add constant folding and verification pass for pim host operations

better validation scripts output
big refactors
This commit is contained in:
NiccoloN
2026-03-20 12:08:12 +01:00
parent 4e50e056e3
commit 6e1de865bb
64 changed files with 1364 additions and 2265 deletions

View File

@@ -49,11 +49,11 @@ LogicalResult annotateReplication(mlir::func::FuncOp funcOp, mlir::IRRewriter& r
ShapedType xShape = mlir::cast<ShapedType>(X.getType());
ShapedType wShape = mlir::cast<ShapedType>(W.getType());
size_t input_w = GET_IMAGE_WIDTH(xShape);
size_t krn_h = GET_KERNEL_HEIGHT(wShape);
size_t krn_w = GET_KERNEL_WIDTH(wShape);
size_t input_w = getImageWidth(xShape);
size_t krn_h = getKernelHeight(wShape);
size_t krn_w = getKernelWidth(wShape);
size_t inputTileCount = ceilIntegerDivide(GET_IMAGE_CHANNEL(xShape), crossbarSize.getValue());
size_t inputTileCount = ceilIntegerDivide(getImageChannel(xShape), crossbarSize.getValue());
size_t outputTileCount = ceilIntegerDivide(wShape.getDimSize(0), crossbarSize.getValue());
auto neededXbars = krn_h * krn_w * inputTileCount * outputTileCount;

View File

@@ -15,21 +15,21 @@
namespace onnx_mlir {
llvm::SmallPtrSet<Operation*, 16> onnx_mlir::SpatialReducer::oldComputeOpsReplaced;
llvm::SmallPtrSet<mlir::Operation*, 16> onnx_mlir::SpatialReducer::oldComputeOpsReplaced;
ResNum SpatialReducer::applyResultProcessing(ComputeAndResNum computeOpAndResNum,
std::function<Value(const Value&)> processFun,
ConversionPatternRewriter& rewriter) {
std::function<mlir::Value(const mlir::Value&)> processFun,
mlir::ConversionPatternRewriter& rewriter) {
assert(processFun);
auto computeOp = GET_COMP(computeOpAndResNum);
auto resultNum = GET_RES_NUM(computeOpAndResNum);
spatial::SpatYieldOp yieldOp = cast<spatial::SpatYieldOp>(computeOp.getBody().front().getTerminator());
spatial::SpatYieldOp yieldOp = mlir::cast<spatial::SpatYieldOp>(computeOp.getBody().front().getTerminator());
Value result = yieldOp->getOperand(resultNum);
mlir::Value result = yieldOp->getOperand(resultNum);
rewriter.setInsertionPointAfterValue(result);
Value processedResult = processFun(result);
mlir::Value processedResult = processFun(result);
if (processedResult == result) {
// Sometimes we want processedResult to return the same value but do
// something else with it (e.g. in softmax we want to broadcast the value
@@ -42,10 +42,11 @@ ResNum SpatialReducer::applyResultProcessing(ComputeAndResNum computeOpAndResNum
return yieldOp.getNumOperands() - 1;
}
OpAndResNum SpatialReducer::applyReducePattern(SmallVector<ComputeAndResNum>& computeOpsAndResNum,
std::function<Value(const Value&, const Value&)> reduce,
std::function<Value(const Value&)> preprocess,
std::function<Value(const Value&)> postprocess) {
OpAndResNum
SpatialReducer::applyReducePattern(llvm::SmallVector<ComputeAndResNum>& computeOpsAndResNum,
std::function<mlir::Value(const mlir::Value&, const mlir::Value&)> reduce,
std::function<mlir::Value(const mlir::Value&)> preprocess,
std::function<mlir::Value(const mlir::Value&)> postprocess) {
if (preprocess)
for (auto& computeOpAndResNum : computeOpsAndResNum)
@@ -55,18 +56,18 @@ OpAndResNum SpatialReducer::applyReducePattern(SmallVector<ComputeAndResNum>& co
// computeOp. In this case, we need to apply the reduction within-computef
// Keep a map between a computeOp and the last Value for this reduction
std::unordered_map<Operation*, Value> lastValueForCompute;
std::unordered_map<mlir::Operation*, mlir::Value> lastValueForCompute;
for (auto& computeOpAndResNum : computeOpsAndResNum) {
auto computeOp = GET_COMP(computeOpAndResNum);
auto yieldOp = cast<spatial::SpatYieldOp>(computeOp.getBody().front().getTerminator());
Value valueWithinCompute = yieldOp->getOperand(GET_RES_NUM(computeOpAndResNum));
auto yieldOp = mlir::cast<spatial::SpatYieldOp>(computeOp.getBody().front().getTerminator());
mlir::Value valueWithinCompute = yieldOp->getOperand(GET_RES_NUM(computeOpAndResNum));
auto it = lastValueForCompute.find(computeOp.getOperation());
if (it != lastValueForCompute.end()) {
// If we have already seen this computeOp, apply the reduction
// within-compute
Value lastWithinComputeValue = it->second;
mlir::Value lastWithinComputeValue = it->second;
assert(valueWithinCompute.getDefiningOp() && lastWithinComputeValue.getDefiningOp());
@@ -85,12 +86,12 @@ OpAndResNum SpatialReducer::applyReducePattern(SmallVector<ComputeAndResNum>& co
computeOpsAndResNum.clear();
computeOpsAndResNum.reserve(lastValueForCompute.size());
for (auto& entry : lastValueForCompute) {
auto computeOp = cast<spatial::SpatWeightedCompute>(entry.first);
auto computeOp = mlir::cast<spatial::SpatWeightedCompute>(entry.first);
auto valueWithinCompute = entry.second;
// We check if `valueWithinCompute` is already used by the yieldOp, in that
// case no need to add it
auto yieldOp = cast<spatial::SpatYieldOp>(computeOp.getBody().front().getTerminator());
auto yieldOp = mlir::cast<spatial::SpatYieldOp>(computeOp.getBody().front().getTerminator());
bool yieldOpUseFound = false;
for (auto& use : valueWithinCompute.getUses()) {
if (use.getOwner() == yieldOp.getOperation()) {
@@ -110,7 +111,7 @@ OpAndResNum SpatialReducer::applyReducePattern(SmallVector<ComputeAndResNum>& co
computeOpsAndResNum.push_back({computeOp, resultNum});
}
Location loc = GET_COMP(computeOpsAndResNum[0])->getLoc();
mlir::Location loc = GET_COMP(computeOpsAndResNum[0])->getLoc();
// Recursive algorithm to reduce the inputs to a single one:
// - Take two inputs at a time, and reduce them into a single one, updating
@@ -118,7 +119,7 @@ OpAndResNum SpatialReducer::applyReducePattern(SmallVector<ComputeAndResNum>& co
// - Repeat until there is only one input left.
llvm::OwningArrayRef<ComputeAndResNum> computeOpsRef(computeOpsAndResNum);
while (computeOpsRef.size() > 1) {
SmallVector<ComputeAndResNum> nextComputeOps;
llvm::SmallVector<ComputeAndResNum> nextComputeOps;
nextComputeOps.reserve(computeOpsRef.size() / 2);
for (size_t i = 0; i < computeOpsRef.size() - 1; i += 2) {
auto [firstCompute, firstResultNum] = computeOpsRef[i];
@@ -135,23 +136,23 @@ OpAndResNum SpatialReducer::applyReducePattern(SmallVector<ComputeAndResNum>& co
// the number of results)
// See below `reducerChanges.push_back` and `finalizeReduceUpdates`
auto yieldOpFirstCompute = cast<spatial::SpatYieldOp>(firstCompute.getBody().front().getTerminator());
auto yieldOpFirstCompute = mlir::cast<spatial::SpatYieldOp>(firstCompute.getBody().front().getTerminator());
// Add a new operand to the block of the second computeOp
Block& secondBlock = secondCompute.getBody().front();
Value formerRes1 = secondBlock.addArgument(yieldOpFirstCompute->getOperand(firstResultNum).getType(), loc);
mlir::Block& secondBlock = secondCompute.getBody().front();
mlir::Value formerRes1 = secondBlock.addArgument(yieldOpFirstCompute->getOperand(firstResultNum).getType(), loc);
auto secondComputeWeightsNum =
secondCompute->getAttrOfType<DenseI32ArrayAttr>(secondCompute.getOperandSegmentSizesAttrName())[0];
secondCompute->getAttrOfType<mlir::DenseI32ArrayAttr>(secondCompute.getOperandSegmentSizesAttrName())[0];
auto secondComputeOperandNum = secondComputeWeightsNum + secondBlock.getNumArguments() - 1;
// Take the "former-result" from the second computeOp
spatial::SpatYieldOp secondYield = cast<spatial::SpatYieldOp>(secondBlock.getTerminator());
Value formerRes2 = secondYield.getOperand(secondResultNum);
spatial::SpatYieldOp secondYield = mlir::cast<spatial::SpatYieldOp>(secondBlock.getTerminator());
mlir::Value formerRes2 = secondYield.getOperand(secondResultNum);
// Apply reduction operation
rewriter.setInsertionPoint(secondYield);
Value reduced = reduce(formerRes2, formerRes1);
mlir::Value reduced = reduce(formerRes2, formerRes1);
// Unfortunately, it is not possible to update the result in place,
// because we may have already referenced it by <computeOp, resultNum>
@@ -219,7 +220,7 @@ void SpatialReducer::finalizeReduceUpdates() {
// `opToReplacedCompute`
auto toComputeOp = opToReplacedCompute[toOp];
if (!toComputeOp)
toComputeOp = cast<spatial::SpatWeightedCompute>(toOp);
toComputeOp = mlir::cast<spatial::SpatWeightedCompute>(toOp);
assert(toComputeOp != fromComputeOp && "Oops should have caught this earlier!");
@@ -234,31 +235,31 @@ void SpatialReducer::finalizeReduceUpdates() {
}
}
Value SpatialReducer::resolveValueFromOpAndResNum(OpAndResNum& opAndResNum) {
mlir::Value SpatialReducer::resolveValueFromOpAndResNum(OpAndResNum& opAndResNum) {
assert(reducesFinalized && "Cannot create resolve values before finalizing the reduce updates.");
Operation* opToCast;
mlir::Operation* opToCast;
auto it = opToReplacedCompute.find(opAndResNum.first);
if (it != opToReplacedCompute.end())
opToCast = it->second;
else
opToCast = opAndResNum.first;
auto computeOp = cast<spatial::SpatWeightedCompute>(opToCast);
auto computeOp = mlir::cast<spatial::SpatWeightedCompute>(opToCast);
return computeOp.getResult(opAndResNum.second);
}
void SpatialReducer::updateResultsOfCompute(Operation* computeOp) {
void SpatialReducer::updateResultsOfCompute(mlir::Operation* computeOp) {
if (opToReplacedCompute.find(computeOp) != opToReplacedCompute.end()) {
// If we have already replaced the fromOp, we do not need to do it again
return;
}
auto oldComputeOp = cast<spatial::SpatWeightedCompute>(computeOp);
auto oldComputeOp = mlir::cast<spatial::SpatWeightedCompute>(computeOp);
auto oldComputeOpNum = oldComputeOp->getNumOperands();
auto yieldOp = cast<spatial::SpatYieldOp>(oldComputeOp.getBody().front().getTerminator());
auto yieldOp = mlir::cast<spatial::SpatYieldOp>(oldComputeOp.getBody().front().getTerminator());
if (yieldOp.getNumOperands() == oldComputeOp->getNumResults()) {
// No result was added, just add itself to the map
@@ -283,8 +284,8 @@ void SpatialReducer::updateResultsOfCompute(Operation* computeOp) {
// Since we replaced the old ComputeOp with a new one, we need to replace
// all its results' uses
for (size_t i = 0; i < oldComputeOp.getNumResults(); i++) {
Value oldResult = oldComputeOp.getResult(i);
Value newResult = newComputeOp.getResult(i);
mlir::Value oldResult = oldComputeOp.getResult(i);
mlir::Value newResult = newComputeOp.getResult(i);
// Replace the uses, except the uses of the compute ops which got deleted
// previously
@@ -298,9 +299,10 @@ void SpatialReducer::updateResultsOfCompute(Operation* computeOp) {
rewriter.eraseOp(oldComputeOp);
}
Value SpatialReducer::createImgConcatOp(SmallVector<SmallVector<SmallVector<OpAndResNum>>>& outputTiles,
Location& loc,
Type outputType) {
mlir::Value
SpatialReducer::createImgConcatOp(llvm::SmallVector<llvm::SmallVector<llvm::SmallVector<OpAndResNum>>>& outputTiles,
mlir::Location& loc,
mlir::Type outputType) {
assert(reducesFinalized && "Cannot create ImgConcatOp before finalizing the reduce updates.");
@@ -309,8 +311,8 @@ Value SpatialReducer::createImgConcatOp(SmallVector<SmallVector<SmallVector<OpAn
auto width = outputTiles[0].size();
auto height = outputTiles[0][0].size();
SmallVector<SmallVector<SmallVector<Value>>> remappedOutputTiles(
tilesCount, SmallVector<SmallVector<Value>>(width, SmallVector<Value>(height)));
llvm::SmallVector<llvm::SmallVector<llvm::SmallVector<mlir::Value>>> remappedOutputTiles(
tilesCount, llvm::SmallVector<llvm::SmallVector<mlir::Value>>(width, llvm::SmallVector<mlir::Value>(height)));
for (size_t t = 0; t < tilesCount; t++)
for (size_t x = 0; x < width; x++)
@@ -320,16 +322,16 @@ Value SpatialReducer::createImgConcatOp(SmallVector<SmallVector<SmallVector<OpAn
return ::onnx_mlir::createImgConcatOp(remappedOutputTiles, rewriter, loc, outputType);
}
OpAndResNum SpatialReducer::applyAddMapReduction(SmallVector<ComputeAndResNum>& computeOps,
ConversionPatternRewriter& rewriter,
Value biasTile,
OpAndResNum SpatialReducer::applyAddMapReduction(llvm::SmallVector<ComputeAndResNum>& computeOps,
mlir::ConversionPatternRewriter& rewriter,
mlir::Value biasTile,
MapOperations mapOp) {
std::function<Value(const Value&)> postprocessing = nullptr;
std::function<mlir::Value(const mlir::Value&)> postprocessing = nullptr;
if (mapOp != MapOperations::None) {
postprocessing = [&](const Value a) {
Value mapOperand = a;
postprocessing = [&](const mlir::Value a) {
mlir::Value mapOperand = a;
if (biasTile)
mapOperand = rewriter.create<spatial::SpatVAddOp>(a.getLoc(), a.getType(), a, biasTile);
return createMapOperation(rewriter, mapOp, mapOperand);
@@ -338,7 +340,7 @@ OpAndResNum SpatialReducer::applyAddMapReduction(SmallVector<ComputeAndResNum>&
return this->applyReducePattern(
computeOps,
[&](Value a, Value b) { return rewriter.create<spatial::SpatVAddOp>(a.getLoc(), a.getType(), a, b); },
[&](mlir::Value a, mlir::Value b) { return rewriter.create<spatial::SpatVAddOp>(a.getLoc(), a.getType(), a, b); },
/* preprocess = */ nullptr,
postprocessing);
}

View File

@@ -3,6 +3,10 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Support/Casting.h"
#include <functional>
#include <unordered_map>
#include <utility>
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/ONNXToSpatialCommon.hpp"
#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
@@ -13,28 +17,28 @@ using ResNum = unsigned int;
using ComputeAndResNum = std::pair<spatial::SpatWeightedCompute, ResNum>;
struct SpatialReducerChange {
Operation* fromOp;
mlir::Operation* fromOp;
unsigned int fromOpResNum;
Operation* toOp;
mlir::Operation* toOp;
unsigned int toOpOperandNum;
};
using OpAndResNum = std::pair<Operation*, ResNum>;
using OpAndResNum = std::pair<mlir::Operation*, ResNum>;
class SpatialReducer {
public:
SpatialReducer(ConversionPatternRewriter& rewriter)
SpatialReducer(mlir::ConversionPatternRewriter& rewriter)
: rewriter(rewriter) {}
OpAndResNum applyReducePattern(SmallVector<ComputeAndResNum>& computeOpsAndResNum,
std::function<Value(const Value&, const Value&)> reduce,
std::function<Value(const Value&)> preprocess,
std::function<Value(const Value&)> postprocess);
OpAndResNum applyReducePattern(llvm::SmallVector<ComputeAndResNum>& computeOpsAndResNum,
std::function<mlir::Value(const mlir::Value&, const mlir::Value&)> reduce,
std::function<mlir::Value(const mlir::Value&)> preprocess,
std::function<mlir::Value(const mlir::Value&)> postprocess);
OpAndResNum applyAddMapReduction(SmallVector<ComputeAndResNum>& computeOps,
ConversionPatternRewriter& rewriter,
Value biasTile,
OpAndResNum applyAddMapReduction(llvm::SmallVector<ComputeAndResNum>& computeOps,
mlir::ConversionPatternRewriter& rewriter,
mlir::Value biasTile,
MapOperations mapOp);
void finalizeReduceUpdates();
@@ -44,17 +48,17 @@ public:
finalizeReduceUpdates();
}
Value createImgConcatOp(llvm::SmallVector<llvm::SmallVector<llvm::SmallVector<OpAndResNum>>>& outputTiles,
Location& loc,
Type outputType);
mlir::Value createImgConcatOp(llvm::SmallVector<llvm::SmallVector<llvm::SmallVector<OpAndResNum>>>& outputTiles,
mlir::Location& loc,
mlir::Type outputType);
Value resolveValueFromOpAndResNum(OpAndResNum& opAndResNum);
mlir::Value resolveValueFromOpAndResNum(OpAndResNum& opAndResNum);
private:
[[nodiscard("computeOp result number gets updated")]] ResNum
applyResultProcessing(ComputeAndResNum computeOpAndResNum,
std::function<Value(const Value&)> processFun,
ConversionPatternRewriter& rewriter);
std::function<mlir::Value(const mlir::Value&)> processFun,
mlir::ConversionPatternRewriter& rewriter);
/**
* @brief Update the results of a ComputeOp.
@@ -66,19 +70,19 @@ private:
*
* @param computeOp The ComputeOp to update the results of.
*/
void updateResultsOfCompute(Operation* computeOp);
void updateResultsOfCompute(mlir::Operation* computeOp);
ConversionPatternRewriter& rewriter;
mlir::ConversionPatternRewriter& rewriter;
bool reducesFinalized = false;
// List of changes to be applied after the reduction is finalized
SmallVector<SpatialReducerChange, 4> reducerChanges;
llvm::SmallVector<SpatialReducerChange, 4> reducerChanges;
// List of computeOps that need to be replaced with new results
SmallVector<spatial::SpatWeightedCompute> computeOpNeedingResUpdate;
llvm::SmallVector<spatial::SpatWeightedCompute> computeOpNeedingResUpdate;
std::unordered_map<Operation*, spatial::SpatWeightedCompute> opToReplacedCompute;
std::unordered_map<mlir::Operation*, spatial::SpatWeightedCompute> opToReplacedCompute;
static llvm::SmallPtrSet<Operation*, 16> oldComputeOpsReplaced;
static llvm::SmallPtrSet<mlir::Operation*, 16> oldComputeOpsReplaced;
};
} // namespace onnx_mlir

View File

@@ -4,7 +4,7 @@
namespace onnx_mlir {
WeightSubdivider::WeightSubdivider(map<long, map<long, SmallVector<Value>>> weights)
WeightSubdivider::WeightSubdivider(std::map<long, std::map<long, llvm::SmallVector<mlir::Value>>> weights)
: weights(std::move(weights)) {}
bool WeightSubdivider::isEmpty() const { return weights.empty(); }
@@ -13,7 +13,7 @@ TaggedWeights WeightSubdivider::popGroup(size_t amount) {
assert(!weights.empty() && "No weights to extract.");
auto it = weights.begin();
SmallVector<Value>& values = it->second.begin()->second;
llvm::SmallVector<mlir::Value>& values = it->second.begin()->second;
long inputTile = it->first;
long outputTile = it->second.begin()->first;
@@ -21,7 +21,7 @@ TaggedWeights WeightSubdivider::popGroup(size_t amount) {
size_t n = std::min(amount, values.size());
crossbarsUsed += n;
SmallVector<Value> result;
llvm::SmallVector<mlir::Value> result;
result.assign(values.begin(), values.begin() + n);
if (n < values.size()) {
@@ -36,9 +36,9 @@ TaggedWeights WeightSubdivider::popGroup(size_t amount) {
return {inputTile, outputTile, crossbarsUsed - n, result};
}
SmallVector<TaggedWeights> WeightSubdivider::popGroups(size_t n) {
llvm::SmallVector<TaggedWeights> WeightSubdivider::popGroups(size_t n) {
crossbarsUsed = 0;
SmallVector<TaggedWeights> result;
llvm::SmallVector<TaggedWeights> result;
size_t remaining = n;
while (remaining > 0 && !weights.empty()) {

View File

@@ -4,11 +4,9 @@
#include "llvm/ADT/SmallVector.h"
#include <cstddef>
#include <map>
using namespace mlir;
using namespace std;
namespace onnx_mlir {
/**
@@ -19,7 +17,7 @@ struct TaggedWeights {
long inputTile;
long outputTile;
size_t startingCrossbarIndex;
SmallVector<Value> weights;
llvm::SmallVector<mlir::Value> weights;
};
/**
@@ -33,16 +31,16 @@ struct TaggedWeights {
*/
class WeightSubdivider {
private:
map<long, map<long, SmallVector<Value>>> weights;
std::map<long, std::map<long, llvm::SmallVector<mlir::Value>>> weights;
size_t crossbarsUsed = 0;
TaggedWeights popGroup(size_t amount);
public:
WeightSubdivider(map<long, map<long, SmallVector<Value>>> weights);
WeightSubdivider(std::map<long, std::map<long, llvm::SmallVector<mlir::Value>>> weights);
bool isEmpty() const;
SmallVector<TaggedWeights> popGroups(size_t n);
llvm::SmallVector<TaggedWeights> popGroups(size_t n);
};
} // namespace onnx_mlir