add PIM accelerator

This commit is contained in:
NiccoloN
2026-02-24 15:09:18 +01:00
parent b24a0df8d7
commit a6e928bdd7
67 changed files with 9109 additions and 1 deletions

View File

@@ -0,0 +1,119 @@
#include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp"
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/ONNXToSpatialCommon.hpp"
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Utils/AnnotateReplication.hpp"
#include "src/Dialect/ONNX/ONNXOps.hpp"
#include <queue>
using namespace mlir;
namespace onnx_mlir {
/**
* @brief Structure that describes the replication of a convolution operation,
* along the image height axis.
*/
struct ConvReplication {
ONNXConvOp convOp; // Convolution operation
size_t input_w; // Width of the input image
size_t replicationFactor; // Replication factor on the image height axis
size_t coresNeededPerReplica; // Number of cores needed for each replica
friend bool operator<(const ConvReplication& a, const ConvReplication& b) {
return a.input_w / a.replicationFactor < b.input_w / b.replicationFactor;
}
ConvReplication(ONNXConvOp convOp, size_t input_w, size_t replicationFactor, size_t coresNeededPerReplica)
: convOp(convOp),
input_w(input_w),
replicationFactor(replicationFactor),
coresNeededPerReplica(coresNeededPerReplica) {}
};
LogicalResult annotateReplication(mlir::func::FuncOp funcOp, mlir::IRRewriter& rewriter) {
if (coresCount == -1) {
// No need for annotation, implicitly set replication to 1
return success();
}
std::priority_queue<struct ConvReplication> convOpsReplicationQueue;
size_t minimumCores = 0;
for (auto& op : funcOp.getFunctionBody().begin()->getOperations()) {
if (auto convOp = dyn_cast<ONNXConvOp>(op)) {
// Convolution layer
Value X = convOp.getX(), W = convOp.getW();
ShapedType xShape = mlir::cast<ShapedType>(X.getType());
ShapedType wShape = mlir::cast<ShapedType>(W.getType());
size_t input_w = GET_IMAGE_WIDTH(xShape);
size_t krn_h = GET_KERNEL_HEIGHT(wShape);
size_t krn_w = GET_KERNEL_WIDTH(wShape);
size_t inputTileCount = ceilIntegerDivide(GET_IMAGE_CHANNEL(xShape), crossbarSize.getValue());
size_t outputTileCount = ceilIntegerDivide(wShape.getDimSize(0), crossbarSize.getValue());
auto neededXbars = krn_h * krn_w * inputTileCount * outputTileCount;
auto neededCores = ceilIntegerDivide(neededXbars, crossbarCountInCore.getValue());
minimumCores += neededCores;
convOpsReplicationQueue.emplace(convOp, input_w, 1, neededCores);
}
else if (auto gemmOp = dyn_cast<ONNXGemmOp>(op)) {
// Fully connected layer
auto matrixTensorShape = cast<ShapedType>(gemmOp.getB().getType());
auto inputSize = matrixTensorShape.getDimSize(0);
auto outputSize = matrixTensorShape.getDimSize(1);
if (gemmOp.getTransB())
std::swap(inputSize, outputSize);
const size_t inputTilesCount = ceilIntegerDivide(inputSize, crossbarSize.getValue());
const size_t outputTilesCount = ceilIntegerDivide(outputSize, crossbarSize.getValue());
// Each output tile is computed by `coresPerOutputTile` cores. The
// entire input is given to each of these cores.
const size_t coresPerOutputTile = ceilIntegerDivide(inputTilesCount, crossbarCountInCore.getValue());
auto neededCores = coresPerOutputTile * outputTilesCount;
minimumCores += neededCores;
}
}
if (static_cast<size_t>(coresCount) < minimumCores) {
return funcOp->emitError("Not enough cores for this network: ")
<< minimumCores << " cores needed, but only " << static_cast<size_t>(coresCount) << " available.";
}
size_t availableCores = static_cast<size_t>(coresCount) - minimumCores;
// Consume all the elements in the queue
while (!convOpsReplicationQueue.empty()) {
auto convOpReplication = convOpsReplicationQueue.top();
convOpsReplicationQueue.pop();
// Check if we can replicate this convolution (e.g. we have enough cores)
if (availableCores > convOpReplication.coresNeededPerReplica * (convOpReplication.replicationFactor + 1)) {
// We can replicate this convolution: increment replicationFactor and put
// back in queue
availableCores -= convOpReplication.coresNeededPerReplica;
convOpReplication.replicationFactor++;
convOpsReplicationQueue.push(convOpReplication);
}
else {
// Cannot replicate this convolution anymore, annotate the operation
// with the replication factor
convOpReplication.convOp->setAttr(REPLICATION_ATTR_NAME,
rewriter.getI64IntegerAttr(convOpReplication.replicationFactor));
}
}
return success();
}
} // namespace onnx_mlir