add PIM accelerator
This commit is contained in:
119
src/PIM/Conversion/ONNXToSpatial/Utils/AnnotateReplication.cpp
Normal file
119
src/PIM/Conversion/ONNXToSpatial/Utils/AnnotateReplication.cpp
Normal file
@@ -0,0 +1,119 @@
|
||||
#include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp"
|
||||
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/ONNXToSpatialCommon.hpp"
|
||||
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Utils/AnnotateReplication.hpp"
|
||||
#include "src/Dialect/ONNX/ONNXOps.hpp"
|
||||
|
||||
#include <queue>
|
||||
|
||||
using namespace mlir;
|
||||
|
||||
namespace onnx_mlir {
|
||||
|
||||
/**
|
||||
* @brief Structure that describes the replication of a convolution operation,
|
||||
* along the image height axis.
|
||||
*/
|
||||
struct ConvReplication {
|
||||
ONNXConvOp convOp; // Convolution operation
|
||||
size_t input_w; // Width of the input image
|
||||
size_t replicationFactor; // Replication factor on the image height axis
|
||||
size_t coresNeededPerReplica; // Number of cores needed for each replica
|
||||
|
||||
friend bool operator<(const ConvReplication& a, const ConvReplication& b) {
|
||||
return a.input_w / a.replicationFactor < b.input_w / b.replicationFactor;
|
||||
}
|
||||
|
||||
ConvReplication(ONNXConvOp convOp, size_t input_w, size_t replicationFactor, size_t coresNeededPerReplica)
|
||||
: convOp(convOp),
|
||||
input_w(input_w),
|
||||
replicationFactor(replicationFactor),
|
||||
coresNeededPerReplica(coresNeededPerReplica) {}
|
||||
};
|
||||
|
||||
LogicalResult annotateReplication(mlir::func::FuncOp funcOp, mlir::IRRewriter& rewriter) {
|
||||
|
||||
if (coresCount == -1) {
|
||||
// No need for annotation, implicitly set replication to 1
|
||||
return success();
|
||||
}
|
||||
|
||||
std::priority_queue<struct ConvReplication> convOpsReplicationQueue;
|
||||
|
||||
size_t minimumCores = 0;
|
||||
|
||||
for (auto& op : funcOp.getFunctionBody().begin()->getOperations()) {
|
||||
if (auto convOp = dyn_cast<ONNXConvOp>(op)) {
|
||||
// Convolution layer
|
||||
|
||||
Value X = convOp.getX(), W = convOp.getW();
|
||||
ShapedType xShape = mlir::cast<ShapedType>(X.getType());
|
||||
ShapedType wShape = mlir::cast<ShapedType>(W.getType());
|
||||
|
||||
size_t input_w = GET_IMAGE_WIDTH(xShape);
|
||||
size_t krn_h = GET_KERNEL_HEIGHT(wShape);
|
||||
size_t krn_w = GET_KERNEL_WIDTH(wShape);
|
||||
|
||||
size_t inputTileCount = ceilIntegerDivide(GET_IMAGE_CHANNEL(xShape), crossbarSize.getValue());
|
||||
size_t outputTileCount = ceilIntegerDivide(wShape.getDimSize(0), crossbarSize.getValue());
|
||||
|
||||
auto neededXbars = krn_h * krn_w * inputTileCount * outputTileCount;
|
||||
auto neededCores = ceilIntegerDivide(neededXbars, crossbarCountInCore.getValue());
|
||||
|
||||
minimumCores += neededCores;
|
||||
|
||||
convOpsReplicationQueue.emplace(convOp, input_w, 1, neededCores);
|
||||
}
|
||||
else if (auto gemmOp = dyn_cast<ONNXGemmOp>(op)) {
|
||||
// Fully connected layer
|
||||
auto matrixTensorShape = cast<ShapedType>(gemmOp.getB().getType());
|
||||
auto inputSize = matrixTensorShape.getDimSize(0);
|
||||
auto outputSize = matrixTensorShape.getDimSize(1);
|
||||
if (gemmOp.getTransB())
|
||||
std::swap(inputSize, outputSize);
|
||||
|
||||
const size_t inputTilesCount = ceilIntegerDivide(inputSize, crossbarSize.getValue());
|
||||
const size_t outputTilesCount = ceilIntegerDivide(outputSize, crossbarSize.getValue());
|
||||
|
||||
// Each output tile is computed by `coresPerOutputTile` cores. The
|
||||
// entire input is given to each of these cores.
|
||||
const size_t coresPerOutputTile = ceilIntegerDivide(inputTilesCount, crossbarCountInCore.getValue());
|
||||
|
||||
auto neededCores = coresPerOutputTile * outputTilesCount;
|
||||
|
||||
minimumCores += neededCores;
|
||||
}
|
||||
}
|
||||
|
||||
if (static_cast<size_t>(coresCount) < minimumCores) {
|
||||
return funcOp->emitError("Not enough cores for this network: ")
|
||||
<< minimumCores << " cores needed, but only " << static_cast<size_t>(coresCount) << " available.";
|
||||
}
|
||||
|
||||
size_t availableCores = static_cast<size_t>(coresCount) - minimumCores;
|
||||
|
||||
// Consume all the elements in the queue
|
||||
while (!convOpsReplicationQueue.empty()) {
|
||||
auto convOpReplication = convOpsReplicationQueue.top();
|
||||
convOpsReplicationQueue.pop();
|
||||
|
||||
// Check if we can replicate this convolution (e.g. we have enough cores)
|
||||
if (availableCores > convOpReplication.coresNeededPerReplica * (convOpReplication.replicationFactor + 1)) {
|
||||
// We can replicate this convolution: increment replicationFactor and put
|
||||
// back in queue
|
||||
availableCores -= convOpReplication.coresNeededPerReplica;
|
||||
convOpReplication.replicationFactor++;
|
||||
|
||||
convOpsReplicationQueue.push(convOpReplication);
|
||||
}
|
||||
else {
|
||||
// Cannot replicate this convolution anymore, annotate the operation
|
||||
// with the replication factor
|
||||
convOpReplication.convOp->setAttr(REPLICATION_ATTR_NAME,
|
||||
rewriter.getI64IntegerAttr(convOpReplication.replicationFactor));
|
||||
}
|
||||
}
|
||||
|
||||
return success();
|
||||
}
|
||||
|
||||
} // namespace onnx_mlir
|
||||
Reference in New Issue
Block a user