add PIM accelerator
This commit is contained in:
@@ -0,0 +1,493 @@
|
||||
#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
|
||||
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
|
||||
#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
|
||||
#include "mlir/Dialect/MemRef/IR/MemRef.h"
|
||||
#include "mlir/Dialect/Tensor/IR/Tensor.h"
|
||||
#include "mlir/Dialect/Tosa/IR/TosaOps.h"
|
||||
#include "mlir/IR/BuiltinAttributes.h"
|
||||
#include "mlir/IR/BuiltinTypeInterfaces.h"
|
||||
#include "mlir/IR/BuiltinTypes.h"
|
||||
#include "mlir/Support/LLVM.h"
|
||||
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/LogicalResult.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "src/Accelerators/PIM/Common/PIMCommon.hpp"
|
||||
#include "src/Accelerators/PIM/Dialect/PIM/PimOps.hpp"
|
||||
#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
|
||||
#include "src/Accelerators/PIM/Dialect/Spatial/Transforms/SpatialBufferizableOpInterface.hpp"
|
||||
#include "src/Dialect/ONNX/ONNXOps.hpp"
|
||||
|
||||
using namespace mlir;
|
||||
using namespace bufferization;
|
||||
|
||||
namespace onnx_mlir {
|
||||
namespace spatial {
|
||||
|
||||
memref::AllocOp createEmptyFromType(Type resultType, Location loc, RewriterBase& rewriter) {
|
||||
auto resultShape = cast<ShapedType>(resultType);
|
||||
auto memrefResultType = MemRefType::get(resultShape.getShape(), resultShape.getElementType());
|
||||
|
||||
// Alloc an output memref
|
||||
return rewriter.create<memref::AllocOp>(loc, memrefResultType);
|
||||
}
|
||||
|
||||
const llvm::StringRef PRECOMPUTED_OTHER_CORE_ID_ATTR_NAME("precomp_other_core_id");
|
||||
|
||||
llvm::FailureOr<uint32_t> getCoreIdOfOtherEndOfChannel(Operation* op, bool opIsReceive, RewriterBase& rewriter) {
|
||||
|
||||
// This function requires the existence of ChannelNewOp and the other
|
||||
// Receive/Send operation. However, during bufferization, the first of the
|
||||
// Receive/Send operation that is processed gets removed. As such, we need to
|
||||
// "precompute" the coreId needed for the other op, and save it as attribute
|
||||
auto precomputedOtherCoreId = op->getAttr(PRECOMPUTED_OTHER_CORE_ID_ATTR_NAME);
|
||||
if (precomputedOtherCoreId)
|
||||
return cast<IntegerAttr>(precomputedOtherCoreId).getInt();
|
||||
|
||||
auto notOpUserOpt = getOtherEndOfChannel(op, opIsReceive, rewriter);
|
||||
if (failed(notOpUserOpt))
|
||||
return failure();
|
||||
Operation* notOpUser = *notOpUserOpt;
|
||||
|
||||
// Save the coreId for this op into the other op as attribute
|
||||
auto opCoreIdAttr = cast<pim::PimCoreOp>(op->getParentOp()).getCoreIdAttr();
|
||||
notOpUser->setAttr(PRECOMPUTED_OTHER_CORE_ID_ATTR_NAME, opCoreIdAttr);
|
||||
|
||||
return cast<pim::PimCoreOp>(notOpUser->getParentOp()).getCoreId();
|
||||
}
|
||||
|
||||
struct WComputeOpInterface : BufferizableOpInterface::ExternalModel<WComputeOpInterface, SpatWeightedCompute> {
|
||||
|
||||
// Input tensor to the compute OP are always read into its local memory
|
||||
bool bufferizesToMemoryRead(Operation* op, OpOperand& opOperand, const AnalysisState& state) const { return true; }
|
||||
|
||||
// Input tensor to the compute OP are _never_ written into its local memory
|
||||
bool bufferizesToMemoryWrite(Operation* op, OpOperand& opOperand, const AnalysisState& state) const { return false; }
|
||||
|
||||
// In general, no tensor is aliased with any other tensor in the compute OP
|
||||
AliasingValueList getAliasingValues(Operation* op, OpOperand& opOperand, const AnalysisState& state) const {
|
||||
// TODO: Is it an empty list or a list of "UNKNOWN" values?
|
||||
return {};
|
||||
}
|
||||
|
||||
LogicalResult bufferize(Operation* op, RewriterBase& rewriter, const BufferizationOptions& options, BufferizationState &state) const {
|
||||
// Bufferize its block
|
||||
|
||||
auto& block = op->getRegion(0).front();
|
||||
|
||||
return bufferizeBlockSignature(&block, rewriter, options, state);
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
* This can be used for operation that have a single argument, which is a
|
||||
* variadic of tensors, and a single output with the same same shape
|
||||
* Example: VAdd, VSub, VExp
|
||||
*/
|
||||
template <typename InterfaceName, typename OpTy, typename ToTy>
|
||||
struct VariadicArgumentElementWiseOpInterface : BufferizableOpInterface::ExternalModel<InterfaceName, OpTy> {
|
||||
|
||||
// Input tensors to the OP are always read
|
||||
bool bufferizesToMemoryRead(Operation* op, OpOperand& opOperand, const AnalysisState& state) const { return true; }
|
||||
|
||||
// Input tensors to the OP are _never_ written
|
||||
bool bufferizesToMemoryWrite(Operation* op, OpOperand& opOperand, const AnalysisState& state) const { return false; }
|
||||
|
||||
// In general, no tensor is aliased with any other tensor in the OP
|
||||
AliasingValueList getAliasingValues(Operation* op, OpOperand& opOperand, const AnalysisState& state) const {
|
||||
return {};
|
||||
}
|
||||
|
||||
// Cast tensor values into memref values
|
||||
LogicalResult bufferize(Operation* op, RewriterBase& rewriter, const BufferizationOptions& options, BufferizationState &state) const {
|
||||
|
||||
// Turn Tensor Operands into Memref Operands
|
||||
SmallVector<Value> memrefOperands;
|
||||
memrefOperands.reserve(op->getNumOperands());
|
||||
for (auto operand : op->getOperands()) {
|
||||
auto memref = getBuffer(rewriter, operand, options, state);
|
||||
if (failed(memref))
|
||||
return failure();
|
||||
memrefOperands.push_back(*memref);
|
||||
}
|
||||
|
||||
// TODO: Support addiction with more than 2 operands
|
||||
if (memrefOperands.size() > 2) {
|
||||
op->emitError("VariadicArgumentElementWiseOpInterface only supports OPs "
|
||||
"with 1 or 2 operands, for now.");
|
||||
return failure();
|
||||
}
|
||||
|
||||
// Alloc an output memref
|
||||
Value outputTensor = createEmptyFromType(op->getResult(0).getType(), op->getLoc(), rewriter);
|
||||
|
||||
memrefOperands.push_back(outputTensor);
|
||||
|
||||
Value newValue = rewriter.create<ToTy>(op->getLoc(), outputTensor.getType(), memrefOperands).getOutRes();
|
||||
|
||||
replaceOpWithBufferizedValues(rewriter, op, newValue);
|
||||
|
||||
return success();
|
||||
}
|
||||
};
|
||||
|
||||
template <typename InterfaceName, typename OpTy, typename ToTy>
|
||||
struct WeightedMultiplicationsOpInterface : BufferizableOpInterface::ExternalModel<InterfaceName, OpTy> {
|
||||
|
||||
// Input tensors to the OP are always read
|
||||
bool bufferizesToMemoryRead(Operation* op, OpOperand& opOperand, const AnalysisState& state) const { return true; }
|
||||
|
||||
// Input tensors to the OP are _never_ written
|
||||
bool bufferizesToMemoryWrite(Operation* op, OpOperand& opOperand, const AnalysisState& state) const { return false; }
|
||||
|
||||
// In general, no tensor is aliased with any other tensor in the OP
|
||||
AliasingValueList getAliasingValues(Operation* op, OpOperand& opOperand, const AnalysisState& state) const {
|
||||
return {};
|
||||
}
|
||||
|
||||
// Cast tensor value into memref value
|
||||
LogicalResult bufferize(Operation* op, RewriterBase& rewriter, const BufferizationOptions& options, BufferizationState &state) const {
|
||||
auto memrefOperandOpt = getBuffer(rewriter, op->getOperand(0), options, state);
|
||||
if (failed(memrefOperandOpt))
|
||||
return failure();
|
||||
auto memrefOperand = *memrefOperandOpt;
|
||||
|
||||
// Alloc an output memref
|
||||
Value outputTensor = createEmptyFromType(op->getResult(0).getType(), op->getLoc(), rewriter);
|
||||
|
||||
Value newValue =
|
||||
rewriter
|
||||
.create<ToTy>(
|
||||
op->getLoc(), outputTensor.getType(), cast<OpTy>(op).getWeightIndexAttr(), memrefOperand, outputTensor)
|
||||
.getOutRes();
|
||||
|
||||
replaceOpWithBufferizedValues(rewriter, op, newValue);
|
||||
|
||||
return success();
|
||||
}
|
||||
};
|
||||
|
||||
struct ChannelReceiveOpInterface
|
||||
: BufferizableOpInterface::ExternalModel<ChannelReceiveOpInterface, SpatChannelReceiveOp> {
|
||||
|
||||
// Input value is the channel (not read/written, its more of an attribute)
|
||||
bool bufferizesToMemoryRead(Operation* op, OpOperand& opOperand, const AnalysisState& state) const { return false; }
|
||||
|
||||
// See above
|
||||
bool bufferizesToMemoryWrite(Operation* op, OpOperand& opOperand, const AnalysisState& state) const { return false; }
|
||||
|
||||
// See above
|
||||
AliasingValueList getAliasingValues(Operation* op, OpOperand& opOperand, const AnalysisState& state) const {
|
||||
// TODO: Is it an empty list or a list of "UNKNOWN" values?
|
||||
return {};
|
||||
}
|
||||
|
||||
/*
|
||||
* Turn the channel receive to pim.recv
|
||||
*/
|
||||
LogicalResult bufferize(Operation* op, RewriterBase& rewriter, const BufferizationOptions& options, BufferizationState &state) const {
|
||||
|
||||
auto outputTensor = createEmptyFromType(op->getResult(0).getType(), op->getLoc(), rewriter);
|
||||
|
||||
auto numElements = cast<ShapedType>(outputTensor.getType()).getNumElements();
|
||||
auto elementSize = cast<ShapedType>(outputTensor.getType()).getElementTypeBitWidth() / 8;
|
||||
|
||||
auto srcCoreId = getCoreIdOfOtherEndOfChannel(op, true, rewriter);
|
||||
if (failed(srcCoreId))
|
||||
return failure();
|
||||
|
||||
Value newValue = rewriter
|
||||
.create<pim::PimReceiveOp>(op->getLoc(),
|
||||
outputTensor.getType(),
|
||||
outputTensor,
|
||||
rewriter.getI32IntegerAttr(numElements * elementSize),
|
||||
rewriter.getI32IntegerAttr(srcCoreId.value()))
|
||||
.getOut();
|
||||
|
||||
replaceOpWithBufferizedValues(rewriter, op, newValue);
|
||||
|
||||
return success();
|
||||
}
|
||||
};
|
||||
|
||||
struct ChannelSendOpInterface : BufferizableOpInterface::ExternalModel<ChannelSendOpInterface, SpatChannelSendOp> {
|
||||
|
||||
// First input is channel (not read/writter) second input is Tensor to send,
|
||||
// which is read
|
||||
bool bufferizesToMemoryRead(Operation* op, OpOperand& opOperand, const AnalysisState& state) const {
|
||||
return opOperand.getOperandNumber() == 2;
|
||||
}
|
||||
|
||||
// See above (both non-written)
|
||||
bool bufferizesToMemoryWrite(Operation* op, OpOperand& opOperand, const AnalysisState& state) const { return false; }
|
||||
|
||||
// See above
|
||||
AliasingValueList getAliasingValues(Operation* op, OpOperand& opOperand, const AnalysisState& state) const {
|
||||
// TODO: Is it an empty list or a list of "UNKNOWN" values?
|
||||
return {};
|
||||
}
|
||||
|
||||
/*
|
||||
* Turn the channel send to pim.send
|
||||
*/
|
||||
LogicalResult bufferize(Operation* op, RewriterBase& rewriter, const BufferizationOptions& options, BufferizationState &state) const {
|
||||
auto srcTensor = op->getOperand(1);
|
||||
|
||||
auto srcTensorOpt = getBuffer(rewriter, srcTensor, options, state);
|
||||
if (failed(srcTensorOpt))
|
||||
return failure();
|
||||
auto srcMemRef = *srcTensorOpt;
|
||||
|
||||
auto numElements = cast<ShapedType>(srcTensor.getType()).getNumElements();
|
||||
auto elementSize = cast<ShapedType>(srcTensor.getType()).getElementTypeBitWidth() / 8;
|
||||
|
||||
auto dstCoreId = getCoreIdOfOtherEndOfChannel(op, false, rewriter);
|
||||
if (failed(dstCoreId))
|
||||
return failure();
|
||||
|
||||
replaceOpWithNewBufferizedOp<pim::PimSendOp>(rewriter,
|
||||
op,
|
||||
srcMemRef,
|
||||
rewriter.getI32IntegerAttr(numElements * elementSize),
|
||||
rewriter.getI32IntegerAttr(dstCoreId.value()));
|
||||
|
||||
return success();
|
||||
}
|
||||
};
|
||||
|
||||
struct ChannelBroadcastReceiveOpInterface
|
||||
: BufferizableOpInterface::ExternalModel<ChannelBroadcastReceiveOpInterface, SpatChannelBroadcastReceiveOp> {
|
||||
|
||||
// Input value is the channel (not read/written, its more of an attribute)
|
||||
bool bufferizesToMemoryRead(Operation* op, OpOperand& opOperand, const AnalysisState& state) const { return false; }
|
||||
|
||||
// See above
|
||||
bool bufferizesToMemoryWrite(Operation* op, OpOperand& opOperand, const AnalysisState& state) const { return false; }
|
||||
|
||||
// See above
|
||||
AliasingValueList getAliasingValues(Operation* op, OpOperand& opOperand, const AnalysisState& state) const {
|
||||
// TODO: Is it an empty list or a list of "UNKNOWN" values?
|
||||
return {};
|
||||
}
|
||||
|
||||
/*
|
||||
* Turn the channel receive to pim.load using by creating a new global buffer
|
||||
*/
|
||||
LogicalResult bufferize(Operation* op, RewriterBase& rewriter, const BufferizationOptions& options, BufferizationState &state) const {
|
||||
|
||||
auto outputTensor = createEmptyFromType(op->getResult(0).getType(), op->getLoc(), rewriter);
|
||||
|
||||
auto outputSize = cast<ShapedType>(outputTensor.getType()).getNumElements();
|
||||
|
||||
auto channelNewOp = op->getOperand(0).getDefiningOp<SpatChannelNewOp>();
|
||||
if (!channelNewOp) {
|
||||
op->emitError("ChannelBroadcastReceiveOp does not use a channel as operand");
|
||||
return failure();
|
||||
}
|
||||
|
||||
// The first 'broadcast' operation creates the buffer just after the
|
||||
// channelNewOp, while the other 'broadcast' operation need to find this
|
||||
// buffer allocation just after the channelNewOp
|
||||
Value bufferAllocation;
|
||||
if (auto allocOpAfterChannel = dyn_cast<memref::AllocOp>(channelNewOp->getNextNode())) {
|
||||
// Buffer already allocated, load from this buffer
|
||||
bufferAllocation = allocOpAfterChannel;
|
||||
}
|
||||
else {
|
||||
// Buffer was not allocated previously, allocate it after channelNewOp
|
||||
rewriter.setInsertionPointAfter(channelNewOp);
|
||||
bufferAllocation = createEmptyFromType(op->getResult(0).getType(), op->getLoc(), rewriter);
|
||||
}
|
||||
|
||||
rewriter.setInsertionPoint(op);
|
||||
auto memCopyHostToDevOp = rewriter.create<pim::PimMemCopyHostToDevOp>(op->getLoc(),
|
||||
outputTensor.getType(),
|
||||
outputTensor,
|
||||
bufferAllocation,
|
||||
rewriter.getI32IntegerAttr(0),
|
||||
rewriter.getI32IntegerAttr(0),
|
||||
rewriter.getI32IntegerAttr(outputSize));
|
||||
|
||||
replaceOpWithBufferizedValues(rewriter, op, memCopyHostToDevOp.getDeviceDst());
|
||||
|
||||
return success();
|
||||
}
|
||||
};
|
||||
|
||||
struct ChannelBroadcastSendOpInterface
|
||||
: BufferizableOpInterface::ExternalModel<ChannelBroadcastSendOpInterface, SpatChannelBroadcastSendOp> {
|
||||
|
||||
// First input is channel (not read/writter) second input is Tensor to send,
|
||||
// which is read
|
||||
bool bufferizesToMemoryRead(Operation* op, OpOperand& opOperand, const AnalysisState& state) const {
|
||||
return opOperand.getOperandNumber() == 2;
|
||||
}
|
||||
|
||||
// See above (both non-written)
|
||||
bool bufferizesToMemoryWrite(Operation* op, OpOperand& opOperand, const AnalysisState& state) const { return false; }
|
||||
|
||||
// See above
|
||||
AliasingValueList getAliasingValues(Operation* op, OpOperand& opOperand, const AnalysisState& state) const {
|
||||
// TODO: Is it an empty list or a list of "UNKNOWN" values?
|
||||
return {};
|
||||
}
|
||||
|
||||
/*
|
||||
* Turn the channel send to pim.send
|
||||
*/
|
||||
LogicalResult bufferize(Operation* op, RewriterBase& rewriter, const BufferizationOptions& options, BufferizationState &state) const {
|
||||
auto srcTensor = op->getOperand(1);
|
||||
|
||||
auto srcTensorOpt = getBuffer(rewriter, srcTensor, options, state);
|
||||
if (failed(srcTensorOpt))
|
||||
return failure();
|
||||
auto srcMemRef = *srcTensorOpt;
|
||||
|
||||
auto channelNewOp = op->getOperand(0).getDefiningOp<SpatChannelNewOp>();
|
||||
if (!channelNewOp) {
|
||||
op->emitError("SpatChannelBroadcastSendOp does not use a channel as operand");
|
||||
return failure();
|
||||
}
|
||||
|
||||
// The first 'broadcast' operation creates the buffer just after the
|
||||
// channelNewOp, while the other 'broadcast' operation need to find this
|
||||
// buffer allocation just after the channelNewOp
|
||||
Value bufferAllocation;
|
||||
if (auto allocOpAfterChannel = dyn_cast<memref::AllocOp>(channelNewOp->getNextNode())) {
|
||||
// Buffer already allocated, load from this buffer
|
||||
bufferAllocation = allocOpAfterChannel;
|
||||
}
|
||||
else {
|
||||
// Buffer was not allocated previously, allocate it after channelNewOp
|
||||
rewriter.setInsertionPointAfter(channelNewOp);
|
||||
bufferAllocation = createEmptyFromType(srcTensor.getType(), op->getLoc(), rewriter);
|
||||
}
|
||||
|
||||
rewriter.setInsertionPoint(op);
|
||||
replaceOpWithBufferizedValues(rewriter, op, {bufferAllocation, srcMemRef});
|
||||
return success();
|
||||
}
|
||||
};
|
||||
|
||||
struct VAddOpInterfaceFromTemplate
|
||||
: VariadicArgumentElementWiseOpInterface<VAddOpInterfaceFromTemplate, SpatVAddOp, pim::PimVAddOp> {};
|
||||
|
||||
struct WVMMOpInterface : WeightedMultiplicationsOpInterface<WVMMOpInterface, SpatWeightedVMMOp, pim::PimVMMOp> {};
|
||||
|
||||
struct WMVMOpInterface : WeightedMultiplicationsOpInterface<WMVMOpInterface, SpatWeightedMVMOp, pim::PimMVMOp> {};
|
||||
|
||||
struct SumOpInterface : VariadicArgumentElementWiseOpInterface<SumOpInterface, SpatSumOp, pim::PimSumOp> {};
|
||||
|
||||
struct VSDivOpInterface : VariadicArgumentElementWiseOpInterface<VSDivOpInterface, SpatVSDivOp, pim::PimVSDivOp> {};
|
||||
|
||||
struct VMaxOpInterface : VariadicArgumentElementWiseOpInterface<VMaxOpInterface, SpatVMaxOp, pim::PimVMaxOp> {};
|
||||
|
||||
// Create a new bufferizable op interface for the apply filters operation.
|
||||
struct ApplyFiltersOpInterface : BufferizableOpInterface::ExternalModel<ApplyFiltersOpInterface, SpatApplyFiltersOp> {
|
||||
|
||||
// One operand ($input) is read from. All other inputs are only written to.
|
||||
bool bufferizesToMemoryRead(Operation* op, OpOperand& opOperand, const AnalysisState& state) const {
|
||||
|
||||
// Operand 0: $input
|
||||
// Operand 1: $outBuf
|
||||
// Operand 2: $accumBuf
|
||||
return opOperand.getOperandNumber() == 0;
|
||||
}
|
||||
|
||||
// One input ($accumBuf) is written to. All other inputs are only read.
|
||||
bool bufferizesToMemoryWrite(Operation* op, OpOperand& opOperand, const AnalysisState& state) const {
|
||||
|
||||
// Operand 0: $input
|
||||
// Operand 1: $outBuf
|
||||
// Operand 2: $accumBuf
|
||||
return opOperand.getOperandNumber() == 2;
|
||||
}
|
||||
|
||||
// No operands are aliased with any other operands.
|
||||
AliasingValueList getAliasingValues(Operation* op, OpOperand& opOperand, const AnalysisState& state) const {
|
||||
return {};
|
||||
}
|
||||
|
||||
// Bufferize the operation.
|
||||
LogicalResult bufferize(Operation* op, RewriterBase& rewriter, const BufferizationOptions& options, BufferizationState &state) const {
|
||||
|
||||
// Get the input tensor buffer.
|
||||
auto inputBuffer = getBuffer(rewriter, op->getOperand(0), options, state);
|
||||
|
||||
if (failed(inputBuffer))
|
||||
return failure();
|
||||
|
||||
// Create a new buffer for the output tensor.
|
||||
auto outputTensor = createEmptyFromType(op->getResult(0).getType(), op->getLoc(), rewriter);
|
||||
|
||||
// Create a new buffer for the accumulation buffer.
|
||||
// To do this, create a new allocation operation. Size must be axbx1x1,
|
||||
// where axbxcxd is the size of the output tensor. Since the shape is
|
||||
// different, we can't immediately use createEmptyFromType, we first need to
|
||||
// create the shape of the accumulation buffer.
|
||||
auto accumShape = llvm::to_vector<4>(cast<ShapedType>(op->getResult(0).getType()).getShape());
|
||||
|
||||
// Set the last two dimensions to 1.
|
||||
accumShape[accumShape.size() - 1] = 1;
|
||||
accumShape[accumShape.size() - 2] = 1;
|
||||
|
||||
auto accumType = MemRefType::get(accumShape, cast<ShapedType>(op->getResult(0).getType()).getElementType());
|
||||
|
||||
auto accumBuffer = createEmptyFromType(accumType, op->getLoc(), rewriter);
|
||||
|
||||
// Bufferize the operation.
|
||||
auto weightIndices = cast<SpatApplyFiltersOp>(op).getWeightIndicesAttr();
|
||||
auto xKernelPositions = cast<SpatApplyFiltersOp>(op).getXKernelPositionsAttr();
|
||||
auto yKernelPositions = cast<SpatApplyFiltersOp>(op).getYKernelPositionsAttr();
|
||||
|
||||
Value bufferized = rewriter.create<pim::PimApplyFiltersOp>(op->getLoc(),
|
||||
outputTensor.getType(),
|
||||
weightIndices,
|
||||
xKernelPositions,
|
||||
yKernelPositions,
|
||||
*inputBuffer,
|
||||
outputTensor,
|
||||
accumBuffer);
|
||||
|
||||
// Replace the operation with the bufferized value.
|
||||
replaceOpWithBufferizedValues(rewriter, op, bufferized);
|
||||
|
||||
return success();
|
||||
}
|
||||
};
|
||||
|
||||
void registerBufferizableOpInterfaceExternalModels(DialectRegistry& registry) {
|
||||
registry.addExtension(+[](MLIRContext* ctx, SpatialDialect* dialect) {
|
||||
SpatWeightedCompute::attachInterface<WComputeOpInterface>(*ctx);
|
||||
SpatVAddOp::attachInterface<VAddOpInterfaceFromTemplate>(*ctx);
|
||||
SpatWeightedVMMOp::attachInterface<WVMMOpInterface>(*ctx);
|
||||
SpatWeightedMVMOp::attachInterface<WMVMOpInterface>(*ctx);
|
||||
SpatSumOp::attachInterface<SumOpInterface>(*ctx);
|
||||
SpatVSDivOp::attachInterface<VSDivOpInterface>(*ctx);
|
||||
SpatVMaxOp::attachInterface<VMaxOpInterface>(*ctx);
|
||||
SpatChannelReceiveOp::attachInterface<ChannelReceiveOpInterface>(*ctx);
|
||||
SpatChannelSendOp::attachInterface<ChannelSendOpInterface>(*ctx);
|
||||
SpatChannelBroadcastReceiveOp::attachInterface<ChannelBroadcastReceiveOpInterface>(*ctx);
|
||||
SpatChannelBroadcastSendOp::attachInterface<ChannelBroadcastSendOpInterface>(*ctx);
|
||||
SpatApplyFiltersOp::attachInterface<ApplyFiltersOpInterface>(*ctx);
|
||||
});
|
||||
}
|
||||
|
||||
struct ONNXReluInterface : VariadicArgumentElementWiseOpInterface<ONNXReluInterface, ONNXReluOp, pim::PimVReluOp> {};
|
||||
|
||||
struct ONNXExpOpInterface : VariadicArgumentElementWiseOpInterface<ONNXExpOpInterface, ONNXExpOp, pim::PimVExpOp> {};
|
||||
|
||||
void registerONNXBufferizableOpInterfaceExternalModels(DialectRegistry& registry) {
|
||||
registry.addExtension(+[](MLIRContext* ctx, ONNXDialect* dialect) {
|
||||
ONNXReluOp::attachInterface<ONNXReluInterface>(*ctx);
|
||||
ONNXExpOp::attachInterface<ONNXExpOpInterface>(*ctx);
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace spatial
|
||||
} // namespace onnx_mlir
|
||||
@@ -0,0 +1,16 @@
|
||||
#pragma once
|
||||
|
||||
#include "mlir/IR/DialectRegistry.h"
|
||||
#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
|
||||
|
||||
using namespace mlir;
|
||||
|
||||
namespace onnx_mlir {
|
||||
namespace spatial {
|
||||
|
||||
void registerBufferizableOpInterfaceExternalModels(DialectRegistry& registry);
|
||||
|
||||
void registerONNXBufferizableOpInterfaceExternalModels(DialectRegistry& registry);
|
||||
|
||||
} // namespace spatial
|
||||
} // namespace onnx_mlir
|
||||
Reference in New Issue
Block a user