standardize spatial and pim dialects

remove old unused stuff
This commit is contained in:
NiccoloN
2026-03-23 21:21:31 +01:00
parent 0478d979ff
commit 93e20c1dfc
18 changed files with 693 additions and 1519 deletions

View File

@@ -194,45 +194,45 @@ void PimCodeGen::emitMvmOp(size_t groupId, size_t rdAddr, size_t rdOffset, size_
void PimCodeGen::codeGenLoadOp(pim::PimMemCopyHostToDevOp loadOp) const {
emitMemCopyOp("ld",
memory.getValueAddress(loadOp.getDeviceDst()),
loadOp.getDeviceDstOffset(),
memory.getValueAddress(loadOp.getHostSrc()),
loadOp.getHostSrcOffset(),
memory.getValueAddress(loadOp.getDeviceTarget()),
loadOp.getDeviceTargetOffset(),
memory.getValueAddress(loadOp.getHostSource()),
loadOp.getHostSourceOffset(),
loadOp.getSize());
}
void PimCodeGen::codeGenStoreOp(pim::PimMemCopyDevToHostOp storeOp) const {
emitMemCopyOp("st",
memory.getValueAddress(storeOp.getHostDst()),
storeOp.getHostDstOffset(),
memory.getValueAddress(storeOp.getDeviceSrc()),
storeOp.getDeviceSrcOffset(),
memory.getValueAddress(storeOp.getHostTarget()),
storeOp.getHostTargetOffset(),
memory.getValueAddress(storeOp.getDeviceSource()),
storeOp.getDeviceSourceOffset(),
storeOp.getSize());
}
void PimCodeGen::codeGenLmvOp(pim::PimMemCopyOp lmvOp) const {
emitMemCopyOp("lmv",
memory.getValueAddress(lmvOp.getDst()),
lmvOp.getDstOffset(),
memory.getValueAddress(lmvOp.getSrc()),
lmvOp.getSrcOffset(),
memory.getValueAddress(lmvOp.getTarget()),
lmvOp.getTargetOffset(),
memory.getValueAddress(lmvOp.getSource()),
lmvOp.getSourceOffset(),
lmvOp.getSize(),
"len");
}
void PimCodeGen::codeGenReceiveOp(pim::PimReceiveOp receiveOp) const {
emitCommunicationOp(
"recv", memory.getValueAddress(receiveOp.getDst()), receiveOp.getSrcCoreId(), receiveOp.getSize());
"recv", memory.getValueAddress(receiveOp.getOutputBuffer()), receiveOp.getSourceCoreId(), receiveOp.getSize());
}
void PimCodeGen::codeGenSendOp(pim::PimSendOp sendOp) const {
emitCommunicationOp("send", memory.getValueAddress(sendOp.getSrc()), sendOp.getTargetCoreId(), sendOp.getSize());
emitCommunicationOp("send", memory.getValueAddress(sendOp.getInput()), sendOp.getTargetCoreId(), sendOp.getSize());
}
template <typename MVMTy>
void PimCodeGen::codeGenMVMLikeOp(size_t mvmId, MVMTy mvmLikeOp, bool transposeMatrix) {
emitMvmOp(
mvmId, memory.getValueAddress(mvmLikeOp.getOutBuf()), 0, memory.getValueAddress(mvmLikeOp.getVectorInput()), 0);
mvmId, memory.getValueAddress(mvmLikeOp.getOutputBuffer()), 0, memory.getValueAddress(mvmLikeOp.getInput()), 0);
// TODO: save weights somewhere (if transposeMatrix=true, transpose the weight matrix)
}
@@ -243,10 +243,10 @@ static size_t getValueSizeInBytes(mlir::Value value) {
}
void PimCodeGen::codeGenVVAddOp(pim::PimVVAddOp vvaddOp) const {
auto outBufAddr = memory.getValueAddress(vvaddOp.getOutBuf());
auto aAddr = memory.getValueAddress(vvaddOp.getA());
auto bAddr = memory.getValueAddress(vvaddOp.getB());
setupRdRs1Rs2(outBufAddr, 0, aAddr, 0, bAddr, 0);
auto outputBufferAddr = memory.getValueAddress(vvaddOp.getOutputBuffer());
auto lhsAddr = memory.getValueAddress(vvaddOp.getLhs());
auto rhsAddr = memory.getValueAddress(vvaddOp.getRhs());
setupRdRs1Rs2(outputBufferAddr, 0, lhsAddr, 0, rhsAddr, 0);
json::Object json;
json["op"] = "vvadd";
@@ -254,15 +254,15 @@ void PimCodeGen::codeGenVVAddOp(pim::PimVVAddOp vvaddOp) const {
json["rs1"] = 1;
json["rs2"] = 2;
json["offset"] = createEmptyOffset();
json["len"] = getValueSizeInBytes(vvaddOp.getA());
json["len"] = getValueSizeInBytes(vvaddOp.getLhs());
emitInstruction(std::move(json));
}
void PimCodeGen::codeGenVVSubOp(pim::PimVVSubOp vvsubOp) const {
auto outBufAddr = memory.getValueAddress(vvsubOp.getOutBuf());
auto aAddr = memory.getValueAddress(vvsubOp.getA());
auto bAddr = memory.getValueAddress(vvsubOp.getB());
setupRdRs1Rs2(outBufAddr, 0, aAddr, 0, bAddr, 0);
auto outputBufferAddr = memory.getValueAddress(vvsubOp.getOutputBuffer());
auto lhsAddr = memory.getValueAddress(vvsubOp.getLhs());
auto rhsAddr = memory.getValueAddress(vvsubOp.getRhs());
setupRdRs1Rs2(outputBufferAddr, 0, lhsAddr, 0, rhsAddr, 0);
json::Object json;
json["op"] = "vvsub";
@@ -270,15 +270,15 @@ void PimCodeGen::codeGenVVSubOp(pim::PimVVSubOp vvsubOp) const {
json["rs1"] = 1;
json["rs2"] = 2;
json["offset"] = createEmptyOffset();
json["len"] = getValueSizeInBytes(vvsubOp.getA());
json["len"] = getValueSizeInBytes(vvsubOp.getLhs());
emitInstruction(std::move(json));
}
void PimCodeGen::codeGenVVMulOp(pim::PimVVMulOp vvmulOp) const {
auto outBufAddr = memory.getValueAddress(vvmulOp.getOutBuf());
auto aAddr = memory.getValueAddress(vvmulOp.getA());
auto bAddr = memory.getValueAddress(vvmulOp.getB());
setupRdRs1Rs2(outBufAddr, 0, aAddr, 0, bAddr, 0);
auto outputBufferAddr = memory.getValueAddress(vvmulOp.getOutputBuffer());
auto lhsAddr = memory.getValueAddress(vvmulOp.getLhs());
auto rhsAddr = memory.getValueAddress(vvmulOp.getRhs());
setupRdRs1Rs2(outputBufferAddr, 0, lhsAddr, 0, rhsAddr, 0);
json::Object json;
json["op"] = "vvmul";
@@ -286,15 +286,15 @@ void PimCodeGen::codeGenVVMulOp(pim::PimVVMulOp vvmulOp) const {
json["rs1"] = 1;
json["rs2"] = 2;
json["offset"] = createEmptyOffset();
json["len"] = getValueSizeInBytes(vvmulOp.getA());
json["len"] = getValueSizeInBytes(vvmulOp.getLhs());
emitInstruction(std::move(json));
}
void PimCodeGen::codeGenVVMaxOp(pim::PimVVMaxOp vvmaxOp) const {
auto outBufAddr = memory.getValueAddress(vvmaxOp.getOutBuf());
auto aAddr = memory.getValueAddress(vvmaxOp.getA());
auto bAddr = memory.getValueAddress(vvmaxOp.getB());
setupRdRs1Rs2(outBufAddr, 0, aAddr, 0, bAddr, 0);
auto outputBufferAddr = memory.getValueAddress(vvmaxOp.getOutputBuffer());
auto lhsAddr = memory.getValueAddress(vvmaxOp.getLhs());
auto rhsAddr = memory.getValueAddress(vvmaxOp.getRhs());
setupRdRs1Rs2(outputBufferAddr, 0, lhsAddr, 0, rhsAddr, 0);
json::Object json;
json["op"] = "vvmax";
@@ -302,15 +302,15 @@ void PimCodeGen::codeGenVVMaxOp(pim::PimVVMaxOp vvmaxOp) const {
json["rs1"] = 1;
json["rs2"] = 2;
json["offset"] = createEmptyOffset();
json["len"] = getValueSizeInBytes(vvmaxOp.getA());
json["len"] = getValueSizeInBytes(vvmaxOp.getLhs());
emitInstruction(std::move(json));
}
void PimCodeGen::codeGenVVDMulOp(pim::PimVVDMulOp vvdmulOp) const {
auto outBufAddr = memory.getValueAddress(vvdmulOp.getOutBuf());
auto aAddr = memory.getValueAddress(vvdmulOp.getA());
auto bAddr = memory.getValueAddress(vvdmulOp.getB());
setupRdRs1Rs2(outBufAddr, 0, aAddr, 0, bAddr, 0);
auto outputBufferAddr = memory.getValueAddress(vvdmulOp.getOutputBuffer());
auto lhsAddr = memory.getValueAddress(vvdmulOp.getLhs());
auto rhsAddr = memory.getValueAddress(vvdmulOp.getRhs());
setupRdRs1Rs2(outputBufferAddr, 0, lhsAddr, 0, rhsAddr, 0);
json::Object json;
json["op"] = "vvdmul";
@@ -318,132 +318,71 @@ void PimCodeGen::codeGenVVDMulOp(pim::PimVVDMulOp vvdmulOp) const {
json["rs1"] = 1;
json["rs2"] = 2;
json["offset"] = createEmptyOffset();
json["len"] = getValueSizeInBytes(vvdmulOp.getA());
json["len"] = getValueSizeInBytes(vvdmulOp.getLhs());
emitInstruction(std::move(json));
}
void PimCodeGen::codeGenVAvgOp(pim::PimVAvgOp vavgOp) const {
auto outBufAddr = memory.getValueAddress(vavgOp.getOutBuf());
auto aAddr = memory.getValueAddress(vavgOp.getA());
setupRdRs1(outBufAddr, 0, aAddr, 0);
auto outputBufferAddr = memory.getValueAddress(vavgOp.getOutputBuffer());
auto inputAddr = memory.getValueAddress(vavgOp.getInput());
setupRdRs1(outputBufferAddr, 0, inputAddr, 0);
json::Object json;
json["op"] = "vavg";
json["rd"] = 0;
json["rs1"] = 1;
json["offset"] = createEmptyOffset();
json["len"] = getValueSizeInBytes(vavgOp.getA());
json["len"] = getValueSizeInBytes(vavgOp.getInput());
emitInstruction(std::move(json));
}
void PimCodeGen::codeGenVReluOp(pim::PimVReluOp vreluOp) const {
auto outBufAddr = memory.getValueAddress(vreluOp.getOutBuf());
auto aAddr = memory.getValueAddress(vreluOp.getA());
setupRdRs1(outBufAddr, 0, aAddr, 0);
auto outputBufferAddr = memory.getValueAddress(vreluOp.getOutputBuffer());
auto inputAddr = memory.getValueAddress(vreluOp.getInput());
setupRdRs1(outputBufferAddr, 0, inputAddr, 0);
json::Object json;
json["op"] = "vrelu";
json["rd"] = 0;
json["rs1"] = 1;
json["offset"] = createEmptyOffset();
json["len"] = getValueSizeInBytes(vreluOp.getA());
json["len"] = getValueSizeInBytes(vreluOp.getInput());
emitInstruction(std::move(json));
}
void PimCodeGen::codeGenVTanhOp(pim::PimVTanhOp vtanhOp) const {
auto outBufAddr = memory.getValueAddress(vtanhOp.getOutBuf());
auto aAddr = memory.getValueAddress(vtanhOp.getA());
setupRdRs1(outBufAddr, 0, aAddr, 0);
auto outputBufferAddr = memory.getValueAddress(vtanhOp.getOutputBuffer());
auto inputAddr = memory.getValueAddress(vtanhOp.getInput());
setupRdRs1(outputBufferAddr, 0, inputAddr, 0);
json::Object json;
json["op"] = "vtanh";
json["rd"] = 0;
json["rs1"] = 1;
json["offset"] = createEmptyOffset();
json["len"] = getValueSizeInBytes(vtanhOp.getA());
json["len"] = getValueSizeInBytes(vtanhOp.getInput());
emitInstruction(std::move(json));
}
void PimCodeGen::codeGenVSigmOp(pim::PimVSigmOp vsigmOp) const {
auto outBufAddr = memory.getValueAddress(vsigmOp.getOutBuf());
auto aAddr = memory.getValueAddress(vsigmOp.getA());
setupRdRs1(outBufAddr, 0, aAddr, 0);
auto outputBufferAddr = memory.getValueAddress(vsigmOp.getOutputBuffer());
auto inputAddr = memory.getValueAddress(vsigmOp.getInput());
setupRdRs1(outputBufferAddr, 0, inputAddr, 0);
json::Object json;
json["op"] = "vsigm";
json["rd"] = 0;
json["rs1"] = 1;
json["offset"] = createEmptyOffset();
json["len"] = getValueSizeInBytes(vsigmOp.getA());
json["len"] = getValueSizeInBytes(vsigmOp.getInput());
emitInstruction(std::move(json));
}
void PimCodeGen::codeGenApplyFiltersOp(pim::PimApplyFiltersOp applyFiltersOp) const {
auto outBufAddr = memory.getValueAddress(applyFiltersOp.getOutBuf());
auto inBufAddr = memory.getValueAddress(applyFiltersOp.getInput());
auto accumBufAddr = memory.getValueAddress(applyFiltersOp.getAccumBuf());
auto weightIndices = applyFiltersOp.getWeightIndices();
auto inputType = cast<MemRefType>(applyFiltersOp.getInput().getType());
auto outputType = cast<MemRefType>(applyFiltersOp.getOutBuf().getType());
auto inShape = inputType.getShape();
auto outShape = outputType.getShape();
size_t inChannels = inShape[1];
size_t outChannels = outShape[1];
size_t dimX = inShape.size() > 2 ? inShape[2] : 1;
size_t dimY = inShape.size() > 3 ? inShape[3] : 1;
for (size_t outY = 0; outY < dimY; outY++) {
for (size_t outX = 0; outX < dimX; outX++) {
size_t weightIndex = 0;
for (Attribute weight : weightIndices) {
// --- STEP 1: Perform MVMUL operation ---
auto weightId = cast<IntegerAttr>(weight).getInt();
size_t xKer = cast<IntegerAttr>(applyFiltersOp.getXKernelPositions()[weightIndex]).getInt();
size_t yKer = cast<IntegerAttr>(applyFiltersOp.getYKernelPositions()[weightIndex]).getInt();
weightIndex++;
if (outX + xKer >= dimX || outY + yKer >= dimY)
continue;
size_t outputOffset = (outY * dimX + outX) * 32 * outChannels;
size_t inputOffset = ((outY + yKer) * dimX + (outX + xKer)) * 32 * inChannels;
bool isFirstWeight = (weightIndices[0] == weight);
// For the first weight, store directly in output buffer; otherwise use accumulator.
size_t rdAddr = isFirstWeight ? outBufAddr : accumBufAddr;
size_t rdOffset = isFirstWeight ? outputOffset : 0;
emitMvmOp(weightId, rdAddr, rdOffset, inBufAddr, inputOffset);
// --- STEP 2: Perform VADD operation (skip for first weight) ---
if (isFirstWeight)
continue;
// Sum accumulator with output buffer, store result in output buffer.
setupRdRs1Rs2(outBufAddr, outputOffset, accumBufAddr, 0, outBufAddr, outputOffset);
json::Object vaddJson;
vaddJson["op"] = "vvadd";
vaddJson["rd"] = 0;
vaddJson["rs1"] = 1;
vaddJson["rs2"] = 2;
vaddJson["offset"] = createEmptyOffset();
vaddJson["len"] = 32 * outChannels;
emitInstruction(std::move(vaddJson));
}
}
}
}
void PimCodeGen::codeGenTransposeOp(pim::PimTransposeOp transposeOp) const {
auto srcAddr = memory.getValueAddress(transposeOp.getData());
auto dstAddr = memory.getValueAddress(transposeOp.getOutBuf());
auto srcAddr = memory.getValueAddress(transposeOp.getInput());
auto dstAddr = memory.getValueAddress(transposeOp.getOutputBuffer());
auto srcType = cast<ShapedType>(transposeOp.getData().getType());
auto srcType = cast<ShapedType>(transposeOp.getInput().getType());
auto srcShape = srcType.getShape();
size_t rank = srcShape.size();
size_t elementSize = srcType.getElementTypeBitWidth() / 8;
@@ -451,7 +390,7 @@ void PimCodeGen::codeGenTransposeOp(pim::PimTransposeOp transposeOp) const {
// Read permutation. Destination dim i corresponds to source dim perm[i].
SmallVector<int64_t> perm =
map_to_vector(transposeOp.getPerms().getAsRange<IntegerAttr>(), [](auto attr) -> int64_t { return attr.getInt(); });
map_to_vector(transposeOp.getPermutation().getAsRange<IntegerAttr>(), [](auto attr) -> int64_t { return attr.getInt(); });
// Destination shape: dstShape[i] = srcShape[perm[i]]
SmallVector<int64_t> dstShape(rank);
@@ -570,8 +509,6 @@ static int64_t codeGenCoreOps(pim::PimCoreOp coreOp, PimCodeGen& coreCodeGen) {
coreCodeGen.codeGenMVMLikeOp<pim::PimVMMOp>(vmmOp.getWeightIndex(), vmmOp, true);
else if (auto mvmOp = dyn_cast<pim::PimMVMOp>(op))
coreCodeGen.codeGenMVMLikeOp<pim::PimMVMOp>(mvmOp.getWeightIndex(), mvmOp, false);
else if (auto applyFiltersOp = dyn_cast<pim::PimApplyFiltersOp>(op))
coreCodeGen.codeGenApplyFiltersOp(applyFiltersOp);
else if (auto transposeOp = dyn_cast<pim::PimTransposeOp>(op))
coreCodeGen.codeGenTransposeOp(transposeOp);
else if (auto vvaddOp = dyn_cast<pim::PimVVAddOp>(op))
@@ -592,11 +529,6 @@ static int64_t codeGenCoreOps(pim::PimCoreOp coreOp, PimCodeGen& coreCodeGen) {
coreCodeGen.codeGenVTanhOp(vtanhOp);
else if (auto vsigmOp = dyn_cast<pim::PimVSigmOp>(op))
coreCodeGen.codeGenVSigmOp(vsigmOp);
else if (isa<pim::PimSumOp>(op)) {
// TODO: Implement somehow?
op.emitWarning("Operation is not yet supported in code generation");
continue;
}
else {
op.emitError("Unsupported codegen for this operation");
op.dump();