standardize spatial and pim dialects
remove old unused stuff
This commit is contained in:
@@ -194,45 +194,45 @@ void PimCodeGen::emitMvmOp(size_t groupId, size_t rdAddr, size_t rdOffset, size_
|
||||
|
||||
void PimCodeGen::codeGenLoadOp(pim::PimMemCopyHostToDevOp loadOp) const {
|
||||
emitMemCopyOp("ld",
|
||||
memory.getValueAddress(loadOp.getDeviceDst()),
|
||||
loadOp.getDeviceDstOffset(),
|
||||
memory.getValueAddress(loadOp.getHostSrc()),
|
||||
loadOp.getHostSrcOffset(),
|
||||
memory.getValueAddress(loadOp.getDeviceTarget()),
|
||||
loadOp.getDeviceTargetOffset(),
|
||||
memory.getValueAddress(loadOp.getHostSource()),
|
||||
loadOp.getHostSourceOffset(),
|
||||
loadOp.getSize());
|
||||
}
|
||||
|
||||
void PimCodeGen::codeGenStoreOp(pim::PimMemCopyDevToHostOp storeOp) const {
|
||||
emitMemCopyOp("st",
|
||||
memory.getValueAddress(storeOp.getHostDst()),
|
||||
storeOp.getHostDstOffset(),
|
||||
memory.getValueAddress(storeOp.getDeviceSrc()),
|
||||
storeOp.getDeviceSrcOffset(),
|
||||
memory.getValueAddress(storeOp.getHostTarget()),
|
||||
storeOp.getHostTargetOffset(),
|
||||
memory.getValueAddress(storeOp.getDeviceSource()),
|
||||
storeOp.getDeviceSourceOffset(),
|
||||
storeOp.getSize());
|
||||
}
|
||||
|
||||
void PimCodeGen::codeGenLmvOp(pim::PimMemCopyOp lmvOp) const {
|
||||
emitMemCopyOp("lmv",
|
||||
memory.getValueAddress(lmvOp.getDst()),
|
||||
lmvOp.getDstOffset(),
|
||||
memory.getValueAddress(lmvOp.getSrc()),
|
||||
lmvOp.getSrcOffset(),
|
||||
memory.getValueAddress(lmvOp.getTarget()),
|
||||
lmvOp.getTargetOffset(),
|
||||
memory.getValueAddress(lmvOp.getSource()),
|
||||
lmvOp.getSourceOffset(),
|
||||
lmvOp.getSize(),
|
||||
"len");
|
||||
}
|
||||
|
||||
void PimCodeGen::codeGenReceiveOp(pim::PimReceiveOp receiveOp) const {
|
||||
emitCommunicationOp(
|
||||
"recv", memory.getValueAddress(receiveOp.getDst()), receiveOp.getSrcCoreId(), receiveOp.getSize());
|
||||
"recv", memory.getValueAddress(receiveOp.getOutputBuffer()), receiveOp.getSourceCoreId(), receiveOp.getSize());
|
||||
}
|
||||
|
||||
void PimCodeGen::codeGenSendOp(pim::PimSendOp sendOp) const {
|
||||
emitCommunicationOp("send", memory.getValueAddress(sendOp.getSrc()), sendOp.getTargetCoreId(), sendOp.getSize());
|
||||
emitCommunicationOp("send", memory.getValueAddress(sendOp.getInput()), sendOp.getTargetCoreId(), sendOp.getSize());
|
||||
}
|
||||
|
||||
template <typename MVMTy>
|
||||
void PimCodeGen::codeGenMVMLikeOp(size_t mvmId, MVMTy mvmLikeOp, bool transposeMatrix) {
|
||||
emitMvmOp(
|
||||
mvmId, memory.getValueAddress(mvmLikeOp.getOutBuf()), 0, memory.getValueAddress(mvmLikeOp.getVectorInput()), 0);
|
||||
mvmId, memory.getValueAddress(mvmLikeOp.getOutputBuffer()), 0, memory.getValueAddress(mvmLikeOp.getInput()), 0);
|
||||
|
||||
// TODO: save weights somewhere (if transposeMatrix=true, transpose the weight matrix)
|
||||
}
|
||||
@@ -243,10 +243,10 @@ static size_t getValueSizeInBytes(mlir::Value value) {
|
||||
}
|
||||
|
||||
void PimCodeGen::codeGenVVAddOp(pim::PimVVAddOp vvaddOp) const {
|
||||
auto outBufAddr = memory.getValueAddress(vvaddOp.getOutBuf());
|
||||
auto aAddr = memory.getValueAddress(vvaddOp.getA());
|
||||
auto bAddr = memory.getValueAddress(vvaddOp.getB());
|
||||
setupRdRs1Rs2(outBufAddr, 0, aAddr, 0, bAddr, 0);
|
||||
auto outputBufferAddr = memory.getValueAddress(vvaddOp.getOutputBuffer());
|
||||
auto lhsAddr = memory.getValueAddress(vvaddOp.getLhs());
|
||||
auto rhsAddr = memory.getValueAddress(vvaddOp.getRhs());
|
||||
setupRdRs1Rs2(outputBufferAddr, 0, lhsAddr, 0, rhsAddr, 0);
|
||||
|
||||
json::Object json;
|
||||
json["op"] = "vvadd";
|
||||
@@ -254,15 +254,15 @@ void PimCodeGen::codeGenVVAddOp(pim::PimVVAddOp vvaddOp) const {
|
||||
json["rs1"] = 1;
|
||||
json["rs2"] = 2;
|
||||
json["offset"] = createEmptyOffset();
|
||||
json["len"] = getValueSizeInBytes(vvaddOp.getA());
|
||||
json["len"] = getValueSizeInBytes(vvaddOp.getLhs());
|
||||
emitInstruction(std::move(json));
|
||||
}
|
||||
|
||||
void PimCodeGen::codeGenVVSubOp(pim::PimVVSubOp vvsubOp) const {
|
||||
auto outBufAddr = memory.getValueAddress(vvsubOp.getOutBuf());
|
||||
auto aAddr = memory.getValueAddress(vvsubOp.getA());
|
||||
auto bAddr = memory.getValueAddress(vvsubOp.getB());
|
||||
setupRdRs1Rs2(outBufAddr, 0, aAddr, 0, bAddr, 0);
|
||||
auto outputBufferAddr = memory.getValueAddress(vvsubOp.getOutputBuffer());
|
||||
auto lhsAddr = memory.getValueAddress(vvsubOp.getLhs());
|
||||
auto rhsAddr = memory.getValueAddress(vvsubOp.getRhs());
|
||||
setupRdRs1Rs2(outputBufferAddr, 0, lhsAddr, 0, rhsAddr, 0);
|
||||
|
||||
json::Object json;
|
||||
json["op"] = "vvsub";
|
||||
@@ -270,15 +270,15 @@ void PimCodeGen::codeGenVVSubOp(pim::PimVVSubOp vvsubOp) const {
|
||||
json["rs1"] = 1;
|
||||
json["rs2"] = 2;
|
||||
json["offset"] = createEmptyOffset();
|
||||
json["len"] = getValueSizeInBytes(vvsubOp.getA());
|
||||
json["len"] = getValueSizeInBytes(vvsubOp.getLhs());
|
||||
emitInstruction(std::move(json));
|
||||
}
|
||||
|
||||
void PimCodeGen::codeGenVVMulOp(pim::PimVVMulOp vvmulOp) const {
|
||||
auto outBufAddr = memory.getValueAddress(vvmulOp.getOutBuf());
|
||||
auto aAddr = memory.getValueAddress(vvmulOp.getA());
|
||||
auto bAddr = memory.getValueAddress(vvmulOp.getB());
|
||||
setupRdRs1Rs2(outBufAddr, 0, aAddr, 0, bAddr, 0);
|
||||
auto outputBufferAddr = memory.getValueAddress(vvmulOp.getOutputBuffer());
|
||||
auto lhsAddr = memory.getValueAddress(vvmulOp.getLhs());
|
||||
auto rhsAddr = memory.getValueAddress(vvmulOp.getRhs());
|
||||
setupRdRs1Rs2(outputBufferAddr, 0, lhsAddr, 0, rhsAddr, 0);
|
||||
|
||||
json::Object json;
|
||||
json["op"] = "vvmul";
|
||||
@@ -286,15 +286,15 @@ void PimCodeGen::codeGenVVMulOp(pim::PimVVMulOp vvmulOp) const {
|
||||
json["rs1"] = 1;
|
||||
json["rs2"] = 2;
|
||||
json["offset"] = createEmptyOffset();
|
||||
json["len"] = getValueSizeInBytes(vvmulOp.getA());
|
||||
json["len"] = getValueSizeInBytes(vvmulOp.getLhs());
|
||||
emitInstruction(std::move(json));
|
||||
}
|
||||
|
||||
void PimCodeGen::codeGenVVMaxOp(pim::PimVVMaxOp vvmaxOp) const {
|
||||
auto outBufAddr = memory.getValueAddress(vvmaxOp.getOutBuf());
|
||||
auto aAddr = memory.getValueAddress(vvmaxOp.getA());
|
||||
auto bAddr = memory.getValueAddress(vvmaxOp.getB());
|
||||
setupRdRs1Rs2(outBufAddr, 0, aAddr, 0, bAddr, 0);
|
||||
auto outputBufferAddr = memory.getValueAddress(vvmaxOp.getOutputBuffer());
|
||||
auto lhsAddr = memory.getValueAddress(vvmaxOp.getLhs());
|
||||
auto rhsAddr = memory.getValueAddress(vvmaxOp.getRhs());
|
||||
setupRdRs1Rs2(outputBufferAddr, 0, lhsAddr, 0, rhsAddr, 0);
|
||||
|
||||
json::Object json;
|
||||
json["op"] = "vvmax";
|
||||
@@ -302,15 +302,15 @@ void PimCodeGen::codeGenVVMaxOp(pim::PimVVMaxOp vvmaxOp) const {
|
||||
json["rs1"] = 1;
|
||||
json["rs2"] = 2;
|
||||
json["offset"] = createEmptyOffset();
|
||||
json["len"] = getValueSizeInBytes(vvmaxOp.getA());
|
||||
json["len"] = getValueSizeInBytes(vvmaxOp.getLhs());
|
||||
emitInstruction(std::move(json));
|
||||
}
|
||||
|
||||
void PimCodeGen::codeGenVVDMulOp(pim::PimVVDMulOp vvdmulOp) const {
|
||||
auto outBufAddr = memory.getValueAddress(vvdmulOp.getOutBuf());
|
||||
auto aAddr = memory.getValueAddress(vvdmulOp.getA());
|
||||
auto bAddr = memory.getValueAddress(vvdmulOp.getB());
|
||||
setupRdRs1Rs2(outBufAddr, 0, aAddr, 0, bAddr, 0);
|
||||
auto outputBufferAddr = memory.getValueAddress(vvdmulOp.getOutputBuffer());
|
||||
auto lhsAddr = memory.getValueAddress(vvdmulOp.getLhs());
|
||||
auto rhsAddr = memory.getValueAddress(vvdmulOp.getRhs());
|
||||
setupRdRs1Rs2(outputBufferAddr, 0, lhsAddr, 0, rhsAddr, 0);
|
||||
|
||||
json::Object json;
|
||||
json["op"] = "vvdmul";
|
||||
@@ -318,132 +318,71 @@ void PimCodeGen::codeGenVVDMulOp(pim::PimVVDMulOp vvdmulOp) const {
|
||||
json["rs1"] = 1;
|
||||
json["rs2"] = 2;
|
||||
json["offset"] = createEmptyOffset();
|
||||
json["len"] = getValueSizeInBytes(vvdmulOp.getA());
|
||||
json["len"] = getValueSizeInBytes(vvdmulOp.getLhs());
|
||||
emitInstruction(std::move(json));
|
||||
}
|
||||
|
||||
void PimCodeGen::codeGenVAvgOp(pim::PimVAvgOp vavgOp) const {
|
||||
auto outBufAddr = memory.getValueAddress(vavgOp.getOutBuf());
|
||||
auto aAddr = memory.getValueAddress(vavgOp.getA());
|
||||
setupRdRs1(outBufAddr, 0, aAddr, 0);
|
||||
auto outputBufferAddr = memory.getValueAddress(vavgOp.getOutputBuffer());
|
||||
auto inputAddr = memory.getValueAddress(vavgOp.getInput());
|
||||
setupRdRs1(outputBufferAddr, 0, inputAddr, 0);
|
||||
|
||||
json::Object json;
|
||||
json["op"] = "vavg";
|
||||
json["rd"] = 0;
|
||||
json["rs1"] = 1;
|
||||
json["offset"] = createEmptyOffset();
|
||||
json["len"] = getValueSizeInBytes(vavgOp.getA());
|
||||
json["len"] = getValueSizeInBytes(vavgOp.getInput());
|
||||
emitInstruction(std::move(json));
|
||||
}
|
||||
|
||||
void PimCodeGen::codeGenVReluOp(pim::PimVReluOp vreluOp) const {
|
||||
auto outBufAddr = memory.getValueAddress(vreluOp.getOutBuf());
|
||||
auto aAddr = memory.getValueAddress(vreluOp.getA());
|
||||
setupRdRs1(outBufAddr, 0, aAddr, 0);
|
||||
auto outputBufferAddr = memory.getValueAddress(vreluOp.getOutputBuffer());
|
||||
auto inputAddr = memory.getValueAddress(vreluOp.getInput());
|
||||
setupRdRs1(outputBufferAddr, 0, inputAddr, 0);
|
||||
|
||||
json::Object json;
|
||||
json["op"] = "vrelu";
|
||||
json["rd"] = 0;
|
||||
json["rs1"] = 1;
|
||||
json["offset"] = createEmptyOffset();
|
||||
json["len"] = getValueSizeInBytes(vreluOp.getA());
|
||||
json["len"] = getValueSizeInBytes(vreluOp.getInput());
|
||||
emitInstruction(std::move(json));
|
||||
}
|
||||
|
||||
void PimCodeGen::codeGenVTanhOp(pim::PimVTanhOp vtanhOp) const {
|
||||
auto outBufAddr = memory.getValueAddress(vtanhOp.getOutBuf());
|
||||
auto aAddr = memory.getValueAddress(vtanhOp.getA());
|
||||
setupRdRs1(outBufAddr, 0, aAddr, 0);
|
||||
auto outputBufferAddr = memory.getValueAddress(vtanhOp.getOutputBuffer());
|
||||
auto inputAddr = memory.getValueAddress(vtanhOp.getInput());
|
||||
setupRdRs1(outputBufferAddr, 0, inputAddr, 0);
|
||||
|
||||
json::Object json;
|
||||
json["op"] = "vtanh";
|
||||
json["rd"] = 0;
|
||||
json["rs1"] = 1;
|
||||
json["offset"] = createEmptyOffset();
|
||||
json["len"] = getValueSizeInBytes(vtanhOp.getA());
|
||||
json["len"] = getValueSizeInBytes(vtanhOp.getInput());
|
||||
emitInstruction(std::move(json));
|
||||
}
|
||||
|
||||
void PimCodeGen::codeGenVSigmOp(pim::PimVSigmOp vsigmOp) const {
|
||||
auto outBufAddr = memory.getValueAddress(vsigmOp.getOutBuf());
|
||||
auto aAddr = memory.getValueAddress(vsigmOp.getA());
|
||||
setupRdRs1(outBufAddr, 0, aAddr, 0);
|
||||
auto outputBufferAddr = memory.getValueAddress(vsigmOp.getOutputBuffer());
|
||||
auto inputAddr = memory.getValueAddress(vsigmOp.getInput());
|
||||
setupRdRs1(outputBufferAddr, 0, inputAddr, 0);
|
||||
|
||||
json::Object json;
|
||||
json["op"] = "vsigm";
|
||||
json["rd"] = 0;
|
||||
json["rs1"] = 1;
|
||||
json["offset"] = createEmptyOffset();
|
||||
json["len"] = getValueSizeInBytes(vsigmOp.getA());
|
||||
json["len"] = getValueSizeInBytes(vsigmOp.getInput());
|
||||
emitInstruction(std::move(json));
|
||||
}
|
||||
|
||||
void PimCodeGen::codeGenApplyFiltersOp(pim::PimApplyFiltersOp applyFiltersOp) const {
|
||||
auto outBufAddr = memory.getValueAddress(applyFiltersOp.getOutBuf());
|
||||
auto inBufAddr = memory.getValueAddress(applyFiltersOp.getInput());
|
||||
auto accumBufAddr = memory.getValueAddress(applyFiltersOp.getAccumBuf());
|
||||
|
||||
auto weightIndices = applyFiltersOp.getWeightIndices();
|
||||
|
||||
auto inputType = cast<MemRefType>(applyFiltersOp.getInput().getType());
|
||||
auto outputType = cast<MemRefType>(applyFiltersOp.getOutBuf().getType());
|
||||
auto inShape = inputType.getShape();
|
||||
auto outShape = outputType.getShape();
|
||||
|
||||
size_t inChannels = inShape[1];
|
||||
size_t outChannels = outShape[1];
|
||||
size_t dimX = inShape.size() > 2 ? inShape[2] : 1;
|
||||
size_t dimY = inShape.size() > 3 ? inShape[3] : 1;
|
||||
|
||||
for (size_t outY = 0; outY < dimY; outY++) {
|
||||
for (size_t outX = 0; outX < dimX; outX++) {
|
||||
|
||||
size_t weightIndex = 0;
|
||||
for (Attribute weight : weightIndices) {
|
||||
// --- STEP 1: Perform MVMUL operation ---
|
||||
auto weightId = cast<IntegerAttr>(weight).getInt();
|
||||
size_t xKer = cast<IntegerAttr>(applyFiltersOp.getXKernelPositions()[weightIndex]).getInt();
|
||||
size_t yKer = cast<IntegerAttr>(applyFiltersOp.getYKernelPositions()[weightIndex]).getInt();
|
||||
weightIndex++;
|
||||
|
||||
if (outX + xKer >= dimX || outY + yKer >= dimY)
|
||||
continue;
|
||||
|
||||
size_t outputOffset = (outY * dimX + outX) * 32 * outChannels;
|
||||
size_t inputOffset = ((outY + yKer) * dimX + (outX + xKer)) * 32 * inChannels;
|
||||
|
||||
bool isFirstWeight = (weightIndices[0] == weight);
|
||||
|
||||
// For the first weight, store directly in output buffer; otherwise use accumulator.
|
||||
size_t rdAddr = isFirstWeight ? outBufAddr : accumBufAddr;
|
||||
size_t rdOffset = isFirstWeight ? outputOffset : 0;
|
||||
emitMvmOp(weightId, rdAddr, rdOffset, inBufAddr, inputOffset);
|
||||
|
||||
// --- STEP 2: Perform VADD operation (skip for first weight) ---
|
||||
if (isFirstWeight)
|
||||
continue;
|
||||
|
||||
// Sum accumulator with output buffer, store result in output buffer.
|
||||
setupRdRs1Rs2(outBufAddr, outputOffset, accumBufAddr, 0, outBufAddr, outputOffset);
|
||||
|
||||
json::Object vaddJson;
|
||||
vaddJson["op"] = "vvadd";
|
||||
vaddJson["rd"] = 0;
|
||||
vaddJson["rs1"] = 1;
|
||||
vaddJson["rs2"] = 2;
|
||||
vaddJson["offset"] = createEmptyOffset();
|
||||
vaddJson["len"] = 32 * outChannels;
|
||||
emitInstruction(std::move(vaddJson));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PimCodeGen::codeGenTransposeOp(pim::PimTransposeOp transposeOp) const {
|
||||
auto srcAddr = memory.getValueAddress(transposeOp.getData());
|
||||
auto dstAddr = memory.getValueAddress(transposeOp.getOutBuf());
|
||||
auto srcAddr = memory.getValueAddress(transposeOp.getInput());
|
||||
auto dstAddr = memory.getValueAddress(transposeOp.getOutputBuffer());
|
||||
|
||||
auto srcType = cast<ShapedType>(transposeOp.getData().getType());
|
||||
auto srcType = cast<ShapedType>(transposeOp.getInput().getType());
|
||||
auto srcShape = srcType.getShape();
|
||||
size_t rank = srcShape.size();
|
||||
size_t elementSize = srcType.getElementTypeBitWidth() / 8;
|
||||
@@ -451,7 +390,7 @@ void PimCodeGen::codeGenTransposeOp(pim::PimTransposeOp transposeOp) const {
|
||||
|
||||
// Read permutation. Destination dim i corresponds to source dim perm[i].
|
||||
SmallVector<int64_t> perm =
|
||||
map_to_vector(transposeOp.getPerms().getAsRange<IntegerAttr>(), [](auto attr) -> int64_t { return attr.getInt(); });
|
||||
map_to_vector(transposeOp.getPermutation().getAsRange<IntegerAttr>(), [](auto attr) -> int64_t { return attr.getInt(); });
|
||||
|
||||
// Destination shape: dstShape[i] = srcShape[perm[i]]
|
||||
SmallVector<int64_t> dstShape(rank);
|
||||
@@ -570,8 +509,6 @@ static int64_t codeGenCoreOps(pim::PimCoreOp coreOp, PimCodeGen& coreCodeGen) {
|
||||
coreCodeGen.codeGenMVMLikeOp<pim::PimVMMOp>(vmmOp.getWeightIndex(), vmmOp, true);
|
||||
else if (auto mvmOp = dyn_cast<pim::PimMVMOp>(op))
|
||||
coreCodeGen.codeGenMVMLikeOp<pim::PimMVMOp>(mvmOp.getWeightIndex(), mvmOp, false);
|
||||
else if (auto applyFiltersOp = dyn_cast<pim::PimApplyFiltersOp>(op))
|
||||
coreCodeGen.codeGenApplyFiltersOp(applyFiltersOp);
|
||||
else if (auto transposeOp = dyn_cast<pim::PimTransposeOp>(op))
|
||||
coreCodeGen.codeGenTransposeOp(transposeOp);
|
||||
else if (auto vvaddOp = dyn_cast<pim::PimVVAddOp>(op))
|
||||
@@ -592,11 +529,6 @@ static int64_t codeGenCoreOps(pim::PimCoreOp coreOp, PimCodeGen& coreCodeGen) {
|
||||
coreCodeGen.codeGenVTanhOp(vtanhOp);
|
||||
else if (auto vsigmOp = dyn_cast<pim::PimVSigmOp>(op))
|
||||
coreCodeGen.codeGenVSigmOp(vsigmOp);
|
||||
else if (isa<pim::PimSumOp>(op)) {
|
||||
// TODO: Implement somehow?
|
||||
op.emitWarning("Operation is not yet supported in code generation");
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
op.emitError("Unsupported codegen for this operation");
|
||||
op.dump();
|
||||
|
||||
Reference in New Issue
Block a user