|
|
|
@@ -12,20 +12,16 @@
|
|
|
|
#include <algorithm>
|
|
|
|
#include <algorithm>
|
|
|
|
#include <cassert>
|
|
|
|
#include <cassert>
|
|
|
|
#include <cmath>
|
|
|
|
#include <cmath>
|
|
|
|
#include <cstddef>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include "Conversion/ONNXToSpatial/ONNXToSpatialCommon.hpp"
|
|
|
|
#include "Conversion/ONNXToSpatial/ONNXToSpatialCommon.hpp"
|
|
|
|
#include "Conversion/SpatialToPIM/SpatialToPIMCommon.hpp"
|
|
|
|
#include "Conversion/SpatialToPIM/SpatialToPIMCommon.hpp"
|
|
|
|
#include "Dialect/Spatial/SpatialOps.hpp"
|
|
|
|
|
|
|
|
#include "src/Accelerators/PIM/Compiler/PimCodeGen.hpp"
|
|
|
|
#include "src/Accelerators/PIM/Compiler/PimCodeGen.hpp"
|
|
|
|
#include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp"
|
|
|
|
#include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp"
|
|
|
|
#include "src/Accelerators/PIM/Compiler/PimCompilerUtils.hpp"
|
|
|
|
|
|
|
|
#include "src/Accelerators/PIM/Dialect/PIM/PimOps.hpp"
|
|
|
|
#include "src/Accelerators/PIM/Dialect/PIM/PimOps.hpp"
|
|
|
|
#include "src/Accelerators/PIM/Pass/PimPasses.hpp"
|
|
|
|
|
|
|
|
#include "src/Compiler/CompilerPasses.hpp"
|
|
|
|
#include "src/Compiler/CompilerPasses.hpp"
|
|
|
|
#include "src/Compiler/CompilerUtils.hpp"
|
|
|
|
#include "src/Compiler/CompilerUtils.hpp"
|
|
|
|
|
|
|
|
|
|
|
|
namespace onnx_mlir {
|
|
|
|
using namespace onnx_mlir;
|
|
|
|
|
|
|
|
|
|
|
|
MemEntry* PimMemory::gatherMemEntry(Value value) {
|
|
|
|
MemEntry* PimMemory::gatherMemEntry(Value value) {
|
|
|
|
auto type = cast<ShapedType>(value.getType());
|
|
|
|
auto type = cast<ShapedType>(value.getType());
|
|
|
|
@@ -49,7 +45,7 @@ void PimMemory::allocateHost(ModuleOp moduleOp, func::FuncOp funcOp) {
|
|
|
|
// More than one SSA value per single global constant:
|
|
|
|
// More than one SSA value per single global constant:
|
|
|
|
// Cannot call gatherMemEntry for each of them, otherwise memory will be allocated multiple times
|
|
|
|
// Cannot call gatherMemEntry for each of them, otherwise memory will be allocated multiple times
|
|
|
|
// Thus, call gatherMemEntry only for the first SSA value and assign the same memEntry to all others
|
|
|
|
// Thus, call gatherMemEntry only for the first SSA value and assign the same memEntry to all others
|
|
|
|
llvm::SmallDenseMap<memref::GlobalOp, MemEntry*, 8> globalConstants;
|
|
|
|
SmallDenseMap<memref::GlobalOp, MemEntry*, 8> globalConstants;
|
|
|
|
funcOp.walk([&](memref::GetGlobalOp getGlobalOp) {
|
|
|
|
funcOp.walk([&](memref::GetGlobalOp getGlobalOp) {
|
|
|
|
if (!getGlobalOp->hasAttr("weightAlways")) {
|
|
|
|
if (!getGlobalOp->hasAttr("weightAlways")) {
|
|
|
|
auto globalMemrefOp = moduleOp.lookupSymbol<memref::GlobalOp>(getGlobalOp.getName());
|
|
|
|
auto globalMemrefOp = moduleOp.lookupSymbol<memref::GlobalOp>(getGlobalOp.getName());
|
|
|
|
@@ -113,310 +109,221 @@ size_t PimAcceleratorMemory::getValueAddress(Value value) const {
|
|
|
|
return memEntriesMap.at(value).address;
|
|
|
|
return memEntriesMap.at(value).address;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
llvm::json::Object PimCodeGen::createSetImmediate(size_t targetRegister, size_t immediate) {
|
|
|
|
json::Object PimCodeGen::createEmptyOffset() {
|
|
|
|
llvm::json::Object returnValue;
|
|
|
|
json::Object offset;
|
|
|
|
returnValue["op"] = "sldi";
|
|
|
|
offset["offset_select"] = 0;
|
|
|
|
returnValue["rd"] = targetRegister;
|
|
|
|
offset["offset_value"] = 0;
|
|
|
|
returnValue["imm"] = immediate;
|
|
|
|
return offset;
|
|
|
|
return returnValue;
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
llvm::json::Object PimCodeGen::createEmptyOffset() {
|
|
|
|
void PimCodeGen::emitInstruction(json::Object instruction) const {
|
|
|
|
llvm::json::Object returnValue;
|
|
|
|
coreFileStream << json::Value(std::move(instruction)) << ',';
|
|
|
|
returnValue["offset_select"] = 0;
|
|
|
|
|
|
|
|
returnValue["offset_value"] = 0;
|
|
|
|
|
|
|
|
return returnValue;
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void PimCodeGen::genSetRegisterImmediateUnsigned(size_t registerNumber, size_t immediate) {
|
|
|
|
void PimCodeGen::genSetRegisterImmediateUnsigned(size_t registerNumber, size_t immediate) const {
|
|
|
|
llvm::json::Object setRegisterJson = createSetImmediate(registerNumber, immediate);
|
|
|
|
json::Object json;
|
|
|
|
coreFileStream << llvm::json::Value(std::move(setRegisterJson)) << ',';
|
|
|
|
json["op"] = "sldi";
|
|
|
|
|
|
|
|
json["rd"] = registerNumber;
|
|
|
|
|
|
|
|
json["imm"] = immediate;
|
|
|
|
|
|
|
|
emitInstruction(std::move(json));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void PimCodeGen::createRd(size_t rdAddress, size_t rdOffset) {
|
|
|
|
void PimCodeGen::setupRd(size_t rdAddress, size_t rdOffset) const {
|
|
|
|
// rd on register 0
|
|
|
|
|
|
|
|
genSetRegisterImmediateUnsigned(0, rdAddress + rdOffset);
|
|
|
|
genSetRegisterImmediateUnsigned(0, rdAddress + rdOffset);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void PimCodeGen::createRdRs1(size_t rdAddress, size_t rdOffset, size_t rs1Address, size_t rs1Offset) {
|
|
|
|
void PimCodeGen::setupRdRs1(size_t rdAddress, size_t rdOffset, size_t rs1Address, size_t rs1Offset) const {
|
|
|
|
// rd on register 0
|
|
|
|
|
|
|
|
genSetRegisterImmediateUnsigned(0, rdAddress + rdOffset);
|
|
|
|
genSetRegisterImmediateUnsigned(0, rdAddress + rdOffset);
|
|
|
|
// rs1 on register 1
|
|
|
|
|
|
|
|
genSetRegisterImmediateUnsigned(1, rs1Address + rs1Offset);
|
|
|
|
genSetRegisterImmediateUnsigned(1, rs1Address + rs1Offset);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void PimCodeGen::createRdRs1Rs2(
|
|
|
|
void PimCodeGen::setupRdRs1Rs2(
|
|
|
|
size_t rdAddress, size_t rdOffset, size_t rs1Address, size_t rs1Offset, size_t rs2Address, size_t rs2Offset) {
|
|
|
|
size_t rdAddress, size_t rdOffset, size_t rs1Address, size_t rs1Offset, size_t rs2Address, size_t rs2Offset) const {
|
|
|
|
// rd on register 0
|
|
|
|
|
|
|
|
genSetRegisterImmediateUnsigned(0, rdAddress + rdOffset);
|
|
|
|
genSetRegisterImmediateUnsigned(0, rdAddress + rdOffset);
|
|
|
|
// rs1 on register 1
|
|
|
|
|
|
|
|
genSetRegisterImmediateUnsigned(1, rs1Address + rs1Offset);
|
|
|
|
genSetRegisterImmediateUnsigned(1, rs1Address + rs1Offset);
|
|
|
|
// rs2 on register 2
|
|
|
|
|
|
|
|
genSetRegisterImmediateUnsigned(2, rs2Address + rs2Offset);
|
|
|
|
genSetRegisterImmediateUnsigned(2, rs2Address + rs2Offset);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void PimCodeGen::codeGenLoadOp(pim::PimMemCopyHostToDevOp loadOp) {
|
|
|
|
void PimCodeGen::emitMemCopyOp(
|
|
|
|
auto deviceDst = loadOp.getDeviceDst();
|
|
|
|
StringRef opName, size_t rdAddr, size_t rdOffset, size_t rs1Addr, size_t rs1Offset, size_t size) const {
|
|
|
|
auto hostSrc = loadOp.getHostSrc();
|
|
|
|
setupRdRs1(rdAddr, rdOffset, rs1Addr, rs1Offset);
|
|
|
|
auto deviceDstOffset = loadOp.getDeviceDstOffset();
|
|
|
|
|
|
|
|
auto hostSrcOffset = loadOp.getHostSrcOffset();
|
|
|
|
|
|
|
|
auto size = loadOp.getSize();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
auto deviceDstAlloc = memory.getValueAddress(deviceDst);
|
|
|
|
json::Object json;
|
|
|
|
auto hostSrcAlloc = memory.getValueAddress(hostSrc);
|
|
|
|
json["op"] = opName;
|
|
|
|
|
|
|
|
json["rd"] = 0;
|
|
|
|
// Set load rd register (reg 0)
|
|
|
|
json["rs1"] = 1;
|
|
|
|
createRdRs1(deviceDstAlloc, deviceDstOffset, hostSrcAlloc, hostSrcOffset);
|
|
|
|
json["size"] = size;
|
|
|
|
|
|
|
|
json["offset"] = createEmptyOffset();
|
|
|
|
llvm::json::Object loadOpJson;
|
|
|
|
emitInstruction(std::move(json));
|
|
|
|
loadOpJson["op"] = "ld";
|
|
|
|
|
|
|
|
loadOpJson["rd"] = 0;
|
|
|
|
|
|
|
|
loadOpJson["rs1"] = 1;
|
|
|
|
|
|
|
|
loadOpJson["size"] = size;
|
|
|
|
|
|
|
|
loadOpJson["offset"] = createEmptyOffset();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
coreFileStream << llvm::json::Value(std::move(loadOpJson)) << ',';
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void PimCodeGen::codeGenStoreOp(pim::PimMemCopyDevToHostOp storeOp) {
|
|
|
|
void PimCodeGen::emitCommunicationOp(StringRef opName, size_t bufferAddr, size_t coreId, size_t size) const {
|
|
|
|
auto hostDst = storeOp.getHostDst();
|
|
|
|
setupRd(bufferAddr, 0);
|
|
|
|
auto deviceSrc = storeOp.getDeviceSrc();
|
|
|
|
|
|
|
|
auto hostDstOffset = storeOp.getHostDstOffset();
|
|
|
|
|
|
|
|
auto deviceSrcOffset = storeOp.getDeviceSrcOffset();
|
|
|
|
|
|
|
|
auto size = storeOp.getSize();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
auto deviceSrcAlloc = memory.getValueAddress(deviceSrc);
|
|
|
|
json::Object json;
|
|
|
|
auto hostDstAlloc = memory.getValueAddress(hostDst);
|
|
|
|
json["op"] = opName;
|
|
|
|
|
|
|
|
json["rd"] = 0;
|
|
|
|
|
|
|
|
json["core"] = coreId;
|
|
|
|
|
|
|
|
json["size"] = size;
|
|
|
|
|
|
|
|
json["offset"] = createEmptyOffset();
|
|
|
|
|
|
|
|
emitInstruction(std::move(json));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Set load rd register (reg 0)
|
|
|
|
void PimCodeGen::emitMvmOp(size_t groupId, size_t rdAddr, size_t rdOffset, size_t rs1Addr, size_t rs1Offset) const {
|
|
|
|
createRdRs1(hostDstAlloc, hostDstOffset, deviceSrcAlloc, deviceSrcOffset);
|
|
|
|
setupRdRs1(rdAddr, rdOffset, rs1Addr, rs1Offset);
|
|
|
|
|
|
|
|
|
|
|
|
llvm::json::Object storeOpJson;
|
|
|
|
json::Object json;
|
|
|
|
storeOpJson["op"] = "st";
|
|
|
|
json["op"] = "mvmul";
|
|
|
|
storeOpJson["rd"] = 0;
|
|
|
|
json["rd"] = 0;
|
|
|
|
storeOpJson["rs1"] = 1;
|
|
|
|
json["rs1"] = 1;
|
|
|
|
storeOpJson["size"] = size;
|
|
|
|
json["group"] = groupId;
|
|
|
|
storeOpJson["offset"] = createEmptyOffset();
|
|
|
|
json["relu"] = 0;
|
|
|
|
|
|
|
|
json["mbiw"] = 8;
|
|
|
|
|
|
|
|
emitInstruction(std::move(json));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
coreFileStream << llvm::json::Value(std::move(storeOpJson)) << ',';
|
|
|
|
void PimCodeGen::codeGenLoadOp(pim::PimMemCopyHostToDevOp loadOp) const {
|
|
|
|
|
|
|
|
emitMemCopyOp("ld",
|
|
|
|
|
|
|
|
memory.getValueAddress(loadOp.getDeviceDst()),
|
|
|
|
|
|
|
|
loadOp.getDeviceDstOffset(),
|
|
|
|
|
|
|
|
memory.getValueAddress(loadOp.getHostSrc()),
|
|
|
|
|
|
|
|
loadOp.getHostSrcOffset(),
|
|
|
|
|
|
|
|
loadOp.getSize());
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void PimCodeGen::codeGenStoreOp(pim::PimMemCopyDevToHostOp storeOp) const {
|
|
|
|
|
|
|
|
emitMemCopyOp("st",
|
|
|
|
|
|
|
|
memory.getValueAddress(storeOp.getHostDst()),
|
|
|
|
|
|
|
|
storeOp.getHostDstOffset(),
|
|
|
|
|
|
|
|
memory.getValueAddress(storeOp.getDeviceSrc()),
|
|
|
|
|
|
|
|
storeOp.getDeviceSrcOffset(),
|
|
|
|
|
|
|
|
storeOp.getSize());
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void PimCodeGen::codeGenReceiveOp(pim::PimReceiveOp receiveOp) const {
|
|
|
|
|
|
|
|
emitCommunicationOp(
|
|
|
|
|
|
|
|
"recv", memory.getValueAddress(receiveOp.getDst()), receiveOp.getSrcCoreId(), receiveOp.getSize());
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void PimCodeGen::codeGenSendOp(pim::PimSendOp sendOp) const {
|
|
|
|
|
|
|
|
emitCommunicationOp("send", memory.getValueAddress(sendOp.getSrc()), sendOp.getTargetCoreId(), sendOp.getSize());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template <typename MVMTy>
|
|
|
|
template <typename MVMTy>
|
|
|
|
void PimCodeGen::codeGenMVMLikeOp(size_t mvmId, MVMTy mvmLikeOp, bool transposeMatrix) {
|
|
|
|
void PimCodeGen::codeGenMVMLikeOp(size_t mvmId, MVMTy mvmLikeOp, bool transposeMatrix) {
|
|
|
|
auto outBufAlloc = memory.getValueAddress(mvmLikeOp.getOutBuf());
|
|
|
|
emitMvmOp(
|
|
|
|
auto vectorAlloc = memory.getValueAddress(mvmLikeOp.getVectorInput());
|
|
|
|
mvmId, memory.getValueAddress(mvmLikeOp.getOutBuf()), 0, memory.getValueAddress(mvmLikeOp.getVectorInput()), 0);
|
|
|
|
|
|
|
|
|
|
|
|
createRdRs1(outBufAlloc, 0, vectorAlloc, 0);
|
|
|
|
// TODO: save weights somewhere (if transposeMatrix=true, transpose the weight matrix)
|
|
|
|
|
|
|
|
|
|
|
|
llvm::json::Object mvmOpJson;
|
|
|
|
|
|
|
|
mvmOpJson["op"] = "mvmul";
|
|
|
|
|
|
|
|
mvmOpJson["rd"] = 0;
|
|
|
|
|
|
|
|
mvmOpJson["rs1"] = 1;
|
|
|
|
|
|
|
|
mvmOpJson["group"] = mvmId;
|
|
|
|
|
|
|
|
mvmOpJson["relu"] = 0;
|
|
|
|
|
|
|
|
mvmOpJson["mbiw"] = 8;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
coreFileStream << llvm::json::Value(std::move(mvmOpJson)) << ',';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// TODO: save weights somewhere (if transposeMatrix=true, then transpose the
|
|
|
|
|
|
|
|
// weight matrix)
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void PimCodeGen::codeGenApplyFiltersOp(pim::PimApplyFiltersOp applyFiltersOp) {
|
|
|
|
void PimCodeGen::codeGenVAddOp(pim::PimVAddOp vaddOp) const {
|
|
|
|
|
|
|
|
auto outBufAddr = memory.getValueAddress(vaddOp.getOutBuf());
|
|
|
|
|
|
|
|
auto aAddr = memory.getValueAddress(vaddOp.getA());
|
|
|
|
|
|
|
|
auto bAddr = memory.getValueAddress(vaddOp.getB());
|
|
|
|
|
|
|
|
setupRdRs1Rs2(outBufAddr, 0, aAddr, 0, bAddr, 0);
|
|
|
|
|
|
|
|
|
|
|
|
auto outBuff = memory.getValueAddress(applyFiltersOp.getOutBuf());
|
|
|
|
auto outputType = cast<MemRefType>(vaddOp.getOutBuf().getType());
|
|
|
|
auto inBuff = memory.getValueAddress(applyFiltersOp.getInput());
|
|
|
|
size_t totalBytes = outputType.getNumElements() * vaddOp.getOutRes().getType().getElementTypeBitWidth() / 8;
|
|
|
|
auto accumBuff = memory.getValueAddress(applyFiltersOp.getAccumBuf());
|
|
|
|
|
|
|
|
|
|
|
|
json::Object json;
|
|
|
|
|
|
|
|
json["op"] = "vvadd";
|
|
|
|
|
|
|
|
json["rd"] = 0;
|
|
|
|
|
|
|
|
json["rs1"] = 1;
|
|
|
|
|
|
|
|
json["rs2"] = 2;
|
|
|
|
|
|
|
|
json["offset"] = createEmptyOffset();
|
|
|
|
|
|
|
|
json["len"] = totalBytes;
|
|
|
|
|
|
|
|
emitInstruction(std::move(json));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void PimCodeGen::codeGenVMaxOp(pim::PimVMaxOp vmaxOp) const {
|
|
|
|
|
|
|
|
auto outBufAddr = memory.getValueAddress(vmaxOp.getOutBuf());
|
|
|
|
|
|
|
|
auto aAddr = memory.getValueAddress(vmaxOp.getA());
|
|
|
|
|
|
|
|
auto bAddr = memory.getValueAddress(vmaxOp.getB());
|
|
|
|
|
|
|
|
setupRdRs1Rs2(outBufAddr, 0, aAddr, 0, bAddr, 0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
json::Object json;
|
|
|
|
|
|
|
|
json["op"] = "vvmax";
|
|
|
|
|
|
|
|
json["rd"] = 0;
|
|
|
|
|
|
|
|
json["rs1"] = 1;
|
|
|
|
|
|
|
|
json["rs2"] = 2;
|
|
|
|
|
|
|
|
json["offset"] = createEmptyOffset();
|
|
|
|
|
|
|
|
emitInstruction(std::move(json));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void PimCodeGen::codeGenVReluOp(pim::PimVReluOp vreluOp) const {
|
|
|
|
|
|
|
|
auto outBufAddr = memory.getValueAddress(vreluOp.getOutBuf());
|
|
|
|
|
|
|
|
auto aAddr = memory.getValueAddress(vreluOp.getA());
|
|
|
|
|
|
|
|
setupRdRs1(outBufAddr, 0, aAddr, 0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
json::Object json;
|
|
|
|
|
|
|
|
json["op"] = "vrelu";
|
|
|
|
|
|
|
|
json["rd"] = 0;
|
|
|
|
|
|
|
|
json["rs1"] = 1;
|
|
|
|
|
|
|
|
json["offset"] = createEmptyOffset();
|
|
|
|
|
|
|
|
emitInstruction(std::move(json));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void PimCodeGen::codeGenApplyFiltersOp(pim::PimApplyFiltersOp applyFiltersOp) const {
|
|
|
|
|
|
|
|
auto outBufAddr = memory.getValueAddress(applyFiltersOp.getOutBuf());
|
|
|
|
|
|
|
|
auto inBufAddr = memory.getValueAddress(applyFiltersOp.getInput());
|
|
|
|
|
|
|
|
auto accumBufAddr = memory.getValueAddress(applyFiltersOp.getAccumBuf());
|
|
|
|
|
|
|
|
|
|
|
|
// Get weight indices from the operation attribute.
|
|
|
|
|
|
|
|
auto weightIndices = applyFiltersOp.getWeightIndices();
|
|
|
|
auto weightIndices = applyFiltersOp.getWeightIndices();
|
|
|
|
|
|
|
|
|
|
|
|
// Get shape of the input tensor.
|
|
|
|
|
|
|
|
auto inputType = cast<MemRefType>(applyFiltersOp.getInput().getType());
|
|
|
|
auto inputType = cast<MemRefType>(applyFiltersOp.getInput().getType());
|
|
|
|
auto outputType = cast<MemRefType>(applyFiltersOp.getOutBuf().getType());
|
|
|
|
auto outputType = cast<MemRefType>(applyFiltersOp.getOutBuf().getType());
|
|
|
|
auto in_shape = inputType.getShape();
|
|
|
|
auto inShape = inputType.getShape();
|
|
|
|
auto out_shape = outputType.getShape();
|
|
|
|
auto outShape = outputType.getShape();
|
|
|
|
|
|
|
|
|
|
|
|
// Extract the relevant dimensions.
|
|
|
|
size_t inChannels = inShape[1];
|
|
|
|
size_t in_channels = in_shape[1]; // Number of input channels.
|
|
|
|
size_t outChannels = outShape[1];
|
|
|
|
size_t out_channels = out_shape[1]; // Number of output channels.
|
|
|
|
size_t dimX = inShape.size() > 2 ? inShape[2] : 1;
|
|
|
|
|
|
|
|
size_t dimY = inShape.size() > 3 ? inShape[3] : 1;
|
|
|
|
|
|
|
|
|
|
|
|
size_t dim2 = in_shape.size() > 2 ? in_shape[2] : 1; // Image width.
|
|
|
|
for (size_t outY = 0; outY < dimY; outY++) {
|
|
|
|
size_t dim3 = in_shape.size() > 3 ? in_shape[3] : 1; // Image height.
|
|
|
|
for (size_t outX = 0; outX < dimX; outX++) {
|
|
|
|
|
|
|
|
|
|
|
|
// Iterate through pixels.
|
|
|
|
|
|
|
|
for (size_t out_y = 0; out_y < dim3; out_y++) {
|
|
|
|
|
|
|
|
for (size_t out_x = 0; out_x < dim2; out_x++) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// For each crossbar, perform the MVMUL operation.
|
|
|
|
|
|
|
|
size_t weightIndex = 0;
|
|
|
|
size_t weightIndex = 0;
|
|
|
|
for (Attribute weight : weightIndices) {
|
|
|
|
for (Attribute weight : weightIndices) {
|
|
|
|
|
|
|
|
// --- STEP 1: Perform MVMUL operation ---
|
|
|
|
// --------------------------------------
|
|
|
|
|
|
|
|
// --- STEP 1: Perform MVUL operation ---
|
|
|
|
|
|
|
|
// --------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Get the weight matrix ID for this position.
|
|
|
|
|
|
|
|
auto weightId = cast<IntegerAttr>(weight).getInt();
|
|
|
|
auto weightId = cast<IntegerAttr>(weight).getInt();
|
|
|
|
|
|
|
|
|
|
|
|
size_t xKer = cast<IntegerAttr>(applyFiltersOp.getXKernelPositions()[weightIndex]).getInt();
|
|
|
|
size_t xKer = cast<IntegerAttr>(applyFiltersOp.getXKernelPositions()[weightIndex]).getInt();
|
|
|
|
size_t yKer = cast<IntegerAttr>(applyFiltersOp.getYKernelPositions()[weightIndex]).getInt();
|
|
|
|
size_t yKer = cast<IntegerAttr>(applyFiltersOp.getYKernelPositions()[weightIndex]).getInt();
|
|
|
|
|
|
|
|
|
|
|
|
weightIndex++;
|
|
|
|
weightIndex++;
|
|
|
|
|
|
|
|
|
|
|
|
if (out_x + xKer >= dim2 || out_y + yKer >= dim3)
|
|
|
|
if (outX + xKer >= dimX || outY + yKer >= dimY)
|
|
|
|
continue;
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
|
|
// Calculate the offset for the input (and output) tensor.
|
|
|
|
size_t outputOffset = (outY * dimX + outX) * 32 * outChannels;
|
|
|
|
size_t output_offset = (out_y * dim2 + out_x) * 32 * out_channels;
|
|
|
|
size_t inputOffset = ((outY + yKer) * dimX + (outX + xKer)) * 32 * inChannels;
|
|
|
|
size_t input_offset = ((out_y + yKer) * dim2 + (out_x + xKer)) * 32 * in_channels;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Read from the input tensor and store the partial result in the
|
|
|
|
bool isFirstWeight = (weightIndices[0] == weight);
|
|
|
|
// accumulator buffer, if this is not the first weight matrix.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Note that rs1 is the input tensor, and rd is the output tensor.
|
|
|
|
// For the first weight, store directly in output buffer; otherwise use accumulator.
|
|
|
|
// TODO: This order of arguments is confusing, check if the correct
|
|
|
|
size_t rdAddr = isFirstWeight ? outBufAddr : accumBufAddr;
|
|
|
|
// order is being used in the WMVUL operation. The order below is
|
|
|
|
size_t rdOffset = isFirstWeight ? outputOffset : 0;
|
|
|
|
// correct.
|
|
|
|
emitMvmOp(weightId, rdAddr, rdOffset, inBufAddr, inputOffset);
|
|
|
|
if (weightIndices[0] != weight) {
|
|
|
|
|
|
|
|
createRdRs1(accumBuff, 0, inBuff, input_offset);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
else {
|
|
|
|
|
|
|
|
// Otherwise store directly in the output buffer.
|
|
|
|
|
|
|
|
createRdRs1(outBuff, output_offset, inBuff, input_offset);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Create the MVMUL JSON object
|
|
|
|
// --- STEP 2: Perform VADD operation (skip for first weight) ---
|
|
|
|
llvm::json::Object mvmOpJson;
|
|
|
|
if (isFirstWeight)
|
|
|
|
mvmOpJson["op"] = "mvmul";
|
|
|
|
|
|
|
|
mvmOpJson["rd"] = 0;
|
|
|
|
|
|
|
|
mvmOpJson["rs1"] = 1;
|
|
|
|
|
|
|
|
mvmOpJson["group"] = weightId;
|
|
|
|
|
|
|
|
mvmOpJson["relu"] = 0;
|
|
|
|
|
|
|
|
mvmOpJson["mbiw"] = 8;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Write the JSON to the output stream
|
|
|
|
|
|
|
|
coreFileStream << llvm::json::Value(std::move(mvmOpJson)) << ',';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// --------------------------------------
|
|
|
|
|
|
|
|
// --- STEP 2: Perform VADD operation ---
|
|
|
|
|
|
|
|
// --------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// If this is the first weight matrix, we don't need to perform a VADD.
|
|
|
|
|
|
|
|
if (weightIndices[0] == weight)
|
|
|
|
|
|
|
|
continue;
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
|
|
// We now need to sum the value in the accumulator buffer with the value
|
|
|
|
// Sum accumulator with output buffer, store result in output buffer.
|
|
|
|
// in the output buffer, and store the result in the output buffer.
|
|
|
|
setupRdRs1Rs2(outBufAddr, outputOffset, accumBufAddr, 0, outBufAddr, outputOffset);
|
|
|
|
createRdRs1Rs2(outBuff, output_offset, accumBuff, 0, outBuff, output_offset);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llvm::json::Object vaddOpJson;
|
|
|
|
json::Object vaddJson;
|
|
|
|
vaddOpJson["op"] = "vvadd";
|
|
|
|
vaddJson["op"] = "vvadd";
|
|
|
|
vaddOpJson["rd"] = 0;
|
|
|
|
vaddJson["rd"] = 0;
|
|
|
|
vaddOpJson["rs1"] = 1;
|
|
|
|
vaddJson["rs1"] = 1;
|
|
|
|
vaddOpJson["rs2"] = 2;
|
|
|
|
vaddJson["rs2"] = 2;
|
|
|
|
vaddOpJson["offset"] = createEmptyOffset();
|
|
|
|
vaddJson["offset"] = createEmptyOffset();
|
|
|
|
|
|
|
|
emitInstruction(std::move(vaddJson));
|
|
|
|
coreFileStream << llvm::json::Value(std::move(vaddOpJson)) << ',';
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void PimCodeGen::codeGenVAddOp(pim::PimVAddOp vaddOp) {
|
|
|
|
|
|
|
|
auto outBufAlloc = memory.getValueAddress(vaddOp.getOutBuf());
|
|
|
|
|
|
|
|
auto rs1BufferOp = memory.getValueAddress(vaddOp.getA());
|
|
|
|
|
|
|
|
auto rs2BufferOp = memory.getValueAddress(vaddOp.getB());
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
createRdRs1Rs2(outBufAlloc, 0, rs1BufferOp, 0, rs2BufferOp, 0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Get the size of the output buffer.
|
|
|
|
|
|
|
|
auto outputType = cast<MemRefType>(vaddOp.getOutBuf().getType());
|
|
|
|
|
|
|
|
auto out_shape = outputType.getShape();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Multiply all dimension lengths to get the total number of elements.
|
|
|
|
|
|
|
|
size_t totalElements = 1;
|
|
|
|
|
|
|
|
for (size_t i = 0; i < out_shape.size(); i++)
|
|
|
|
|
|
|
|
totalElements *= out_shape[i];
|
|
|
|
|
|
|
|
auto elementSize = vaddOp.getOutRes().getType().getElementTypeBitWidth() / 8;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llvm::json::Object mvmOpJson;
|
|
|
|
|
|
|
|
mvmOpJson["op"] = "vvadd";
|
|
|
|
|
|
|
|
mvmOpJson["rd"] = 0;
|
|
|
|
|
|
|
|
mvmOpJson["rs1"] = 1;
|
|
|
|
|
|
|
|
mvmOpJson["rs2"] = 2;
|
|
|
|
|
|
|
|
mvmOpJson["offset"] = createEmptyOffset();
|
|
|
|
|
|
|
|
mvmOpJson["len"] = totalElements * elementSize;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
coreFileStream << llvm::json::Value(std::move(mvmOpJson)) << ',';
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void PimCodeGen::codeGenVMaxOp(pim::PimVMaxOp vmaxOp) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
auto outBufAlloc = memory.getValueAddress(vmaxOp.getOutBuf());
|
|
|
|
|
|
|
|
auto rs1BufferOp = memory.getValueAddress(vmaxOp.getA());
|
|
|
|
|
|
|
|
auto rs2BufferOp = memory.getValueAddress(vmaxOp.getB());
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
createRdRs1Rs2(outBufAlloc, 0, rs1BufferOp, 0, rs2BufferOp, 0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llvm::json::Object mvmOpJson;
|
|
|
|
|
|
|
|
mvmOpJson["op"] = "vvmax";
|
|
|
|
|
|
|
|
mvmOpJson["rd"] = 0;
|
|
|
|
|
|
|
|
mvmOpJson["rs1"] = 1;
|
|
|
|
|
|
|
|
mvmOpJson["rs2"] = 2;
|
|
|
|
|
|
|
|
mvmOpJson["offset"] = createEmptyOffset();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
coreFileStream << llvm::json::Value(std::move(mvmOpJson)) << ',';
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void PimCodeGen::codeGenVReluOp(pim::PimVReluOp vreluOp) {
|
|
|
|
|
|
|
|
auto outBufAlloc = memory.getValueAddress(vreluOp.getOutBuf());
|
|
|
|
|
|
|
|
auto rs1BufferOp = memory.getValueAddress(vreluOp.getA());
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
createRdRs1(outBufAlloc, 0, rs1BufferOp, 0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llvm::json::Object mvmOpJson;
|
|
|
|
|
|
|
|
mvmOpJson["op"] = "vrelu";
|
|
|
|
|
|
|
|
mvmOpJson["rd"] = 0;
|
|
|
|
|
|
|
|
mvmOpJson["rs1"] = 1;
|
|
|
|
|
|
|
|
mvmOpJson["offset"] = createEmptyOffset();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
coreFileStream << llvm::json::Value(std::move(mvmOpJson)) << ',';
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void PimCodeGen::codeGenReceiveOp(pim::PimReceiveOp receiveOp) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
auto destAlloc = memory.getValueAddress(receiveOp.getDst());
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
createRd(destAlloc, /* dest_offset = */ 0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llvm::json::Object recvOpJson;
|
|
|
|
|
|
|
|
recvOpJson["op"] = "recv";
|
|
|
|
|
|
|
|
recvOpJson["rd"] = 0;
|
|
|
|
|
|
|
|
recvOpJson["core"] = receiveOp.getSrcCoreId();
|
|
|
|
|
|
|
|
recvOpJson["size"] = receiveOp.getSize();
|
|
|
|
|
|
|
|
recvOpJson["offset"] = createEmptyOffset();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
coreFileStream << llvm::json::Value(std::move(recvOpJson)) << ',';
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void PimCodeGen::codeGenSendOp(pim::PimSendOp sendOp) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
auto srcAlloc = memory.getValueAddress(sendOp.getSrc());
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Technically a RS1 register, but its just a name..
|
|
|
|
|
|
|
|
createRd(srcAlloc, /* dest_offset = */ 0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llvm::json::Object sendOpJson;
|
|
|
|
|
|
|
|
sendOpJson["op"] = "send";
|
|
|
|
|
|
|
|
sendOpJson["rd"] = 0;
|
|
|
|
|
|
|
|
sendOpJson["core"] = sendOp.getTargetCoreId();
|
|
|
|
|
|
|
|
sendOpJson["size"] = sendOp.getSize();
|
|
|
|
|
|
|
|
sendOpJson["offset"] = createEmptyOffset();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
coreFileStream << llvm::json::Value(std::move(sendOpJson)) << ',';
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
size_t getMatrixSize(ShapedType matrixShape) {
|
|
|
|
size_t getMatrixSize(ShapedType matrixShape) {
|
|
|
|
if (matrixShape.getRank() != 2 && matrixShape.getRank() != 4)
|
|
|
|
if (matrixShape.getRank() != 2 && matrixShape.getRank() != 4)
|
|
|
|
assert(false && "Unsupported matrix shape");
|
|
|
|
assert(false && "Unsupported matrix shape");
|
|
|
|
@@ -433,36 +340,20 @@ std::string getMemorySizeAsString(size_t size) {
|
|
|
|
return std::to_string(size) + " Bytes";
|
|
|
|
return std::to_string(size) + " Bytes";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
OnnxMlirCompilerErrorCodes compileModuleToPIMJSON(ModuleOp& moduleOp, std::string& outputDirPath) {
|
|
|
|
/// Write global constant data into a binary memory image at their allocated addresses.
|
|
|
|
if (!outputDirPath.empty()) {
|
|
|
|
static OnnxMlirCompilerErrorCodes
|
|
|
|
if (auto error = llvm::sys::fs::create_directory(outputDirPath)) {
|
|
|
|
writeMemoryBinary(ModuleOp moduleOp, func::FuncOp funcOp, PimAcceleratorMemory& memory, StringRef outputDirPath) {
|
|
|
|
llvm::errs() << "Error creating output directory: " << outputDirPath << ": " << error.message() << '\n';
|
|
|
|
|
|
|
|
return InvalidOutputFileAccess;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// For each core, specify the number of crossbar per array group
|
|
|
|
auto memoryFilePath = (outputDirPath + "/memory.bin").str();
|
|
|
|
// This implementation always assigns one crossbar per group
|
|
|
|
|
|
|
|
llvm::json::Object xbarsPerArrayGroup;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
auto funcOps = moduleOp.getOps<func::FuncOp>();
|
|
|
|
|
|
|
|
assert(!funcOps.empty() && "No function found in the module");
|
|
|
|
|
|
|
|
auto funcOp = *funcOps.begin();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PimAcceleratorMemory memory;
|
|
|
|
|
|
|
|
memory.hostMem.allocateHost(moduleOp, funcOp);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Write memory binary file
|
|
|
|
|
|
|
|
auto memoryFilePath = outputDirPath + "/memory.bin";
|
|
|
|
|
|
|
|
std::error_code errorCode;
|
|
|
|
std::error_code errorCode;
|
|
|
|
llvm::raw_fd_ostream memoryFileStream(memoryFilePath, errorCode, llvm::sys::fs::OF_None);
|
|
|
|
raw_fd_ostream memoryFileStream(memoryFilePath, errorCode, sys::fs::OF_None);
|
|
|
|
if (errorCode) {
|
|
|
|
if (errorCode) {
|
|
|
|
llvm::errs() << "Error while opening memory file " << memoryFilePath << ": " << errorCode.message() << '\n';
|
|
|
|
errs() << "Error while opening memory file " << memoryFilePath << ": " << errorCode.message() << '\n';
|
|
|
|
return InvalidOutputFileAccess;
|
|
|
|
return InvalidOutputFileAccess;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Zero-initialized buffer
|
|
|
|
|
|
|
|
std::vector<char> memoryBuffer(memory.hostMem.getFirstAvailableAddress(), 0);
|
|
|
|
std::vector<char> memoryBuffer(memory.hostMem.getFirstAvailableAddress(), 0);
|
|
|
|
// Write global values at their allocated addresses
|
|
|
|
|
|
|
|
funcOp.walk([&](memref::GetGlobalOp getGlobalOp) {
|
|
|
|
funcOp.walk([&](memref::GetGlobalOp getGlobalOp) {
|
|
|
|
if (getGlobalOp->hasAttr("weightAlways"))
|
|
|
|
if (getGlobalOp->hasAttr("weightAlways"))
|
|
|
|
return;
|
|
|
|
return;
|
|
|
|
@@ -491,116 +382,67 @@ OnnxMlirCompilerErrorCodes compileModuleToPIMJSON(ModuleOp& moduleOp, std::strin
|
|
|
|
std::memcpy(dst, rawData.data(), rawData.size());
|
|
|
|
std::memcpy(dst, rawData.data(), rawData.size());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
memoryFileStream.write(memoryBuffer.data(), memoryBuffer.size());
|
|
|
|
memoryFileStream.write(memoryBuffer.data(), memoryBuffer.size());
|
|
|
|
memoryFileStream.close();
|
|
|
|
memoryFileStream.close();
|
|
|
|
|
|
|
|
return CompilerSuccess;
|
|
|
|
auto outputHostCorePath = outputDirPath + "/core_0.json";
|
|
|
|
|
|
|
|
llvm::raw_fd_ostream hostFileStream(outputHostCorePath, errorCode);
|
|
|
|
|
|
|
|
if (errorCode) {
|
|
|
|
|
|
|
|
llvm::errs() << "Error while opening host core file `" << outputHostCorePath << "`: " << errorCode.message() << '\n';
|
|
|
|
|
|
|
|
return InvalidOutputFileAccess;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
hostFileStream << "[]";
|
|
|
|
|
|
|
|
hostFileStream.close();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
size_t coreCount = 0;
|
|
|
|
|
|
|
|
for (auto coreOp : funcOp.getOps<pim::PimCoreOp>()) {
|
|
|
|
|
|
|
|
auto coreId = coreOp.getCoreId();
|
|
|
|
|
|
|
|
coreCount++;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
std::error_code errorCode;
|
|
|
|
|
|
|
|
auto outputCorePath = outputDirPath + "/core_" + std::to_string(coreId) + ".json";
|
|
|
|
|
|
|
|
llvm::raw_fd_ostream coreFileStream(outputCorePath, errorCode);
|
|
|
|
|
|
|
|
if (errorCode) {
|
|
|
|
|
|
|
|
llvm::errs() << "Error while opening core file `" << outputCorePath << "`: " << errorCode.message() << '\n';
|
|
|
|
|
|
|
|
return InvalidOutputFileAccess;
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
coreFileStream << '[';
|
|
|
|
/// Dispatch all operations in a core region to the appropriate code generator.
|
|
|
|
auto coreNameString = "core" + std::to_string(coreId);
|
|
|
|
/// Returns the number of emitted instructions, or -1 on failure.
|
|
|
|
|
|
|
|
static int64_t codeGenCoreOps(pim::PimCoreOp coreOp, PimCodeGen& coreCodeGen) {
|
|
|
|
PimCodeGen coreCodeGen(memory, coreFileStream);
|
|
|
|
|
|
|
|
memory.getOrCreateDeviceMem(coreId).allocateCore(coreOp);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
size_t processedOperations = 0;
|
|
|
|
size_t processedOperations = 0;
|
|
|
|
for (auto& op : coreOp.getBody().front()) {
|
|
|
|
for (auto& op : coreOp.getBody().front()) {
|
|
|
|
if (isa<memref::AllocOp>(op))
|
|
|
|
if (isa<memref::AllocOp, pim::PimHaltOp, memref::SubViewOp>(op))
|
|
|
|
continue;
|
|
|
|
continue;
|
|
|
|
if (isa<pim::PimHaltOp>(op))
|
|
|
|
|
|
|
|
continue;
|
|
|
|
if (auto loadOp = dyn_cast<pim::PimMemCopyHostToDevOp>(op))
|
|
|
|
if (auto loadOp = dyn_cast<pim::PimMemCopyHostToDevOp>(op)) {
|
|
|
|
|
|
|
|
coreCodeGen.codeGenLoadOp(loadOp);
|
|
|
|
coreCodeGen.codeGenLoadOp(loadOp);
|
|
|
|
}
|
|
|
|
else if (auto storeOp = dyn_cast<pim::PimMemCopyDevToHostOp>(op))
|
|
|
|
else if (auto storeOp = dyn_cast<pim::PimMemCopyDevToHostOp>(op)) {
|
|
|
|
|
|
|
|
coreCodeGen.codeGenStoreOp(storeOp);
|
|
|
|
coreCodeGen.codeGenStoreOp(storeOp);
|
|
|
|
}
|
|
|
|
else if (auto vmmOp = dyn_cast<pim::PimVMMOp>(op))
|
|
|
|
else if (auto vmmOp = dyn_cast<pim::PimVMMOp>(op)) {
|
|
|
|
|
|
|
|
coreCodeGen.codeGenMVMLikeOp<pim::PimVMMOp>(vmmOp.getWeightIndex(), vmmOp, true);
|
|
|
|
coreCodeGen.codeGenMVMLikeOp<pim::PimVMMOp>(vmmOp.getWeightIndex(), vmmOp, true);
|
|
|
|
}
|
|
|
|
else if (auto mvmOp = dyn_cast<pim::PimMVMOp>(op))
|
|
|
|
else if (auto mvmOp = dyn_cast<pim::PimMVMOp>(op)) {
|
|
|
|
|
|
|
|
coreCodeGen.codeGenMVMLikeOp<pim::PimMVMOp>(mvmOp.getWeightIndex(), mvmOp, false);
|
|
|
|
coreCodeGen.codeGenMVMLikeOp<pim::PimMVMOp>(mvmOp.getWeightIndex(), mvmOp, false);
|
|
|
|
}
|
|
|
|
else if (auto applyFiltersOp = dyn_cast<pim::PimApplyFiltersOp>(op))
|
|
|
|
else if (auto applyFiltersOp = dyn_cast<pim::PimApplyFiltersOp>(op)) {
|
|
|
|
|
|
|
|
coreCodeGen.codeGenApplyFiltersOp(applyFiltersOp);
|
|
|
|
coreCodeGen.codeGenApplyFiltersOp(applyFiltersOp);
|
|
|
|
}
|
|
|
|
else if (auto vaddOp = dyn_cast<pim::PimVAddOp>(op))
|
|
|
|
else if (auto vaddOp = dyn_cast<pim::PimVAddOp>(op)) {
|
|
|
|
|
|
|
|
coreCodeGen.codeGenVAddOp(vaddOp);
|
|
|
|
coreCodeGen.codeGenVAddOp(vaddOp);
|
|
|
|
}
|
|
|
|
else if (auto vmaxOp = dyn_cast<pim::PimVMaxOp>(op))
|
|
|
|
else if (auto vmaxOp = dyn_cast<pim::PimVMaxOp>(op)) {
|
|
|
|
|
|
|
|
coreCodeGen.codeGenVMaxOp(vmaxOp);
|
|
|
|
coreCodeGen.codeGenVMaxOp(vmaxOp);
|
|
|
|
}
|
|
|
|
else if (auto vreluOp = dyn_cast<pim::PimVReluOp>(op))
|
|
|
|
else if (auto vreluOp = dyn_cast<pim::PimVReluOp>(op)) {
|
|
|
|
|
|
|
|
coreCodeGen.codeGenVReluOp(vreluOp);
|
|
|
|
coreCodeGen.codeGenVReluOp(vreluOp);
|
|
|
|
}
|
|
|
|
else if (auto receiveOp = dyn_cast<pim::PimReceiveOp>(op))
|
|
|
|
else if (auto receiveOp = dyn_cast<pim::PimReceiveOp>(op)) {
|
|
|
|
|
|
|
|
coreCodeGen.codeGenReceiveOp(receiveOp);
|
|
|
|
coreCodeGen.codeGenReceiveOp(receiveOp);
|
|
|
|
}
|
|
|
|
else if (auto sendOp = dyn_cast<pim::PimSendOp>(op))
|
|
|
|
else if (auto sendOp = dyn_cast<pim::PimSendOp>(op)) {
|
|
|
|
|
|
|
|
coreCodeGen.codeGenSendOp(sendOp);
|
|
|
|
coreCodeGen.codeGenSendOp(sendOp);
|
|
|
|
}
|
|
|
|
else if (isa<pim::PimSumOp, pim::PimVSDivOp, pim::PimVExpOp>(op)) {
|
|
|
|
else if (auto sumOp = dyn_cast<pim::PimSumOp>(op)) {
|
|
|
|
|
|
|
|
// TODO: Implement somehow?
|
|
|
|
// TODO: Implement somehow?
|
|
|
|
op.emitWarning("Sum operation is not supported");
|
|
|
|
op.emitWarning("Operation is not yet supported in code generation");
|
|
|
|
continue;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
else if (auto vsDivOp = dyn_cast<pim::PimVSDivOp>(op)) {
|
|
|
|
|
|
|
|
// TODO: Implement somehow?
|
|
|
|
|
|
|
|
op.emitWarning("VSDiv operation is not supported");
|
|
|
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
else if (auto vexpOp = dyn_cast<pim::PimVExpOp>(op)) {
|
|
|
|
|
|
|
|
// TODO: Implement somehow?
|
|
|
|
|
|
|
|
op.emitWarning("VExp operation is not supported");
|
|
|
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
else if (isa<memref::SubViewOp>(op)) {
|
|
|
|
|
|
|
|
continue;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
else {
|
|
|
|
op.emitError("Unsupported codegen for this operation");
|
|
|
|
op.emitError("Unsupported codegen for this operation");
|
|
|
|
op.dump();
|
|
|
|
op.dump();
|
|
|
|
return CompilerFailure;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
processedOperations++;
|
|
|
|
processedOperations++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
assert(processedOperations > 0);
|
|
|
|
return processedOperations;
|
|
|
|
// Remove trailing comma
|
|
|
|
|
|
|
|
coreFileStream.seek(coreFileStream.tell() - 1);
|
|
|
|
|
|
|
|
coreFileStream << ']';
|
|
|
|
|
|
|
|
coreFileStream.close();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Create output directory for this core's crossbar weights
|
|
|
|
|
|
|
|
auto coreWeightsDirPath = outputDirPath + "/core_" + std::to_string(coreId);
|
|
|
|
|
|
|
|
if (auto error = llvm::sys::fs::create_directory(coreWeightsDirPath)) {
|
|
|
|
|
|
|
|
llvm::errs() << "Error creating core directory: " << coreWeightsDirPath << ": " << error.message() << '\n';
|
|
|
|
|
|
|
|
return InvalidOutputFileAccess;
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/// Write crossbar weight matrices as padded binary files for a single core.
|
|
|
|
|
|
|
|
static OnnxMlirCompilerErrorCodes writeCrossbarWeights(ModuleOp moduleOp,
|
|
|
|
|
|
|
|
pim::PimCoreOp coreOp,
|
|
|
|
|
|
|
|
StringRef coreWeightsDirPath,
|
|
|
|
|
|
|
|
json::Array& xbarsPerGroup) {
|
|
|
|
int64_t xbarSize = crossbarSize.getValue();
|
|
|
|
int64_t xbarSize = crossbarSize.getValue();
|
|
|
|
|
|
|
|
std::error_code errorCode;
|
|
|
|
size_t weightIndex = 0;
|
|
|
|
size_t weightIndex = 0;
|
|
|
|
llvm::json::Array xbarsPerGroup;
|
|
|
|
|
|
|
|
for (auto weight : coreOp.getWeights()) {
|
|
|
|
for (auto weight : coreOp.getWeights()) {
|
|
|
|
xbarsPerGroup.push_back(weightIndex);
|
|
|
|
xbarsPerGroup.push_back(weightIndex);
|
|
|
|
|
|
|
|
|
|
|
|
auto getGlobalOp = weight.getDefiningOp<memref::GetGlobalOp>();
|
|
|
|
auto getGlobalOp = weight.getDefiningOp<memref::GetGlobalOp>();
|
|
|
|
if (!getGlobalOp) {
|
|
|
|
if (!getGlobalOp) {
|
|
|
|
coreOp.emitWarning("Weight is not from a memref.get_global at index " + std::to_string(weightIndex));
|
|
|
|
coreOp.emitWarning("Weight is not from a memref.get_global at index " + std::to_string(weightIndex));
|
|
|
|
@@ -636,14 +478,12 @@ OnnxMlirCompilerErrorCodes compileModuleToPIMJSON(ModuleOp& moduleOp, std::strin
|
|
|
|
int64_t numCols = shape[1];
|
|
|
|
int64_t numCols = shape[1];
|
|
|
|
assert(numRows <= xbarSize && numCols <= xbarSize && "Weight dimensions must not exceed crossbar size");
|
|
|
|
assert(numRows <= xbarSize && numCols <= xbarSize && "Weight dimensions must not exceed crossbar size");
|
|
|
|
|
|
|
|
|
|
|
|
auto elementType = type.getElementType();
|
|
|
|
size_t elementByteWidth = type.getElementType().getIntOrFloatBitWidth() / 8;
|
|
|
|
size_t elementByteWidth = elementType.getIntOrFloatBitWidth() / 8;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Write crossbar weights as binary, padded to crossbarSize x crossbarSize
|
|
|
|
auto weightFilePath = (coreWeightsDirPath + "/crossbar_" + std::to_string(weightIndex) + ".bin").str();
|
|
|
|
auto weightFilePath = coreWeightsDirPath + "/crossbar_" + std::to_string(weightIndex) + ".bin";
|
|
|
|
raw_fd_ostream weightFileStream(weightFilePath, errorCode, sys::fs::OF_None);
|
|
|
|
llvm::raw_fd_ostream weightFileStream(weightFilePath, errorCode, llvm::sys::fs::OF_None);
|
|
|
|
|
|
|
|
if (errorCode) {
|
|
|
|
if (errorCode) {
|
|
|
|
llvm::errs() << "Error while opening weight file `" << weightFilePath << "`: " << errorCode.message() << '\n';
|
|
|
|
errs() << "Error while opening weight file `" << weightFilePath << "`: " << errorCode.message() << '\n';
|
|
|
|
return InvalidOutputFileAccess;
|
|
|
|
return InvalidOutputFileAccess;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@@ -665,52 +505,129 @@ OnnxMlirCompilerErrorCodes compileModuleToPIMJSON(ModuleOp& moduleOp, std::strin
|
|
|
|
weightFileStream.close();
|
|
|
|
weightFileStream.close();
|
|
|
|
weightIndex++;
|
|
|
|
weightIndex++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
xbarsPerArrayGroup[coreNameString] = std::move(xbarsPerGroup);
|
|
|
|
|
|
|
|
|
|
|
|
return CompilerSuccess;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Step 3: Write configuration to JSON
|
|
|
|
/// Write the top-level PIM configuration JSON (core count, crossbar config, I/O addresses).
|
|
|
|
llvm::json::Object configJson;
|
|
|
|
static OnnxMlirCompilerErrorCodes writeConfigJson(func::FuncOp funcOp,
|
|
|
|
|
|
|
|
PimAcceleratorMemory& memory,
|
|
|
|
|
|
|
|
size_t coreCount,
|
|
|
|
|
|
|
|
json::Object xbarsPerArrayGroup,
|
|
|
|
|
|
|
|
StringRef outputDirPath) {
|
|
|
|
|
|
|
|
json::Object configJson;
|
|
|
|
configJson["core_cnt"] = coreCount;
|
|
|
|
configJson["core_cnt"] = coreCount;
|
|
|
|
|
|
|
|
|
|
|
|
// TODO: Should this be based on the floating point type used in the model?
|
|
|
|
// TODO: Should this be based on the floating point type used in the model?
|
|
|
|
//// The 2 following values determine the bitwidth of the vectors' elements:
|
|
|
|
// The 2 following values determine the bitwidth of the vectors' elements: bitwidth = adc_count * cell_precision
|
|
|
|
//// bitwidth = adc_count * cell_precision
|
|
|
|
|
|
|
|
// Number of ADC for MVM units
|
|
|
|
// Number of ADC for MVM units
|
|
|
|
configJson["adc_count"] = 16;
|
|
|
|
configJson["adc_count"] = 16;
|
|
|
|
// Bit precision of each ADC
|
|
|
|
// The bit precision of each ADC
|
|
|
|
configJson["cell_precision"] = 2;
|
|
|
|
configJson["cell_precision"] = 2;
|
|
|
|
|
|
|
|
|
|
|
|
//// Crossbar configuration
|
|
|
|
// Crossbar configuration
|
|
|
|
configJson["xbar_array_count"] = crossbarCountInCore.getValue();
|
|
|
|
configJson["xbar_array_count"] = crossbarCountInCore.getValue();
|
|
|
|
configJson["xbar_size"] = {crossbarSize.getValue(), crossbarSize.getValue()};
|
|
|
|
configJson["xbar_size"] = {crossbarSize.getValue(), crossbarSize.getValue()};
|
|
|
|
|
|
|
|
|
|
|
|
// Store the crossbar sizes
|
|
|
|
|
|
|
|
configJson["array_group_map"] = std::move(xbarsPerArrayGroup);
|
|
|
|
configJson["array_group_map"] = std::move(xbarsPerArrayGroup);
|
|
|
|
|
|
|
|
|
|
|
|
// Store the memory layout of inputs and outputs
|
|
|
|
// Memory layout of inputs and outputs
|
|
|
|
llvm::json::Array inputsAddresses;
|
|
|
|
json::Array inputsAddresses;
|
|
|
|
for (BlockArgument input : funcOp.getArguments())
|
|
|
|
for (BlockArgument input : funcOp.getArguments())
|
|
|
|
inputsAddresses.push_back(memory.getValueAddress(input));
|
|
|
|
inputsAddresses.push_back(memory.getValueAddress(input));
|
|
|
|
configJson["inputs_addresses"] = std::move(inputsAddresses);
|
|
|
|
configJson["inputs_addresses"] = std::move(inputsAddresses);
|
|
|
|
llvm::json::Array outputsAddresses;
|
|
|
|
|
|
|
|
|
|
|
|
json::Array outputsAddresses;
|
|
|
|
for (func::ReturnOp returnOp : funcOp.getOps<func::ReturnOp>())
|
|
|
|
for (func::ReturnOp returnOp : funcOp.getOps<func::ReturnOp>())
|
|
|
|
for (Value output : returnOp.getOperands())
|
|
|
|
for (Value output : returnOp.getOperands())
|
|
|
|
outputsAddresses.push_back(memory.getValueAddress(output));
|
|
|
|
outputsAddresses.push_back(memory.getValueAddress(output));
|
|
|
|
configJson["outputs_addresses"] = std::move(outputsAddresses);
|
|
|
|
configJson["outputs_addresses"] = std::move(outputsAddresses);
|
|
|
|
|
|
|
|
|
|
|
|
// Step 4: Write config JSON
|
|
|
|
auto configPath = (outputDirPath + "/config.json").str();
|
|
|
|
std::string openOutputErrorMsg;
|
|
|
|
std::error_code errorCode;
|
|
|
|
auto configPath = outputDirPath + "/config.json";
|
|
|
|
raw_fd_ostream jsonOS(configPath, errorCode);
|
|
|
|
std::error_code EC;
|
|
|
|
if (errorCode) {
|
|
|
|
llvm::raw_fd_ostream jsonOS(configPath, EC);
|
|
|
|
errs() << "Error while opening config file: " << errorCode.message() << '\n';
|
|
|
|
if (EC) {
|
|
|
|
|
|
|
|
llvm::errs() << "Error while opening config file: " << EC.message() << '\n';
|
|
|
|
|
|
|
|
return InvalidOutputFileAccess;
|
|
|
|
return InvalidOutputFileAccess;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
jsonOS << llvm::json::Value(std::move(configJson)) << '\n';
|
|
|
|
jsonOS << json::Value(std::move(configJson)) << '\n';
|
|
|
|
jsonOS.close();
|
|
|
|
jsonOS.close();
|
|
|
|
|
|
|
|
|
|
|
|
return CompilerSuccess;
|
|
|
|
return CompilerSuccess;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
} // namespace onnx_mlir
|
|
|
|
OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimJson(ModuleOp& moduleOp, std::string& outputDirPath) {
|
|
|
|
|
|
|
|
if (!outputDirPath.empty()) {
|
|
|
|
|
|
|
|
if (auto error = sys::fs::create_directory(outputDirPath)) {
|
|
|
|
|
|
|
|
errs() << "Error creating output directory: " << outputDirPath << ": " << error.message() << '\n';
|
|
|
|
|
|
|
|
return InvalidOutputFileAccess;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
auto funcOps = moduleOp.getOps<func::FuncOp>();
|
|
|
|
|
|
|
|
assert(!funcOps.empty() && "No function found in the module");
|
|
|
|
|
|
|
|
auto funcOp = *funcOps.begin();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PimAcceleratorMemory memory;
|
|
|
|
|
|
|
|
memory.hostMem.allocateHost(moduleOp, funcOp);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (auto err = writeMemoryBinary(moduleOp, funcOp, memory, outputDirPath))
|
|
|
|
|
|
|
|
return err;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Write empty host core file
|
|
|
|
|
|
|
|
std::error_code errorCode;
|
|
|
|
|
|
|
|
auto outputHostCorePath = outputDirPath + "/core_0.json";
|
|
|
|
|
|
|
|
raw_fd_ostream hostFileStream(outputHostCorePath, errorCode);
|
|
|
|
|
|
|
|
if (errorCode) {
|
|
|
|
|
|
|
|
errs() << "Error while opening host core file `" << outputHostCorePath << "`: " << errorCode.message() << '\n';
|
|
|
|
|
|
|
|
return InvalidOutputFileAccess;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
hostFileStream << "[]";
|
|
|
|
|
|
|
|
hostFileStream.close();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// For each core, specify the number of crossbar per array group.
|
|
|
|
|
|
|
|
// This implementation always assigns one crossbar per group.
|
|
|
|
|
|
|
|
json::Object xbarsPerArrayGroup;
|
|
|
|
|
|
|
|
size_t coreCount = 0;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (auto coreOp : funcOp.getOps<pim::PimCoreOp>()) {
|
|
|
|
|
|
|
|
auto coreId = coreOp.getCoreId();
|
|
|
|
|
|
|
|
coreCount++;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
std::error_code errorCode;
|
|
|
|
|
|
|
|
auto outputCorePath = outputDirPath + "/core_" + std::to_string(coreId) + ".json";
|
|
|
|
|
|
|
|
raw_fd_ostream coreFileStream(outputCorePath, errorCode);
|
|
|
|
|
|
|
|
if (errorCode) {
|
|
|
|
|
|
|
|
errs() << "Error while opening core file `" << outputCorePath << "`: " << errorCode.message() << '\n';
|
|
|
|
|
|
|
|
return InvalidOutputFileAccess;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
coreFileStream << '[';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PimCodeGen coreCodeGen(memory, coreFileStream);
|
|
|
|
|
|
|
|
memory.getOrCreateDeviceMem(coreId).allocateCore(coreOp);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int64_t processedOperations = codeGenCoreOps(coreOp, coreCodeGen);
|
|
|
|
|
|
|
|
if (processedOperations < 0)
|
|
|
|
|
|
|
|
return CompilerFailure;
|
|
|
|
|
|
|
|
assert(processedOperations > 0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Remove trailing comma, close JSON array
|
|
|
|
|
|
|
|
coreFileStream.seek(coreFileStream.tell() - 1);
|
|
|
|
|
|
|
|
coreFileStream << ']';
|
|
|
|
|
|
|
|
coreFileStream.close();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Write crossbar weights for this core
|
|
|
|
|
|
|
|
auto coreWeightsDirPath = outputDirPath + "/core_" + std::to_string(coreId);
|
|
|
|
|
|
|
|
if (auto error = sys::fs::create_directory(coreWeightsDirPath)) {
|
|
|
|
|
|
|
|
errs() << "Error creating core directory: " << coreWeightsDirPath << ": " << error.message() << '\n';
|
|
|
|
|
|
|
|
return InvalidOutputFileAccess;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
json::Array xbarsPerGroup;
|
|
|
|
|
|
|
|
if (auto err = writeCrossbarWeights(moduleOp, coreOp, coreWeightsDirPath, xbarsPerGroup))
|
|
|
|
|
|
|
|
return err;
|
|
|
|
|
|
|
|
xbarsPerArrayGroup["core" + std::to_string(coreId)] = std::move(xbarsPerGroup);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return writeConfigJson(funcOp, memory, coreCount, std::move(xbarsPerArrayGroup), outputDirPath);
|
|
|
|
|
|
|
|
}
|
|
|
|
|