#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "mlir/Dialect/Bufferization/Transforms/BufferViewFlowAnalysis.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/BuiltinTypes.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/JSON.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include "Common/PimCommon.hpp" #include "Conversion/ONNXToSpatial/ONNXToSpatialCommon.hpp" #include "src/Accelerators/PIM/Compiler/PimCodeGen.hpp" #include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp" #include "src/Accelerators/PIM/Dialect/Pim/PimOps.hpp" #include "src/Compiler/CompilerPasses.hpp" #include "src/Compiler/CompilerUtils.hpp" using namespace llvm; using namespace mlir; using namespace onnx_mlir; MemEntry* PimMemory::gatherMemEntry(mlir::Value value) { auto type = cast(value.getType()); assert("Only static shape is supported" && type.hasStaticShape()); size_t allocSize = type.getNumElements() * type.getElementType().getIntOrFloatBitWidth() / 8; MemEntry memEntry = {0, allocSize}; return &memEntries.emplace_back(memEntry, value).first; } void PimMemory::allocateMemoryForValue(mlir::Value value, MemEntry& memEntry) { memEntry.address = firstAvailableAddress; firstAvailableAddress += memEntry.size; // Alignment if (size_t remainder = firstAvailableAddress % minAlignment) firstAvailableAddress += minAlignment - remainder; globalMemEntriesMap[value] = memEntry; } void PimMemory::allocateHost(ModuleOp moduleOp, func::FuncOp funcOp) { // More than one SSA value per single global constant: // Cannot call gatherMemEntry for each of them, otherwise memory will be allocated multiple times // Thus, call gatherMemEntry only for the first SSA value and assign the same memEntry to all others SmallDenseMap globalConstants; funcOp.walk([&](memref::GetGlobalOp getGlobalOp) { if (!hasWeightAlways(getGlobalOp)) { auto globalMemrefOp = lookupGlobalForGetGlobal(moduleOp, getGlobalOp); auto iter = globalConstants.find(globalMemrefOp); if (iter == globalConstants.end()) globalConstants[globalMemrefOp] = gatherMemEntry(getGlobalOp); else { MemEntry memEntry = *iter->second; globalMemEntriesMap[getGlobalOp] = memEntry; } } }); for (mlir::Value arg : funcOp.getArguments()) gatherMemEntry(arg); allocateCore(funcOp); } void PimMemory::allocateCore(Operation* op) { op->walk([&](memref::AllocOp allocOp) { gatherMemEntry(allocOp); }); llvm::sort(memEntries, [](auto a, auto b) -> bool { return a.first.size > b.first.size; }); for (auto& [memEntry, value] : memEntries) allocateMemoryForValue(value, memEntry); } MemEntry PimMemory::getMemEntry(mlir::Value value) const { auto iter = globalMemEntriesMap.find(value); assert("Missing memEntry for value" && iter != globalMemEntriesMap.end()); return iter->second; } PimMemory& PimAcceleratorMemory::getOrCreateDeviceMem(size_t id) { return deviceMem.try_emplace(id, memEntriesMap).first->second; } size_t PimAcceleratorMemory::getValueAddress(mlir::Value value) const { size_t offset = 0; while (true) { auto definingOp = value.getDefiningOp(); if (!definingOp) break; if (auto dpsDefiningOp = dyn_cast(definingOp)) { OpOperand* tiedOperand = dpsDefiningOp.getTiedOpOperand(cast(value)); if (!tiedOperand) break; value = tiedOperand->get(); } else if (auto subviewDefiningOp = dyn_cast(definingOp)) { auto source = subviewDefiningOp.getSource(); auto srcShape = source.getType().getShape(); auto subviewOffsets = subviewDefiningOp.getStaticOffsets(); auto subviewSizes = subviewDefiningOp.getStaticSizes(); auto subviewStrides = subviewDefiningOp.getStaticStrides(); assert(isMemoryContiguous(srcShape, subviewOffsets, subviewSizes, subviewStrides)); for (unsigned i = 0; i < subviewOffsets.size(); i++) { size_t localOffset = subviewOffsets[i]; for (unsigned j = i + 1; j < subviewSizes.size(); j++) localOffset *= subviewSizes[j]; offset += localOffset * subviewDefiningOp.getType().getElementTypeBitWidth() / 8; } value = source; } else if (auto castOp = dyn_cast(definingOp)) { value = castOp.getSource(); } else if (auto collapseOp = dyn_cast(definingOp)) { value = collapseOp.getSrc(); } else if (auto expandOp = dyn_cast(definingOp)) { value = expandOp.getSrc(); } else break; } auto iter = memEntriesMap.find(value); if (iter == memEntriesMap.end()) { errs() << "Missing mem entry for value: "; value.print(errs()); errs() << "\n"; if (auto* definingOp = value.getDefiningOp()) { errs() << "Defining op:\n"; definingOp->print(errs()); errs() << "\n"; } llvm_unreachable("Missing mem entry"); } return iter->second.address + offset; } json::Object PimCodeGen::createEmptyOffset() { json::Object offset; offset["offset_select"] = 0; offset["offset_value"] = 0; return offset; } void PimCodeGen::emitInstruction(json::Object instruction) const { coreFileStream << json::Value(std::move(instruction)) << ','; } void PimCodeGen::genSetRegisterImmediateUnsigned(size_t registerNumber, size_t immediate) const { json::Object json; json["op"] = "sldi"; json["rd"] = registerNumber; json["imm"] = immediate; emitInstruction(std::move(json)); } void PimCodeGen::setupRd(size_t rdAddress, size_t rdOffset) const { genSetRegisterImmediateUnsigned(0, rdAddress + rdOffset); } void PimCodeGen::setupRdRs1(size_t rdAddress, size_t rdOffset, size_t rs1Address, size_t rs1Offset) const { genSetRegisterImmediateUnsigned(0, rdAddress + rdOffset); genSetRegisterImmediateUnsigned(1, rs1Address + rs1Offset); } void PimCodeGen::setupRdRs1Rs2( size_t rdAddress, size_t rdOffset, size_t rs1Address, size_t rs1Offset, size_t rs2Address, size_t rs2Offset) const { genSetRegisterImmediateUnsigned(0, rdAddress + rdOffset); genSetRegisterImmediateUnsigned(1, rs1Address + rs1Offset); genSetRegisterImmediateUnsigned(2, rs2Address + rs2Offset); } void PimCodeGen::emitMemCopyOp(StringRef opName, size_t rdAddr, size_t rdOffset, size_t rs1Addr, size_t rs1Offset, size_t size, StringRef sizeFieldName) const { setupRdRs1(rdAddr, rdOffset, rs1Addr, rs1Offset); json::Object json; json["op"] = opName; json["rd"] = 0; json["rs1"] = 1; json[sizeFieldName] = size; json["offset"] = createEmptyOffset(); emitInstruction(std::move(json)); } void PimCodeGen::emitCommunicationOp(StringRef opName, size_t bufferAddr, size_t coreId, size_t size) const { setupRd(bufferAddr, 0); json::Object json; json["op"] = opName; json["rd"] = 0; json["core"] = coreId; json["size"] = size; json["offset"] = createEmptyOffset(); emitInstruction(std::move(json)); } void PimCodeGen::emitMvmOp(size_t groupId, size_t rdAddr, size_t rdOffset, size_t rs1Addr, size_t rs1Offset) const { setupRdRs1(rdAddr, rdOffset, rs1Addr, rs1Offset); json::Object json; json["op"] = "mvmul"; json["rd"] = 0; json["rs1"] = 1; json["group"] = groupId; json["relu"] = 0; json["mbiw"] = 8; emitInstruction(std::move(json)); } void PimCodeGen::codeGenLoadOp(pim::PimMemCopyHostToDevOp loadOp) const { emitMemCopyOp("ld", memory.getValueAddress(loadOp.getDeviceDst()), loadOp.getDeviceDstOffset(), memory.getValueAddress(loadOp.getHostSrc()), loadOp.getHostSrcOffset(), loadOp.getSize()); } void PimCodeGen::codeGenStoreOp(pim::PimMemCopyDevToHostOp storeOp) const { emitMemCopyOp("st", memory.getValueAddress(storeOp.getHostDst()), storeOp.getHostDstOffset(), memory.getValueAddress(storeOp.getDeviceSrc()), storeOp.getDeviceSrcOffset(), storeOp.getSize()); } void PimCodeGen::codeGenLmvOp(pim::PimMemCopyOp lmvOp) const { emitMemCopyOp("lmv", memory.getValueAddress(lmvOp.getDst()), lmvOp.getDstOffset(), memory.getValueAddress(lmvOp.getSrc()), lmvOp.getSrcOffset(), lmvOp.getSize(), "len"); } void PimCodeGen::codeGenReceiveOp(pim::PimReceiveOp receiveOp) const { emitCommunicationOp( "recv", memory.getValueAddress(receiveOp.getDst()), receiveOp.getSrcCoreId(), receiveOp.getSize()); } void PimCodeGen::codeGenSendOp(pim::PimSendOp sendOp) const { emitCommunicationOp("send", memory.getValueAddress(sendOp.getSrc()), sendOp.getTargetCoreId(), sendOp.getSize()); } template void PimCodeGen::codeGenMVMLikeOp(size_t mvmId, MVMTy mvmLikeOp, bool transposeMatrix) { emitMvmOp( mvmId, memory.getValueAddress(mvmLikeOp.getOutBuf()), 0, memory.getValueAddress(mvmLikeOp.getVectorInput()), 0); // TODO: save weights somewhere (if transposeMatrix=true, transpose the weight matrix) } void PimCodeGen::codeGenVAddOp(pim::PimVAddOp vaddOp) const { auto outBufAddr = memory.getValueAddress(vaddOp.getOutBuf()); auto aAddr = memory.getValueAddress(vaddOp.getA()); auto bAddr = memory.getValueAddress(vaddOp.getB()); setupRdRs1Rs2(outBufAddr, 0, aAddr, 0, bAddr, 0); auto outputType = cast(vaddOp.getOutBuf().getType()); size_t totalBytes = outputType.getNumElements() * vaddOp.getOutRes().getType().getElementTypeBitWidth() / 8; json::Object json; json["op"] = "vvadd"; json["rd"] = 0; json["rs1"] = 1; json["rs2"] = 2; json["offset"] = createEmptyOffset(); json["len"] = totalBytes; emitInstruction(std::move(json)); } void PimCodeGen::codeGenVMaxOp(pim::PimVMaxOp vmaxOp) const { auto outBufAddr = memory.getValueAddress(vmaxOp.getOutBuf()); auto aAddr = memory.getValueAddress(vmaxOp.getA()); auto bAddr = memory.getValueAddress(vmaxOp.getB()); setupRdRs1Rs2(outBufAddr, 0, aAddr, 0, bAddr, 0); json::Object json; json["op"] = "vvmax"; json["rd"] = 0; json["rs1"] = 1; json["rs2"] = 2; json["offset"] = createEmptyOffset(); emitInstruction(std::move(json)); } void PimCodeGen::codeGenVReluOp(pim::PimVReluOp vreluOp) const { auto outBufAddr = memory.getValueAddress(vreluOp.getOutBuf()); auto aAddr = memory.getValueAddress(vreluOp.getA()); setupRdRs1(outBufAddr, 0, aAddr, 0); json::Object json; json["op"] = "vrelu"; json["rd"] = 0; json["rs1"] = 1; json["offset"] = createEmptyOffset(); emitInstruction(std::move(json)); } void PimCodeGen::codeGenApplyFiltersOp(pim::PimApplyFiltersOp applyFiltersOp) const { auto outBufAddr = memory.getValueAddress(applyFiltersOp.getOutBuf()); auto inBufAddr = memory.getValueAddress(applyFiltersOp.getInput()); auto accumBufAddr = memory.getValueAddress(applyFiltersOp.getAccumBuf()); auto weightIndices = applyFiltersOp.getWeightIndices(); auto inputType = cast(applyFiltersOp.getInput().getType()); auto outputType = cast(applyFiltersOp.getOutBuf().getType()); auto inShape = inputType.getShape(); auto outShape = outputType.getShape(); size_t inChannels = inShape[1]; size_t outChannels = outShape[1]; size_t dimX = inShape.size() > 2 ? inShape[2] : 1; size_t dimY = inShape.size() > 3 ? inShape[3] : 1; for (size_t outY = 0; outY < dimY; outY++) { for (size_t outX = 0; outX < dimX; outX++) { size_t weightIndex = 0; for (Attribute weight : weightIndices) { // --- STEP 1: Perform MVMUL operation --- auto weightId = cast(weight).getInt(); size_t xKer = cast(applyFiltersOp.getXKernelPositions()[weightIndex]).getInt(); size_t yKer = cast(applyFiltersOp.getYKernelPositions()[weightIndex]).getInt(); weightIndex++; if (outX + xKer >= dimX || outY + yKer >= dimY) continue; size_t outputOffset = (outY * dimX + outX) * 32 * outChannels; size_t inputOffset = ((outY + yKer) * dimX + (outX + xKer)) * 32 * inChannels; bool isFirstWeight = (weightIndices[0] == weight); // For the first weight, store directly in output buffer; otherwise use accumulator. size_t rdAddr = isFirstWeight ? outBufAddr : accumBufAddr; size_t rdOffset = isFirstWeight ? outputOffset : 0; emitMvmOp(weightId, rdAddr, rdOffset, inBufAddr, inputOffset); // --- STEP 2: Perform VADD operation (skip for first weight) --- if (isFirstWeight) continue; // Sum accumulator with output buffer, store result in output buffer. setupRdRs1Rs2(outBufAddr, outputOffset, accumBufAddr, 0, outBufAddr, outputOffset); json::Object vaddJson; vaddJson["op"] = "vvadd"; vaddJson["rd"] = 0; vaddJson["rs1"] = 1; vaddJson["rs2"] = 2; vaddJson["offset"] = createEmptyOffset(); emitInstruction(std::move(vaddJson)); } } } } void PimCodeGen::codeGenTransposeOp(pim::PimTransposeOp transposeOp) const { auto srcAddr = memory.getValueAddress(transposeOp.getData()); auto dstAddr = memory.getValueAddress(transposeOp.getOutBuf()); auto srcType = cast(transposeOp.getData().getType()); auto srcShape = srcType.getShape(); size_t rank = srcShape.size(); size_t elementSize = srcType.getElementTypeBitWidth() / 8; size_t totalElements = srcType.getNumElements(); // Read permutation. Destination dim i corresponds to source dim perm[i]. SmallVector perm = map_to_vector(transposeOp.getPerms().getAsRange(), [](auto attr) -> int64_t { return attr.getInt(); }); // Destination shape: dstShape[i] = srcShape[perm[i]] SmallVector dstShape(rank); for (size_t i = 0; i < rank; i++) dstShape[i] = srcShape[perm[i]]; // Row-major strides for source and destination SmallVector srcStrides(rank, 1); SmallVector dstStrides(rank, 1); for (int64_t i = rank - 2; i >= 0; i--) { srcStrides[i] = srcStrides[i + 1] * srcShape[i + 1]; dstStrides[i] = dstStrides[i + 1] * dstShape[i + 1]; } // Emit element-by-element copy with transposed addressing for (size_t srcFlat = 0; srcFlat < totalElements; srcFlat++) { // Decompose flat source index into multi-dimensional index SmallVector srcIdx(rank); size_t remaining = srcFlat; for (size_t d = 0; d < rank; d++) { srcIdx[d] = remaining / srcStrides[d]; remaining %= srcStrides[d]; } // Compute flat destination index: dstIdx[d] = srcIdx[perm[d]] size_t dstFlat = 0; for (size_t d = 0; d < rank; d++) dstFlat += srcIdx[perm[d]] * dstStrides[d]; emitMemCopyOp("lmv", dstAddr, dstFlat * elementSize, srcAddr, srcFlat * elementSize, elementSize, "len"); } } size_t getMatrixSize(ShapedType matrixShape) { if (matrixShape.getRank() != 2 && matrixShape.getRank() != 4) assert(false && "Unsupported matrix shape"); return std::max(matrixShape.getDimSize(0), matrixShape.getDimSize(1)); } std::string getMemorySizeAsString(size_t size) { if (size > 1024 * 1024 * 1024) return std::to_string(size / 1024 / 1024 / 1024) + " GB"; if (size > 1024 * 1024) return std::to_string(size / 1024 / 1024) + " MB"; if (size > 1024) return std::to_string(size / 1024) + " KB"; return std::to_string(size) + " Bytes"; } /// Write global constant data into a binary memory image at their allocated addresses. static OnnxMlirCompilerErrorCodes writeMemoryBinary(ModuleOp moduleOp, func::FuncOp funcOp, PimAcceleratorMemory& memory, StringRef outputDirPath) { auto memoryFilePath = (outputDirPath + "/memory.bin").str(); std::error_code errorCode; raw_fd_ostream memoryFileStream(memoryFilePath, errorCode, sys::fs::OF_None); if (errorCode) { errs() << "Error while opening memory file " << memoryFilePath << ": " << errorCode.message() << '\n'; return InvalidOutputFileAccess; } std::vector memoryBuffer(memory.hostMem.getFirstAvailableAddress(), 0); funcOp.walk([&](memref::GetGlobalOp getGlobalOp) { if (hasWeightAlways(getGlobalOp)) return; auto globalOp = lookupGlobalForGetGlobal(moduleOp, getGlobalOp); if (!globalOp) return; auto initialValue = globalOp.getInitialValue(); if (!initialValue) return; auto denseAttr = dyn_cast(*initialValue); if (!denseAttr) return; MemEntry memEntry = memory.hostMem.getMemEntry(getGlobalOp.getResult()); ArrayRef rawData = denseAttr.getRawData(); char* dst = memoryBuffer.data() + memEntry.address; if (denseAttr.isSplat()) { size_t elementSize = rawData.size(); assert(elementSize * getGlobalOp.getType().getNumElements() == memEntry.size && "Data size mismatch"); for (size_t offset = 0; offset < memEntry.size; offset += elementSize) std::memcpy(dst + offset, rawData.data(), std::min(elementSize, memEntry.size - offset)); } else { assert(rawData.size() == memEntry.size && "Data size mismatch"); std::memcpy(dst, rawData.data(), rawData.size()); } }); memoryFileStream.write(memoryBuffer.data(), memoryBuffer.size()); memoryFileStream.close(); return CompilerSuccess; } /// Dispatch all operations in a core region to the appropriate code generator. /// Returns the number of emitted instructions, or -1 on failure. static int64_t codeGenCoreOps(pim::PimCoreOp coreOp, PimCodeGen& coreCodeGen) { size_t processedOperations = 0; for (auto& op : coreOp.getBody().front()) { if (isa(op)) continue; if (auto loadOp = dyn_cast(op)) coreCodeGen.codeGenLoadOp(loadOp); else if (auto storeOp = dyn_cast(op)) coreCodeGen.codeGenStoreOp(storeOp); else if (auto lmvOp = dyn_cast(op)) coreCodeGen.codeGenLmvOp(lmvOp); else if (auto receiveOp = dyn_cast(op)) coreCodeGen.codeGenReceiveOp(receiveOp); else if (auto sendOp = dyn_cast(op)) coreCodeGen.codeGenSendOp(sendOp); else if (auto vmmOp = dyn_cast(op)) coreCodeGen.codeGenMVMLikeOp(vmmOp.getWeightIndex(), vmmOp, true); else if (auto mvmOp = dyn_cast(op)) coreCodeGen.codeGenMVMLikeOp(mvmOp.getWeightIndex(), mvmOp, false); else if (auto applyFiltersOp = dyn_cast(op)) coreCodeGen.codeGenApplyFiltersOp(applyFiltersOp); else if (auto transposeOp = dyn_cast(op)) coreCodeGen.codeGenTransposeOp(transposeOp); else if (auto vaddOp = dyn_cast(op)) coreCodeGen.codeGenVAddOp(vaddOp); else if (auto vmaxOp = dyn_cast(op)) coreCodeGen.codeGenVMaxOp(vmaxOp); else if (auto vreluOp = dyn_cast(op)) coreCodeGen.codeGenVReluOp(vreluOp); else if (isa(op)) { // TODO: Implement somehow? op.emitWarning("Operation is not yet supported in code generation"); continue; } else { op.emitError("Unsupported codegen for this operation"); op.dump(); return -1; } processedOperations++; } return processedOperations; } /// Write crossbar weight matrices as padded binary files for a single core. static OnnxMlirCompilerErrorCodes writeCrossbarWeights(ModuleOp moduleOp, pim::PimCoreOp coreOp, StringRef coreWeightsDirPath, json::Array& xbarsPerGroup) { int64_t xbarSize = crossbarSize.getValue(); std::error_code errorCode; size_t weightIndex = 0; for (auto weight : coreOp.getWeights()) { xbarsPerGroup.push_back(weightIndex); auto getGlobalOp = weight.getDefiningOp(); if (!getGlobalOp) { coreOp.emitWarning("Weight is not from a memref.get_global at index " + std::to_string(weightIndex)); weightIndex++; continue; } auto globalOp = lookupGlobalForGetGlobal(moduleOp, getGlobalOp); if (!globalOp) { coreOp.emitWarning("Could not find memref.global for weight at index " + std::to_string(weightIndex)); weightIndex++; continue; } auto initialValue = globalOp.getInitialValue(); if (!initialValue) { coreOp.emitWarning("memref.global has no initial value at index " + std::to_string(weightIndex)); weightIndex++; continue; } auto denseAttr = dyn_cast(*initialValue); if (!denseAttr) { coreOp.emitWarning("memref.global initial value is not dense at index " + std::to_string(weightIndex)); weightIndex++; continue; } auto type = denseAttr.getType(); auto shape = type.getShape(); assert(isMatrixShape(shape) && "Weight matrix must be 2-dimensional"); int64_t numRows = shape[0]; int64_t numCols = shape[1]; assert(numRows <= xbarSize && numCols <= xbarSize && "Weight dimensions must not exceed crossbar size"); size_t elementByteWidth = type.getElementType().getIntOrFloatBitWidth() / 8; auto weightFilePath = (coreWeightsDirPath + "/crossbar_" + std::to_string(weightIndex) + ".bin").str(); raw_fd_ostream weightFileStream(weightFilePath, errorCode, sys::fs::OF_None); if (errorCode) { errs() << "Error while opening weight file `" << weightFilePath << "`: " << errorCode.message() << '\n'; return InvalidOutputFileAccess; } uint64_t zero = 0; for (int64_t row = 0; row < xbarSize; row++) { for (int64_t col = 0; col < xbarSize; col++) { if (row < numRows && col < numCols) { int64_t index = row * numCols + col; APInt bits = denseAttr.getValues()[index].bitcastToAPInt(); uint64_t word = bits.getZExtValue(); weightFileStream.write(reinterpret_cast(&word), elementByteWidth); } else { weightFileStream.write(reinterpret_cast(&zero), elementByteWidth); } } } weightFileStream.close(); weightIndex++; } return CompilerSuccess; } /// Write the top-level PIM configuration JSON (core count, crossbar config, I/O addresses). static OnnxMlirCompilerErrorCodes writeConfigJson(func::FuncOp funcOp, PimAcceleratorMemory& memory, size_t coreCount, json::Object xbarsPerArrayGroup, StringRef outputDirPath) { json::Object configJson; configJson["core_cnt"] = coreCount; // TODO: Should this be based on the floating point type used in the model? // The 2 following values determine the bitwidth of the vectors' elements: bitwidth = adc_count * cell_precision // Number of ADC for MVM units configJson["adc_count"] = 16; // The bit precision of each ADC configJson["cell_precision"] = 2; // Crossbar configuration configJson["xbar_array_count"] = crossbarCountInCore.getValue(); configJson["xbar_size"] = {crossbarSize.getValue(), crossbarSize.getValue()}; configJson["array_group_map"] = std::move(xbarsPerArrayGroup); // Memory layout of inputs and outputs json::Array inputsAddresses; for (BlockArgument input : funcOp.getArguments()) inputsAddresses.push_back(memory.getValueAddress(input)); configJson["inputs_addresses"] = std::move(inputsAddresses); json::Array outputsAddresses; for (func::ReturnOp returnOp : funcOp.getOps()) for (mlir::Value output : returnOp.getOperands()) outputsAddresses.push_back(memory.getValueAddress(output)); configJson["outputs_addresses"] = std::move(outputsAddresses); auto configPath = (outputDirPath + "/config.json").str(); std::error_code errorCode; raw_fd_ostream jsonOS(configPath, errorCode); if (errorCode) { errs() << "Error while opening config file: " << errorCode.message() << '\n'; return InvalidOutputFileAccess; } jsonOS << json::Value(std::move(configJson)) << '\n'; jsonOS.close(); return CompilerSuccess; } OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimJson(ModuleOp& moduleOp, std::string& outputDirPath) { if (!outputDirPath.empty()) { if (auto error = sys::fs::create_directory(outputDirPath)) { errs() << "Error creating output directory: " << outputDirPath << ": " << error.message() << '\n'; return InvalidOutputFileAccess; } } auto entryFunc = getPimEntryFunc(moduleOp); if (failed(entryFunc)) return CompilerFailure; auto funcOp = *entryFunc; PimAcceleratorMemory memory; memory.hostMem.allocateHost(moduleOp, funcOp); if (auto err = writeMemoryBinary(moduleOp, funcOp, memory, outputDirPath)) return err; // Write empty host core file std::error_code errorCode; auto outputHostCorePath = outputDirPath + "/core_0.json"; raw_fd_ostream hostFileStream(outputHostCorePath, errorCode); if (errorCode) { errs() << "Error while opening host core file `" << outputHostCorePath << "`: " << errorCode.message() << '\n'; return InvalidOutputFileAccess; } hostFileStream << "[]"; hostFileStream.close(); // For each core, specify the number of crossbar per array group. // This implementation always assigns one crossbar per group. json::Object xbarsPerArrayGroup; size_t coreCount = 0; for (auto coreOp : funcOp.getOps()) { auto coreId = coreOp.getCoreId(); coreCount++; std::error_code errorCode; auto outputCorePath = outputDirPath + "/core_" + std::to_string(coreId) + ".json"; raw_fd_ostream coreFileStream(outputCorePath, errorCode); if (errorCode) { errs() << "Error while opening core file `" << outputCorePath << "`: " << errorCode.message() << '\n'; return InvalidOutputFileAccess; } coreFileStream << '['; PimCodeGen coreCodeGen(memory, coreFileStream); memory.getOrCreateDeviceMem(coreId).allocateCore(coreOp); int64_t processedOperations = codeGenCoreOps(coreOp, coreCodeGen); if (processedOperations < 0) return CompilerFailure; assert(processedOperations > 0); // Remove trailing comma, close JSON array coreFileStream.seek(coreFileStream.tell() - 1); coreFileStream << ']'; coreFileStream.close(); // Write crossbar weights for this core auto coreWeightsDirPath = outputDirPath + "/core_" + std::to_string(coreId); if (auto error = sys::fs::create_directory(coreWeightsDirPath)) { errs() << "Error creating core directory: " << coreWeightsDirPath << ": " << error.message() << '\n'; return InvalidOutputFileAccess; } json::Array xbarsPerGroup; if (auto err = writeCrossbarWeights(moduleOp, coreOp, coreWeightsDirPath, xbarsPerGroup)) return err; xbarsPerArrayGroup["core" + std::to_string(coreId)] = std::move(xbarsPerGroup); } return writeConfigJson(funcOp, memory, coreCount, std::move(xbarsPerArrayGroup), outputDirPath); }