#pragma once #include "mlir/IR/Operation.h" #include "llvm-project/clang/include/clang/Basic/LLVM.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Support/JSON.h" #include "llvm/Support/raw_os_ostream.h" #include #include #include "onnx-mlir/Compiler/OMCompilerTypes.h" #include "src/Accelerators/PIM/Common/PimCommon.hpp" #include "src/Accelerators/PIM/Common/Support/ReportUtils.hpp" #include "src/Accelerators/PIM/Compiler/PimBinaryFormat.hpp" #include "src/Accelerators/PIM/Dialect/Pim/PimOps.hpp" namespace onnx_mlir { struct MemEntry { size_t address; size_t size; }; struct MemoryReportRow { uint64_t numAlloca = 0; uint64_t sizeAlloca = 0; uint64_t numGlobal = 0; uint64_t sizeGlobal = 0; bool operator==(const MemoryReportRow& other) const { return numAlloca == other.numAlloca && sizeAlloca == other.sizeAlloca && numGlobal == other.numGlobal && sizeGlobal == other.sizeGlobal; } }; struct MemoryReportEntry { enum class Kind { Core, Batch }; Kind kind = Kind::Core; uint64_t id = 0; llvm::SmallVector coreIds; MemoryReportRow row; uint64_t totalAllocaCount = 0; uint64_t totalAllocaBytes = 0; }; class PimMemory { llvm::SmallVector, 32> memEntries; llvm::SmallDenseMap& globalMemEntriesMap; llvm::SmallDenseMap ownedMemEntriesMap; size_t minAlignment = 4; size_t firstAvailableAddress = 0; MemEntry* gatherMemEntry(mlir::Value value); void allocateGatheredMemory(); void allocateMemoryForValue(mlir::Value value, MemEntry& memEntry); public: PimMemory(llvm::SmallDenseMap& globalMemEntriesMap) : globalMemEntriesMap(globalMemEntriesMap) {} void allocateHost(mlir::ModuleOp moduleOp, mlir::func::FuncOp funcOp); void allocateCore(mlir::Operation* op); MemoryReportRow getReportRow() const; void remove(mlir::Value val); size_t getFirstAvailableAddress() const { return firstAvailableAddress; } MemEntry getMemEntry(mlir::Value value) const; }; class PimAcceleratorMemory { public: llvm::SmallDenseMap memEntriesMap; PimMemory hostMem; private: llvm::SmallDenseMap deviceMem; std::fstream fileReport; std::optional hostReportRow; llvm::SmallVector reportEntries; public: PimAcceleratorMemory() : hostMem(memEntriesMap), fileReport(openReportFile("memory_report")) {} PimMemory& getOrCreateDeviceMem(size_t id); size_t getValueAddress(mlir::Value value, const StaticValueKnowledge& knowledge = {}) const; void reportHost(); void recordCoreReport(size_t coreId, const MemoryReportRow& row); void recordBatchReport(uint64_t batchId, llvm::ArrayRef coreIds, const MemoryReportRow& perCoreRow, uint64_t totalAllocaCount, uint64_t totalAllocaBytes); void flushReport(); void clean(mlir::Operation* op); }; class PimCodeGen { PimAcceleratorMemory& memory; llvm::raw_fd_ostream& coreBinaryStream; llvm::raw_fd_ostream* coreJsonStream; const llvm::DenseMap& emittedCoreIds; mutable uint32_t emittedInstructionCount = 0; size_t addressOf(mlir::Value value, const StaticValueKnowledge& knowledge) const { return memory.getValueAddress(value, knowledge); } size_t remapCoreId(size_t coreId) const; void emitInstruction(const pim_binary::InstructionRecord& instruction) const; void genSetRegisterImmediateUnsigned(size_t registerNumber, size_t immediate) const; void setupRd(size_t rdAddress, size_t rdOffset) const; void setupRdRs1(size_t rdAddress, size_t rdOffset, size_t rs1Address, size_t rs1Offset) const; void setupRdRs1Rs2( size_t rdAddress, size_t rdOffset, size_t rs1Address, size_t rs1Offset, size_t rs2Address, size_t rs2Offset) const; void emitMemCopyOp(mlir::StringRef opName, size_t rdAddr, size_t rdOffset, size_t rs1Addr, size_t rs1Offset, size_t size, mlir::StringRef sizeFieldName = "size") const; void emitCommunicationOp(mlir::StringRef opName, size_t bufferAddr, size_t coreId, size_t size) const; void emitMvmOp(size_t groupId, size_t rdAddr, size_t rdOffset, size_t rs1Addr, size_t rs1Offset) const; public: PimCodeGen(PimAcceleratorMemory& memory, llvm::raw_fd_ostream& coreBinary, llvm::raw_fd_ostream* coreJson, const llvm::DenseMap& emittedCoreIds) : memory(memory), coreBinaryStream(coreBinary), coreJsonStream(coreJson), emittedCoreIds(emittedCoreIds) {} uint32_t getEmittedInstructionCount() const { return emittedInstructionCount; } void codeGenLoadOp(pim::PimMemCopyHostToDevOp loadOp, const StaticValueKnowledge& knowledge) const; void codeGenStoreOp(pim::PimMemCopyDevToHostOp storeOp, const StaticValueKnowledge& knowledge) const; void codeGenLmvOp(pim::PimMemCopyOp lmvOp, const StaticValueKnowledge& knowledge) const; void codeGenReceiveOp(pim::PimReceiveOp receiveOp, const StaticValueKnowledge& knowledge) const; void codeGenReceiveTensorOp(pim::PimReceiveTensorOp receiveTensorOp, const StaticValueKnowledge& knowledge) const; void codeGenSendOp(pim::PimSendOp sendOp, const StaticValueKnowledge& knowledge) const; void codeGenSendTensorOp(pim::PimSendTensorOp sendTensorOp, const StaticValueKnowledge& knowledge) const; void codeGenConcatOp(pim::PimConcatOp concatOp, const StaticValueKnowledge& knowledge) const; template void codeGenMVMLikeOp(size_t mvmId, MVMTy mvmLikeOp, bool transposeMatrix, const StaticValueKnowledge& knowledge); void codeGenVVAddOp(pim::PimVVAddOp vvaddOp, const StaticValueKnowledge& knowledge) const; void codeGenVVSubOp(pim::PimVVSubOp vvsubOp, const StaticValueKnowledge& knowledge) const; void codeGenVVMulOp(pim::PimVVMulOp vvmulOp, const StaticValueKnowledge& knowledge) const; void codeGenVVMaxOp(pim::PimVVMaxOp vvmaxOp, const StaticValueKnowledge& knowledge) const; void codeGenVVDMulOp(pim::PimVVDMulOp vvdmulOp, const StaticValueKnowledge& knowledge) const; void codeGenVAvgOp(pim::PimVAvgOp vavgOp, const StaticValueKnowledge& knowledge) const; void codeGenVReluOp(pim::PimVReluOp vreluOp, const StaticValueKnowledge& knowledge) const; void codeGenVTanhOp(pim::PimVTanhOp vtanhOp, const StaticValueKnowledge& knowledge) const; void codeGenVSigmOp(pim::PimVSigmOp vsigmOp, const StaticValueKnowledge& knowledge) const; void codeGenVSoftmaxOp(pim::PimVSoftmaxOp vsoftmaxOp, const StaticValueKnowledge& knowledge) const; void codeGetGlobalOp(mlir::memref::GetGlobalOp getGlobalOp, const StaticValueKnowledge& knowledge) const; void codeGenTransposeOp(pim::PimTransposeOp transposeOp, const StaticValueKnowledge& knowledge) const; }; OnnxMlirCompilerErrorCodes compileToPimCode(mlir::ModuleOp& moduleOpRef, std::string& outputDirName); } // namespace onnx_mlir