ff36729140
fix codegen symlinks overwrite remove deprecated pim memcp_hd_batch op
222 lines
8.8 KiB
C++
222 lines
8.8 KiB
C++
#pragma once
|
|
|
|
#include "mlir/IR/Operation.h"
|
|
|
|
#include "llvm-project/clang/include/clang/Basic/LLVM.h"
|
|
#include "llvm/ADT/DenseMap.h"
|
|
#include "llvm/ADT/Hashing.h"
|
|
#include "llvm/Support/JSON.h"
|
|
#include "llvm/Support/raw_os_ostream.h"
|
|
|
|
#include <fstream>
|
|
#include <limits>
|
|
#include <optional>
|
|
|
|
#include "onnx-mlir/Compiler/OMCompilerTypes.h"
|
|
#include "src/Accelerators/PIM/Common/IR/AddressAnalysis.hpp"
|
|
#include "src/Accelerators/PIM/Common/PimCommon.hpp"
|
|
#include "src/Accelerators/PIM/Common/Support/ReportUtils.hpp"
|
|
#include "src/Accelerators/PIM/Compiler/PimBinaryFormat.hpp"
|
|
#include "src/Accelerators/PIM/Dialect/Pim/PimOps.hpp"
|
|
|
|
namespace onnx_mlir {
|
|
|
|
struct MemEntry {
|
|
size_t address;
|
|
size_t size;
|
|
};
|
|
|
|
struct MemoryValueKey {
|
|
mlir::Value value;
|
|
std::optional<unsigned> lane;
|
|
|
|
bool operator==(const MemoryValueKey& other) const { return value == other.value && lane == other.lane; }
|
|
};
|
|
|
|
struct MemoryReportRow {
|
|
uint64_t numAlloca = 0;
|
|
uint64_t sizeAlloca = 0;
|
|
uint64_t numGlobal = 0;
|
|
uint64_t sizeGlobal = 0;
|
|
|
|
bool operator==(const MemoryReportRow& other) const {
|
|
return numAlloca == other.numAlloca && sizeAlloca == other.sizeAlloca && numGlobal == other.numGlobal
|
|
&& sizeGlobal == other.sizeGlobal;
|
|
}
|
|
};
|
|
|
|
struct MemoryReportEntry {
|
|
enum class Kind {
|
|
Core,
|
|
Batch
|
|
};
|
|
|
|
Kind kind = Kind::Core;
|
|
uint64_t id = 0;
|
|
llvm::SmallVector<int32_t, 8> coreIds;
|
|
MemoryReportRow row;
|
|
uint64_t totalAllocaCount = 0;
|
|
uint64_t totalAllocaBytes = 0;
|
|
};
|
|
|
|
class PimMemory {
|
|
llvm::SmallVector<std::pair<MemEntry, MemoryValueKey>, 32> memEntries;
|
|
llvm::SmallDenseMap<MemoryValueKey, MemEntry, 32>& globalMemEntriesMap;
|
|
llvm::SmallDenseMap<MemoryValueKey, MemEntry, 32> ownedMemEntriesMap;
|
|
|
|
size_t minAlignment = 4;
|
|
size_t firstAvailableAddress = 0;
|
|
|
|
MemEntry* gatherMemEntry(mlir::Value value, std::optional<unsigned> lane = std::nullopt);
|
|
void allocateGatheredMemory();
|
|
void allocateMemoryForValue(const MemoryValueKey& key, MemEntry& memEntry);
|
|
|
|
public:
|
|
PimMemory(llvm::SmallDenseMap<MemoryValueKey, MemEntry, 32>& globalMemEntriesMap)
|
|
: globalMemEntriesMap(globalMemEntriesMap) {}
|
|
|
|
void allocateHost(mlir::ModuleOp moduleOp, mlir::func::FuncOp funcOp);
|
|
void allocateCore(mlir::Operation* op, std::optional<unsigned> lane = std::nullopt);
|
|
MemoryReportRow getReportRow() const;
|
|
void remove(mlir::Value val);
|
|
|
|
size_t getFirstAvailableAddress() const { return firstAvailableAddress; }
|
|
MemEntry getMemEntry(const MemoryValueKey& key) const;
|
|
};
|
|
|
|
class PimAcceleratorMemory {
|
|
public:
|
|
llvm::SmallDenseMap<MemoryValueKey, MemEntry, 32> memEntriesMap;
|
|
PimMemory hostMem;
|
|
|
|
private:
|
|
llvm::SmallDenseMap<size_t, PimMemory> deviceMem;
|
|
std::fstream fileReport;
|
|
std::optional<MemoryReportRow> hostReportRow;
|
|
llvm::SmallVector<MemoryReportEntry, 32> reportEntries;
|
|
mutable llvm::DenseMap<mlir::Value, CompiledIndexExpr> compiledIndexExprs;
|
|
mutable llvm::DenseMap<mlir::Value, CompiledAddressExpr> compiledAddressExprs;
|
|
|
|
public:
|
|
PimAcceleratorMemory()
|
|
: hostMem(memEntriesMap), fileReport(openReportFile("memory_report")) {}
|
|
PimAcceleratorMemory(const llvm::SmallDenseMap<MemoryValueKey, MemEntry, 32>& initialMemEntries, bool enableReport)
|
|
: memEntriesMap(initialMemEntries),
|
|
hostMem(memEntriesMap),
|
|
fileReport(enableReport ? openReportFile("memory_report") : std::fstream()) {}
|
|
|
|
PimMemory& getOrCreateDeviceMem(size_t id);
|
|
|
|
size_t getValueAddress(mlir::Value value,
|
|
const StaticValueKnowledge& knowledge = {},
|
|
std::optional<unsigned> lane = std::nullopt) const;
|
|
llvm::FailureOr<int64_t> getIndexValue(mlir::Value value, const StaticValueKnowledge& knowledge = {}) const;
|
|
void reportHost();
|
|
void recordCoreReport(size_t coreId, const MemoryReportRow& row);
|
|
void recordBatchReport(uint64_t batchId,
|
|
llvm::ArrayRef<int32_t> coreIds,
|
|
const MemoryReportRow& perCoreRow,
|
|
uint64_t totalAllocaCount,
|
|
uint64_t totalAllocaBytes);
|
|
void flushReport();
|
|
void clean(mlir::Operation* op);
|
|
};
|
|
|
|
struct CoreEmissionJob {
|
|
mlir::Operation* coreLikeOp = nullptr;
|
|
size_t originalCoreId = 0;
|
|
size_t emittedCoreId = 0;
|
|
llvm::SmallVector<unsigned, 4> lanes;
|
|
std::optional<uint64_t> batchReportId;
|
|
};
|
|
|
|
class PimCodeGen {
|
|
PimAcceleratorMemory& memory;
|
|
llvm::raw_fd_ostream& coreBinaryStream;
|
|
llvm::raw_fd_ostream* coreJsonStream;
|
|
const llvm::DenseMap<size_t, size_t>& emittedCoreIds;
|
|
std::optional<unsigned> batchLane;
|
|
mutable uint32_t emittedInstructionCount = 0;
|
|
|
|
size_t addressOf(mlir::Value value, const StaticValueKnowledge& knowledge) const {
|
|
return memory.getValueAddress(value, knowledge, batchLane);
|
|
}
|
|
size_t remapCoreId(size_t coreId) const;
|
|
|
|
void emitInstruction(const pim_binary::InstructionRecord& instruction) const;
|
|
|
|
void genSetRegisterImmediateUnsigned(size_t registerNumber, size_t immediate) const;
|
|
void setupRd(size_t rdAddress, size_t rdOffset) const;
|
|
void setupRdRs1(size_t rdAddress, size_t rdOffset, size_t rs1Address, size_t rs1Offset) const;
|
|
void setupRdRs1Rs2(
|
|
size_t rdAddress, size_t rdOffset, size_t rs1Address, size_t rs1Offset, size_t rs2Address, size_t rs2Offset) const;
|
|
|
|
void emitMemCopyOp(mlir::StringRef opName,
|
|
size_t rdAddr,
|
|
size_t rdOffset,
|
|
size_t rs1Addr,
|
|
size_t rs1Offset,
|
|
size_t size,
|
|
mlir::StringRef sizeFieldName = "size") const;
|
|
void emitCommunicationOp(mlir::StringRef opName, size_t bufferAddr, size_t coreId, size_t size) const;
|
|
void emitMvmOp(size_t groupId, size_t rdAddr, size_t rdOffset, size_t rs1Addr, size_t rs1Offset) const;
|
|
|
|
public:
|
|
PimCodeGen(PimAcceleratorMemory& memory,
|
|
llvm::raw_fd_ostream& coreBinary,
|
|
llvm::raw_fd_ostream* coreJson,
|
|
const llvm::DenseMap<size_t, size_t>& emittedCoreIds)
|
|
: memory(memory), coreBinaryStream(coreBinary), coreJsonStream(coreJson), emittedCoreIds(emittedCoreIds) {}
|
|
|
|
uint32_t getEmittedInstructionCount() const { return emittedInstructionCount; }
|
|
void setBatchLane(std::optional<unsigned> lane) { batchLane = lane; }
|
|
llvm::FailureOr<int64_t> indexOf(mlir::Value value, const StaticValueKnowledge& knowledge) const {
|
|
return memory.getIndexValue(value, knowledge);
|
|
}
|
|
|
|
void codeGenLoadOp(pim::PimMemCopyHostToDevOp loadOp, const StaticValueKnowledge& knowledge) const;
|
|
void codeGenStoreOp(pim::PimMemCopyDevToHostOp storeOp, const StaticValueKnowledge& knowledge) const;
|
|
void codeGenLmvOp(pim::PimMemCopyOp lmvOp, const StaticValueKnowledge& knowledge) const;
|
|
|
|
void codeGenReceiveOp(pim::PimReceiveOp receiveOp, const StaticValueKnowledge& knowledge) const;
|
|
void codeGenSendOp(pim::PimSendOp sendOp, const StaticValueKnowledge& knowledge) const;
|
|
void codeGenConcatOp(pim::PimConcatOp concatOp, const StaticValueKnowledge& knowledge) const;
|
|
|
|
template <typename MVMTy>
|
|
void codeGenMVMLikeOp(size_t mvmId, MVMTy mvmLikeOp, bool transposeMatrix, const StaticValueKnowledge& knowledge);
|
|
|
|
void codeGenVVAddOp(pim::PimVVAddOp vvaddOp, const StaticValueKnowledge& knowledge) const;
|
|
void codeGenVVSubOp(pim::PimVVSubOp vvsubOp, const StaticValueKnowledge& knowledge) const;
|
|
void codeGenVVMulOp(pim::PimVVMulOp vvmulOp, const StaticValueKnowledge& knowledge) const;
|
|
void codeGenVVMaxOp(pim::PimVVMaxOp vvmaxOp, const StaticValueKnowledge& knowledge) const;
|
|
void codeGenVVDMulOp(pim::PimVVDMulOp vvdmulOp, const StaticValueKnowledge& knowledge) const;
|
|
void codeGenVAvgOp(pim::PimVAvgOp vavgOp, const StaticValueKnowledge& knowledge) const;
|
|
void codeGenVReluOp(pim::PimVReluOp vreluOp, const StaticValueKnowledge& knowledge) const;
|
|
void codeGenVTanhOp(pim::PimVTanhOp vtanhOp, const StaticValueKnowledge& knowledge) const;
|
|
void codeGenVSigmOp(pim::PimVSigmOp vsigmOp, const StaticValueKnowledge& knowledge) const;
|
|
void codeGenVSoftmaxOp(pim::PimVSoftmaxOp vsoftmaxOp, const StaticValueKnowledge& knowledge) const;
|
|
void codeGetGlobalOp(mlir::memref::GetGlobalOp getGlobalOp, const StaticValueKnowledge& knowledge) const;
|
|
void codeGenTransposeOp(pim::PimTransposeOp transposeOp, const StaticValueKnowledge& knowledge) const;
|
|
};
|
|
|
|
OnnxMlirCompilerErrorCodes compileToPimCode(mlir::ModuleOp& moduleOpRef, std::string& outputDirName);
|
|
|
|
} // namespace onnx_mlir
|
|
|
|
namespace llvm {
|
|
|
|
template <>
|
|
struct DenseMapInfo<onnx_mlir::MemoryValueKey> {
|
|
static onnx_mlir::MemoryValueKey getEmptyKey() { return {DenseMapInfo<mlir::Value>::getEmptyKey(), 0}; }
|
|
|
|
static onnx_mlir::MemoryValueKey getTombstoneKey() { return {DenseMapInfo<mlir::Value>::getTombstoneKey(), 0}; }
|
|
|
|
static unsigned getHashValue(const onnx_mlir::MemoryValueKey& key) {
|
|
return hash_combine(key.value, key.lane.value_or(std::numeric_limits<unsigned>::max()));
|
|
}
|
|
|
|
static bool isEqual(const onnx_mlir::MemoryValueKey& lhs, const onnx_mlir::MemoryValueKey& rhs) { return lhs == rhs; }
|
|
};
|
|
|
|
} // namespace llvm
|