merge remote changes
This commit is contained in:
@@ -5,9 +5,11 @@
|
||||
#include "mlir/IR/BuiltinAttributes.h"
|
||||
#include "mlir/IR/BuiltinTypes.h"
|
||||
#include "mlir/IR/IRMapping.h"
|
||||
#include "mlir/IR/Value.h"
|
||||
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/Support/FileSystem.h"
|
||||
#include "llvm/Support/JSON.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
@@ -55,9 +57,23 @@ void PimMemory::allocateMemoryForValue(mlir::Value value, MemEntry& memEntry) {
|
||||
void PimMemory::allocateHost(ModuleOp moduleOp, func::FuncOp funcOp) {
|
||||
SmallDenseMap<memref::GlobalOp, mlir::Value, 8> globalConstants;
|
||||
SmallVector<std::pair<mlir::Value, mlir::Value>, 16> globalAliases;
|
||||
SmallVector<mlir::Value> args;
|
||||
|
||||
|
||||
for (mlir::Value arg : funcOp.getArguments()){
|
||||
gatherMemEntry(arg);
|
||||
args.push_back(arg);
|
||||
}
|
||||
|
||||
funcOp.walk([&](memref::GetGlobalOp getGlobalOp) {
|
||||
if (!hasWeightAlways(getGlobalOp)) {
|
||||
auto globalMemrefOp = lookupGlobalForGetGlobal(moduleOp, getGlobalOp);
|
||||
if (globalMemrefOp.getName().starts_with("arg")){
|
||||
StringRef indexStr = globalMemrefOp.getName().substr(4);
|
||||
int index = 0;
|
||||
llvm::to_integer(indexStr,index, 10);
|
||||
globalAliases.push_back({getGlobalOp.getResult(), args[index]});
|
||||
}
|
||||
auto [iter, inserted] = globalConstants.try_emplace(globalMemrefOp, getGlobalOp.getResult());
|
||||
if (inserted)
|
||||
gatherMemEntry(getGlobalOp.getResult());
|
||||
@@ -66,8 +82,6 @@ void PimMemory::allocateHost(ModuleOp moduleOp, func::FuncOp funcOp) {
|
||||
}
|
||||
});
|
||||
|
||||
for (mlir::Value arg : funcOp.getArguments())
|
||||
gatherMemEntry(arg);
|
||||
|
||||
funcOp.walk([&](memref::AllocOp allocOp) {
|
||||
if (!allocOp->getParentOfType<pim::PimCoreOp>())
|
||||
@@ -133,6 +147,12 @@ json::Object PimCodeGen::createEmptyOffset() {
|
||||
return offset;
|
||||
}
|
||||
|
||||
size_t PimCodeGen::remapCoreId(size_t coreId) const {
|
||||
auto it = emittedCoreIds.find(coreId);
|
||||
assert(it != emittedCoreIds.end() && "Missing emitted core id remapping");
|
||||
return it->second;
|
||||
}
|
||||
|
||||
static json::Object createRs1OnlyOffset() {
|
||||
json::Object offset;
|
||||
offset["offset_select"] = 1;
|
||||
@@ -192,7 +212,7 @@ void PimCodeGen::emitCommunicationOp(StringRef opName, size_t bufferAddr, size_t
|
||||
json::Object json;
|
||||
json["op"] = opName;
|
||||
json["rd"] = 0;
|
||||
json["core"] = coreId;
|
||||
json["core"] = remapCoreId(coreId);
|
||||
json["size"] = size;
|
||||
json["offset"] = createEmptyOffset();
|
||||
emitInstruction(std::move(json));
|
||||
@@ -414,6 +434,9 @@ void PimCodeGen::codeGenVSoftmaxOp(pim::PimVSoftmaxOp vsoftmaxOp, const StaticVa
|
||||
emitInstruction(std::move(json));
|
||||
}
|
||||
|
||||
void PimCodeGen::codeGetGlobalOp(memref::GetGlobalOp getGlobalOp, const StaticValueKnowledge& knowledge) const {
|
||||
}
|
||||
|
||||
void PimCodeGen::codeGenTransposeOp(pim::PimTransposeOp transposeOp, const StaticValueKnowledge& knowledge) const {
|
||||
auto srcAddr = addressOf(transposeOp.getInput(), knowledge);
|
||||
auto dstAddr = addressOf(transposeOp.getOutputBuffer(), knowledge);
|
||||
@@ -583,6 +606,29 @@ static pim::PimCoreOp materializeScalarCoreFromBatchLane(pim::PimCoreBatchOp cor
|
||||
return scalarCore;
|
||||
}
|
||||
|
||||
static void aliasMaterializedHostGlobals(
|
||||
ModuleOp moduleOp, func::FuncOp funcOp, pim::PimCoreOp coreOp, PimAcceleratorMemory& memory) {
|
||||
coreOp.walk([&](memref::GetGlobalOp getGlobalOp) {
|
||||
if (hasWeightAlways(getGlobalOp) || memory.memEntriesMap.contains(getGlobalOp.getResult()))
|
||||
return;
|
||||
|
||||
auto targetGlobal = lookupGlobalForGetGlobal(moduleOp, getGlobalOp);
|
||||
if (!targetGlobal)
|
||||
return;
|
||||
|
||||
mlir::Value aliasedValue;
|
||||
funcOp.walk([&](memref::GetGlobalOp candidate) {
|
||||
if (aliasedValue || candidate == getGlobalOp || !memory.memEntriesMap.contains(candidate.getResult()))
|
||||
return;
|
||||
if (lookupGlobalForGetGlobal(moduleOp, candidate) == targetGlobal)
|
||||
aliasedValue = candidate.getResult();
|
||||
});
|
||||
|
||||
if (aliasedValue)
|
||||
memory.memEntriesMap[getGlobalOp.getResult()] = memory.memEntriesMap[aliasedValue];
|
||||
});
|
||||
}
|
||||
|
||||
/// Write global constant data into a binary memory image at their allocated addresses.
|
||||
static OnnxMlirCompilerErrorCodes
|
||||
writeMemoryBinary(ModuleOp moduleOp, func::FuncOp funcOp, PimAcceleratorMemory& memory, StringRef outputDirPath) {
|
||||
@@ -677,6 +723,8 @@ static int64_t codeGenCoreOps(Block& block, PimCodeGen& coreCodeGen) {
|
||||
coreCodeGen.codeGenVSigmOp(vsigmOp, knowledge);
|
||||
else if (auto vsoftmaxOp = dyn_cast<pim::PimVSoftmaxOp>(op))
|
||||
coreCodeGen.codeGenVSoftmaxOp(vsoftmaxOp, knowledge);
|
||||
else if (auto getGlobalOp = dyn_cast<memref::GetGlobalOp>(op))
|
||||
coreCodeGen.codeGetGlobalOp(getGlobalOp, knowledge);
|
||||
else {
|
||||
op.emitError("Unsupported codegen for this operation");
|
||||
op.dump();
|
||||
@@ -880,13 +928,14 @@ createAndPopulateWeightFolder(func::FuncOp funcOp, StringRef outputDirPath) {
|
||||
/// Write the top-level PIM configuration JSON (core count, crossbar config, I/O addresses).
|
||||
static OnnxMlirCompilerErrorCodes writeConfigJson(func::FuncOp funcOp,
|
||||
PimAcceleratorMemory& memory,
|
||||
size_t coreCount,
|
||||
size_t maxCoreId,
|
||||
json::Object xbarsPerArrayGroup,
|
||||
StringRef outputDirPath) {
|
||||
json::Object configJson;
|
||||
|
||||
// +1 because pimsim-nn also considers the host as a core
|
||||
configJson["core_cnt"] = coreCount + 1;
|
||||
// pimsim-nn indexes cores directly by their numeric core ID, with the host
|
||||
// occupying core 0.
|
||||
configJson["core_cnt"] = maxCoreId + 1;
|
||||
|
||||
// TODO: Should this be based on the floating point type used in the model?
|
||||
// The 2 following values determine the bitwidth of the vectors' elements: bitwidth = adc_count * cell_precision
|
||||
@@ -960,12 +1009,31 @@ OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimJson(ModuleOp& moduleOp, std::
|
||||
// For each core, specify the number of crossbar per array group.
|
||||
// This implementation always assigns one crossbar per group.
|
||||
json::Object xbarsPerArrayGroup;
|
||||
size_t coreCount = 0;
|
||||
size_t maxCoreId = 0;
|
||||
|
||||
// Create Weight Folder
|
||||
auto mapCoreWeightToFileName = createAndPopulateWeightFolder(funcOp, outputDirPath);
|
||||
|
||||
SmallVector<Operation*> coreLikeOps = collectTopLevelCoreLikeOps(funcOp);
|
||||
llvm::DenseMap<size_t, size_t> emittedCoreIds;
|
||||
size_t nextEmittedCoreId = 1;
|
||||
|
||||
for (Operation* op : coreLikeOps) {
|
||||
if (auto coreOp = dyn_cast<pim::PimCoreOp>(op)) {
|
||||
size_t originalCoreId = static_cast<size_t>(coreOp.getCoreId());
|
||||
if (!emittedCoreIds.contains(originalCoreId))
|
||||
emittedCoreIds[originalCoreId] = nextEmittedCoreId++;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto coreBatchOp = cast<pim::PimCoreBatchOp>(op);
|
||||
auto batchCoreIds = getBatchCoreIds(coreBatchOp);
|
||||
for (unsigned lane = 0; lane < static_cast<unsigned>(coreBatchOp.getLaneCount()); ++lane) {
|
||||
size_t originalCoreId = static_cast<size_t>(batchCoreIds[lane]);
|
||||
if (!emittedCoreIds.contains(originalCoreId))
|
||||
emittedCoreIds[originalCoreId] = nextEmittedCoreId++;
|
||||
}
|
||||
}
|
||||
|
||||
for (Operation* op : coreLikeOps) {
|
||||
SmallVector<pim::PimCoreOp> scalarCores;
|
||||
@@ -979,8 +1047,9 @@ OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimJson(ModuleOp& moduleOp, std::
|
||||
}
|
||||
|
||||
for (pim::PimCoreOp coreOp : scalarCores) {
|
||||
auto coreId = coreOp.getCoreId();
|
||||
coreCount++;
|
||||
size_t originalCoreId = static_cast<size_t>(coreOp.getCoreId());
|
||||
size_t coreId = emittedCoreIds.lookup(originalCoreId);
|
||||
maxCoreId = std::max(maxCoreId, coreId);
|
||||
|
||||
std::error_code errorCode;
|
||||
auto outputCorePath = outputDirPath + "/core_" + std::to_string(coreId) + ".json";
|
||||
@@ -991,7 +1060,8 @@ OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimJson(ModuleOp& moduleOp, std::
|
||||
}
|
||||
coreFileStream << '[';
|
||||
|
||||
PimCodeGen coreCodeGen(memory, coreFileStream);
|
||||
PimCodeGen coreCodeGen(memory, coreFileStream, emittedCoreIds);
|
||||
aliasMaterializedHostGlobals(moduleOp, funcOp, coreOp, memory);
|
||||
memory.getOrCreateDeviceMem(coreId).allocateCore(coreOp);
|
||||
|
||||
int64_t processedOperations = codeGenCoreOps(coreOp.getBody().front(), coreCodeGen);
|
||||
@@ -1009,7 +1079,7 @@ OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimJson(ModuleOp& moduleOp, std::
|
||||
return InvalidOutputFileAccess;
|
||||
}
|
||||
|
||||
auto& mapWeightToFile = mapCoreWeightToFileName[static_cast<size_t>(coreId)];
|
||||
auto& mapWeightToFile = mapCoreWeightToFileName[originalCoreId];
|
||||
json::Array xbarsPerGroup;
|
||||
for (unsigned index : getUsedWeightIndices(coreOp)) {
|
||||
if (index >= coreOp.getWeights().size()) {
|
||||
@@ -1037,5 +1107,5 @@ OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimJson(ModuleOp& moduleOp, std::
|
||||
coreOp.erase();
|
||||
}
|
||||
|
||||
return writeConfigJson(funcOp, memory, coreCount, std::move(xbarsPerArrayGroup), outputDirPath);
|
||||
return writeConfigJson(funcOp, memory, maxCoreId, std::move(xbarsPerArrayGroup), outputDirPath);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm-project/clang/include/clang/Basic/LLVM.h"
|
||||
#include "llvm/Support/JSON.h"
|
||||
|
||||
@@ -58,10 +59,12 @@ public:
|
||||
class PimCodeGen {
|
||||
PimAcceleratorMemory& memory;
|
||||
llvm::raw_fd_ostream& coreFileStream;
|
||||
const llvm::DenseMap<size_t, size_t>& emittedCoreIds;
|
||||
|
||||
size_t addressOf(mlir::Value value, const StaticValueKnowledge& knowledge) const {
|
||||
return memory.getValueAddress(value, knowledge);
|
||||
}
|
||||
size_t remapCoreId(size_t coreId) const;
|
||||
|
||||
static llvm::json::Object createEmptyOffset();
|
||||
void emitInstruction(llvm::json::Object instruction) const;
|
||||
@@ -83,8 +86,10 @@ class PimCodeGen {
|
||||
void emitMvmOp(size_t groupId, size_t rdAddr, size_t rdOffset, size_t rs1Addr, size_t rs1Offset) const;
|
||||
|
||||
public:
|
||||
PimCodeGen(PimAcceleratorMemory& memory, llvm::raw_fd_ostream& coreJson)
|
||||
: memory(memory), coreFileStream(coreJson) {}
|
||||
PimCodeGen(PimAcceleratorMemory& memory,
|
||||
llvm::raw_fd_ostream& coreJson,
|
||||
const llvm::DenseMap<size_t, size_t>& emittedCoreIds)
|
||||
: memory(memory), coreFileStream(coreJson), emittedCoreIds(emittedCoreIds) {}
|
||||
|
||||
void codeGenLoadOp(pim::PimMemCopyHostToDevOp loadOp, const StaticValueKnowledge& knowledge) const;
|
||||
void codeGenStoreOp(pim::PimMemCopyDevToHostOp storeOp, const StaticValueKnowledge& knowledge) const;
|
||||
@@ -106,6 +111,7 @@ public:
|
||||
void codeGenVTanhOp(pim::PimVTanhOp vtanhOp, const StaticValueKnowledge& knowledge) const;
|
||||
void codeGenVSigmOp(pim::PimVSigmOp vsigmOp, const StaticValueKnowledge& knowledge) const;
|
||||
void codeGenVSoftmaxOp(pim::PimVSoftmaxOp vsoftmaxOp, const StaticValueKnowledge& knowledge) const;
|
||||
void codeGetGlobalOp(mlir::memref::GetGlobalOp getGlobalOp, const StaticValueKnowledge& knowledge) const;
|
||||
void codeGenTransposeOp(pim::PimTransposeOp transposeOp, const StaticValueKnowledge& knowledge) const;
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user