Memory report

This commit is contained in:
ilgeco
2026-05-06 10:47:04 +02:00
parent bdacb9871d
commit 3cb6a1abc5
2 changed files with 170 additions and 29 deletions

View File

@@ -1,22 +1,29 @@
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/IR/AsmState.h"
#include "mlir/IR/Attributes.h" #include "mlir/IR/Attributes.h"
#include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/IRMapping.h" #include "mlir/IR/IRMapping.h"
#include "mlir/IR/Value.h" #include "mlir/IR/Value.h"
#include "mlir/IR/Verifier.h"
#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringExtras.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h" #include "llvm/Support/FileSystem.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/JSON.h" #include "llvm/Support/JSON.h"
#include "llvm/Support/raw_ostream.h" #include "llvm/Support/raw_ostream.h"
#include <absl/types/compare.h>
#include <algorithm> #include <algorithm>
#include <cassert> #include <cassert>
#include <cmath> #include <cmath>
#include <cstdint>
#include <fstream>
#include <string> #include <string>
#include <utility> #include <utility>
@@ -59,8 +66,7 @@ void PimMemory::allocateHost(ModuleOp moduleOp, func::FuncOp funcOp) {
SmallVector<std::pair<mlir::Value, mlir::Value>, 16> globalAliases; SmallVector<std::pair<mlir::Value, mlir::Value>, 16> globalAliases;
SmallVector<mlir::Value> args; SmallVector<mlir::Value> args;
for (mlir::Value arg : funcOp.getArguments()) {
for (mlir::Value arg : funcOp.getArguments()){
gatherMemEntry(arg); gatherMemEntry(arg);
args.push_back(arg); args.push_back(arg);
} }
@@ -68,10 +74,10 @@ void PimMemory::allocateHost(ModuleOp moduleOp, func::FuncOp funcOp) {
funcOp.walk([&](memref::GetGlobalOp getGlobalOp) { funcOp.walk([&](memref::GetGlobalOp getGlobalOp) {
if (!hasWeightAlways(getGlobalOp)) { if (!hasWeightAlways(getGlobalOp)) {
auto globalMemrefOp = lookupGlobalForGetGlobal(moduleOp, getGlobalOp); auto globalMemrefOp = lookupGlobalForGetGlobal(moduleOp, getGlobalOp);
if (globalMemrefOp.getName().starts_with("arg")){ if (globalMemrefOp.getName().starts_with("arg")) {
StringRef indexStr = globalMemrefOp.getName().substr(4); StringRef indexStr = globalMemrefOp.getName().substr(4);
int index = 0; int index = 0;
llvm::to_integer(indexStr,index, 10); llvm::to_integer(indexStr, index, 10);
globalAliases.push_back({getGlobalOp.getResult(), args[index]}); globalAliases.push_back({getGlobalOp.getResult(), args[index]});
} }
auto [iter, inserted] = globalConstants.try_emplace(globalMemrefOp, getGlobalOp.getResult()); auto [iter, inserted] = globalConstants.try_emplace(globalMemrefOp, getGlobalOp.getResult());
@@ -82,7 +88,6 @@ void PimMemory::allocateHost(ModuleOp moduleOp, func::FuncOp funcOp) {
} }
}); });
funcOp.walk([&](memref::AllocOp allocOp) { funcOp.walk([&](memref::AllocOp allocOp) {
if (!allocOp->getParentOfType<pim::PimCoreOp>()) if (!allocOp->getParentOfType<pim::PimCoreOp>())
gatherMemEntry(allocOp.getResult()); gatherMemEntry(allocOp.getResult());
@@ -100,6 +105,97 @@ void PimMemory::allocateCore(Operation* op) {
allocateGatheredMemory(); allocateGatheredMemory();
} }
std::string formatMemory(uint64_t bytes) {
const char* units[] = {"B", "KB", "MB", "GB", "TB", "PB", "EB"};
int i = 0;
double size = static_cast<double>(bytes);
while (size >= 1024 && i < 6) {
size /= 1024;
i++;
}
// Formats to 2 decimal places
std::string out;
llvm::raw_string_ostream rss(out);
rss << llvm::format("%.2f ", size) << units[i];
return rss.str();
}
void PimMemory::report(llvm::raw_ostream& file) {
// Key: {OpName, Size}, Value: Vector of Addresses
// This groups all "memref.alloc" of "1KB" together
std::vector orderedList(globalMemEntriesMap.begin(), globalMemEntriesMap.end());
std::sort(
orderedList.begin(), orderedList.end(), [](auto lft, auto rgt) { return lft.second.address < rgt.second.address; });
auto newEnd = std::unique(orderedList.begin(), orderedList.end(), [](auto lft, auto rgt) {
return lft.second.address == rgt.second.address;
});
orderedList.erase(newEnd, orderedList.end());
std::sort(
orderedList.begin(), orderedList.end(), [](auto lft, auto rgt) { return lft.second.size < rgt.second.size; });
std::map<std::pair<std::string, uint64_t>, std::vector<uint64_t>> groupedStats;
for (auto& [value, memEntry] : orderedList) {
std::string opName = "Unknown/BlockArg";
if (auto op = value.getDefiningOp())
opName = op->getName().getStringRef().str();
groupedStats[{opName, memEntry.size}].push_back(memEntry.address);
}
file << "--- Memory Usage Report ---\n";
uint64_t totalMemory = 0;
for (auto const& [key, addresses] : groupedStats) {
const std::string& opName = key.first;
uint64_t size = key.second;
file.indent(4) << "Type: " << opName << " [" << formatMemory(size) << "]\n";
file.indent(6) << "Count: " << addresses.size() << "\n";
file.indent(6) << "Total Memory: " << formatMemory(size * addresses.size()) << "\n";
totalMemory += size * addresses.size();
// Optional: Print address range or first/last address to keep it concise
if (!addresses.empty()) {
auto [min, max] = std::minmax_element(addresses.begin(), addresses.end());
file.indent(6) << "Range: " << llvm::format_hex(*min, 10) << " -> " << llvm::format_hex(*max, 10) << "\n";
}
file << "\n";
file << "Total Core Memory: " << formatMemory(totalMemory) << "\n";
}
}
// void PimMemory::report(llvm::raw_ostream& file) {
// std::vector orderedList(globalMemEntriesMap.begin(), globalMemEntriesMap.end());
// std::sort(
// orderedList.begin(), orderedList.end(), [](auto lft, auto rgt) { return lft.second.address < rgt.second.address;
// });
// auto newEnd = std::unique(orderedList.begin(), orderedList.end(), [](auto lft, auto rgt) {
// return (lft.first.getDefiningOp() == rgt.first.getDefiningOp()) && (lft.second.address == rgt.second.address);
// });
// orderedList.erase(newEnd, orderedList.end());
// mlir::OpPrintingFlags flags;
// flags.assumeVerified(true);
// for (auto& [value, memEntry] : orderedList) {
// if (auto op = value.getDefiningOp()) {
// file.indent(4) << op << ": ";
// op->print(file, flags);
// file << "\n";
// file.indent(6) << "Address: " << llvm::format_hex(memEntry.address, 10) << "\n";
// file.indent(6) << "Memory: " << formatMemory(memEntry.size) << "\n";
// }
// else {
// file.indent(4) << value << "\n";
// file.indent(6) << "Address: " << llvm::format_hex(memEntry.address, 10) << "\n";
// file.indent(6) << "Memory: " << formatMemory(memEntry.size) << "\n";
// }
// }
// }
void PimMemory::remove(mlir::Value val) {
if (auto removeIter = globalMemEntriesMap.find(val); removeIter != globalMemEntriesMap.end())
globalMemEntriesMap.erase(removeIter);
}
MemEntry PimMemory::getMemEntry(mlir::Value value) const { MemEntry PimMemory::getMemEntry(mlir::Value value) const {
auto iter = globalMemEntriesMap.find(value); auto iter = globalMemEntriesMap.find(value);
assert("Missing memEntry for value" && iter != globalMemEntriesMap.end()); assert("Missing memEntry for value" && iter != globalMemEntriesMap.end());
@@ -140,6 +236,28 @@ size_t PimAcceleratorMemory::getValueAddress(mlir::Value value, const StaticValu
return iter->second.address + resolvedAddress->byteOffset; return iter->second.address + resolvedAddress->byteOffset;
} }
void PimAcceleratorMemory::reportHost() {
llvm::raw_os_ostream os(fileReport);
os << "Host Memory\n";
hostMem.report(os);
os.flush();
}
void PimAcceleratorMemory::reportCore(size_t coreId) {
llvm::raw_os_ostream os(fileReport);
os << "Core " << coreId << " Memory\n";
deviceMem.at(coreId).report(os);
os.flush();
}
void PimAcceleratorMemory::clean(mlir::Operation* op) {
for (auto value : op->getResults()) {
hostMem.remove(value);
for (auto& device : deviceMem)
device.second.remove(value);
}
}
json::Object PimCodeGen::createEmptyOffset() { json::Object PimCodeGen::createEmptyOffset() {
json::Object offset; json::Object offset;
offset["offset_select"] = 0; offset["offset_select"] = 0;
@@ -434,8 +552,7 @@ void PimCodeGen::codeGenVSoftmaxOp(pim::PimVSoftmaxOp vsoftmaxOp, const StaticVa
emitInstruction(std::move(json)); emitInstruction(std::move(json));
} }
void PimCodeGen::codeGetGlobalOp(memref::GetGlobalOp getGlobalOp, const StaticValueKnowledge& knowledge) const { void PimCodeGen::codeGetGlobalOp(memref::GetGlobalOp getGlobalOp, const StaticValueKnowledge& knowledge) const {}
}
void PimCodeGen::codeGenTransposeOp(pim::PimTransposeOp transposeOp, const StaticValueKnowledge& knowledge) const { void PimCodeGen::codeGenTransposeOp(pim::PimTransposeOp transposeOp, const StaticValueKnowledge& knowledge) const {
auto srcAddr = addressOf(transposeOp.getInput(), knowledge); auto srcAddr = addressOf(transposeOp.getInput(), knowledge);
@@ -524,10 +641,9 @@ static SmallVector<int32_t> getBatchCoreIds(pim::PimCoreBatchOp coreBatchOp) {
static SmallVector<Operation*> collectTopLevelCoreLikeOps(func::FuncOp funcOp) { static SmallVector<Operation*> collectTopLevelCoreLikeOps(func::FuncOp funcOp) {
SmallVector<Operation*> coreLikeOps; SmallVector<Operation*> coreLikeOps;
for (Operation& op : funcOp.getBody().front()) { for (Operation& op : funcOp.getBody().front())
if (dyn_cast<pim::PimCoreOp>(&op) || dyn_cast<pim::PimCoreBatchOp>(&op)) if (dyn_cast<pim::PimCoreOp>(&op) || dyn_cast<pim::PimCoreBatchOp>(&op))
coreLikeOps.push_back(&op); coreLikeOps.push_back(&op);
}
return coreLikeOps; return coreLikeOps;
} }
@@ -543,10 +659,8 @@ static pim::PimCoreOp materializeScalarCoreFromBatchLane(pim::PimCoreBatchOp cor
laneWeights.push_back(coreBatchOp.getWeights()[lane * weightsPerLane + weightIndex]); laneWeights.push_back(coreBatchOp.getWeights()[lane * weightsPerLane + weightIndex]);
auto coreIds = getBatchCoreIds(coreBatchOp); auto coreIds = getBatchCoreIds(coreBatchOp);
auto scalarCore = pim::PimCoreOp::create(builder, auto scalarCore = pim::PimCoreOp::create(
coreBatchOp.getLoc(), builder, coreBatchOp.getLoc(), ValueRange(laneWeights), builder.getI32IntegerAttr(coreIds[lane]));
ValueRange(laneWeights),
builder.getI32IntegerAttr(coreIds[lane]));
Block* block = builder.createBlock(&scalarCore.getBody(), scalarCore.getBody().end()); Block* block = builder.createBlock(&scalarCore.getBody(), scalarCore.getBody().end());
IRMapping mapper; IRMapping mapper;
if (coreBatchOp.getBody().front().getNumArguments() == 1) if (coreBatchOp.getBody().front().getNumArguments() == 1)
@@ -569,7 +683,8 @@ static pim::PimCoreOp materializeScalarCoreFromBatchLane(pim::PimCoreBatchOp cor
} }
if (auto receiveBatchOp = dyn_cast<pim::PimReceiveBatchOp>(op)) { if (auto receiveBatchOp = dyn_cast<pim::PimReceiveBatchOp>(op)) {
auto scalarReceive = pim::PimReceiveOp::create(builder, auto scalarReceive =
pim::PimReceiveOp::create(builder,
receiveBatchOp.getLoc(), receiveBatchOp.getLoc(),
receiveBatchOp.getOutput().getType(), receiveBatchOp.getOutput().getType(),
mapper.lookup(receiveBatchOp.getOutputBuffer()), mapper.lookup(receiveBatchOp.getOutputBuffer()),
@@ -606,8 +721,10 @@ static pim::PimCoreOp materializeScalarCoreFromBatchLane(pim::PimCoreBatchOp cor
return scalarCore; return scalarCore;
} }
static void aliasMaterializedHostGlobals( static void aliasMaterializedHostGlobals(ModuleOp moduleOp,
ModuleOp moduleOp, func::FuncOp funcOp, pim::PimCoreOp coreOp, PimAcceleratorMemory& memory) { func::FuncOp funcOp,
pim::PimCoreOp coreOp,
PimAcceleratorMemory& memory) {
coreOp.walk([&](memref::GetGlobalOp getGlobalOp) { coreOp.walk([&](memref::GetGlobalOp getGlobalOp) {
if (hasWeightAlways(getGlobalOp) || memory.memEntriesMap.contains(getGlobalOp.getResult())) if (hasWeightAlways(getGlobalOp) || memory.memEntriesMap.contains(getGlobalOp.getResult()))
return; return;
@@ -990,6 +1107,7 @@ OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimJson(ModuleOp& moduleOp, std::
PimAcceleratorMemory memory; PimAcceleratorMemory memory;
memory.hostMem.allocateHost(moduleOp, funcOp); memory.hostMem.allocateHost(moduleOp, funcOp);
memory.reportHost();
if (auto err = writeMemoryBinary(moduleOp, funcOp, memory, outputDirPath)) if (auto err = writeMemoryBinary(moduleOp, funcOp, memory, outputDirPath))
return err; return err;
@@ -1063,6 +1181,7 @@ OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimJson(ModuleOp& moduleOp, std::
PimCodeGen coreCodeGen(memory, coreFileStream, emittedCoreIds); PimCodeGen coreCodeGen(memory, coreFileStream, emittedCoreIds);
aliasMaterializedHostGlobals(moduleOp, funcOp, coreOp, memory); aliasMaterializedHostGlobals(moduleOp, funcOp, coreOp, memory);
memory.getOrCreateDeviceMem(coreId).allocateCore(coreOp); memory.getOrCreateDeviceMem(coreId).allocateCore(coreOp);
memory.reportCore(coreId);
int64_t processedOperations = codeGenCoreOps(coreOp.getBody().front(), coreCodeGen); int64_t processedOperations = codeGenCoreOps(coreOp.getBody().front(), coreCodeGen);
if (processedOperations < 0) if (processedOperations < 0)
@@ -1093,8 +1212,8 @@ OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimJson(ModuleOp& moduleOp, std::
if (auto error = sys::fs::create_link(outputDirPath + "/weights/" + fileName, if (auto error = sys::fs::create_link(outputDirPath + "/weights/" + fileName,
coreWeightsDirPath + "/crossbar_" + std::to_string(index) + ".bin")) { coreWeightsDirPath + "/crossbar_" + std::to_string(index) + ".bin")) {
errs() << "Error creating link file: " << (outputDirPath + "/weights/" + fileName) << " to " errs() << "Error creating link file: " << (outputDirPath + "/weights/" + fileName) << " to "
<< (coreWeightsDirPath + "/crossbar_" + std::to_string(index) + ".bin") << "\nError:" << (coreWeightsDirPath + "/crossbar_" + std::to_string(index) + ".bin")
<< error.message() << '\n'; << "\nError:" << error.message() << '\n';
return InvalidOutputFileAccess; return InvalidOutputFileAccess;
} }
} }
@@ -1103,9 +1222,11 @@ OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimJson(ModuleOp& moduleOp, std::
} }
for (pim::PimCoreOp coreOp : scalarCores) for (pim::PimCoreOp coreOp : scalarCores)
if (coreOp.getOperation() != op) if (coreOp.getOperation() != op) {
coreOp.walk([&memory](Operation* op) { memory.clean(op); });
coreOp.erase(); coreOp.erase();
} }
}
return writeConfigJson(funcOp, memory, maxCoreId, std::move(xbarsPerArrayGroup), outputDirPath); return writeConfigJson(funcOp, memory, maxCoreId, std::move(xbarsPerArrayGroup), outputDirPath);
} }

View File

@@ -1,8 +1,12 @@
#pragma once #pragma once
#include "llvm/ADT/DenseMap.h" #include "mlir/IR/Operation.h"
#include "llvm-project/clang/include/clang/Basic/LLVM.h" #include "llvm-project/clang/include/clang/Basic/LLVM.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/JSON.h" #include "llvm/Support/JSON.h"
#include "llvm/Support/raw_os_ostream.h"
#include <fstream>
#include "onnx-mlir/Compiler/OMCompilerTypes.h" #include "onnx-mlir/Compiler/OMCompilerTypes.h"
#include "src/Accelerators/PIM/Common/PimCommon.hpp" #include "src/Accelerators/PIM/Common/PimCommon.hpp"
@@ -34,6 +38,8 @@ public:
void allocateHost(mlir::ModuleOp moduleOp, mlir::func::FuncOp funcOp); void allocateHost(mlir::ModuleOp moduleOp, mlir::func::FuncOp funcOp);
void allocateCore(mlir::Operation* op); void allocateCore(mlir::Operation* op);
void report(llvm::raw_ostream& os);
void remove(mlir::Value val);
size_t getFirstAvailableAddress() const { return firstAvailableAddress; } size_t getFirstAvailableAddress() const { return firstAvailableAddress; }
MemEntry getMemEntry(mlir::Value value) const; MemEntry getMemEntry(mlir::Value value) const;
@@ -46,14 +52,28 @@ public:
private: private:
llvm::SmallDenseMap<size_t, PimMemory> deviceMem; llvm::SmallDenseMap<size_t, PimMemory> deviceMem;
std::fstream fileReport;
public: public:
PimAcceleratorMemory() PimAcceleratorMemory()
: hostMem(memEntriesMap) {} : hostMem(memEntriesMap) {
std::string outputDir = getOutputDir();
if (outputDir.empty())
return;
std::string dialectsDir = outputDir + "/reports/";
createDirectory(dialectsDir);
std::fstream file(dialectsDir + "/memory_report.txt", std::ios::out);
fileReport = std::move(file);
}
PimMemory& getOrCreateDeviceMem(size_t id); PimMemory& getOrCreateDeviceMem(size_t id);
size_t getValueAddress(mlir::Value value, const StaticValueKnowledge& knowledge = {}) const; size_t getValueAddress(mlir::Value value, const StaticValueKnowledge& knowledge = {}) const;
void reportHost();
void reportCore(size_t coreId);
void clean(mlir::Operation* op);
}; };
class PimCodeGen { class PimCodeGen {