add memory coalescing pass
Validate Operations / validate-operations (push) Has been cancelled

better reports
refactor for more code-reuse and patter usage
fixes
This commit is contained in:
NiccoloN
2026-05-12 18:17:00 +02:00
parent 4f3570520c
commit 41de3cb150
26 changed files with 930 additions and 385 deletions
@@ -40,6 +40,7 @@
#include "RegularOpCompaction.hpp"
#include "src/Accelerators/PIM/Common/IR/CompactAsmUtils.hpp"
#include "src/Accelerators/PIM/Common/PimCommon.hpp"
#include "src/Accelerators/PIM/Common/Support/ReportUtils.hpp"
#include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp"
using namespace mlir;
@@ -764,18 +765,13 @@ void emitMotifProfile(func::FuncOp funcOp) {
}
void generateReport(func::FuncOp funcOp, const std::string& name, size_t usedCpuCount = 0) {
std::string outputDir = getOutputDir();
if (outputDir.empty())
std::fstream file = openReportFile(name);
if (!file.is_open())
return;
std::string reportsDir = outputDir + "/reports";
createDirectory(reportsDir);
std::fstream file(reportsDir + "/" + name + ".txt", std::ios::out);
llvm::raw_os_ostream os(file);
struct ReportRow {
uint64_t opId = 0;
uint64_t id = 0;
uint64_t logicalComputeCount = 0;
uint64_t weightCount = 0;
uint64_t instructionCount = 0;
@@ -786,6 +782,9 @@ void generateReport(func::FuncOp funcOp, const std::string& name, size_t usedCpu
uint64_t totalComputeOps = 0;
uint64_t totalLogicalComputes = 0;
uint64_t totalBatchComputeOps = 0;
uint64_t totalInstructionCount = 0;
uint64_t totalWeightCount = 0;
uint64_t nextBatchId = 0;
std::vector<ReportRow> collectedData;
for (Operation& op : funcOp.getBody().front()) {
@@ -793,8 +792,13 @@ void generateReport(func::FuncOp funcOp, const std::string& name, size_t usedCpu
uint64_t numInst = 0;
for (auto& _ : spatCompute.getRegion().front())
++numInst;
collectedData.push_back({totalComputeOps++, 1, spatCompute.getWeights().size(), numInst, false, {}});
SmallVector<int32_t> coreIds;
if (auto coreId = getComputeCoreId(spatCompute))
coreIds.push_back(*coreId);
collectedData.push_back({totalComputeOps++, 1, spatCompute.getWeights().size(), numInst, false, coreIds});
totalLogicalComputes += 1;
totalInstructionCount += numInst;
totalWeightCount += spatCompute.getWeights().size();
continue;
}
if (auto batch = dyn_cast<SpatComputeBatch>(&op)) {
@@ -805,44 +809,27 @@ void generateReport(func::FuncOp funcOp, const std::string& name, size_t usedCpu
SmallVector<int32_t> coreIds;
if (auto coreIdsAttr = batch->getAttrOfType<DenseI32ArrayAttr>(onnx_mlir::kCoreIdsAttrName))
llvm::append_range(coreIds, coreIdsAttr.asArrayRef());
collectedData.push_back({totalComputeOps++, logicalCount, batch.getWeights().size(), numInst, true, coreIds});
collectedData.push_back({nextBatchId++, logicalCount, batch.getWeights().size(), numInst, true, coreIds});
totalComputeOps += 1;
totalLogicalComputes += logicalCount;
totalBatchComputeOps += 1;
totalInstructionCount += numInst * logicalCount;
totalWeightCount += batch.getWeights().size();
}
}
os << "Used cores: " << usedCpuCount << "\n";
os << "Number of top-level compute ops: " << totalComputeOps << "\n";
os << "Number of logical computes: " << totalLogicalComputes << "\n";
os << "Number of top-level batch compute ops: " << totalBatchComputeOps << "\n";
os << "\n";
llvm::SmallVector<ReportField, 6> totalFields = {{"Used cores", std::to_string(usedCpuCount)},
{"Number of top-level compute ops", std::to_string(totalComputeOps)},
{"Number of logical computes", std::to_string(totalLogicalComputes)},
{"Number of top-level batch compute ops",
std::to_string(totalBatchComputeOps)},
{"Number of instructions", std::to_string(totalInstructionCount)},
{"Number of used crossbars", std::to_string(totalWeightCount)}};
printReportTotalsBlock(os, totalFields);
if (!collectedData.empty())
os << "\n";
std::stable_sort(collectedData.begin(), collectedData.end(), [](const ReportRow& lft, const ReportRow& rgt) {
if (lft.isRebatched != rgt.isRebatched)
return lft.isRebatched > rgt.isRebatched;
if (lft.instructionCount < rgt.instructionCount)
return false;
else if (rgt.instructionCount < lft.instructionCount)
return true;
if (lft.weightCount < rgt.weightCount)
return false;
else if (rgt.weightCount < lft.weightCount)
return true;
if (lft.logicalComputeCount < rgt.logicalComputeCount)
return false;
else if (rgt.logicalComputeCount < lft.logicalComputeCount)
return true;
if (lft.opId < rgt.opId)
return true;
else if (rgt.opId < lft.opId)
return false;
return true;
});
sortReportEntriesByFirstCore(collectedData);
for (uint64_t cI = 0; cI < totalComputeOps; ++cI) {
uint64_t lastIndex = cI;
@@ -863,7 +850,7 @@ void generateReport(func::FuncOp funcOp, const std::string& name, size_t usedCpu
for (uint64_t index = cI; index <= lastIndex; ++index) {
if (index != cI)
os << ",\n ";
os << collectedData[index].opId << " (cores ";
os << collectedData[index].id << " (cores ";
if (collectedData[index].coreIds.empty())
os << "unknown";
else
@@ -876,14 +863,32 @@ void generateReport(func::FuncOp funcOp, const std::string& name, size_t usedCpu
SmallVector<uint64_t> opIds;
opIds.reserve(lastIndex - cI + 1);
for (uint64_t index = cI; index <= lastIndex; ++index)
opIds.push_back(collectedData[index].opId);
opIds.push_back(collectedData[index].id);
printCompressedIntegerEntries(os, ArrayRef<uint64_t>(opIds));
}
os << ":\n";
os << "\tNumber of logical computes: " << current.logicalComputeCount << "\n";
os << "\tNumber of instructions: " << current.instructionCount << "\n";
os << "\tNumber of used crossbars: " << current.weightCount << "\n";
uint64_t perCoreLogicalComputeCount = current.isRebatched ? 1 : current.logicalComputeCount;
uint64_t perCoreInstructionCount = current.instructionCount;
uint64_t perCoreWeightCount =
current.logicalComputeCount == 0 ? 0 : current.weightCount / current.logicalComputeCount;
uint64_t totalEntryInstructionCount = current.instructionCount * current.logicalComputeCount;
llvm::SmallVector<ReportField, 3> perCoreFields = {
{"Number of logical computes", std::to_string(perCoreLogicalComputeCount)},
{"Number of instructions", std::to_string(perCoreInstructionCount)},
{"Number of used crossbars", std::to_string(perCoreWeightCount)}};
if (current.isRebatched) {
llvm::SmallVector<ReportField, 3> totalEntryFields = {
{"Number of logical computes", std::to_string(current.logicalComputeCount)},
{"Number of instructions", std::to_string(totalEntryInstructionCount)},
{"Number of used crossbars", std::to_string(current.weightCount)}};
printReportPerCoreAndTotalFields(os, perCoreFields, totalEntryFields);
}
else {
printReportFlatFields(os, perCoreFields);
}
printReportEntrySeparator(os, lastIndex + 1 < totalComputeOps);
cI = lastIndex;
}