fix pool lowering
Validate Operations / validate-operations (push) Has been cancelled

better reports (dcp merge and memory)
This commit is contained in:
NiccoloN
2026-05-12 12:32:23 +02:00
parent 8ad504fcdf
commit 80a7298552
8 changed files with 393 additions and 203 deletions
+2 -3
View File
@@ -3,7 +3,6 @@
#include "mlir/IR/Diagnostics.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/TypeUtilities.h"
#include "mlir/Support/LLVM.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/Support/LogicalResult.h"
@@ -460,11 +459,11 @@ LogicalResult SpatComputeBatch::verify() {
return emitError("all outputs must have the same type");
}
if (auto coreIdAttr = (*this)->getAttr(onnx_mlir::kCoreIdsAttrName)) {
if (auto coreIdAttr = (*this)->getAttr(kCoreIdsAttrName)) {
auto coreIdsAttr = dyn_cast<DenseI32ArrayAttr>(coreIdAttr);
if (!coreIdsAttr)
return emitError("compute_batch coreIds attribute must be a dense i32 array");
if (coreIdsAttr.size() != laneCountSz)
if (coreIdsAttr.size() != static_cast<int64_t>(laneCountSz))
return emitError("compute_batch coreIds array length must match laneCount");
if (llvm::any_of(coreIdsAttr.asArrayRef(), [](int32_t coreId) { return coreId <= 0; }))
return emitError("compute_batch coreIds values must be positive");
@@ -1,6 +1,5 @@
#include "llvm/ADT/STLExtras.h"
#include <algorithm>
#include <vector>
#include "GraphSupport.hpp"
@@ -31,7 +30,7 @@ llvm::DenseSet<TaskDCP*> collectReachableTasks(TaskDCP* root, bool followParents
}
GraphDCP::CandidateRelations computeCandidateRelations(TaskDCP* candidate) {
return {collectReachableTasks(candidate, true), collectReachableTasks(candidate, false)};
return {collectReachableTasks(candidate, true), collectReachableTasks(candidate, false), {}};
}
LocalScheduleSnapshot captureLocalScheduleState(TaskDCP* task,
@@ -38,6 +38,7 @@
#include "DCPGraph/DCPAnalysis.hpp"
#include "RegularOpCompaction.hpp"
#include "src/Accelerators/PIM/Common/IR/CompactAsmUtils.hpp"
#include "src/Accelerators/PIM/Common/PimCommon.hpp"
#include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp"
@@ -45,6 +46,7 @@ using namespace mlir;
namespace onnx_mlir {
namespace {
using namespace onnx_mlir::compact_asm;
using SpatCompute = spatial::SpatCompute;
using SpatComputeBatch = spatial::SpatComputeBatch;
@@ -766,10 +768,10 @@ void generateReport(func::FuncOp funcOp, const std::string& name, size_t usedCpu
if (outputDir.empty())
return;
std::string dialectsDir = outputDir + "/dcp_graph";
createDirectory(dialectsDir);
std::string reportsDir = outputDir + "/reports";
createDirectory(reportsDir);
std::fstream file(dialectsDir + "/" + name + ".txt", std::ios::out);
std::fstream file(reportsDir + "/" + name + ".txt", std::ios::out);
llvm::raw_os_ostream os(file);
struct ReportRow {
@@ -778,41 +780,42 @@ void generateReport(func::FuncOp funcOp, const std::string& name, size_t usedCpu
uint64_t weightCount = 0;
uint64_t instructionCount = 0;
bool isRebatched = false;
SmallVector<int32_t> coreIds;
};
uint64_t totalComputeOps = 0;
uint64_t totalLogicalComputes = 0;
uint64_t totalBatchComputeOps = 0;
uint64_t totalMultiLaneBatchComputeOps = 0;
std::vector<ReportRow> collectedData;
for (Operation& op : funcOp.getBody().front()) {
if (auto spatCompute = dyn_cast<SpatCompute>(&op)) {
uint64_t numInst = 0;
for (auto& _ : spatCompute.getRegion().front())
numInst++;
collectedData.push_back({totalComputeOps++, 1, spatCompute.getWeights().size(), numInst, false});
++numInst;
collectedData.push_back({totalComputeOps++, 1, spatCompute.getWeights().size(), numInst, false, {}});
totalLogicalComputes += 1;
continue;
}
if (auto batch = dyn_cast<SpatComputeBatch>(&op)) {
uint64_t numInst = 0;
for (auto& _ : batch.getRegion().front())
numInst++;
++numInst;
uint64_t logicalCount = static_cast<uint64_t>(batch.getLaneCount());
collectedData.push_back({totalComputeOps++, logicalCount, batch.getWeights().size(), numInst, true});
SmallVector<int32_t> coreIds;
if (auto coreIdsAttr = batch->getAttrOfType<DenseI32ArrayAttr>(onnx_mlir::kCoreIdsAttrName))
llvm::append_range(coreIds, coreIdsAttr.asArrayRef());
collectedData.push_back({totalComputeOps++, logicalCount, batch.getWeights().size(), numInst, true, coreIds});
totalLogicalComputes += logicalCount;
totalBatchComputeOps += 1;
if (batch.getLaneCount() > 1)
totalMultiLaneBatchComputeOps += 1;
}
}
os << "Used CPUs: " << usedCpuCount << "\n";
os << "Used cores: " << usedCpuCount << "\n";
os << "Number of top-level compute ops: " << totalComputeOps << "\n";
os << "Number of logical computes: " << totalLogicalComputes << "\n";
os << "Number of top-level batch compute ops: " << totalBatchComputeOps << "\n";
os << "Number of top-level multi-lane batch compute ops: " << totalMultiLaneBatchComputeOps << "\n\n";
os << "\n";
std::stable_sort(collectedData.begin(), collectedData.end(), [](const ReportRow& lft, const ReportRow& rgt) {
if (lft.isRebatched != rgt.isRebatched)
@@ -855,31 +858,32 @@ void generateReport(func::FuncOp funcOp, const std::string& name, size_t usedCpu
break;
}
os << (current.isRebatched ? "Batch " : "Compute ") << current.opId;
auto expectedPrintedValue = current.opId + 1;
bool rangePrinted = false;
cI++;
for (; cI <= lastIndex; ++cI) {
auto candidateToPrint = collectedData[cI].opId;
if (candidateToPrint == expectedPrintedValue) {
expectedPrintedValue = candidateToPrint + 1;
rangePrinted = true;
}
else {
if (rangePrinted)
os << " - " << expectedPrintedValue - 1;
os << " , " << candidateToPrint;
rangePrinted = false;
expectedPrintedValue = candidateToPrint + 1;
if (current.isRebatched) {
os << "Batch ";
for (uint64_t index = cI; index <= lastIndex; ++index) {
if (index != cI)
os << ",\n ";
os << collectedData[index].opId << " (cores ";
if (collectedData[index].coreIds.empty())
os << "unknown";
else
printCompressedIntegerEntries(os, ArrayRef<int32_t>(collectedData[index].coreIds));
os << ")";
}
}
if (rangePrinted && current.opId != expectedPrintedValue - 1)
os << " - " << expectedPrintedValue - 1;
else {
os << "Compute ";
SmallVector<uint64_t> opIds;
opIds.reserve(lastIndex - cI + 1);
for (uint64_t index = cI; index <= lastIndex; ++index)
opIds.push_back(collectedData[index].opId);
printCompressedIntegerEntries(os, ArrayRef<uint64_t>(opIds));
}
os << " :\n";
os << "\tNumber of logical computes " << current.logicalComputeCount << "\n";
os << "\tNumber of instructions " << current.instructionCount << "\n";
os << "\tNumber of used crossbars " << current.weightCount << "\n";
os << ":\n";
os << "\tNumber of logical computes: " << current.logicalComputeCount << "\n";
os << "\tNumber of instructions: " << current.instructionCount << "\n";
os << "\tNumber of used crossbars: " << current.weightCount << "\n";
cI = lastIndex;
}
@@ -1438,7 +1442,7 @@ public:
return;
}
dumpModule(cast<ModuleOp>(func->getParentOp()), "spatial1_dcp_merged");
generateReport(func, "spatial1_dcp_merged_report", analysisResult.cpuToLastComputeMap.size());
generateReport(func, "dcp_merge_report", analysisResult.cpuToLastComputeMap.size());
}
private: