better reports (dcp merge and memory)
This commit is contained in:
@@ -3,7 +3,6 @@
|
||||
#include "mlir/IR/Diagnostics.h"
|
||||
#include "mlir/IR/OpDefinition.h"
|
||||
#include "mlir/IR/TypeUtilities.h"
|
||||
#include "mlir/Support/LLVM.h"
|
||||
|
||||
#include "llvm/ADT/DenseSet.h"
|
||||
#include "llvm/Support/LogicalResult.h"
|
||||
@@ -460,11 +459,11 @@ LogicalResult SpatComputeBatch::verify() {
|
||||
return emitError("all outputs must have the same type");
|
||||
}
|
||||
|
||||
if (auto coreIdAttr = (*this)->getAttr(onnx_mlir::kCoreIdsAttrName)) {
|
||||
if (auto coreIdAttr = (*this)->getAttr(kCoreIdsAttrName)) {
|
||||
auto coreIdsAttr = dyn_cast<DenseI32ArrayAttr>(coreIdAttr);
|
||||
if (!coreIdsAttr)
|
||||
return emitError("compute_batch coreIds attribute must be a dense i32 array");
|
||||
if (coreIdsAttr.size() != laneCountSz)
|
||||
if (coreIdsAttr.size() != static_cast<int64_t>(laneCountSz))
|
||||
return emitError("compute_batch coreIds array length must match laneCount");
|
||||
if (llvm::any_of(coreIdsAttr.asArrayRef(), [](int32_t coreId) { return coreId <= 0; }))
|
||||
return emitError("compute_batch coreIds values must be positive");
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#include "GraphSupport.hpp"
|
||||
@@ -31,7 +30,7 @@ llvm::DenseSet<TaskDCP*> collectReachableTasks(TaskDCP* root, bool followParents
|
||||
}
|
||||
|
||||
GraphDCP::CandidateRelations computeCandidateRelations(TaskDCP* candidate) {
|
||||
return {collectReachableTasks(candidate, true), collectReachableTasks(candidate, false)};
|
||||
return {collectReachableTasks(candidate, true), collectReachableTasks(candidate, false), {}};
|
||||
}
|
||||
|
||||
LocalScheduleSnapshot captureLocalScheduleState(TaskDCP* task,
|
||||
|
||||
@@ -38,6 +38,7 @@
|
||||
|
||||
#include "DCPGraph/DCPAnalysis.hpp"
|
||||
#include "RegularOpCompaction.hpp"
|
||||
#include "src/Accelerators/PIM/Common/IR/CompactAsmUtils.hpp"
|
||||
#include "src/Accelerators/PIM/Common/PimCommon.hpp"
|
||||
#include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp"
|
||||
|
||||
@@ -45,6 +46,7 @@ using namespace mlir;
|
||||
|
||||
namespace onnx_mlir {
|
||||
namespace {
|
||||
using namespace onnx_mlir::compact_asm;
|
||||
using SpatCompute = spatial::SpatCompute;
|
||||
using SpatComputeBatch = spatial::SpatComputeBatch;
|
||||
|
||||
@@ -766,10 +768,10 @@ void generateReport(func::FuncOp funcOp, const std::string& name, size_t usedCpu
|
||||
if (outputDir.empty())
|
||||
return;
|
||||
|
||||
std::string dialectsDir = outputDir + "/dcp_graph";
|
||||
createDirectory(dialectsDir);
|
||||
std::string reportsDir = outputDir + "/reports";
|
||||
createDirectory(reportsDir);
|
||||
|
||||
std::fstream file(dialectsDir + "/" + name + ".txt", std::ios::out);
|
||||
std::fstream file(reportsDir + "/" + name + ".txt", std::ios::out);
|
||||
llvm::raw_os_ostream os(file);
|
||||
|
||||
struct ReportRow {
|
||||
@@ -778,41 +780,42 @@ void generateReport(func::FuncOp funcOp, const std::string& name, size_t usedCpu
|
||||
uint64_t weightCount = 0;
|
||||
uint64_t instructionCount = 0;
|
||||
bool isRebatched = false;
|
||||
SmallVector<int32_t> coreIds;
|
||||
};
|
||||
|
||||
uint64_t totalComputeOps = 0;
|
||||
uint64_t totalLogicalComputes = 0;
|
||||
uint64_t totalBatchComputeOps = 0;
|
||||
uint64_t totalMultiLaneBatchComputeOps = 0;
|
||||
std::vector<ReportRow> collectedData;
|
||||
|
||||
for (Operation& op : funcOp.getBody().front()) {
|
||||
if (auto spatCompute = dyn_cast<SpatCompute>(&op)) {
|
||||
uint64_t numInst = 0;
|
||||
for (auto& _ : spatCompute.getRegion().front())
|
||||
numInst++;
|
||||
collectedData.push_back({totalComputeOps++, 1, spatCompute.getWeights().size(), numInst, false});
|
||||
++numInst;
|
||||
collectedData.push_back({totalComputeOps++, 1, spatCompute.getWeights().size(), numInst, false, {}});
|
||||
totalLogicalComputes += 1;
|
||||
continue;
|
||||
}
|
||||
if (auto batch = dyn_cast<SpatComputeBatch>(&op)) {
|
||||
uint64_t numInst = 0;
|
||||
for (auto& _ : batch.getRegion().front())
|
||||
numInst++;
|
||||
++numInst;
|
||||
uint64_t logicalCount = static_cast<uint64_t>(batch.getLaneCount());
|
||||
collectedData.push_back({totalComputeOps++, logicalCount, batch.getWeights().size(), numInst, true});
|
||||
SmallVector<int32_t> coreIds;
|
||||
if (auto coreIdsAttr = batch->getAttrOfType<DenseI32ArrayAttr>(onnx_mlir::kCoreIdsAttrName))
|
||||
llvm::append_range(coreIds, coreIdsAttr.asArrayRef());
|
||||
collectedData.push_back({totalComputeOps++, logicalCount, batch.getWeights().size(), numInst, true, coreIds});
|
||||
totalLogicalComputes += logicalCount;
|
||||
totalBatchComputeOps += 1;
|
||||
if (batch.getLaneCount() > 1)
|
||||
totalMultiLaneBatchComputeOps += 1;
|
||||
}
|
||||
}
|
||||
|
||||
os << "Used CPUs: " << usedCpuCount << "\n";
|
||||
os << "Used cores: " << usedCpuCount << "\n";
|
||||
os << "Number of top-level compute ops: " << totalComputeOps << "\n";
|
||||
os << "Number of logical computes: " << totalLogicalComputes << "\n";
|
||||
os << "Number of top-level batch compute ops: " << totalBatchComputeOps << "\n";
|
||||
os << "Number of top-level multi-lane batch compute ops: " << totalMultiLaneBatchComputeOps << "\n\n";
|
||||
os << "\n";
|
||||
|
||||
std::stable_sort(collectedData.begin(), collectedData.end(), [](const ReportRow& lft, const ReportRow& rgt) {
|
||||
if (lft.isRebatched != rgt.isRebatched)
|
||||
@@ -855,31 +858,32 @@ void generateReport(func::FuncOp funcOp, const std::string& name, size_t usedCpu
|
||||
break;
|
||||
}
|
||||
|
||||
os << (current.isRebatched ? "Batch " : "Compute ") << current.opId;
|
||||
auto expectedPrintedValue = current.opId + 1;
|
||||
bool rangePrinted = false;
|
||||
cI++;
|
||||
for (; cI <= lastIndex; ++cI) {
|
||||
auto candidateToPrint = collectedData[cI].opId;
|
||||
if (candidateToPrint == expectedPrintedValue) {
|
||||
expectedPrintedValue = candidateToPrint + 1;
|
||||
rangePrinted = true;
|
||||
}
|
||||
else {
|
||||
if (rangePrinted)
|
||||
os << " - " << expectedPrintedValue - 1;
|
||||
os << " , " << candidateToPrint;
|
||||
rangePrinted = false;
|
||||
expectedPrintedValue = candidateToPrint + 1;
|
||||
if (current.isRebatched) {
|
||||
os << "Batch ";
|
||||
for (uint64_t index = cI; index <= lastIndex; ++index) {
|
||||
if (index != cI)
|
||||
os << ",\n ";
|
||||
os << collectedData[index].opId << " (cores ";
|
||||
if (collectedData[index].coreIds.empty())
|
||||
os << "unknown";
|
||||
else
|
||||
printCompressedIntegerEntries(os, ArrayRef<int32_t>(collectedData[index].coreIds));
|
||||
os << ")";
|
||||
}
|
||||
}
|
||||
if (rangePrinted && current.opId != expectedPrintedValue - 1)
|
||||
os << " - " << expectedPrintedValue - 1;
|
||||
else {
|
||||
os << "Compute ";
|
||||
SmallVector<uint64_t> opIds;
|
||||
opIds.reserve(lastIndex - cI + 1);
|
||||
for (uint64_t index = cI; index <= lastIndex; ++index)
|
||||
opIds.push_back(collectedData[index].opId);
|
||||
printCompressedIntegerEntries(os, ArrayRef<uint64_t>(opIds));
|
||||
}
|
||||
|
||||
os << " :\n";
|
||||
os << "\tNumber of logical computes " << current.logicalComputeCount << "\n";
|
||||
os << "\tNumber of instructions " << current.instructionCount << "\n";
|
||||
os << "\tNumber of used crossbars " << current.weightCount << "\n";
|
||||
os << ":\n";
|
||||
os << "\tNumber of logical computes: " << current.logicalComputeCount << "\n";
|
||||
os << "\tNumber of instructions: " << current.instructionCount << "\n";
|
||||
os << "\tNumber of used crossbars: " << current.weightCount << "\n";
|
||||
cI = lastIndex;
|
||||
}
|
||||
|
||||
@@ -1438,7 +1442,7 @@ public:
|
||||
return;
|
||||
}
|
||||
dumpModule(cast<ModuleOp>(func->getParentOp()), "spatial1_dcp_merged");
|
||||
generateReport(func, "spatial1_dcp_merged_report", analysisResult.cpuToLastComputeMap.size());
|
||||
generateReport(func, "dcp_merge_report", analysisResult.cpuToLastComputeMap.size());
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
Reference in New Issue
Block a user