better reports refactor for more code-reuse and patter usage fixes
This commit is contained in:
@@ -40,6 +40,7 @@
|
||||
#include "RegularOpCompaction.hpp"
|
||||
#include "src/Accelerators/PIM/Common/IR/CompactAsmUtils.hpp"
|
||||
#include "src/Accelerators/PIM/Common/PimCommon.hpp"
|
||||
#include "src/Accelerators/PIM/Common/Support/ReportUtils.hpp"
|
||||
#include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp"
|
||||
|
||||
using namespace mlir;
|
||||
@@ -764,18 +765,13 @@ void emitMotifProfile(func::FuncOp funcOp) {
|
||||
}
|
||||
|
||||
void generateReport(func::FuncOp funcOp, const std::string& name, size_t usedCpuCount = 0) {
|
||||
std::string outputDir = getOutputDir();
|
||||
if (outputDir.empty())
|
||||
std::fstream file = openReportFile(name);
|
||||
if (!file.is_open())
|
||||
return;
|
||||
|
||||
std::string reportsDir = outputDir + "/reports";
|
||||
createDirectory(reportsDir);
|
||||
|
||||
std::fstream file(reportsDir + "/" + name + ".txt", std::ios::out);
|
||||
llvm::raw_os_ostream os(file);
|
||||
|
||||
struct ReportRow {
|
||||
uint64_t opId = 0;
|
||||
uint64_t id = 0;
|
||||
uint64_t logicalComputeCount = 0;
|
||||
uint64_t weightCount = 0;
|
||||
uint64_t instructionCount = 0;
|
||||
@@ -786,6 +782,9 @@ void generateReport(func::FuncOp funcOp, const std::string& name, size_t usedCpu
|
||||
uint64_t totalComputeOps = 0;
|
||||
uint64_t totalLogicalComputes = 0;
|
||||
uint64_t totalBatchComputeOps = 0;
|
||||
uint64_t totalInstructionCount = 0;
|
||||
uint64_t totalWeightCount = 0;
|
||||
uint64_t nextBatchId = 0;
|
||||
std::vector<ReportRow> collectedData;
|
||||
|
||||
for (Operation& op : funcOp.getBody().front()) {
|
||||
@@ -793,8 +792,13 @@ void generateReport(func::FuncOp funcOp, const std::string& name, size_t usedCpu
|
||||
uint64_t numInst = 0;
|
||||
for (auto& _ : spatCompute.getRegion().front())
|
||||
++numInst;
|
||||
collectedData.push_back({totalComputeOps++, 1, spatCompute.getWeights().size(), numInst, false, {}});
|
||||
SmallVector<int32_t> coreIds;
|
||||
if (auto coreId = getComputeCoreId(spatCompute))
|
||||
coreIds.push_back(*coreId);
|
||||
collectedData.push_back({totalComputeOps++, 1, spatCompute.getWeights().size(), numInst, false, coreIds});
|
||||
totalLogicalComputes += 1;
|
||||
totalInstructionCount += numInst;
|
||||
totalWeightCount += spatCompute.getWeights().size();
|
||||
continue;
|
||||
}
|
||||
if (auto batch = dyn_cast<SpatComputeBatch>(&op)) {
|
||||
@@ -805,44 +809,27 @@ void generateReport(func::FuncOp funcOp, const std::string& name, size_t usedCpu
|
||||
SmallVector<int32_t> coreIds;
|
||||
if (auto coreIdsAttr = batch->getAttrOfType<DenseI32ArrayAttr>(onnx_mlir::kCoreIdsAttrName))
|
||||
llvm::append_range(coreIds, coreIdsAttr.asArrayRef());
|
||||
collectedData.push_back({totalComputeOps++, logicalCount, batch.getWeights().size(), numInst, true, coreIds});
|
||||
collectedData.push_back({nextBatchId++, logicalCount, batch.getWeights().size(), numInst, true, coreIds});
|
||||
totalComputeOps += 1;
|
||||
totalLogicalComputes += logicalCount;
|
||||
totalBatchComputeOps += 1;
|
||||
totalInstructionCount += numInst * logicalCount;
|
||||
totalWeightCount += batch.getWeights().size();
|
||||
}
|
||||
}
|
||||
|
||||
os << "Used cores: " << usedCpuCount << "\n";
|
||||
os << "Number of top-level compute ops: " << totalComputeOps << "\n";
|
||||
os << "Number of logical computes: " << totalLogicalComputes << "\n";
|
||||
os << "Number of top-level batch compute ops: " << totalBatchComputeOps << "\n";
|
||||
os << "\n";
|
||||
llvm::SmallVector<ReportField, 6> totalFields = {{"Used cores", std::to_string(usedCpuCount)},
|
||||
{"Number of top-level compute ops", std::to_string(totalComputeOps)},
|
||||
{"Number of logical computes", std::to_string(totalLogicalComputes)},
|
||||
{"Number of top-level batch compute ops",
|
||||
std::to_string(totalBatchComputeOps)},
|
||||
{"Number of instructions", std::to_string(totalInstructionCount)},
|
||||
{"Number of used crossbars", std::to_string(totalWeightCount)}};
|
||||
printReportTotalsBlock(os, totalFields);
|
||||
if (!collectedData.empty())
|
||||
os << "\n";
|
||||
|
||||
std::stable_sort(collectedData.begin(), collectedData.end(), [](const ReportRow& lft, const ReportRow& rgt) {
|
||||
if (lft.isRebatched != rgt.isRebatched)
|
||||
return lft.isRebatched > rgt.isRebatched;
|
||||
|
||||
if (lft.instructionCount < rgt.instructionCount)
|
||||
return false;
|
||||
else if (rgt.instructionCount < lft.instructionCount)
|
||||
return true;
|
||||
|
||||
if (lft.weightCount < rgt.weightCount)
|
||||
return false;
|
||||
else if (rgt.weightCount < lft.weightCount)
|
||||
return true;
|
||||
|
||||
if (lft.logicalComputeCount < rgt.logicalComputeCount)
|
||||
return false;
|
||||
else if (rgt.logicalComputeCount < lft.logicalComputeCount)
|
||||
return true;
|
||||
|
||||
if (lft.opId < rgt.opId)
|
||||
return true;
|
||||
else if (rgt.opId < lft.opId)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
});
|
||||
sortReportEntriesByFirstCore(collectedData);
|
||||
|
||||
for (uint64_t cI = 0; cI < totalComputeOps; ++cI) {
|
||||
uint64_t lastIndex = cI;
|
||||
@@ -863,7 +850,7 @@ void generateReport(func::FuncOp funcOp, const std::string& name, size_t usedCpu
|
||||
for (uint64_t index = cI; index <= lastIndex; ++index) {
|
||||
if (index != cI)
|
||||
os << ",\n ";
|
||||
os << collectedData[index].opId << " (cores ";
|
||||
os << collectedData[index].id << " (cores ";
|
||||
if (collectedData[index].coreIds.empty())
|
||||
os << "unknown";
|
||||
else
|
||||
@@ -876,14 +863,32 @@ void generateReport(func::FuncOp funcOp, const std::string& name, size_t usedCpu
|
||||
SmallVector<uint64_t> opIds;
|
||||
opIds.reserve(lastIndex - cI + 1);
|
||||
for (uint64_t index = cI; index <= lastIndex; ++index)
|
||||
opIds.push_back(collectedData[index].opId);
|
||||
opIds.push_back(collectedData[index].id);
|
||||
printCompressedIntegerEntries(os, ArrayRef<uint64_t>(opIds));
|
||||
}
|
||||
|
||||
os << ":\n";
|
||||
os << "\tNumber of logical computes: " << current.logicalComputeCount << "\n";
|
||||
os << "\tNumber of instructions: " << current.instructionCount << "\n";
|
||||
os << "\tNumber of used crossbars: " << current.weightCount << "\n";
|
||||
uint64_t perCoreLogicalComputeCount = current.isRebatched ? 1 : current.logicalComputeCount;
|
||||
uint64_t perCoreInstructionCount = current.instructionCount;
|
||||
uint64_t perCoreWeightCount =
|
||||
current.logicalComputeCount == 0 ? 0 : current.weightCount / current.logicalComputeCount;
|
||||
uint64_t totalEntryInstructionCount = current.instructionCount * current.logicalComputeCount;
|
||||
|
||||
llvm::SmallVector<ReportField, 3> perCoreFields = {
|
||||
{"Number of logical computes", std::to_string(perCoreLogicalComputeCount)},
|
||||
{"Number of instructions", std::to_string(perCoreInstructionCount)},
|
||||
{"Number of used crossbars", std::to_string(perCoreWeightCount)}};
|
||||
if (current.isRebatched) {
|
||||
llvm::SmallVector<ReportField, 3> totalEntryFields = {
|
||||
{"Number of logical computes", std::to_string(current.logicalComputeCount)},
|
||||
{"Number of instructions", std::to_string(totalEntryInstructionCount)},
|
||||
{"Number of used crossbars", std::to_string(current.weightCount)}};
|
||||
printReportPerCoreAndTotalFields(os, perCoreFields, totalEntryFields);
|
||||
}
|
||||
else {
|
||||
printReportFlatFields(os, perCoreFields);
|
||||
}
|
||||
printReportEntrySeparator(os, lastIndex + 1 < totalComputeOps);
|
||||
cI = lastIndex;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user