DCP Merge status
All checks were successful
Validate Operations / validate-operations (push) Successful in 22m29s

This commit is contained in:
ilgeco
2026-04-23 18:40:33 +02:00
parent 5545b0f672
commit 49dea15b95

View File

@@ -8,14 +8,19 @@
#include "mlir/Pass/Pass.h" #include "mlir/Pass/Pass.h"
#include "mlir/Support/LLVM.h" #include "mlir/Support/LLVM.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Debug.h" #include "llvm/Support/Debug.h"
#include "llvm/Support/raw_os_ostream.h"
#include <cstddef> #include <cstddef>
#include <cstdint>
#include <fstream>
#include <functional> #include <functional>
#include <iterator> #include <iterator>
#include <memory> #include <memory>
#include <vector>
#include "DCPGraph/DCPAnalysis.hpp" #include "DCPGraph/DCPAnalysis.hpp"
#include "src/Accelerators/PIM/Common/PimCommon.hpp" #include "src/Accelerators/PIM/Common/PimCommon.hpp"
@@ -26,6 +31,41 @@ namespace onnx_mlir {
namespace { namespace {
using SpatCompute = spatial::SpatCompute; using SpatCompute = spatial::SpatCompute;
void generateReport(func::FuncOp funcOp, const std::string& name) {
std::string outputDir = getOutputDir();
if (outputDir.empty())
return;
std::string dialectsDir = outputDir + "/dialects/stats";
createDirectory(dialectsDir);
std::fstream file(dialectsDir + "/" + name + ".txt", std::ios::out);
llvm::raw_os_ostream os(file);
uint64_t numSpatCompute = 0;
std::vector<uint64_t> numWeights;
std::vector<uint64_t> numInstructions;
for (auto spatCompute : funcOp.getOps<SpatCompute>()) {
numSpatCompute++;
numWeights.push_back(spatCompute.getWeights().size());
uint64_t numInst = 0;
for(auto& _ : spatCompute.getRegion().front() ){
numInst++;
}
numInstructions.push_back(numInst);
}
for (uint64_t cI = 0; cI < numSpatCompute; ++cI) {
os << "Compute " << cI << ":\n";
os << "\tNumber of instructions " << numInstructions[cI] << "\n";
os << "\tNumber of used crossbars " << numWeights[cI] << "\n";
}
os.flush();
file.close();
}
struct ComputeValueResults { struct ComputeValueResults {
SmallVector<Value> innerValues; SmallVector<Value> innerValues;
@@ -45,9 +85,7 @@ public:
LazyInsertComputeResult(ComputeValueResults computeValueResults, LazyInsertComputeResult(ComputeValueResults computeValueResults,
std::function<std::pair<Value, std::function<void(InsertPoint)>>(size_t)> channelNewInserter, std::function<std::pair<Value, std::function<void(InsertPoint)>>(size_t)> channelNewInserter,
bool isOnlyChannel) bool isOnlyChannel)
: computeResults(computeValueResults), : computeResults(computeValueResults), onlyChannel(isOnlyChannel), channelNewInserter(channelNewInserter) {}
onlyChannel(isOnlyChannel),
channelNewInserter(channelNewInserter) {}
struct ChannelOrLocalOp { struct ChannelOrLocalOp {
Value data; Value data;
@@ -107,21 +145,19 @@ public:
size_t cpu = analysisResult.computeToCpuMap.at(currentComputeNode); size_t cpu = analysisResult.computeToCpuMap.at(currentComputeNode);
if (!cpuToNewComputeMap.contains(cpu)) { if (!cpuToNewComputeMap.contains(cpu)) {
ValueTypeRange<ResultRange> newComputeType = cpuToLastComputeMap.at(cpu).getResultTypes(); ValueTypeRange<ResultRange> newComputeType = cpuToLastComputeMap.at(cpu).getResultTypes();
auto [newCompute, computeValueResult] = createNewComputeNode( auto [newCompute, computeValueResult] =
currentComputeNode, newComputeType, lastComputeOfCpu.contains(currentComputeNode)); createNewComputeNode(currentComputeNode, newComputeType, lastComputeOfCpu.contains(currentComputeNode));
cpuToNewComputeMap[cpu] = newCompute; cpuToNewComputeMap[cpu] = newCompute;
newComputeNodeResults.insert( newComputeNodeResults.insert(std::make_pair(
std::make_pair(currentComputeNode, currentComputeNode,
createLazyComputeResult( createLazyComputeResult(newCompute, computeValueResult, lastComputeOfCpu.contains(currentComputeNode))));
newCompute, computeValueResult, lastComputeOfCpu.contains(currentComputeNode))));
} }
else { else {
auto [newCompute, computeValueResult] = mergeIntoComputeNode( auto [newCompute, computeValueResult] = mergeIntoComputeNode(
cpuToNewComputeMap[cpu], currentComputeNode, lastComputeOfCpu.contains(currentComputeNode)); cpuToNewComputeMap[cpu], currentComputeNode, lastComputeOfCpu.contains(currentComputeNode));
newComputeNodeResults.insert( newComputeNodeResults.insert(std::make_pair(
std::make_pair(currentComputeNode, currentComputeNode,
createLazyComputeResult( createLazyComputeResult(newCompute, computeValueResult, lastComputeOfCpu.contains(currentComputeNode))));
newCompute, computeValueResult, lastComputeOfCpu.contains(currentComputeNode))));
} }
} }
@@ -132,11 +168,12 @@ public:
} }
func::FuncOp func = getOperation(); func::FuncOp func = getOperation();
dumpModule(cast<ModuleOp>(func->getParentOp()), "spatial1_dcp_merged"); dumpModule(cast<ModuleOp>(func->getParentOp()), "spatial1_dcp_merged");
generateReport(func, "spatial1_dcp_merged_report");
} }
private: private:
std::pair<SpatCompute, ComputeValueResults> createNewComputeNode( std::pair<SpatCompute, ComputeValueResults>
SpatCompute oldCompute, ValueTypeRange<ResultRange> newComputeType, bool lastCompute) { createNewComputeNode(SpatCompute oldCompute, ValueTypeRange<ResultRange> newComputeType, bool lastCompute) {
func::FuncOp func = getOperation(); func::FuncOp func = getOperation();
auto loc = func.getLoc(); auto loc = func.getLoc();
IRRewriter rewriter(&getContext()); IRRewriter rewriter(&getContext());
@@ -161,8 +198,7 @@ private:
auto newCompute = SpatCompute::create(rewriter, loc, newComputeType, newComputeOperand); auto newCompute = SpatCompute::create(rewriter, loc, newComputeType, newComputeOperand);
rewriter.createBlock( rewriter.createBlock(&newCompute.getBody(), newCompute.getBody().end(), newBBOperandType, newBBLocations);
&newCompute.getBody(), newCompute.getBody().end(), newBBOperandType, newBBLocations);
newCompute.getProperties().setOperandSegmentSizes( newCompute.getProperties().setOperandSegmentSizes(
{(int) oldCompute.getWeights().size(), (int) newBBOperandType.size()}); {(int) oldCompute.getWeights().size(), (int) newBBOperandType.size()});
@@ -178,15 +214,14 @@ private:
mapper.map(oldBB.getArgument(indexOld - indexOldStart), newBB.getArgument(indexNew++)); mapper.map(oldBB.getArgument(indexOld - indexOldStart), newBB.getArgument(indexNew++));
} }
else { else {
auto argWeightCompute = auto argWeightCompute = llvm::dyn_cast_if_present<SpatCompute>(oldCompute.getOperand(indexOld).getDefiningOp());
llvm::dyn_cast_if_present<SpatCompute>(oldCompute.getOperand(indexOld).getDefiningOp());
auto argResultIndex = cast<OpResult>(oldCompute.getOperand(indexOld)).getResultNumber(); auto argResultIndex = cast<OpResult>(oldCompute.getOperand(indexOld)).getResultNumber();
LazyInsertComputeResult& lazyArgWeight = newComputeNodeResults.at(argWeightCompute); LazyInsertComputeResult& lazyArgWeight = newComputeNodeResults.at(argWeightCompute);
auto [channelVal, isChannel] = lazyArgWeight.getAsChannelValueAndInsertSender(argResultIndex); auto [channelVal, isChannel] = lazyArgWeight.getAsChannelValueAndInsertSender(argResultIndex);
assert(isChannel == true); assert(isChannel == true);
spatial::SpatChannelReceiveOp receiveOp = spatial::SpatChannelReceiveOp::create( spatial::SpatChannelReceiveOp receiveOp =
rewriter, loc, oldCompute.getOperand(indexOld).getType(), channelVal); spatial::SpatChannelReceiveOp::create(rewriter, loc, oldCompute.getOperand(indexOld).getType(), channelVal);
mapper.map(oldBB.getArgument(indexOld - indexOldStart), receiveOp); mapper.map(oldBB.getArgument(indexOld - indexOldStart), receiveOp);
} }
} }
@@ -318,9 +353,8 @@ private:
return {cast<SpatCompute>(toCompute), computeValueResults}; return {cast<SpatCompute>(toCompute), computeValueResults};
} }
LazyInsertComputeResult createLazyComputeResult(SpatCompute compute, LazyInsertComputeResult
ComputeValueResults computeValueResults, createLazyComputeResult(SpatCompute compute, ComputeValueResults computeValueResults, bool lastCompute) {
bool lastCompute) {
func::FuncOp funcOp = cast<func::FuncOp>(compute->getParentOp()); func::FuncOp funcOp = cast<func::FuncOp>(compute->getParentOp());
auto* context = &getContext(); auto* context = &getContext();
auto loc = funcOp.getLoc(); auto loc = funcOp.getLoc();
@@ -335,11 +369,12 @@ private:
auto channelVal = channelOp.getResult(); auto channelVal = channelOp.getResult();
auto insertVal = auto insertVal =
[&context, loc, computeValueResults, channelVal, resultIndex](mlir::IRRewriter::InsertPoint sendInsertPoint) { [&context, loc, computeValueResults, channelVal, resultIndex](mlir::IRRewriter::InsertPoint sendInsertPoint) {
IRRewriter rewriter(context); IRRewriter rewriter(context);
rewriter.restoreInsertionPoint(sendInsertPoint); rewriter.restoreInsertionPoint(sendInsertPoint);
auto spatSend = spatial::SpatChannelSendOp::create(rewriter, loc, channelVal, computeValueResults.get(resultIndex)); auto spatSend =
return spatSend; spatial::SpatChannelSendOp::create(rewriter, loc, channelVal, computeValueResults.get(resultIndex));
}; return spatSend;
};
std::pair<Value, std::function<void(mlir::IRRewriter::InsertPoint)>> ret {channelVal, insertVal}; std::pair<Value, std::function<void(mlir::IRRewriter::InsertPoint)>> ret {channelVal, insertVal};
return ret; return ret;
}; };