Export csv graph for gephi
Validate Operations / validate-operations (push) Has been cancelled

This commit is contained in:
ilgeco
2026-07-02 17:01:26 +02:00
parent 8d3eb929f6
commit c4dd28a607
14 changed files with 926 additions and 10 deletions
+11 -5
View File
@@ -7,15 +7,21 @@
namespace onnx_mlir {
void dumpModule(mlir::ModuleOp moduleOp, const std::string& name) {
std::fstream openDialectDumpFileWithExtension(const std::string& name, llvm::StringRef destination, llvm::StringRef extension) {
std::string outputDir = getOutputDir();
if (outputDir.empty())
return {};
std::string dialectsDir = (outputDir + destination).str();
createDirectory(dialectsDir);
return std::fstream(dialectsDir + "/" + name + "." + extension.str(), std::ios::out);
}
void dumpModule(mlir::ModuleOp moduleOp, const std::string& name) {
std::fstream file = openDialectDumpFileWithExtension(name, "/dialects", "mlir");
if (!file.is_open())
return;
std::string dialectsDir = outputDir + "/dialects";
createDirectory(dialectsDir);
std::fstream file(dialectsDir + "/" + name + ".mlir", std::ios::out);
llvm::raw_os_ostream os(file);
mlir::OpPrintingFlags flags;
flags.elideLargeElementsAttrs().enableDebugInfo(true, false);
+5
View File
@@ -1,7 +1,9 @@
#pragma once
#include "mlir/IR/BuiltinOps.h"
#include "llvm/ADT/StringRef.h"
#include <fstream>
#include <string>
namespace onnx_mlir {
@@ -10,4 +12,7 @@ namespace onnx_mlir {
/// directory for pass-level debugging.
void dumpModule(mlir::ModuleOp moduleOp, const std::string& name);
/// Opens a file under the same dialect dump directory used by dumpModule.
std::fstream openDialectDumpFileWithExtension(const std::string& name,llvm::StringRef destination = "/dialects", llvm::StringRef extension = "mlir");
} // namespace onnx_mlir
+12
View File
@@ -57,6 +57,18 @@ llvm::cl::opt<PimConvLoweringType> pimConvLowering(
llvm::cl::init(PimConvLoweringAuto),
llvm::cl::cat(OnnxMlirOptions));
llvm::cl::opt<PimSpatialDataflowExportType> pimExportSpatialDataflow(
"pim-export-spatial-dataflow",
llvm::cl::desc("Emit Gephi-importable CSV dataflow reports around MergeComputeNodes materialization"),
llvm::cl::values(clEnumValN(SpatialDataflowExportNone, "none", "Do not emit Spatial dataflow CSV reports")),
llvm::cl::values(clEnumValN(SpatialDataflowExportPre, "pre", "Emit pre-materialization Spatial dataflow CSV reports")),
llvm::cl::values(
clEnumValN(SpatialDataflowExportPost, "post", "Emit post-materialization Spatial dataflow CSV reports")),
llvm::cl::values(
clEnumValN(SpatialDataflowExportBoth, "both", "Emit both pre- and post-materialization Spatial dataflow CSV reports")),
llvm::cl::init(SpatialDataflowExportNone),
llvm::cl::cat(OnnxMlirOptions));
llvm::cl::opt<bool>
pimOnlyCodegen("pim-only-codegen",
llvm::cl::desc("Only generate code for PIM (assume input is already in bufferized PIM IR)"),
+8
View File
@@ -42,11 +42,19 @@ typedef enum {
PimConvLoweringTiled2D = 8,
} PimConvLoweringType;
typedef enum {
SpatialDataflowExportNone = 0,
SpatialDataflowExportPre = 1,
SpatialDataflowExportPost = 2,
SpatialDataflowExportBoth = 3,
} PimSpatialDataflowExportType;
extern llvm::cl::OptionCategory OnnxMlirOptions;
extern llvm::cl::opt<PimEmissionTargetType> pimEmissionTarget;
extern llvm::cl::opt<PimMergeSchedulerType> pimMergeScheduler;
extern llvm::cl::opt<PimMemoryReportLevel> pimMemoryReport;
extern llvm::cl::opt<PimConvLoweringType> pimConvLowering;
extern llvm::cl::opt<PimSpatialDataflowExportType> pimExportSpatialDataflow;
extern llvm::cl::opt<bool> pimOnlyCodegen;
extern llvm::cl::opt<bool> pimDisableMemoryCoalescing;
@@ -20,6 +20,7 @@ add_pim_library(OMONNXToSpatial
Patterns/NN/Sigmoid.cpp
Patterns/NN/Softmax.cpp
Patterns/Tensor/Concat.cpp
Patterns/Tensor/Flatten.cpp
Patterns/Tensor/Gather.cpp
Patterns/Tensor/Resize.cpp
Patterns/Tensor/Reshape.cpp
@@ -16,6 +16,7 @@
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common/Common.hpp"
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Patterns.hpp"
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/PlanLowering.hpp"
#include "src/Accelerators/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/SpatialDataflowCsvExporter.hpp"
#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
#include "src/Accelerators/PIM/Pass/PIMPasses.h"
#include "src/Dialect/ONNX/ONNXOps.hpp"
@@ -392,10 +393,17 @@ struct LowerSpatialPlansPass final : PassWrapper<LowerSpatialPlansPass, Operatio
hasIllegalOps = true;
}
});
if (hasIllegalOps)
if (hasIllegalOps) {
signalPassFailure();
else
dumpModule(moduleOp, "spatial1_premerge");
} else {
dumpModule(moduleOp, "spatial1_graph");
spatial::SpatialDataflowExportStage exportMode = spatial::getSpatialDataflowExportStage();
if (spatial::shouldExportSpatialDataflowStage(exportMode, spatial::SpatialDataflowExportStage::Pre)
&& failed(spatial::exportSpatialDataflowCsvPre(funcOp))) {
signalPassFailure();
return;
}
}
if (!verifyLogicalPhase("at the end of LowerSpatialPlans"))
return;
@@ -103,7 +103,7 @@ void ONNXToSpatialPass::runOnOperation() {
affine::AffineDialect,
arith::ArithDialect,
scf::SCFDialect>();
preTarget.addIllegalOp<ONNXConstantOp, ONNXFlattenOp>();
preTarget.addIllegalOp<ONNXConstantOp>();
RewritePatternSet prePatterns(ctx);
populatePrePatterns(prePatterns, ctx);
@@ -142,6 +142,7 @@ void ONNXToSpatialPass::runOnOperation() {
target.addIllegalOp<ONNXSigmoidOp>();
target.addIllegalOp<ONNXSoftmaxOp>();
target.addIllegalOp<ONNXConcatOp>();
target.addIllegalOp<ONNXFlattenOp>();
target.addIllegalOp<ONNXGatherOp>();
target.addIllegalOp<ONNXReshapeOp>();
target.addIllegalOp<ONNXResizeOp>();
@@ -19,6 +19,7 @@ void populateConversionPatterns(RewritePatternSet& patterns, MLIRContext* ctx) {
populateSigmoidPatterns(patterns, ctx);
populateSoftmaxPatterns(patterns, ctx);
populateConcatPatterns(patterns, ctx);
populateFlattenPatterns(patterns, ctx);
populateGatherPatterns(patterns, ctx);
populateResizePatterns(patterns, ctx);
populateReshapePatterns(patterns, ctx);
@@ -26,6 +26,7 @@ void populateReluPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext*
void populateSigmoidPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populateSoftmaxPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populateConcatPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populateFlattenPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populateGatherPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populateResizePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populateReshapePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
@@ -0,0 +1,112 @@
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/Transforms/DialectConversion.h"
#include "llvm/ADT/SmallVector.h"
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common/Common.hpp"
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Patterns.hpp"
#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
#include "src/Dialect/ONNX/ONNXOps.hpp"
using namespace mlir;
namespace onnx_mlir {
namespace {
static FailureOr<int64_t> normalizeFlattenAxis(int64_t axis, int64_t rank) {
int64_t normalizedAxis = axis < 0 ? rank + axis : axis;
if (normalizedAxis < 0 || normalizedAxis > rank)
return failure();
return normalizedAxis;
}
static int64_t product(ArrayRef<int64_t> values) {
int64_t result = 1;
for (int64_t value : values)
result *= value;
return result;
}
static SmallVector<ReassociationIndices> getCollapseTo1DReassociation(int64_t rank) {
SmallVector<ReassociationIndices> reassociation(1);
reassociation.front().reserve(rank);
for (int64_t dim = 0; dim < rank; ++dim)
reassociation.front().push_back(dim);
return reassociation;
}
static SmallVector<ReassociationIndices> getExpandFrom1DReassociation(int64_t rank) {
SmallVector<ReassociationIndices> reassociation(1);
reassociation.front().reserve(rank);
for (int64_t dim = 0; dim < rank; ++dim)
reassociation.front().push_back(dim);
return reassociation;
}
static Value buildFlatten(Value input,
RankedTensorType sourceType,
RankedTensorType resultType,
int64_t axis,
ConversionPatternRewriter& rewriter,
Location loc) {
if (sourceType == resultType)
return input;
if (axis > 0 && axis < sourceType.getRank()) {
SmallVector<ReassociationIndices> reassociation(2);
for (int64_t dim = 0; dim < axis; ++dim)
reassociation[0].push_back(dim);
for (int64_t dim = axis; dim < sourceType.getRank(); ++dim)
reassociation[1].push_back(dim);
return tensor::CollapseShapeOp::create(rewriter, loc, resultType, input, reassociation);
}
Value flattened = input;
if (sourceType.getRank() != 1) {
auto flatType = RankedTensorType::get({sourceType.getNumElements()}, sourceType.getElementType());
flattened = tensor::CollapseShapeOp::create(
rewriter, loc, flatType, flattened, getCollapseTo1DReassociation(sourceType.getRank()));
}
return tensor::ExpandShapeOp::create(
rewriter, loc, resultType, flattened, getExpandFrom1DReassociation(resultType.getRank()));
}
struct Flatten : OpConversionPattern<ONNXFlattenOp> {
using OpConversionPattern::OpConversionPattern;
LogicalResult matchAndRewrite(ONNXFlattenOp flattenOp,
ONNXFlattenOpAdaptor adaptor,
ConversionPatternRewriter& rewriter) const override {
auto sourceType = dyn_cast<RankedTensorType>(adaptor.getInput().getType());
auto resultType = dyn_cast<RankedTensorType>(flattenOp.getOperation()->getResult(0).getType());
if (!sourceType || !resultType || !sourceType.hasStaticShape() || !resultType.hasStaticShape())
return failure();
if (!hasStaticPositiveShape(sourceType) || !hasStaticPositiveShape(resultType) || resultType.getRank() != 2)
return failure();
auto axis = normalizeFlattenAxis(flattenOp.getAxis(), sourceType.getRank());
if (failed(axis))
return failure();
int64_t outerDim = product(sourceType.getShape().take_front(*axis));
int64_t innerDim = product(sourceType.getShape().drop_front(*axis));
if (resultType.getShape()[0] != outerDim || resultType.getShape()[1] != innerDim)
return failure();
auto replaceWithFlatten = [&](auto build) -> LogicalResult {
Value flattened = materializeOrComputeUnary(adaptor.getInput(), resultType, rewriter, flattenOp.getLoc(), build);
rewriter.replaceOp(flattenOp, flattened);
return success();
};
return replaceWithFlatten([&](Value input) {
return buildFlatten(input, sourceType, resultType, *axis, rewriter, flattenOp.getLoc());
});
}
};
} // namespace
void populateFlattenPatterns(RewritePatternSet& patterns, MLIRContext* ctx) { patterns.add<Flatten>(ctx); }
} // namespace onnx_mlir
+1
View File
@@ -11,6 +11,7 @@ add_pim_library(SpatialOps
Transforms/MergeComputeNodes/HostOutputFinalization.cpp
Transforms/MergeComputeNodes/MaterializeMergeSchedule.cpp
Transforms/MergeComputeNodes/ProjectedFragments.cpp
Transforms/MergeComputeNodes/SpatialDataflowCsvExporter.cpp
Transforms/MergeComputeNodes/Scheduling/ComputeGraph.cpp
Transforms/MergeComputeNodes/Scheduling/ComputeInstanceUtils.cpp
Transforms/MergeComputeNodes/Scheduling/MergeSchedulingAnalysis.cpp
@@ -25,6 +25,7 @@
#include <vector>
#include "MaterializeMergeSchedule.hpp"
#include "SpatialDataflowCsvExporter.hpp"
#include "Scheduling/ComputeGraph.hpp"
#include "Scheduling/ComputeInstanceUtils.hpp"
#include "Scheduling/MergeSchedulingAnalysis.hpp"
@@ -364,6 +365,7 @@ public:
const spatial::MergeScheduleResult* analysisResult = nullptr;
analysisResult = &getAnalysis<spatial::MergeSchedulingAnalysis>().getResult();
spatial::SpatialDataflowExportStage exportMode = spatial::getSpatialDataflowExportStage();
if (failed(spatial::MergeScheduleMaterializer().run(func, *analysisResult, nextChannelId))) {
signalPassFailure();
return;
@@ -379,7 +381,12 @@ public:
signalPassFailure();
return;
}
dumpModule(cast<ModuleOp>(func->getParentOp()), "spatial1_merged");
if (spatial::shouldExportSpatialDataflowStage(exportMode, spatial::SpatialDataflowExportStage::Post)
&& failed(spatial::exportSpatialDataflowCsvPost(func))) {
signalPassFailure();
return;
}
dumpModule(cast<ModuleOp>(func->getParentOp()), "spatial2_merged");
generateReport(func, "spatial_merge_report", analysisResult->cpuToLastComputeMap.size());
}
};
@@ -0,0 +1,728 @@
#include "SpatialDataflowCsvExporter.hpp"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/IR/AsmState.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Value.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <cstdint>
#include <fstream>
#include <optional>
#include <string>
#include <utility>
#include "src/Accelerators/PIM/Common/IR/AffineUtils.hpp"
#include "src/Accelerators/PIM/Common/IR/BatchCoreUtils.hpp"
#include "src/Accelerators/PIM/Common/IR/ConstantUtils.hpp"
#include "src/Accelerators/PIM/Common/IR/ShapeUtils.hpp"
#include "src/Accelerators/PIM/Common/Support/DebugDump.hpp"
#include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp"
#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
using namespace mlir;
namespace onnx_mlir {
namespace spatial {
namespace {
struct TopLevelOpInfo {
Operation* op = nullptr;
size_t opId = 0;
bool isPost = false;
std::optional<int32_t> scalarCore;
};
struct ExpandedNodeInfo {
std::string id;
std::optional<int32_t> core;
std::optional<uint32_t> lane;
};
struct ChannelSendRecord {
std::string sourceId;
std::optional<uint32_t> sourceLane;
};
enum class LogicalNodeSelector {
Scalar,
Lane,
RangeRepresentative,
};
struct ResolvedProducer {
Operation* op = nullptr;
size_t resultIndex = 0;
LogicalNodeSelector selector = LogicalNodeSelector::Scalar;
uint32_t lane = 0;
uint32_t laneStart = 0;
uint32_t laneCount = 1;
};
struct EdgeSource {
std::string id;
std::optional<uint32_t> sourceLane;
};
std::string csvEscape(StringRef field) {
bool needsQuotes = field.contains(',') || field.contains('"') || field.contains('\n') || field.contains('\r');
if (!needsQuotes)
return field.str();
std::string escaped;
escaped.reserve(field.size() + 2);
escaped.push_back('"');
for (char ch : field) {
if (ch == '"')
escaped += "\"\"";
else
escaped.push_back(ch);
}
escaped.push_back('"');
return escaped;
}
void writeCsvRow(std::fstream& file, ArrayRef<std::string> fields) {
for (size_t i = 0; i < fields.size(); ++i) {
if (i != 0)
file << ",";
file << csvEscape(fields[i]);
}
file << "\n";
}
template <typename NumberT>
std::string maybeNumber(std::optional<NumberT> value) {
if (!value)
return "";
return std::to_string(*value);
}
std::string stringifyType(Type type) {
std::string storage;
llvm::raw_string_ostream os(storage);
type.print(os);
return os.str();
}
std::string stringifyValueAsOperand(Value value, AsmState& asmState) {
std::string storage;
llvm::raw_string_ostream os(storage);
value.printAsOperand(os, asmState);
return os.str();
}
std::string stringifyResultSsaNames(Operation* op, AsmState* asmState) {
if (!asmState || op->getNumResults() == 0)
return "";
std::string storage;
llvm::raw_string_ostream os(storage);
llvm::interleave(
op->getResults(),
[&](Value result) { os << stringifyValueAsOperand(result, *asmState); },
[&]() { os << ";"; });
return os.str();
}
std::optional<uint64_t> getTypeSizeBytes(Type type) {
if (auto shapedType = dyn_cast<ShapedType>(type)) {
if (!shapedType.hasStaticShape() || !hasByteSizedElementType(shapedType.getElementType()))
return std::nullopt;
return static_cast<uint64_t>(getShapedTypeSizeInBytes(shapedType));
}
if (isa<IndexType>(type))
return static_cast<uint64_t>(getElementTypeSizeInBytes(type));
if (auto intType = dyn_cast<IntegerType>(type)) {
if (intType.getWidth() <= 0 || intType.getWidth() % 8 != 0)
return std::nullopt;
return static_cast<uint64_t>(getElementTypeSizeInBytes(type));
}
if (auto floatType = dyn_cast<FloatType>(type)) {
if (floatType.getWidth() <= 0 || floatType.getWidth() % 8 != 0)
return std::nullopt;
return static_cast<uint64_t>(getElementTypeSizeInBytes(type));
}
return std::nullopt;
}
std::string getScalarId(bool isPost, size_t opId) {
return (isPost ? "sc:" : "gc:") + std::to_string(opId);
}
std::string getBatchLaneId(bool isPost, size_t opId, uint32_t lane) {
return (isPost ? "scb:" : "gcb:") + std::to_string(opId) + ":" + std::to_string(lane);
}
template <typename ComputeOpTy, typename BatchOpTy>
bool isTopLevelRelevantCompute(Operation& op) {
return isa<ComputeOpTy, BatchOpTy>(&op);
}
template <typename ComputeOpTy, typename BatchOpTy>
FailureOr<TopLevelOpInfo> buildTopLevelOpInfo(Operation& op, bool isPost, size_t opId) {
TopLevelOpInfo info;
info.op = &op;
info.opId = opId;
info.isPost = isPost;
if constexpr (std::is_same_v<ComputeOpTy, SpatScheduledCompute>) {
if (auto compute = dyn_cast<ComputeOpTy>(&op)) {
auto coreId = getOptionalScheduledCoreId(compute, "spatial dataflow export core id");
if (failed(coreId))
return failure();
if (*coreId)
info.scalarCore = **coreId;
}
}
return info;
}
template <typename BatchOpTy>
FailureOr<SmallVector<int32_t, 8>> getBatchLaneCoreIds(BatchOpTy batch) {
if constexpr (std::is_same_v<BatchOpTy, SpatScheduledComputeBatch>) {
auto coreIds = getOptionalScheduledBatchCoreIds(batch, "spatial dataflow export core ids");
if (failed(coreIds))
return failure();
if (!*coreIds)
return SmallVector<int32_t, 8> {};
return SmallVector<int32_t, 8>((**coreIds).begin(), (**coreIds).end());
}
return SmallVector<int32_t, 8> {};
}
std::string getExpandedNodeId(const DenseMap<std::pair<Operation*, uint32_t>, ExpandedNodeInfo>& expandedNodes,
Operation* op,
uint32_t lane) {
auto it = expandedNodes.find({op, lane});
if (it == expandedNodes.end())
return "";
return it->second.id;
}
void addScalarNodeRow(std::fstream& nodesFile,
DenseMap<std::pair<Operation*, uint32_t>, ExpandedNodeInfo>& expandedNodes,
const TopLevelOpInfo& info,
AsmState* asmState = nullptr) {
std::string id = getScalarId(info.isPost, info.opId);
SmallVector<std::string, 5> row {id, std::to_string(info.opId), "", maybeNumber<int32_t>(info.scalarCore)};
if (asmState)
row.push_back(stringifyResultSsaNames(info.op, asmState));
writeCsvRow(nodesFile, row);
expandedNodes[{info.op, 0}] = {id, info.scalarCore, std::nullopt};
}
template <typename BatchOpTy>
void addBatchNodeRows(std::fstream& nodesFile,
DenseMap<std::pair<Operation*, uint32_t>, ExpandedNodeInfo>& expandedNodes,
const TopLevelOpInfo& info,
BatchOpTy batch,
ArrayRef<std::optional<int32_t>> laneCoreIds,
AsmState* asmState = nullptr) {
for (uint32_t lane = 0; lane < static_cast<uint32_t>(batch.getLaneCount()); ++lane) {
std::string id = getBatchLaneId(info.isPost, info.opId, lane);
SmallVector<std::string, 5> row {id,
std::to_string(info.opId),
std::to_string(lane),
maybeNumber<int32_t>(laneCoreIds[lane])};
if (asmState)
row.push_back(stringifyResultSsaNames(info.op, asmState));
writeCsvRow(nodesFile, row);
expandedNodes[{info.op, lane}] = {id, laneCoreIds[lane], lane};
}
}
std::optional<int64_t> evaluateIndexLike(Value value, Value laneArg, uint32_t lane);
std::optional<int64_t> evaluateIndexLike(Value value, Value laneArg, uint32_t lane) {
if (value == laneArg)
return static_cast<int64_t>(lane);
if (std::optional<int64_t> constant = matchConstantIndexValue(value))
return *constant;
if (auto constant = value.getDefiningOp<arith::ConstantOp>()) {
if (auto intAttr = dyn_cast<IntegerAttr>(constant.getValue()))
return intAttr.getInt();
}
if (auto extract = value.getDefiningOp<tensor::ExtractOp>()) {
auto constant = extract.getTensor().getDefiningOp<arith::ConstantOp>();
auto elements = constant ? dyn_cast<ElementsAttr>(constant.getValue()) : nullptr;
auto shapedType = elements ? dyn_cast<ShapedType>(elements.getType()) : nullptr;
if (!elements || !shapedType || shapedType.getRank() != 1 || extract.getIndices().size() != 1)
return std::nullopt;
std::optional<int64_t> index = evaluateIndexLike(extract.getIndices().front(), laneArg, lane);
if (!index || *index < 0 || *index >= static_cast<int64_t>(elements.getNumElements()))
return std::nullopt;
if (auto denseInts = dyn_cast<DenseIntElementsAttr>(elements))
return (*(denseInts.value_begin<APInt>() + *index)).getSExtValue();
return std::nullopt;
}
if (auto affineApply = value.getDefiningOp<affine::AffineApplyOp>())
if (FailureOr<int64_t> folded = evaluateAffineApply(
affineApply,
[&](Value operand) -> FailureOr<int64_t> {
if (std::optional<int64_t> resolved = evaluateIndexLike(operand, laneArg, lane))
return *resolved;
return failure();
});
succeeded(folded)) {
return *folded;
}
return std::nullopt;
}
SmallVector<int64_t, 8> collectPossibleIntValues(Value value, Value laneArg, uint32_t lane) {
if (std::optional<int64_t> exact = evaluateIndexLike(value, laneArg, lane))
return {*exact};
auto extract = value.getDefiningOp<tensor::ExtractOp>();
auto constant = extract ? extract.getTensor().getDefiningOp<arith::ConstantOp>() : nullptr;
auto elements = constant ? dyn_cast<ElementsAttr>(constant.getValue()) : nullptr;
if (!elements)
return {};
SmallVector<int64_t, 8> values;
if (auto denseInts = dyn_cast<DenseIntElementsAttr>(elements)) {
values.reserve(elements.getNumElements());
for (APInt element : denseInts.getValues<APInt>())
if (!llvm::is_contained(values, element.getSExtValue()))
values.push_back(element.getSExtValue());
}
return values;
}
template <typename BatchOpTy>
std::optional<Value> getBatchLaneInput(BatchOpTy batch, uint32_t lane, unsigned inputIndex) {
if (batch.getNumResults() != 0)
return batch.getInputs()[inputIndex];
size_t laneCount = static_cast<size_t>(batch.getLaneCount());
if (laneCount == 0 || batch.getInputs().size() % laneCount != 0)
return std::nullopt;
size_t inputsPerLane = batch.getInputs().size() / laneCount;
size_t flatIndex = static_cast<size_t>(lane) * inputsPerLane + inputIndex;
if (flatIndex >= batch.getInputs().size())
return std::nullopt;
return batch.getInputs()[flatIndex];
}
template <typename BatchOpTy>
unsigned getBatchLaneInputCount(BatchOpTy batch) {
if (batch.getNumResults() != 0)
return batch.getInputs().size();
size_t laneCount = static_cast<size_t>(batch.getLaneCount());
if (laneCount == 0 || batch.getInputs().size() % laneCount != 0)
return 0;
return static_cast<unsigned>(batch.getInputs().size() / laneCount);
}
template <typename ComputeOpTy, typename BatchOpTy>
std::optional<ResolvedProducer> resolveProducerForValue(Value value, std::optional<uint32_t> consumerLane) {
Operation* op = value.getDefiningOp();
if (!op)
return std::nullopt;
while (auto extract = dyn_cast<tensor::ExtractSliceOp>(op)) {
Value source = extract.getSource();
Operation* sourceOp = source.getDefiningOp();
auto sourceBatch = dyn_cast_or_null<BatchOpTy>(sourceOp);
if (sourceBatch && sourceBatch.getNumResults() != 0) {
auto staticOffsets = extract.getStaticOffsets();
if (!staticOffsets.empty() && staticOffsets.front() != ShapedType::kDynamic) {
uint32_t lane = static_cast<uint32_t>(staticOffsets.front());
return ResolvedProducer {sourceOp, 0, LogicalNodeSelector::Lane, lane, lane, 1};
}
if (consumerLane)
return ResolvedProducer {sourceOp, 0, LogicalNodeSelector::Lane, *consumerLane, *consumerLane, 1};
return ResolvedProducer {
sourceOp, 0, LogicalNodeSelector::RangeRepresentative, 0, 0, static_cast<uint32_t>(sourceBatch.getLaneCount())
};
}
value = source;
op = sourceOp;
if (!op)
return std::nullopt;
}
if (auto compute = dyn_cast<ComputeOpTy>(op))
return ResolvedProducer {
compute.getOperation(), static_cast<size_t>(cast<OpResult>(value).getResultNumber()), LogicalNodeSelector::Scalar, 0, 0, 1
};
if (auto batch = dyn_cast<BatchOpTy>(op)) {
if (batch.getNumResults() != 0) {
if (consumerLane)
return ResolvedProducer {op, 0, LogicalNodeSelector::Lane, *consumerLane, *consumerLane, 1};
return ResolvedProducer {
op, 0, LogicalNodeSelector::RangeRepresentative, 0, 0, static_cast<uint32_t>(batch.getLaneCount())
};
}
uint32_t lane = static_cast<uint32_t>(cast<OpResult>(value).getResultNumber());
return ResolvedProducer {op, static_cast<size_t>(lane), LogicalNodeSelector::Lane, lane, lane, 1};
}
return std::nullopt;
}
SmallVector<EdgeSource, 8>
resolveProducerSourcesForCsv(const ResolvedProducer& producer,
const DenseMap<std::pair<Operation*, uint32_t>, ExpandedNodeInfo>& expandedNodes) {
SmallVector<EdgeSource, 8> sources;
if (producer.selector == LogicalNodeSelector::Scalar) {
std::string id = getExpandedNodeId(expandedNodes, producer.op, 0);
if (!id.empty())
sources.push_back({id, std::nullopt});
return sources;
}
if (producer.selector == LogicalNodeSelector::Lane) {
std::string id = getExpandedNodeId(expandedNodes, producer.op, producer.lane);
if (!id.empty())
sources.push_back({id, producer.lane});
return sources;
}
for (uint32_t lane = producer.laneStart; lane < producer.laneStart + producer.laneCount; ++lane) {
std::string id = getExpandedNodeId(expandedNodes, producer.op, lane);
if (!id.empty())
sources.push_back({id, lane});
}
return sources;
}
void emitEdgeRow(std::fstream& edgesFile,
StringRef sourceId,
StringRef targetId,
std::optional<uint64_t> byteSize,
Type propagatedType,
StringRef stage,
std::optional<uint32_t> sourceLane,
std::optional<uint32_t> targetLane,
std::optional<int64_t> channelId) {
writeCsvRow(edgesFile,
{sourceId.str(),
targetId.str(),
maybeNumber<uint64_t>(byteSize),
stringifyType(propagatedType),
stage.str(),
maybeNumber<uint32_t>(sourceLane),
maybeNumber<uint32_t>(targetLane),
maybeNumber<int64_t>(channelId)});
}
template <typename ComputeOpTy, typename BatchOpTy>
LogicalResult emitDataEdges(std::fstream& edgesFile,
const DenseMap<Operation*, TopLevelOpInfo>& topLevelInfo,
const DenseMap<std::pair<Operation*, uint32_t>, ExpandedNodeInfo>& expandedNodes,
StringRef stage) {
for (const auto& entry : topLevelInfo) {
Operation* op = entry.first;
const TopLevelOpInfo& info = entry.second;
if (auto compute = dyn_cast<ComputeOpTy>(op)) {
for (Value input : compute.getInputs()) {
if (isa_and_nonnull<SpatChannelReceiveOp>(input.getDefiningOp()))
continue;
auto producer = resolveProducerForValue<ComputeOpTy, BatchOpTy>(input, std::nullopt);
if (!producer)
continue;
SmallVector<EdgeSource, 8> sources = resolveProducerSourcesForCsv(*producer, expandedNodes);
std::optional<uint64_t> byteSize = getTypeSizeBytes(input.getType());
std::string targetId = getScalarId(info.isPost, info.opId);
for (const EdgeSource& source : sources)
emitEdgeRow(edgesFile, source.id, targetId, byteSize, input.getType(), stage, source.sourceLane, std::nullopt, std::nullopt);
}
continue;
}
auto batch = dyn_cast<BatchOpTy>(op);
if (!batch)
continue;
unsigned inputCount = getBatchLaneInputCount(batch);
for (uint32_t lane = 0; lane < static_cast<uint32_t>(batch.getLaneCount()); ++lane) {
std::string targetId = getBatchLaneId(info.isPost, info.opId, lane);
for (unsigned inputIndex = 0; inputIndex < inputCount; ++inputIndex) {
std::optional<Value> input = getBatchLaneInput(batch, lane, inputIndex);
if (!input || isa_and_nonnull<SpatChannelReceiveOp>((*input).getDefiningOp()))
continue;
auto producer = resolveProducerForValue<ComputeOpTy, BatchOpTy>(*input, lane);
if (!producer)
continue;
SmallVector<EdgeSource, 8> sources = resolveProducerSourcesForCsv(*producer, expandedNodes);
std::optional<uint64_t> byteSize = getTypeSizeBytes((*input).getType());
for (const EdgeSource& source : sources)
emitEdgeRow(edgesFile, source.id, targetId, byteSize, (*input).getType(), stage, source.sourceLane, lane, std::nullopt);
}
}
}
return success();
}
template <typename BatchOpTy>
void collectChannelSends(DenseMap<int64_t, SmallVector<ChannelSendRecord, 4>>& sendsByChannelId,
const DenseMap<std::pair<Operation*, uint32_t>, ExpandedNodeInfo>& expandedNodes,
BatchOpTy batch) {
std::optional<BlockArgument> laneArg = batch.getLaneArgument();
if (!laneArg)
return;
for (uint32_t lane = 0; lane < static_cast<uint32_t>(batch.getLaneCount()); ++lane) {
std::string sourceId = getExpandedNodeId(expandedNodes, batch.getOperation(), lane);
if (sourceId.empty())
continue;
batch.getBody().walk([&](SpatChannelSendOp send) {
std::optional<int64_t> channelId = evaluateIndexLike(send.getChannelId(), *laneArg, lane);
if (!channelId)
return;
sendsByChannelId[*channelId].push_back({sourceId, lane});
});
}
}
void collectChannelSends(DenseMap<int64_t, SmallVector<ChannelSendRecord, 4>>& sendsByChannelId,
const DenseMap<std::pair<Operation*, uint32_t>, ExpandedNodeInfo>& expandedNodes,
SpatScheduledCompute compute) {
std::string sourceId = getExpandedNodeId(expandedNodes, compute.getOperation(), 0);
if (sourceId.empty())
return;
compute.getBody().walk([&](SpatChannelSendOp send) {
std::optional<int64_t> channelId = evaluateIndexLike(send.getChannelId(), Value(), 0);
if (!channelId)
return;
sendsByChannelId[*channelId].push_back({sourceId, std::nullopt});
});
}
DenseMap<int32_t, SmallVector<ChannelSendRecord, 4>>
buildNodesByCore(const DenseMap<std::pair<Operation*, uint32_t>, ExpandedNodeInfo>& expandedNodes) {
DenseMap<int32_t, SmallVector<ChannelSendRecord, 4>> nodesByCore;
for (const auto& entry : expandedNodes) {
const ExpandedNodeInfo& node = entry.second;
if (!node.core)
continue;
nodesByCore[*node.core].push_back({node.id, node.lane});
}
return nodesByCore;
}
template <typename ComputeOpTy, typename BatchOpTy, typename ResolveChannelSourcesFn>
LogicalResult emitExplicitChannelEdges(std::fstream& edgesFile,
const DenseMap<Operation*, TopLevelOpInfo>& topLevelInfo,
ResolveChannelSourcesFn&& resolveChannelSources,
StringRef stage) {
for (const auto& entry : topLevelInfo) {
Operation* op = entry.first;
const TopLevelOpInfo& info = entry.second;
if (auto compute = dyn_cast<ComputeOpTy>(op)) {
compute.getBody().walk([&](SpatChannelReceiveOp receive) {
SmallVector<ChannelSendRecord, 4> sources = resolveChannelSources(receive, 0);
if (sources.empty())
return;
std::optional<int64_t> channelId = evaluateIndexLike(receive.getChannelId(), Value(), 0);
std::string targetId = getScalarId(info.isPost, info.opId);
std::optional<uint64_t> byteSize = getTypeSizeBytes(receive.getType());
for (const ChannelSendRecord& source : sources)
emitEdgeRow(edgesFile, source.sourceId, targetId, byteSize, receive.getType(), stage, source.sourceLane, std::nullopt, channelId);
});
continue;
}
auto batch = dyn_cast<BatchOpTy>(op);
if (!batch)
continue;
auto laneArg = batch.getLaneArgument();
if (!laneArg)
continue;
for (uint32_t lane = 0; lane < static_cast<uint32_t>(batch.getLaneCount()); ++lane) {
std::string targetId = getBatchLaneId(info.isPost, info.opId, lane);
batch.getBody().walk([&](SpatChannelReceiveOp receive) {
SmallVector<ChannelSendRecord, 4> sources = resolveChannelSources(receive, lane);
if (sources.empty())
return;
std::optional<int64_t> channelId = evaluateIndexLike(receive.getChannelId(), *laneArg, lane);
std::optional<uint64_t> byteSize = getTypeSizeBytes(receive.getType());
for (const ChannelSendRecord& source : sources)
emitEdgeRow(edgesFile, source.sourceId, targetId, byteSize, receive.getType(), stage, source.sourceLane, lane, channelId);
});
}
}
return success();
}
LogicalResult exportStagePre(func::FuncOp func) {
std::fstream nodesFile = openDialectDumpFileWithExtension("spatial1_graph.nodes", "/reports", "csv");
std::fstream edgesFile = openDialectDumpFileWithExtension("spatial1_graph.edges","/reports", "csv");
if (!nodesFile.is_open() || !edgesFile.is_open())
return success();
writeCsvRow(nodesFile, {"Id", "op_id", "lane", "core", "ssa_name"});
writeCsvRow(edgesFile, {"Source", "Target", "Weight", "Type", "stage", "source_lane", "target_lane", "channel_id"});
Operation* asmRoot = func.getOperation();
if (auto moduleOp = func->getParentOfType<ModuleOp>())
asmRoot = moduleOp.getOperation();
OpPrintingFlags flags;
flags.elideLargeElementsAttrs().enableDebugInfo(true, false);
AsmState asmState(asmRoot, flags);
DenseMap<Operation*, TopLevelOpInfo> topLevelInfo;
DenseMap<std::pair<Operation*, uint32_t>, ExpandedNodeInfo> expandedNodes;
size_t opId = 0;
for (Operation& op : func.getBody().front()) {
if (!isTopLevelRelevantCompute<SpatGraphCompute, SpatGraphComputeBatch>(op))
continue;
FailureOr<TopLevelOpInfo> info = buildTopLevelOpInfo<SpatGraphCompute, SpatGraphComputeBatch>(op, false, opId++);
if (failed(info))
return failure();
topLevelInfo[&op] = *info;
if (auto compute = dyn_cast<SpatGraphCompute>(&op)) {
addScalarNodeRow(nodesFile, expandedNodes, *info, &asmState);
continue;
}
auto batch = cast<SpatGraphComputeBatch>(&op);
SmallVector<std::optional<int32_t>, 8> laneCoreIds(batch.getLaneCount());
addBatchNodeRows(nodesFile, expandedNodes, *info, batch, laneCoreIds, &asmState);
}
return emitDataEdges<SpatGraphCompute, SpatGraphComputeBatch>(edgesFile, topLevelInfo, expandedNodes, "pre");
}
LogicalResult exportStagePost(func::FuncOp func) {
std::fstream nodesFile = openDialectDumpFileWithExtension("spatial2_merged.nodes", "/reports", "csv");
std::fstream edgesFile = openDialectDumpFileWithExtension("spatial2_merged.edges", "/reports", "csv");
if (!nodesFile.is_open() || !edgesFile.is_open())
return success();
writeCsvRow(nodesFile, {"Id", "op_id", "lane", "core"});
writeCsvRow(edgesFile, {"Source", "Target", "Weight", "Type", "stage", "source_lane", "target_lane", "channel_id"});
DenseMap<Operation*, TopLevelOpInfo> topLevelInfo;
DenseMap<std::pair<Operation*, uint32_t>, ExpandedNodeInfo> expandedNodes;
size_t opId = 0;
for (Operation& op : func.getBody().front()) {
if (!isTopLevelRelevantCompute<SpatScheduledCompute, SpatScheduledComputeBatch>(op))
continue;
FailureOr<TopLevelOpInfo> info = buildTopLevelOpInfo<SpatScheduledCompute, SpatScheduledComputeBatch>(op, true, opId++);
if (failed(info))
return failure();
topLevelInfo[&op] = *info;
if (isa<SpatScheduledCompute>(&op)) {
addScalarNodeRow(nodesFile, expandedNodes, *info);
continue;
}
auto batch = cast<SpatScheduledComputeBatch>(&op);
auto coreIds = getBatchLaneCoreIds(batch);
if (failed(coreIds))
return failure();
SmallVector<std::optional<int32_t>, 8> laneCoreIds(batch.getLaneCount());
for (uint32_t lane = 0; lane < static_cast<uint32_t>(batch.getLaneCount()); ++lane)
if (lane < coreIds->size())
laneCoreIds[lane] = (*coreIds)[lane];
addBatchNodeRows(nodesFile, expandedNodes, *info, batch, laneCoreIds);
}
if (failed(emitDataEdges<SpatScheduledCompute, SpatScheduledComputeBatch>(edgesFile, topLevelInfo, expandedNodes, "post")))
return failure();
DenseMap<int64_t, SmallVector<ChannelSendRecord, 4>> sendsByChannelId;
for (const auto& entry : topLevelInfo) {
Operation* op = entry.first;
if (auto compute = dyn_cast<SpatScheduledCompute>(op))
collectChannelSends(sendsByChannelId, expandedNodes, compute);
else if (auto batch = dyn_cast<SpatScheduledComputeBatch>(op))
collectChannelSends(sendsByChannelId, expandedNodes, batch);
}
DenseMap<int32_t, SmallVector<ChannelSendRecord, 4>> nodesByCore = buildNodesByCore(expandedNodes);
auto resolveChannelSources = [&](SpatChannelReceiveOp receive, uint32_t lane) {
SmallVector<ChannelSendRecord, 4> sources;
Value laneArg;
if (auto owner = receive->getParentOfType<SpatScheduledComputeBatch>())
if (auto maybeLaneArg = owner.getLaneArgument())
laneArg = *maybeLaneArg;
if (std::optional<int64_t> channelId = evaluateIndexLike(receive.getChannelId(), laneArg, lane)) {
if (auto it = sendsByChannelId.find(*channelId); it != sendsByChannelId.end())
return it->second;
}
for (int64_t sourceCore : collectPossibleIntValues(receive.getSourceCoreId(), laneArg, lane)) {
auto it = nodesByCore.find(static_cast<int32_t>(sourceCore));
if (it == nodesByCore.end())
continue;
llvm::append_range(sources, it->second);
}
return sources;
};
return emitExplicitChannelEdges<SpatScheduledCompute, SpatScheduledComputeBatch>(
edgesFile, topLevelInfo, resolveChannelSources, "post");
}
} // namespace
SpatialDataflowExportStage getSpatialDataflowExportStage() {
switch (pimExportSpatialDataflow.getValue()) {
case SpatialDataflowExportNone: return SpatialDataflowExportStage::None;
case SpatialDataflowExportPre: return SpatialDataflowExportStage::Pre;
case SpatialDataflowExportPost: return SpatialDataflowExportStage::Post;
case SpatialDataflowExportBoth: return SpatialDataflowExportStage::Both;
}
llvm_unreachable("unknown spatial dataflow export mode");
}
bool shouldExportSpatialDataflowStage(SpatialDataflowExportStage mode, SpatialDataflowExportStage stage) {
switch (mode) {
case SpatialDataflowExportStage::None: return false;
case SpatialDataflowExportStage::Pre: return stage == SpatialDataflowExportStage::Pre;
case SpatialDataflowExportStage::Post: return stage == SpatialDataflowExportStage::Post;
case SpatialDataflowExportStage::Both:
return stage == SpatialDataflowExportStage::Pre || stage == SpatialDataflowExportStage::Post;
}
return false;
}
LogicalResult exportSpatialDataflowCsvPre(func::FuncOp func) { return exportStagePre(func); }
LogicalResult exportSpatialDataflowCsvPost(func::FuncOp func) { return exportStagePost(func); }
} // namespace spatial
} // namespace onnx_mlir
@@ -0,0 +1,25 @@
#pragma once
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Support/LogicalResult.h"
namespace onnx_mlir {
namespace spatial {
enum class SpatialDataflowExportStage {
None,
Pre,
Post,
Both,
};
SpatialDataflowExportStage getSpatialDataflowExportStage();
mlir::LogicalResult exportSpatialDataflowCsvPre(mlir::func::FuncOp func);
mlir::LogicalResult exportSpatialDataflowCsvPost(mlir::func::FuncOp func);
bool shouldExportSpatialDataflowStage(SpatialDataflowExportStage mode, SpatialDataflowExportStage stage);
} // namespace spatial
} // namespace onnx_mlir