This commit is contained in:
@@ -7,15 +7,21 @@
|
||||
|
||||
namespace onnx_mlir {
|
||||
|
||||
void dumpModule(mlir::ModuleOp moduleOp, const std::string& name) {
|
||||
std::fstream openDialectDumpFileWithExtension(const std::string& name, llvm::StringRef destination, llvm::StringRef extension) {
|
||||
std::string outputDir = getOutputDir();
|
||||
if (outputDir.empty())
|
||||
return {};
|
||||
|
||||
std::string dialectsDir = (outputDir + destination).str();
|
||||
createDirectory(dialectsDir);
|
||||
return std::fstream(dialectsDir + "/" + name + "." + extension.str(), std::ios::out);
|
||||
}
|
||||
|
||||
void dumpModule(mlir::ModuleOp moduleOp, const std::string& name) {
|
||||
std::fstream file = openDialectDumpFileWithExtension(name, "/dialects", "mlir");
|
||||
if (!file.is_open())
|
||||
return;
|
||||
|
||||
std::string dialectsDir = outputDir + "/dialects";
|
||||
createDirectory(dialectsDir);
|
||||
|
||||
std::fstream file(dialectsDir + "/" + name + ".mlir", std::ios::out);
|
||||
llvm::raw_os_ostream os(file);
|
||||
mlir::OpPrintingFlags flags;
|
||||
flags.elideLargeElementsAttrs().enableDebugInfo(true, false);
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
#pragma once
|
||||
|
||||
#include "mlir/IR/BuiltinOps.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
|
||||
namespace onnx_mlir {
|
||||
@@ -10,4 +12,7 @@ namespace onnx_mlir {
|
||||
/// directory for pass-level debugging.
|
||||
void dumpModule(mlir::ModuleOp moduleOp, const std::string& name);
|
||||
|
||||
/// Opens a file under the same dialect dump directory used by dumpModule.
|
||||
std::fstream openDialectDumpFileWithExtension(const std::string& name,llvm::StringRef destination = "/dialects", llvm::StringRef extension = "mlir");
|
||||
|
||||
} // namespace onnx_mlir
|
||||
|
||||
@@ -57,6 +57,18 @@ llvm::cl::opt<PimConvLoweringType> pimConvLowering(
|
||||
llvm::cl::init(PimConvLoweringAuto),
|
||||
llvm::cl::cat(OnnxMlirOptions));
|
||||
|
||||
llvm::cl::opt<PimSpatialDataflowExportType> pimExportSpatialDataflow(
|
||||
"pim-export-spatial-dataflow",
|
||||
llvm::cl::desc("Emit Gephi-importable CSV dataflow reports around MergeComputeNodes materialization"),
|
||||
llvm::cl::values(clEnumValN(SpatialDataflowExportNone, "none", "Do not emit Spatial dataflow CSV reports")),
|
||||
llvm::cl::values(clEnumValN(SpatialDataflowExportPre, "pre", "Emit pre-materialization Spatial dataflow CSV reports")),
|
||||
llvm::cl::values(
|
||||
clEnumValN(SpatialDataflowExportPost, "post", "Emit post-materialization Spatial dataflow CSV reports")),
|
||||
llvm::cl::values(
|
||||
clEnumValN(SpatialDataflowExportBoth, "both", "Emit both pre- and post-materialization Spatial dataflow CSV reports")),
|
||||
llvm::cl::init(SpatialDataflowExportNone),
|
||||
llvm::cl::cat(OnnxMlirOptions));
|
||||
|
||||
llvm::cl::opt<bool>
|
||||
pimOnlyCodegen("pim-only-codegen",
|
||||
llvm::cl::desc("Only generate code for PIM (assume input is already in bufferized PIM IR)"),
|
||||
|
||||
@@ -42,11 +42,19 @@ typedef enum {
|
||||
PimConvLoweringTiled2D = 8,
|
||||
} PimConvLoweringType;
|
||||
|
||||
typedef enum {
|
||||
SpatialDataflowExportNone = 0,
|
||||
SpatialDataflowExportPre = 1,
|
||||
SpatialDataflowExportPost = 2,
|
||||
SpatialDataflowExportBoth = 3,
|
||||
} PimSpatialDataflowExportType;
|
||||
|
||||
extern llvm::cl::OptionCategory OnnxMlirOptions;
|
||||
extern llvm::cl::opt<PimEmissionTargetType> pimEmissionTarget;
|
||||
extern llvm::cl::opt<PimMergeSchedulerType> pimMergeScheduler;
|
||||
extern llvm::cl::opt<PimMemoryReportLevel> pimMemoryReport;
|
||||
extern llvm::cl::opt<PimConvLoweringType> pimConvLowering;
|
||||
extern llvm::cl::opt<PimSpatialDataflowExportType> pimExportSpatialDataflow;
|
||||
|
||||
extern llvm::cl::opt<bool> pimOnlyCodegen;
|
||||
extern llvm::cl::opt<bool> pimDisableMemoryCoalescing;
|
||||
|
||||
@@ -20,6 +20,7 @@ add_pim_library(OMONNXToSpatial
|
||||
Patterns/NN/Sigmoid.cpp
|
||||
Patterns/NN/Softmax.cpp
|
||||
Patterns/Tensor/Concat.cpp
|
||||
Patterns/Tensor/Flatten.cpp
|
||||
Patterns/Tensor/Gather.cpp
|
||||
Patterns/Tensor/Resize.cpp
|
||||
Patterns/Tensor/Reshape.cpp
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common/Common.hpp"
|
||||
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Patterns.hpp"
|
||||
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/PlanLowering.hpp"
|
||||
#include "src/Accelerators/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/SpatialDataflowCsvExporter.hpp"
|
||||
#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
|
||||
#include "src/Accelerators/PIM/Pass/PIMPasses.h"
|
||||
#include "src/Dialect/ONNX/ONNXOps.hpp"
|
||||
@@ -392,10 +393,17 @@ struct LowerSpatialPlansPass final : PassWrapper<LowerSpatialPlansPass, Operatio
|
||||
hasIllegalOps = true;
|
||||
}
|
||||
});
|
||||
if (hasIllegalOps)
|
||||
if (hasIllegalOps) {
|
||||
signalPassFailure();
|
||||
else
|
||||
dumpModule(moduleOp, "spatial1_premerge");
|
||||
} else {
|
||||
dumpModule(moduleOp, "spatial1_graph");
|
||||
spatial::SpatialDataflowExportStage exportMode = spatial::getSpatialDataflowExportStage();
|
||||
if (spatial::shouldExportSpatialDataflowStage(exportMode, spatial::SpatialDataflowExportStage::Pre)
|
||||
&& failed(spatial::exportSpatialDataflowCsvPre(funcOp))) {
|
||||
signalPassFailure();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (!verifyLogicalPhase("at the end of LowerSpatialPlans"))
|
||||
return;
|
||||
|
||||
@@ -103,7 +103,7 @@ void ONNXToSpatialPass::runOnOperation() {
|
||||
affine::AffineDialect,
|
||||
arith::ArithDialect,
|
||||
scf::SCFDialect>();
|
||||
preTarget.addIllegalOp<ONNXConstantOp, ONNXFlattenOp>();
|
||||
preTarget.addIllegalOp<ONNXConstantOp>();
|
||||
|
||||
RewritePatternSet prePatterns(ctx);
|
||||
populatePrePatterns(prePatterns, ctx);
|
||||
@@ -142,6 +142,7 @@ void ONNXToSpatialPass::runOnOperation() {
|
||||
target.addIllegalOp<ONNXSigmoidOp>();
|
||||
target.addIllegalOp<ONNXSoftmaxOp>();
|
||||
target.addIllegalOp<ONNXConcatOp>();
|
||||
target.addIllegalOp<ONNXFlattenOp>();
|
||||
target.addIllegalOp<ONNXGatherOp>();
|
||||
target.addIllegalOp<ONNXReshapeOp>();
|
||||
target.addIllegalOp<ONNXResizeOp>();
|
||||
|
||||
@@ -19,6 +19,7 @@ void populateConversionPatterns(RewritePatternSet& patterns, MLIRContext* ctx) {
|
||||
populateSigmoidPatterns(patterns, ctx);
|
||||
populateSoftmaxPatterns(patterns, ctx);
|
||||
populateConcatPatterns(patterns, ctx);
|
||||
populateFlattenPatterns(patterns, ctx);
|
||||
populateGatherPatterns(patterns, ctx);
|
||||
populateResizePatterns(patterns, ctx);
|
||||
populateReshapePatterns(patterns, ctx);
|
||||
|
||||
@@ -26,6 +26,7 @@ void populateReluPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext*
|
||||
void populateSigmoidPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
|
||||
void populateSoftmaxPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
|
||||
void populateConcatPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
|
||||
void populateFlattenPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
|
||||
void populateGatherPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
|
||||
void populateResizePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
|
||||
void populateReshapePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
|
||||
|
||||
@@ -0,0 +1,112 @@
|
||||
#include "mlir/Dialect/Tensor/IR/Tensor.h"
|
||||
#include "mlir/Transforms/DialectConversion.h"
|
||||
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
|
||||
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common/Common.hpp"
|
||||
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Patterns.hpp"
|
||||
#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
|
||||
#include "src/Dialect/ONNX/ONNXOps.hpp"
|
||||
|
||||
using namespace mlir;
|
||||
|
||||
namespace onnx_mlir {
|
||||
namespace {
|
||||
|
||||
static FailureOr<int64_t> normalizeFlattenAxis(int64_t axis, int64_t rank) {
|
||||
int64_t normalizedAxis = axis < 0 ? rank + axis : axis;
|
||||
if (normalizedAxis < 0 || normalizedAxis > rank)
|
||||
return failure();
|
||||
return normalizedAxis;
|
||||
}
|
||||
|
||||
static int64_t product(ArrayRef<int64_t> values) {
|
||||
int64_t result = 1;
|
||||
for (int64_t value : values)
|
||||
result *= value;
|
||||
return result;
|
||||
}
|
||||
|
||||
static SmallVector<ReassociationIndices> getCollapseTo1DReassociation(int64_t rank) {
|
||||
SmallVector<ReassociationIndices> reassociation(1);
|
||||
reassociation.front().reserve(rank);
|
||||
for (int64_t dim = 0; dim < rank; ++dim)
|
||||
reassociation.front().push_back(dim);
|
||||
return reassociation;
|
||||
}
|
||||
|
||||
static SmallVector<ReassociationIndices> getExpandFrom1DReassociation(int64_t rank) {
|
||||
SmallVector<ReassociationIndices> reassociation(1);
|
||||
reassociation.front().reserve(rank);
|
||||
for (int64_t dim = 0; dim < rank; ++dim)
|
||||
reassociation.front().push_back(dim);
|
||||
return reassociation;
|
||||
}
|
||||
|
||||
static Value buildFlatten(Value input,
|
||||
RankedTensorType sourceType,
|
||||
RankedTensorType resultType,
|
||||
int64_t axis,
|
||||
ConversionPatternRewriter& rewriter,
|
||||
Location loc) {
|
||||
if (sourceType == resultType)
|
||||
return input;
|
||||
|
||||
if (axis > 0 && axis < sourceType.getRank()) {
|
||||
SmallVector<ReassociationIndices> reassociation(2);
|
||||
for (int64_t dim = 0; dim < axis; ++dim)
|
||||
reassociation[0].push_back(dim);
|
||||
for (int64_t dim = axis; dim < sourceType.getRank(); ++dim)
|
||||
reassociation[1].push_back(dim);
|
||||
return tensor::CollapseShapeOp::create(rewriter, loc, resultType, input, reassociation);
|
||||
}
|
||||
|
||||
Value flattened = input;
|
||||
if (sourceType.getRank() != 1) {
|
||||
auto flatType = RankedTensorType::get({sourceType.getNumElements()}, sourceType.getElementType());
|
||||
flattened = tensor::CollapseShapeOp::create(
|
||||
rewriter, loc, flatType, flattened, getCollapseTo1DReassociation(sourceType.getRank()));
|
||||
}
|
||||
return tensor::ExpandShapeOp::create(
|
||||
rewriter, loc, resultType, flattened, getExpandFrom1DReassociation(resultType.getRank()));
|
||||
}
|
||||
|
||||
struct Flatten : OpConversionPattern<ONNXFlattenOp> {
|
||||
using OpConversionPattern::OpConversionPattern;
|
||||
|
||||
LogicalResult matchAndRewrite(ONNXFlattenOp flattenOp,
|
||||
ONNXFlattenOpAdaptor adaptor,
|
||||
ConversionPatternRewriter& rewriter) const override {
|
||||
auto sourceType = dyn_cast<RankedTensorType>(adaptor.getInput().getType());
|
||||
auto resultType = dyn_cast<RankedTensorType>(flattenOp.getOperation()->getResult(0).getType());
|
||||
if (!sourceType || !resultType || !sourceType.hasStaticShape() || !resultType.hasStaticShape())
|
||||
return failure();
|
||||
if (!hasStaticPositiveShape(sourceType) || !hasStaticPositiveShape(resultType) || resultType.getRank() != 2)
|
||||
return failure();
|
||||
|
||||
auto axis = normalizeFlattenAxis(flattenOp.getAxis(), sourceType.getRank());
|
||||
if (failed(axis))
|
||||
return failure();
|
||||
|
||||
int64_t outerDim = product(sourceType.getShape().take_front(*axis));
|
||||
int64_t innerDim = product(sourceType.getShape().drop_front(*axis));
|
||||
if (resultType.getShape()[0] != outerDim || resultType.getShape()[1] != innerDim)
|
||||
return failure();
|
||||
|
||||
auto replaceWithFlatten = [&](auto build) -> LogicalResult {
|
||||
Value flattened = materializeOrComputeUnary(adaptor.getInput(), resultType, rewriter, flattenOp.getLoc(), build);
|
||||
rewriter.replaceOp(flattenOp, flattened);
|
||||
return success();
|
||||
};
|
||||
|
||||
return replaceWithFlatten([&](Value input) {
|
||||
return buildFlatten(input, sourceType, resultType, *axis, rewriter, flattenOp.getLoc());
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
void populateFlattenPatterns(RewritePatternSet& patterns, MLIRContext* ctx) { patterns.add<Flatten>(ctx); }
|
||||
|
||||
} // namespace onnx_mlir
|
||||
@@ -11,6 +11,7 @@ add_pim_library(SpatialOps
|
||||
Transforms/MergeComputeNodes/HostOutputFinalization.cpp
|
||||
Transforms/MergeComputeNodes/MaterializeMergeSchedule.cpp
|
||||
Transforms/MergeComputeNodes/ProjectedFragments.cpp
|
||||
Transforms/MergeComputeNodes/SpatialDataflowCsvExporter.cpp
|
||||
Transforms/MergeComputeNodes/Scheduling/ComputeGraph.cpp
|
||||
Transforms/MergeComputeNodes/Scheduling/ComputeInstanceUtils.cpp
|
||||
Transforms/MergeComputeNodes/Scheduling/MergeSchedulingAnalysis.cpp
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "MaterializeMergeSchedule.hpp"
|
||||
#include "SpatialDataflowCsvExporter.hpp"
|
||||
#include "Scheduling/ComputeGraph.hpp"
|
||||
#include "Scheduling/ComputeInstanceUtils.hpp"
|
||||
#include "Scheduling/MergeSchedulingAnalysis.hpp"
|
||||
@@ -364,6 +365,7 @@ public:
|
||||
|
||||
const spatial::MergeScheduleResult* analysisResult = nullptr;
|
||||
analysisResult = &getAnalysis<spatial::MergeSchedulingAnalysis>().getResult();
|
||||
spatial::SpatialDataflowExportStage exportMode = spatial::getSpatialDataflowExportStage();
|
||||
if (failed(spatial::MergeScheduleMaterializer().run(func, *analysisResult, nextChannelId))) {
|
||||
signalPassFailure();
|
||||
return;
|
||||
@@ -379,7 +381,12 @@ public:
|
||||
signalPassFailure();
|
||||
return;
|
||||
}
|
||||
dumpModule(cast<ModuleOp>(func->getParentOp()), "spatial1_merged");
|
||||
if (spatial::shouldExportSpatialDataflowStage(exportMode, spatial::SpatialDataflowExportStage::Post)
|
||||
&& failed(spatial::exportSpatialDataflowCsvPost(func))) {
|
||||
signalPassFailure();
|
||||
return;
|
||||
}
|
||||
dumpModule(cast<ModuleOp>(func->getParentOp()), "spatial2_merged");
|
||||
generateReport(func, "spatial_merge_report", analysisResult->cpuToLastComputeMap.size());
|
||||
}
|
||||
};
|
||||
|
||||
@@ -0,0 +1,728 @@
|
||||
#include "SpatialDataflowCsvExporter.hpp"
|
||||
|
||||
#include "mlir/Dialect/Affine/IR/AffineOps.h"
|
||||
#include "mlir/Dialect/Arith/IR/Arith.h"
|
||||
#include "mlir/Dialect/Tensor/IR/Tensor.h"
|
||||
#include "mlir/IR/AsmState.h"
|
||||
#include "mlir/IR/BuiltinAttributes.h"
|
||||
#include "mlir/IR/BuiltinOps.h"
|
||||
#include "mlir/IR/BuiltinTypes.h"
|
||||
#include "mlir/IR/Value.h"
|
||||
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <fstream>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "src/Accelerators/PIM/Common/IR/AffineUtils.hpp"
|
||||
#include "src/Accelerators/PIM/Common/IR/BatchCoreUtils.hpp"
|
||||
#include "src/Accelerators/PIM/Common/IR/ConstantUtils.hpp"
|
||||
#include "src/Accelerators/PIM/Common/IR/ShapeUtils.hpp"
|
||||
#include "src/Accelerators/PIM/Common/Support/DebugDump.hpp"
|
||||
#include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp"
|
||||
#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
|
||||
|
||||
using namespace mlir;
|
||||
|
||||
namespace onnx_mlir {
|
||||
namespace spatial {
|
||||
|
||||
namespace {
|
||||
|
||||
struct TopLevelOpInfo {
|
||||
Operation* op = nullptr;
|
||||
size_t opId = 0;
|
||||
bool isPost = false;
|
||||
std::optional<int32_t> scalarCore;
|
||||
};
|
||||
|
||||
struct ExpandedNodeInfo {
|
||||
std::string id;
|
||||
std::optional<int32_t> core;
|
||||
std::optional<uint32_t> lane;
|
||||
};
|
||||
|
||||
struct ChannelSendRecord {
|
||||
std::string sourceId;
|
||||
std::optional<uint32_t> sourceLane;
|
||||
};
|
||||
|
||||
enum class LogicalNodeSelector {
|
||||
Scalar,
|
||||
Lane,
|
||||
RangeRepresentative,
|
||||
};
|
||||
|
||||
struct ResolvedProducer {
|
||||
Operation* op = nullptr;
|
||||
size_t resultIndex = 0;
|
||||
LogicalNodeSelector selector = LogicalNodeSelector::Scalar;
|
||||
uint32_t lane = 0;
|
||||
uint32_t laneStart = 0;
|
||||
uint32_t laneCount = 1;
|
||||
};
|
||||
|
||||
struct EdgeSource {
|
||||
std::string id;
|
||||
std::optional<uint32_t> sourceLane;
|
||||
};
|
||||
|
||||
std::string csvEscape(StringRef field) {
|
||||
bool needsQuotes = field.contains(',') || field.contains('"') || field.contains('\n') || field.contains('\r');
|
||||
if (!needsQuotes)
|
||||
return field.str();
|
||||
|
||||
std::string escaped;
|
||||
escaped.reserve(field.size() + 2);
|
||||
escaped.push_back('"');
|
||||
for (char ch : field) {
|
||||
if (ch == '"')
|
||||
escaped += "\"\"";
|
||||
else
|
||||
escaped.push_back(ch);
|
||||
}
|
||||
escaped.push_back('"');
|
||||
return escaped;
|
||||
}
|
||||
|
||||
void writeCsvRow(std::fstream& file, ArrayRef<std::string> fields) {
|
||||
for (size_t i = 0; i < fields.size(); ++i) {
|
||||
if (i != 0)
|
||||
file << ",";
|
||||
file << csvEscape(fields[i]);
|
||||
}
|
||||
file << "\n";
|
||||
}
|
||||
|
||||
template <typename NumberT>
|
||||
std::string maybeNumber(std::optional<NumberT> value) {
|
||||
if (!value)
|
||||
return "";
|
||||
return std::to_string(*value);
|
||||
}
|
||||
|
||||
std::string stringifyType(Type type) {
|
||||
std::string storage;
|
||||
llvm::raw_string_ostream os(storage);
|
||||
type.print(os);
|
||||
return os.str();
|
||||
}
|
||||
|
||||
std::string stringifyValueAsOperand(Value value, AsmState& asmState) {
|
||||
std::string storage;
|
||||
llvm::raw_string_ostream os(storage);
|
||||
value.printAsOperand(os, asmState);
|
||||
return os.str();
|
||||
}
|
||||
|
||||
std::string stringifyResultSsaNames(Operation* op, AsmState* asmState) {
|
||||
if (!asmState || op->getNumResults() == 0)
|
||||
return "";
|
||||
|
||||
std::string storage;
|
||||
llvm::raw_string_ostream os(storage);
|
||||
llvm::interleave(
|
||||
op->getResults(),
|
||||
[&](Value result) { os << stringifyValueAsOperand(result, *asmState); },
|
||||
[&]() { os << ";"; });
|
||||
return os.str();
|
||||
}
|
||||
|
||||
std::optional<uint64_t> getTypeSizeBytes(Type type) {
|
||||
if (auto shapedType = dyn_cast<ShapedType>(type)) {
|
||||
if (!shapedType.hasStaticShape() || !hasByteSizedElementType(shapedType.getElementType()))
|
||||
return std::nullopt;
|
||||
return static_cast<uint64_t>(getShapedTypeSizeInBytes(shapedType));
|
||||
}
|
||||
|
||||
if (isa<IndexType>(type))
|
||||
return static_cast<uint64_t>(getElementTypeSizeInBytes(type));
|
||||
if (auto intType = dyn_cast<IntegerType>(type)) {
|
||||
if (intType.getWidth() <= 0 || intType.getWidth() % 8 != 0)
|
||||
return std::nullopt;
|
||||
return static_cast<uint64_t>(getElementTypeSizeInBytes(type));
|
||||
}
|
||||
if (auto floatType = dyn_cast<FloatType>(type)) {
|
||||
if (floatType.getWidth() <= 0 || floatType.getWidth() % 8 != 0)
|
||||
return std::nullopt;
|
||||
return static_cast<uint64_t>(getElementTypeSizeInBytes(type));
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
std::string getScalarId(bool isPost, size_t opId) {
|
||||
return (isPost ? "sc:" : "gc:") + std::to_string(opId);
|
||||
}
|
||||
|
||||
std::string getBatchLaneId(bool isPost, size_t opId, uint32_t lane) {
|
||||
return (isPost ? "scb:" : "gcb:") + std::to_string(opId) + ":" + std::to_string(lane);
|
||||
}
|
||||
|
||||
template <typename ComputeOpTy, typename BatchOpTy>
|
||||
bool isTopLevelRelevantCompute(Operation& op) {
|
||||
return isa<ComputeOpTy, BatchOpTy>(&op);
|
||||
}
|
||||
|
||||
template <typename ComputeOpTy, typename BatchOpTy>
|
||||
FailureOr<TopLevelOpInfo> buildTopLevelOpInfo(Operation& op, bool isPost, size_t opId) {
|
||||
TopLevelOpInfo info;
|
||||
info.op = &op;
|
||||
info.opId = opId;
|
||||
info.isPost = isPost;
|
||||
|
||||
if constexpr (std::is_same_v<ComputeOpTy, SpatScheduledCompute>) {
|
||||
if (auto compute = dyn_cast<ComputeOpTy>(&op)) {
|
||||
auto coreId = getOptionalScheduledCoreId(compute, "spatial dataflow export core id");
|
||||
if (failed(coreId))
|
||||
return failure();
|
||||
if (*coreId)
|
||||
info.scalarCore = **coreId;
|
||||
}
|
||||
}
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
template <typename BatchOpTy>
|
||||
FailureOr<SmallVector<int32_t, 8>> getBatchLaneCoreIds(BatchOpTy batch) {
|
||||
if constexpr (std::is_same_v<BatchOpTy, SpatScheduledComputeBatch>) {
|
||||
auto coreIds = getOptionalScheduledBatchCoreIds(batch, "spatial dataflow export core ids");
|
||||
if (failed(coreIds))
|
||||
return failure();
|
||||
if (!*coreIds)
|
||||
return SmallVector<int32_t, 8> {};
|
||||
return SmallVector<int32_t, 8>((**coreIds).begin(), (**coreIds).end());
|
||||
}
|
||||
return SmallVector<int32_t, 8> {};
|
||||
}
|
||||
|
||||
std::string getExpandedNodeId(const DenseMap<std::pair<Operation*, uint32_t>, ExpandedNodeInfo>& expandedNodes,
|
||||
Operation* op,
|
||||
uint32_t lane) {
|
||||
auto it = expandedNodes.find({op, lane});
|
||||
if (it == expandedNodes.end())
|
||||
return "";
|
||||
return it->second.id;
|
||||
}
|
||||
|
||||
void addScalarNodeRow(std::fstream& nodesFile,
|
||||
DenseMap<std::pair<Operation*, uint32_t>, ExpandedNodeInfo>& expandedNodes,
|
||||
const TopLevelOpInfo& info,
|
||||
AsmState* asmState = nullptr) {
|
||||
std::string id = getScalarId(info.isPost, info.opId);
|
||||
SmallVector<std::string, 5> row {id, std::to_string(info.opId), "", maybeNumber<int32_t>(info.scalarCore)};
|
||||
if (asmState)
|
||||
row.push_back(stringifyResultSsaNames(info.op, asmState));
|
||||
writeCsvRow(nodesFile, row);
|
||||
expandedNodes[{info.op, 0}] = {id, info.scalarCore, std::nullopt};
|
||||
}
|
||||
|
||||
template <typename BatchOpTy>
|
||||
void addBatchNodeRows(std::fstream& nodesFile,
|
||||
DenseMap<std::pair<Operation*, uint32_t>, ExpandedNodeInfo>& expandedNodes,
|
||||
const TopLevelOpInfo& info,
|
||||
BatchOpTy batch,
|
||||
ArrayRef<std::optional<int32_t>> laneCoreIds,
|
||||
AsmState* asmState = nullptr) {
|
||||
for (uint32_t lane = 0; lane < static_cast<uint32_t>(batch.getLaneCount()); ++lane) {
|
||||
std::string id = getBatchLaneId(info.isPost, info.opId, lane);
|
||||
SmallVector<std::string, 5> row {id,
|
||||
std::to_string(info.opId),
|
||||
std::to_string(lane),
|
||||
maybeNumber<int32_t>(laneCoreIds[lane])};
|
||||
if (asmState)
|
||||
row.push_back(stringifyResultSsaNames(info.op, asmState));
|
||||
writeCsvRow(nodesFile, row);
|
||||
expandedNodes[{info.op, lane}] = {id, laneCoreIds[lane], lane};
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<int64_t> evaluateIndexLike(Value value, Value laneArg, uint32_t lane);
|
||||
|
||||
std::optional<int64_t> evaluateIndexLike(Value value, Value laneArg, uint32_t lane) {
|
||||
if (value == laneArg)
|
||||
return static_cast<int64_t>(lane);
|
||||
|
||||
if (std::optional<int64_t> constant = matchConstantIndexValue(value))
|
||||
return *constant;
|
||||
|
||||
if (auto constant = value.getDefiningOp<arith::ConstantOp>()) {
|
||||
if (auto intAttr = dyn_cast<IntegerAttr>(constant.getValue()))
|
||||
return intAttr.getInt();
|
||||
}
|
||||
|
||||
if (auto extract = value.getDefiningOp<tensor::ExtractOp>()) {
|
||||
auto constant = extract.getTensor().getDefiningOp<arith::ConstantOp>();
|
||||
auto elements = constant ? dyn_cast<ElementsAttr>(constant.getValue()) : nullptr;
|
||||
auto shapedType = elements ? dyn_cast<ShapedType>(elements.getType()) : nullptr;
|
||||
if (!elements || !shapedType || shapedType.getRank() != 1 || extract.getIndices().size() != 1)
|
||||
return std::nullopt;
|
||||
|
||||
std::optional<int64_t> index = evaluateIndexLike(extract.getIndices().front(), laneArg, lane);
|
||||
if (!index || *index < 0 || *index >= static_cast<int64_t>(elements.getNumElements()))
|
||||
return std::nullopt;
|
||||
|
||||
if (auto denseInts = dyn_cast<DenseIntElementsAttr>(elements))
|
||||
return (*(denseInts.value_begin<APInt>() + *index)).getSExtValue();
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
if (auto affineApply = value.getDefiningOp<affine::AffineApplyOp>())
|
||||
if (FailureOr<int64_t> folded = evaluateAffineApply(
|
||||
affineApply,
|
||||
[&](Value operand) -> FailureOr<int64_t> {
|
||||
if (std::optional<int64_t> resolved = evaluateIndexLike(operand, laneArg, lane))
|
||||
return *resolved;
|
||||
return failure();
|
||||
});
|
||||
succeeded(folded)) {
|
||||
return *folded;
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
SmallVector<int64_t, 8> collectPossibleIntValues(Value value, Value laneArg, uint32_t lane) {
|
||||
if (std::optional<int64_t> exact = evaluateIndexLike(value, laneArg, lane))
|
||||
return {*exact};
|
||||
|
||||
auto extract = value.getDefiningOp<tensor::ExtractOp>();
|
||||
auto constant = extract ? extract.getTensor().getDefiningOp<arith::ConstantOp>() : nullptr;
|
||||
auto elements = constant ? dyn_cast<ElementsAttr>(constant.getValue()) : nullptr;
|
||||
if (!elements)
|
||||
return {};
|
||||
|
||||
SmallVector<int64_t, 8> values;
|
||||
if (auto denseInts = dyn_cast<DenseIntElementsAttr>(elements)) {
|
||||
values.reserve(elements.getNumElements());
|
||||
for (APInt element : denseInts.getValues<APInt>())
|
||||
if (!llvm::is_contained(values, element.getSExtValue()))
|
||||
values.push_back(element.getSExtValue());
|
||||
}
|
||||
return values;
|
||||
}
|
||||
|
||||
template <typename BatchOpTy>
|
||||
std::optional<Value> getBatchLaneInput(BatchOpTy batch, uint32_t lane, unsigned inputIndex) {
|
||||
if (batch.getNumResults() != 0)
|
||||
return batch.getInputs()[inputIndex];
|
||||
|
||||
size_t laneCount = static_cast<size_t>(batch.getLaneCount());
|
||||
if (laneCount == 0 || batch.getInputs().size() % laneCount != 0)
|
||||
return std::nullopt;
|
||||
|
||||
size_t inputsPerLane = batch.getInputs().size() / laneCount;
|
||||
size_t flatIndex = static_cast<size_t>(lane) * inputsPerLane + inputIndex;
|
||||
if (flatIndex >= batch.getInputs().size())
|
||||
return std::nullopt;
|
||||
return batch.getInputs()[flatIndex];
|
||||
}
|
||||
|
||||
template <typename BatchOpTy>
|
||||
unsigned getBatchLaneInputCount(BatchOpTy batch) {
|
||||
if (batch.getNumResults() != 0)
|
||||
return batch.getInputs().size();
|
||||
|
||||
size_t laneCount = static_cast<size_t>(batch.getLaneCount());
|
||||
if (laneCount == 0 || batch.getInputs().size() % laneCount != 0)
|
||||
return 0;
|
||||
return static_cast<unsigned>(batch.getInputs().size() / laneCount);
|
||||
}
|
||||
|
||||
template <typename ComputeOpTy, typename BatchOpTy>
|
||||
std::optional<ResolvedProducer> resolveProducerForValue(Value value, std::optional<uint32_t> consumerLane) {
|
||||
Operation* op = value.getDefiningOp();
|
||||
if (!op)
|
||||
return std::nullopt;
|
||||
|
||||
while (auto extract = dyn_cast<tensor::ExtractSliceOp>(op)) {
|
||||
Value source = extract.getSource();
|
||||
Operation* sourceOp = source.getDefiningOp();
|
||||
auto sourceBatch = dyn_cast_or_null<BatchOpTy>(sourceOp);
|
||||
if (sourceBatch && sourceBatch.getNumResults() != 0) {
|
||||
auto staticOffsets = extract.getStaticOffsets();
|
||||
if (!staticOffsets.empty() && staticOffsets.front() != ShapedType::kDynamic) {
|
||||
uint32_t lane = static_cast<uint32_t>(staticOffsets.front());
|
||||
return ResolvedProducer {sourceOp, 0, LogicalNodeSelector::Lane, lane, lane, 1};
|
||||
}
|
||||
if (consumerLane)
|
||||
return ResolvedProducer {sourceOp, 0, LogicalNodeSelector::Lane, *consumerLane, *consumerLane, 1};
|
||||
return ResolvedProducer {
|
||||
sourceOp, 0, LogicalNodeSelector::RangeRepresentative, 0, 0, static_cast<uint32_t>(sourceBatch.getLaneCount())
|
||||
};
|
||||
}
|
||||
value = source;
|
||||
op = sourceOp;
|
||||
if (!op)
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
if (auto compute = dyn_cast<ComputeOpTy>(op))
|
||||
return ResolvedProducer {
|
||||
compute.getOperation(), static_cast<size_t>(cast<OpResult>(value).getResultNumber()), LogicalNodeSelector::Scalar, 0, 0, 1
|
||||
};
|
||||
|
||||
if (auto batch = dyn_cast<BatchOpTy>(op)) {
|
||||
if (batch.getNumResults() != 0) {
|
||||
if (consumerLane)
|
||||
return ResolvedProducer {op, 0, LogicalNodeSelector::Lane, *consumerLane, *consumerLane, 1};
|
||||
return ResolvedProducer {
|
||||
op, 0, LogicalNodeSelector::RangeRepresentative, 0, 0, static_cast<uint32_t>(batch.getLaneCount())
|
||||
};
|
||||
}
|
||||
|
||||
uint32_t lane = static_cast<uint32_t>(cast<OpResult>(value).getResultNumber());
|
||||
return ResolvedProducer {op, static_cast<size_t>(lane), LogicalNodeSelector::Lane, lane, lane, 1};
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
SmallVector<EdgeSource, 8>
|
||||
resolveProducerSourcesForCsv(const ResolvedProducer& producer,
|
||||
const DenseMap<std::pair<Operation*, uint32_t>, ExpandedNodeInfo>& expandedNodes) {
|
||||
SmallVector<EdgeSource, 8> sources;
|
||||
|
||||
if (producer.selector == LogicalNodeSelector::Scalar) {
|
||||
std::string id = getExpandedNodeId(expandedNodes, producer.op, 0);
|
||||
if (!id.empty())
|
||||
sources.push_back({id, std::nullopt});
|
||||
return sources;
|
||||
}
|
||||
|
||||
if (producer.selector == LogicalNodeSelector::Lane) {
|
||||
std::string id = getExpandedNodeId(expandedNodes, producer.op, producer.lane);
|
||||
if (!id.empty())
|
||||
sources.push_back({id, producer.lane});
|
||||
return sources;
|
||||
}
|
||||
|
||||
for (uint32_t lane = producer.laneStart; lane < producer.laneStart + producer.laneCount; ++lane) {
|
||||
std::string id = getExpandedNodeId(expandedNodes, producer.op, lane);
|
||||
if (!id.empty())
|
||||
sources.push_back({id, lane});
|
||||
}
|
||||
return sources;
|
||||
}
|
||||
|
||||
void emitEdgeRow(std::fstream& edgesFile,
|
||||
StringRef sourceId,
|
||||
StringRef targetId,
|
||||
std::optional<uint64_t> byteSize,
|
||||
Type propagatedType,
|
||||
StringRef stage,
|
||||
std::optional<uint32_t> sourceLane,
|
||||
std::optional<uint32_t> targetLane,
|
||||
std::optional<int64_t> channelId) {
|
||||
writeCsvRow(edgesFile,
|
||||
{sourceId.str(),
|
||||
targetId.str(),
|
||||
maybeNumber<uint64_t>(byteSize),
|
||||
stringifyType(propagatedType),
|
||||
stage.str(),
|
||||
maybeNumber<uint32_t>(sourceLane),
|
||||
maybeNumber<uint32_t>(targetLane),
|
||||
maybeNumber<int64_t>(channelId)});
|
||||
}
|
||||
|
||||
template <typename ComputeOpTy, typename BatchOpTy>
|
||||
LogicalResult emitDataEdges(std::fstream& edgesFile,
|
||||
const DenseMap<Operation*, TopLevelOpInfo>& topLevelInfo,
|
||||
const DenseMap<std::pair<Operation*, uint32_t>, ExpandedNodeInfo>& expandedNodes,
|
||||
StringRef stage) {
|
||||
for (const auto& entry : topLevelInfo) {
|
||||
Operation* op = entry.first;
|
||||
const TopLevelOpInfo& info = entry.second;
|
||||
|
||||
if (auto compute = dyn_cast<ComputeOpTy>(op)) {
|
||||
for (Value input : compute.getInputs()) {
|
||||
if (isa_and_nonnull<SpatChannelReceiveOp>(input.getDefiningOp()))
|
||||
continue;
|
||||
|
||||
auto producer = resolveProducerForValue<ComputeOpTy, BatchOpTy>(input, std::nullopt);
|
||||
if (!producer)
|
||||
continue;
|
||||
|
||||
SmallVector<EdgeSource, 8> sources = resolveProducerSourcesForCsv(*producer, expandedNodes);
|
||||
std::optional<uint64_t> byteSize = getTypeSizeBytes(input.getType());
|
||||
std::string targetId = getScalarId(info.isPost, info.opId);
|
||||
for (const EdgeSource& source : sources)
|
||||
emitEdgeRow(edgesFile, source.id, targetId, byteSize, input.getType(), stage, source.sourceLane, std::nullopt, std::nullopt);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
auto batch = dyn_cast<BatchOpTy>(op);
|
||||
if (!batch)
|
||||
continue;
|
||||
|
||||
unsigned inputCount = getBatchLaneInputCount(batch);
|
||||
for (uint32_t lane = 0; lane < static_cast<uint32_t>(batch.getLaneCount()); ++lane) {
|
||||
std::string targetId = getBatchLaneId(info.isPost, info.opId, lane);
|
||||
for (unsigned inputIndex = 0; inputIndex < inputCount; ++inputIndex) {
|
||||
std::optional<Value> input = getBatchLaneInput(batch, lane, inputIndex);
|
||||
if (!input || isa_and_nonnull<SpatChannelReceiveOp>((*input).getDefiningOp()))
|
||||
continue;
|
||||
|
||||
auto producer = resolveProducerForValue<ComputeOpTy, BatchOpTy>(*input, lane);
|
||||
if (!producer)
|
||||
continue;
|
||||
|
||||
SmallVector<EdgeSource, 8> sources = resolveProducerSourcesForCsv(*producer, expandedNodes);
|
||||
std::optional<uint64_t> byteSize = getTypeSizeBytes((*input).getType());
|
||||
for (const EdgeSource& source : sources)
|
||||
emitEdgeRow(edgesFile, source.id, targetId, byteSize, (*input).getType(), stage, source.sourceLane, lane, std::nullopt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return success();
|
||||
}
|
||||
|
||||
template <typename BatchOpTy>
|
||||
void collectChannelSends(DenseMap<int64_t, SmallVector<ChannelSendRecord, 4>>& sendsByChannelId,
|
||||
const DenseMap<std::pair<Operation*, uint32_t>, ExpandedNodeInfo>& expandedNodes,
|
||||
BatchOpTy batch) {
|
||||
std::optional<BlockArgument> laneArg = batch.getLaneArgument();
|
||||
if (!laneArg)
|
||||
return;
|
||||
|
||||
for (uint32_t lane = 0; lane < static_cast<uint32_t>(batch.getLaneCount()); ++lane) {
|
||||
std::string sourceId = getExpandedNodeId(expandedNodes, batch.getOperation(), lane);
|
||||
if (sourceId.empty())
|
||||
continue;
|
||||
batch.getBody().walk([&](SpatChannelSendOp send) {
|
||||
std::optional<int64_t> channelId = evaluateIndexLike(send.getChannelId(), *laneArg, lane);
|
||||
if (!channelId)
|
||||
return;
|
||||
sendsByChannelId[*channelId].push_back({sourceId, lane});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void collectChannelSends(DenseMap<int64_t, SmallVector<ChannelSendRecord, 4>>& sendsByChannelId,
|
||||
const DenseMap<std::pair<Operation*, uint32_t>, ExpandedNodeInfo>& expandedNodes,
|
||||
SpatScheduledCompute compute) {
|
||||
std::string sourceId = getExpandedNodeId(expandedNodes, compute.getOperation(), 0);
|
||||
if (sourceId.empty())
|
||||
return;
|
||||
compute.getBody().walk([&](SpatChannelSendOp send) {
|
||||
std::optional<int64_t> channelId = evaluateIndexLike(send.getChannelId(), Value(), 0);
|
||||
if (!channelId)
|
||||
return;
|
||||
sendsByChannelId[*channelId].push_back({sourceId, std::nullopt});
|
||||
});
|
||||
}
|
||||
|
||||
DenseMap<int32_t, SmallVector<ChannelSendRecord, 4>>
|
||||
buildNodesByCore(const DenseMap<std::pair<Operation*, uint32_t>, ExpandedNodeInfo>& expandedNodes) {
|
||||
DenseMap<int32_t, SmallVector<ChannelSendRecord, 4>> nodesByCore;
|
||||
for (const auto& entry : expandedNodes) {
|
||||
const ExpandedNodeInfo& node = entry.second;
|
||||
if (!node.core)
|
||||
continue;
|
||||
nodesByCore[*node.core].push_back({node.id, node.lane});
|
||||
}
|
||||
return nodesByCore;
|
||||
}
|
||||
|
||||
template <typename ComputeOpTy, typename BatchOpTy, typename ResolveChannelSourcesFn>
|
||||
LogicalResult emitExplicitChannelEdges(std::fstream& edgesFile,
|
||||
const DenseMap<Operation*, TopLevelOpInfo>& topLevelInfo,
|
||||
ResolveChannelSourcesFn&& resolveChannelSources,
|
||||
StringRef stage) {
|
||||
for (const auto& entry : topLevelInfo) {
|
||||
Operation* op = entry.first;
|
||||
const TopLevelOpInfo& info = entry.second;
|
||||
|
||||
if (auto compute = dyn_cast<ComputeOpTy>(op)) {
|
||||
compute.getBody().walk([&](SpatChannelReceiveOp receive) {
|
||||
SmallVector<ChannelSendRecord, 4> sources = resolveChannelSources(receive, 0);
|
||||
if (sources.empty())
|
||||
return;
|
||||
std::optional<int64_t> channelId = evaluateIndexLike(receive.getChannelId(), Value(), 0);
|
||||
std::string targetId = getScalarId(info.isPost, info.opId);
|
||||
std::optional<uint64_t> byteSize = getTypeSizeBytes(receive.getType());
|
||||
for (const ChannelSendRecord& source : sources)
|
||||
emitEdgeRow(edgesFile, source.sourceId, targetId, byteSize, receive.getType(), stage, source.sourceLane, std::nullopt, channelId);
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
auto batch = dyn_cast<BatchOpTy>(op);
|
||||
if (!batch)
|
||||
continue;
|
||||
auto laneArg = batch.getLaneArgument();
|
||||
if (!laneArg)
|
||||
continue;
|
||||
for (uint32_t lane = 0; lane < static_cast<uint32_t>(batch.getLaneCount()); ++lane) {
|
||||
std::string targetId = getBatchLaneId(info.isPost, info.opId, lane);
|
||||
batch.getBody().walk([&](SpatChannelReceiveOp receive) {
|
||||
SmallVector<ChannelSendRecord, 4> sources = resolveChannelSources(receive, lane);
|
||||
if (sources.empty())
|
||||
return;
|
||||
std::optional<int64_t> channelId = evaluateIndexLike(receive.getChannelId(), *laneArg, lane);
|
||||
std::optional<uint64_t> byteSize = getTypeSizeBytes(receive.getType());
|
||||
for (const ChannelSendRecord& source : sources)
|
||||
emitEdgeRow(edgesFile, source.sourceId, targetId, byteSize, receive.getType(), stage, source.sourceLane, lane, channelId);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return success();
|
||||
}
|
||||
|
||||
LogicalResult exportStagePre(func::FuncOp func) {
|
||||
std::fstream nodesFile = openDialectDumpFileWithExtension("spatial1_graph.nodes", "/reports", "csv");
|
||||
std::fstream edgesFile = openDialectDumpFileWithExtension("spatial1_graph.edges","/reports", "csv");
|
||||
if (!nodesFile.is_open() || !edgesFile.is_open())
|
||||
return success();
|
||||
|
||||
writeCsvRow(nodesFile, {"Id", "op_id", "lane", "core", "ssa_name"});
|
||||
writeCsvRow(edgesFile, {"Source", "Target", "Weight", "Type", "stage", "source_lane", "target_lane", "channel_id"});
|
||||
|
||||
Operation* asmRoot = func.getOperation();
|
||||
if (auto moduleOp = func->getParentOfType<ModuleOp>())
|
||||
asmRoot = moduleOp.getOperation();
|
||||
OpPrintingFlags flags;
|
||||
flags.elideLargeElementsAttrs().enableDebugInfo(true, false);
|
||||
AsmState asmState(asmRoot, flags);
|
||||
|
||||
DenseMap<Operation*, TopLevelOpInfo> topLevelInfo;
|
||||
DenseMap<std::pair<Operation*, uint32_t>, ExpandedNodeInfo> expandedNodes;
|
||||
|
||||
size_t opId = 0;
|
||||
for (Operation& op : func.getBody().front()) {
|
||||
if (!isTopLevelRelevantCompute<SpatGraphCompute, SpatGraphComputeBatch>(op))
|
||||
continue;
|
||||
FailureOr<TopLevelOpInfo> info = buildTopLevelOpInfo<SpatGraphCompute, SpatGraphComputeBatch>(op, false, opId++);
|
||||
if (failed(info))
|
||||
return failure();
|
||||
topLevelInfo[&op] = *info;
|
||||
|
||||
if (auto compute = dyn_cast<SpatGraphCompute>(&op)) {
|
||||
addScalarNodeRow(nodesFile, expandedNodes, *info, &asmState);
|
||||
continue;
|
||||
}
|
||||
|
||||
auto batch = cast<SpatGraphComputeBatch>(&op);
|
||||
SmallVector<std::optional<int32_t>, 8> laneCoreIds(batch.getLaneCount());
|
||||
addBatchNodeRows(nodesFile, expandedNodes, *info, batch, laneCoreIds, &asmState);
|
||||
}
|
||||
|
||||
return emitDataEdges<SpatGraphCompute, SpatGraphComputeBatch>(edgesFile, topLevelInfo, expandedNodes, "pre");
|
||||
}
|
||||
|
||||
LogicalResult exportStagePost(func::FuncOp func) {
|
||||
std::fstream nodesFile = openDialectDumpFileWithExtension("spatial2_merged.nodes", "/reports", "csv");
|
||||
std::fstream edgesFile = openDialectDumpFileWithExtension("spatial2_merged.edges", "/reports", "csv");
|
||||
if (!nodesFile.is_open() || !edgesFile.is_open())
|
||||
return success();
|
||||
|
||||
writeCsvRow(nodesFile, {"Id", "op_id", "lane", "core"});
|
||||
writeCsvRow(edgesFile, {"Source", "Target", "Weight", "Type", "stage", "source_lane", "target_lane", "channel_id"});
|
||||
|
||||
DenseMap<Operation*, TopLevelOpInfo> topLevelInfo;
|
||||
DenseMap<std::pair<Operation*, uint32_t>, ExpandedNodeInfo> expandedNodes;
|
||||
|
||||
size_t opId = 0;
|
||||
for (Operation& op : func.getBody().front()) {
|
||||
if (!isTopLevelRelevantCompute<SpatScheduledCompute, SpatScheduledComputeBatch>(op))
|
||||
continue;
|
||||
FailureOr<TopLevelOpInfo> info = buildTopLevelOpInfo<SpatScheduledCompute, SpatScheduledComputeBatch>(op, true, opId++);
|
||||
if (failed(info))
|
||||
return failure();
|
||||
topLevelInfo[&op] = *info;
|
||||
|
||||
if (isa<SpatScheduledCompute>(&op)) {
|
||||
addScalarNodeRow(nodesFile, expandedNodes, *info);
|
||||
continue;
|
||||
}
|
||||
|
||||
auto batch = cast<SpatScheduledComputeBatch>(&op);
|
||||
auto coreIds = getBatchLaneCoreIds(batch);
|
||||
if (failed(coreIds))
|
||||
return failure();
|
||||
SmallVector<std::optional<int32_t>, 8> laneCoreIds(batch.getLaneCount());
|
||||
for (uint32_t lane = 0; lane < static_cast<uint32_t>(batch.getLaneCount()); ++lane)
|
||||
if (lane < coreIds->size())
|
||||
laneCoreIds[lane] = (*coreIds)[lane];
|
||||
addBatchNodeRows(nodesFile, expandedNodes, *info, batch, laneCoreIds);
|
||||
}
|
||||
|
||||
if (failed(emitDataEdges<SpatScheduledCompute, SpatScheduledComputeBatch>(edgesFile, topLevelInfo, expandedNodes, "post")))
|
||||
return failure();
|
||||
|
||||
DenseMap<int64_t, SmallVector<ChannelSendRecord, 4>> sendsByChannelId;
|
||||
for (const auto& entry : topLevelInfo) {
|
||||
Operation* op = entry.first;
|
||||
if (auto compute = dyn_cast<SpatScheduledCompute>(op))
|
||||
collectChannelSends(sendsByChannelId, expandedNodes, compute);
|
||||
else if (auto batch = dyn_cast<SpatScheduledComputeBatch>(op))
|
||||
collectChannelSends(sendsByChannelId, expandedNodes, batch);
|
||||
}
|
||||
|
||||
DenseMap<int32_t, SmallVector<ChannelSendRecord, 4>> nodesByCore = buildNodesByCore(expandedNodes);
|
||||
auto resolveChannelSources = [&](SpatChannelReceiveOp receive, uint32_t lane) {
|
||||
SmallVector<ChannelSendRecord, 4> sources;
|
||||
|
||||
Value laneArg;
|
||||
if (auto owner = receive->getParentOfType<SpatScheduledComputeBatch>())
|
||||
if (auto maybeLaneArg = owner.getLaneArgument())
|
||||
laneArg = *maybeLaneArg;
|
||||
|
||||
if (std::optional<int64_t> channelId = evaluateIndexLike(receive.getChannelId(), laneArg, lane)) {
|
||||
if (auto it = sendsByChannelId.find(*channelId); it != sendsByChannelId.end())
|
||||
return it->second;
|
||||
}
|
||||
|
||||
for (int64_t sourceCore : collectPossibleIntValues(receive.getSourceCoreId(), laneArg, lane)) {
|
||||
auto it = nodesByCore.find(static_cast<int32_t>(sourceCore));
|
||||
if (it == nodesByCore.end())
|
||||
continue;
|
||||
llvm::append_range(sources, it->second);
|
||||
}
|
||||
return sources;
|
||||
};
|
||||
|
||||
return emitExplicitChannelEdges<SpatScheduledCompute, SpatScheduledComputeBatch>(
|
||||
edgesFile, topLevelInfo, resolveChannelSources, "post");
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
SpatialDataflowExportStage getSpatialDataflowExportStage() {
|
||||
switch (pimExportSpatialDataflow.getValue()) {
|
||||
case SpatialDataflowExportNone: return SpatialDataflowExportStage::None;
|
||||
case SpatialDataflowExportPre: return SpatialDataflowExportStage::Pre;
|
||||
case SpatialDataflowExportPost: return SpatialDataflowExportStage::Post;
|
||||
case SpatialDataflowExportBoth: return SpatialDataflowExportStage::Both;
|
||||
}
|
||||
llvm_unreachable("unknown spatial dataflow export mode");
|
||||
}
|
||||
|
||||
bool shouldExportSpatialDataflowStage(SpatialDataflowExportStage mode, SpatialDataflowExportStage stage) {
|
||||
switch (mode) {
|
||||
case SpatialDataflowExportStage::None: return false;
|
||||
case SpatialDataflowExportStage::Pre: return stage == SpatialDataflowExportStage::Pre;
|
||||
case SpatialDataflowExportStage::Post: return stage == SpatialDataflowExportStage::Post;
|
||||
case SpatialDataflowExportStage::Both:
|
||||
return stage == SpatialDataflowExportStage::Pre || stage == SpatialDataflowExportStage::Post;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
LogicalResult exportSpatialDataflowCsvPre(func::FuncOp func) { return exportStagePre(func); }
|
||||
|
||||
LogicalResult exportSpatialDataflowCsvPost(func::FuncOp func) { return exportStagePost(func); }
|
||||
|
||||
} // namespace spatial
|
||||
} // namespace onnx_mlir
|
||||
@@ -0,0 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
#include "mlir/Dialect/Func/IR/FuncOps.h"
|
||||
#include "mlir/Support/LogicalResult.h"
|
||||
|
||||
|
||||
namespace onnx_mlir {
|
||||
namespace spatial {
|
||||
|
||||
enum class SpatialDataflowExportStage {
|
||||
None,
|
||||
Pre,
|
||||
Post,
|
||||
Both,
|
||||
};
|
||||
|
||||
SpatialDataflowExportStage getSpatialDataflowExportStage();
|
||||
|
||||
mlir::LogicalResult exportSpatialDataflowCsvPre(mlir::func::FuncOp func);
|
||||
mlir::LogicalResult exportSpatialDataflowCsvPost(mlir::func::FuncOp func);
|
||||
|
||||
bool shouldExportSpatialDataflowStage(SpatialDataflowExportStage mode, SpatialDataflowExportStage stage);
|
||||
|
||||
} // namespace spatial
|
||||
} // namespace onnx_mlir
|
||||
Reference in New Issue
Block a user