CodexWorkaround
Validate Operations / validate-operations (push) Has been cancelled

This commit is contained in:
ilgeco
2026-06-08 11:33:36 +02:00
parent aec80529ca
commit 75fb70712f
184 changed files with 1127 additions and 7 deletions
Symlink
+1
View File
@@ -0,0 +1 @@
/home/ilgeco/Project/Raptor/build_debug/
+254
View File
@@ -0,0 +1,254 @@
diff --git a/src/PIM/Conversion/ONNXToSpatial/CMakeLists.txt b/src/PIM/Conversion/ONNXToSpatial/CMakeLists.txt
index 0b7e8cc..32964aa 100644
--- a/src/PIM/Conversion/ONNXToSpatial/CMakeLists.txt
+++ b/src/PIM/Conversion/ONNXToSpatial/CMakeLists.txt
@@ -22,6 +22,7 @@ add_pim_library(OMONNXToSpatial
Patterns/Tensor/Gather.cpp
Patterns/Tensor/Resize.cpp
Patterns/Tensor/Reshape.cpp
+ Patterns/Tensor/Slice.cpp
Patterns/Tensor/Split.cpp
Patterns/Tensor/Transpose.cpp
ONNXToSpatialPass.cpp
diff --git a/src/PIM/Conversion/ONNXToSpatial/ONNXToSpatialPass.cpp b/src/PIM/Conversion/ONNXToSpatial/ONNXToSpatialPass.cpp
index edf311e..c3d42f7 100644
--- a/src/PIM/Conversion/ONNXToSpatial/ONNXToSpatialPass.cpp
+++ b/src/PIM/Conversion/ONNXToSpatial/ONNXToSpatialPass.cpp
@@ -138,6 +138,7 @@ void ONNXToSpatialPass::runOnOperation() {
target.addIllegalOp<ONNXGatherOp>();
target.addIllegalOp<ONNXReshapeOp>();
target.addIllegalOp<ONNXResizeOp>();
+ target.addIllegalOp<ONNXSliceOp>();
target.addIllegalOp<ONNXLRNOp>();
target.addIllegalOp<ONNXReduceMeanV13Op>();
target.addIllegalOp<ONNXSplitOp>();
diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns.cpp b/src/PIM/Conversion/ONNXToSpatial/Patterns.cpp
index ffa0b1f..0a747e9 100644
--- a/src/PIM/Conversion/ONNXToSpatial/Patterns.cpp
+++ b/src/PIM/Conversion/ONNXToSpatial/Patterns.cpp
@@ -22,6 +22,7 @@ void populateConversionPatterns(RewritePatternSet& patterns, MLIRContext* ctx) {
populateGatherPatterns(patterns, ctx);
populateResizePatterns(patterns, ctx);
populateReshapePatterns(patterns, ctx);
+ populateSlicePatterns(patterns, ctx);
populateSplitPatterns(patterns, ctx);
populateTransposePatterns(patterns, ctx);
}
diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns.hpp b/src/PIM/Conversion/ONNXToSpatial/Patterns.hpp
index e58729e..c040536 100644
--- a/src/PIM/Conversion/ONNXToSpatial/Patterns.hpp
+++ b/src/PIM/Conversion/ONNXToSpatial/Patterns.hpp
@@ -29,6 +29,7 @@ void populateConcatPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext
void populateGatherPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populateResizePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populateReshapePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
+void populateSlicePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populateSplitPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populateTransposePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Slice.cpp b/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Slice.cpp
new file mode 100644
index 0000000..3f8867f
--- /dev/null
+++ b/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Slice.cpp
@@ -0,0 +1,200 @@
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/Transforms/DialectConversion.h"
+
+#include "llvm/ADT/SmallVector.h"
+
+#include <algorithm>
+#include <optional>
+
+#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common/Common.hpp"
+#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/CompileTime.hpp"
+#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Patterns.hpp"
+#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
+#include "src/Dialect/ONNX/ONNXOps.hpp"
+
+using namespace mlir;
+
+namespace onnx_mlir {
+namespace {
+
+static DenseElementsAttr getDenseConstantAttr(Value value) {
+ if (auto constantOp = value.getDefiningOp<arith::ConstantOp>())
+ return dyn_cast<DenseElementsAttr>(constantOp.getValue());
+ if (auto constantOp = value.getDefiningOp<ONNXConstantOp>())
+ return dyn_cast_or_null<DenseElementsAttr>(constantOp.getValueAttr());
+ return nullptr;
+}
+
+static FailureOr<SmallVector<int64_t>> getConstantIntValues(Value value) {
+ auto denseAttr = dyn_cast_or_null<DenseIntElementsAttr>(getDenseConstantAttr(value));
+ if (!denseAttr)
+ return failure();
+ return SmallVector<int64_t>(denseAttr.getValues<int64_t>().begin(), denseAttr.getValues<int64_t>().end());
+}
+
+static bool isNoneValueLike(Value value) { return isa_and_nonnull<ONNXNoneOp>(value.getDefiningOp()); }
+
+static FailureOr<Value> buildSlice(Value data,
+ RankedTensorType dataType,
+ RankedTensorType resultType,
+ ArrayRef<int64_t> starts,
+ ArrayRef<int64_t> ends,
+ std::optional<ArrayRef<int64_t>> axes,
+ std::optional<ArrayRef<int64_t>> steps,
+ ConversionPatternRewriter& rewriter,
+ Location loc) {
+ int64_t rank = dataType.getRank();
+ if (!dataType.hasStaticShape() || !resultType.hasStaticShape() || resultType.getRank() != rank)
+ return failure();
+
+ if (starts.size() != ends.size())
+ return failure();
+ if (axes && axes->size() != starts.size())
+ return failure();
+ if (steps && steps->size() != starts.size())
+ return failure();
+
+ SmallVector<int64_t> normalizedAxes;
+ if (axes) {
+ SmallVector<bool> seenAxes(rank, false);
+ normalizedAxes.reserve(axes->size());
+ for (int64_t axis : *axes) {
+ auto normalizedAxis = normalizeAxisChecked(axis, rank);
+ if (failed(normalizedAxis))
+ return failure();
+ if (seenAxes[*normalizedAxis])
+ return failure();
+ seenAxes[*normalizedAxis] = true;
+ normalizedAxes.push_back(*normalizedAxis);
+ }
+ }
+ else {
+ if (starts.size() > static_cast<size_t>(rank))
+ return failure();
+ normalizedAxes.reserve(starts.size());
+ for (size_t i = 0; i < starts.size(); ++i)
+ normalizedAxes.push_back(static_cast<int64_t>(i));
+ }
+
+ SmallVector<int64_t> normalizedSteps;
+ if (steps)
+ normalizedSteps.assign(steps->begin(), steps->end());
+ else
+ normalizedSteps.assign(starts.size(), 1);
+
+ SmallVector<int64_t> computedShape(dataType.getShape().begin(), dataType.getShape().end());
+ SmallVector<OpFoldResult> offsets = getZeroOffsets(rewriter, rank);
+ SmallVector<OpFoldResult> sizes = getStaticSizes(rewriter, dataType.getShape());
+ SmallVector<OpFoldResult> strides = getUnitStrides(rewriter, rank);
+
+ for (auto [sliceIndex, axis] : llvm::enumerate(normalizedAxes)) {
+ int64_t step = normalizedSteps[sliceIndex];
+ if (step <= 0)
+ return failure();
+
+ int64_t dimSize = dataType.getShape()[axis];
+ int64_t start = starts[sliceIndex];
+ int64_t end = ends[sliceIndex];
+
+ if (start < 0)
+ start += dimSize;
+ if (end < 0)
+ end += dimSize;
+
+ start = std::clamp(start, int64_t {0}, dimSize);
+ end = std::clamp(end, int64_t {0}, dimSize);
+
+ int64_t extent = std::max(end - start, int64_t {0});
+ int64_t size = (extent + step - 1) / step;
+
+ offsets[axis] = rewriter.getIndexAttr(start);
+ sizes[axis] = rewriter.getIndexAttr(size);
+ strides[axis] = rewriter.getIndexAttr(step);
+ computedShape[axis] = size;
+ }
+
+ if (llvm::ArrayRef(computedShape) != resultType.getShape())
+ return failure();
+
+ return tensor::ExtractSliceOp::create(rewriter, loc, resultType, data, offsets, sizes, strides).getResult();
+}
+
+struct Slice final : OpConversionPattern<ONNXSliceOp> {
+ using OpConversionPattern::OpConversionPattern;
+
+ LogicalResult matchAndRewrite(ONNXSliceOp sliceOp,
+ ONNXSliceOpAdaptor adaptor,
+ ConversionPatternRewriter& rewriter) const override {
+ auto dataType = dyn_cast<RankedTensorType>(adaptor.getData().getType());
+ auto resultType = dyn_cast<RankedTensorType>(sliceOp.getResult().getType());
+ if (!dataType || !resultType || !dataType.hasStaticShape() || !resultType.hasStaticShape())
+ return failure();
+
+ auto starts = getConstantIntValues(adaptor.getStarts());
+ auto ends = getConstantIntValues(adaptor.getEnds());
+ if (failed(starts))
+ return rewriter.notifyMatchFailure(sliceOp, "requires compile-time constant starts");
+ if (failed(ends))
+ return rewriter.notifyMatchFailure(sliceOp, "requires compile-time constant ends");
+
+ std::optional<SmallVector<int64_t>> axes;
+ if (!isNoneValueLike(adaptor.getAxes())) {
+ auto parsedAxes = getConstantIntValues(adaptor.getAxes());
+ if (failed(parsedAxes))
+ return rewriter.notifyMatchFailure(sliceOp, "requires compile-time constant axes when present");
+ axes = std::move(*parsedAxes);
+ }
+
+ std::optional<SmallVector<int64_t>> steps;
+ if (!isNoneValueLike(adaptor.getSteps())) {
+ auto parsedSteps = getConstantIntValues(adaptor.getSteps());
+ if (failed(parsedSteps))
+ return rewriter.notifyMatchFailure(sliceOp, "requires compile-time constant steps when present");
+ steps = std::move(*parsedSteps);
+ if (llvm::any_of(*steps, [](int64_t step) { return step <= 0; }))
+ return rewriter.notifyMatchFailure(sliceOp, "supports only positive constant steps");
+ }
+
+ ArrayRef<int64_t> startsRef = *starts;
+ ArrayRef<int64_t> endsRef = *ends;
+ std::optional<ArrayRef<int64_t>> axesRef = axes ? std::optional<ArrayRef<int64_t>>(ArrayRef<int64_t>(*axes))
+ : std::nullopt;
+ std::optional<ArrayRef<int64_t>> stepsRef = steps ? std::optional<ArrayRef<int64_t>>(ArrayRef<int64_t>(*steps))
+ : std::nullopt;
+
+ Location loc = sliceOp.getLoc();
+ auto tryBuildSlice = [&](Value data) {
+ return buildSlice(data, dataType, resultType, startsRef, endsRef, axesRef, stepsRef, rewriter, loc);
+ };
+
+ if (isCompileTimeComputable(adaptor.getData())) {
+ auto sliced = tryBuildSlice(adaptor.getData());
+ if (failed(sliced))
+ return rewriter.notifyMatchFailure(sliceOp, "failed to normalize static slice parameters");
+ rewriter.replaceOp(sliceOp, *sliced);
+ return success();
+ }
+
+ auto computeOp =
+ createSpatCompute<1>(rewriter, loc, TypeRange {resultType}, {}, adaptor.getData(), [&](Value data) {
+ auto sliced = tryBuildSlice(data);
+ if (failed(sliced))
+ return failure();
+ spatial::SpatYieldOp::create(rewriter, loc, *sliced);
+ return success();
+ });
+ if (failed(computeOp))
+ return rewriter.notifyMatchFailure(sliceOp, "failed to build runtime tensor.extract_slice lowering");
+
+ rewriter.replaceOp(sliceOp, computeOp->getResults());
+ return success();
+ }
+};
+
+} // namespace
+
+void populateSlicePatterns(RewritePatternSet& patterns, MLIRContext* ctx) { patterns.add<Slice>(ctx); }
+
+} // namespace onnx_mlir
+6
View File
@@ -38,6 +38,12 @@ llvm::cl::opt<bool>
llvm::cl::init(false),
llvm::cl::cat(OnnxMlirOptions));
llvm::cl::opt<bool>
pimDisableMemoryCoalescing("pim-disable-memory-coalescing",
llvm::cl::desc("Skip the PIM memory coalescing pass (developer diagnostic option)"),
llvm::cl::init(false),
llvm::cl::cat(OnnxMlirOptions));
llvm::cl::opt<bool> useExperimentalConvImpl("use-experimental-conv-impl",
llvm::cl::desc("Use experimental implementation for convolution"),
llvm::cl::init(false),
+1
View File
@@ -36,6 +36,7 @@ extern llvm::cl::opt<PimMergeSchedulerType> pimMergeScheduler;
extern llvm::cl::opt<PimMemoryReportLevel> pimMemoryReport;
extern llvm::cl::opt<bool> pimOnlyCodegen;
extern llvm::cl::opt<bool> pimDisableMemoryCoalescing;
extern llvm::cl::opt<bool> useExperimentalConvImpl;
extern llvm::cl::opt<bool> pimEmitJson;
+2 -1
View File
@@ -46,7 +46,8 @@ void addPassesPim(OwningOpRef<ModuleOp>& module,
if (pimEmissionTarget >= EmitPimCodegen) {
pm.addPass(createPimHostConstantFoldingPass());
pm.addPass(createMessagePass("Pim host constants folded"));
pm.addPass(createPimMemoryCoalescingPass());
if (!pimDisableMemoryCoalescing)
pm.addPass(createPimMemoryCoalescingPass());
pm.addPass(createPimVerificationPass());
pm.addPass(createMessagePass("Pim verified"));
pm.addPass(createEmitPimCodePass());
BIN
View File
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
+500
View File
@@ -1200,6 +1200,106 @@ def softmax_large_dimension_1024():
save_model(model, "softmax/large_dimension_1024", "softmax_large_dimension_1024.onnx")
# ---------------------------------------------------------------------------
# DFL tests
# ---------------------------------------------------------------------------
def _make_yolo_dfl_initializers(n, include_decode_tail):
shape0 = make_int64_initializer("Shape0", [1, 4, 16, n])
shape1 = make_int64_initializer("Shape1", [1, 16, 4 * n])
shape2 = make_int64_initializer("Shape2", [1, 4, n])
proj = numpy_helper.from_array(np.arange(16, dtype=np.float32).reshape(1, 16), name="Proj")
initializers = [shape0, shape1, shape2, proj]
if include_decode_tail:
starts0 = make_int64_initializer("starts0", [0])
ends0 = make_int64_initializer("ends0", [2])
starts1 = make_int64_initializer("starts1", [2])
ends1 = make_int64_initializer("ends1", [4])
axes = make_int64_initializer("axes", [1])
rng = np.random.default_rng(301)
anchor = numpy_helper.from_array(
rng.uniform(-2.0, 2.0, (1, 2, n)).astype(np.float32), name="Anchor")
half = numpy_helper.from_array(np.asarray([2.0], dtype=np.float32), name="Half")
scale_values = np.full((1, n), 8.0, dtype=np.float32)
scale_values[:, 1::2] = 16.0
scale = numpy_helper.from_array(scale_values, name="Scale")
initializers.extend([starts0, ends0, starts1, ends1, axes, anchor, half, scale])
return initializers
def _build_yolo_dfl_projection_nodes(box_raw_name):
return [
helper.make_node("Reshape", [box_raw_name, "Shape0"], ["R0"]),
helper.make_node("Transpose", ["R0"], ["T0"], perm=[0, 2, 1, 3]),
helper.make_node("Softmax", ["T0"], ["S0"], axis=1),
helper.make_node("Reshape", ["S0", "Shape1"], ["R1"]),
helper.make_node("MatMul", ["Proj", "R1"], ["M0"]),
helper.make_node("Reshape", ["M0", "Shape2"], ["Box4"]),
]
def yolo_dfl_projection_small():
"""YOLO DFL projection path on a small [1,64,128] box tensor."""
box_raw = helper.make_tensor_value_info("BoxRaw", TensorProto.FLOAT, [1, 64, 128])
y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 128])
nodes = _build_yolo_dfl_projection_nodes("BoxRaw")
graph = helper.make_graph(
nodes[:-1] + [helper.make_node("Reshape", ["M0", "Shape2"], ["Y"])],
"yolo_dfl_projection_small",
[box_raw],
[y],
initializer=_make_yolo_dfl_initializers(128, include_decode_tail=False))
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "dfl/yolo_dfl_projection_small", "yolo_dfl_projection_small.onnx")
def yolo_dfl_projection_large():
"""YOLO DFL projection path on a YOLO-scale [1,64,8400] box tensor."""
box_raw = helper.make_tensor_value_info("BoxRaw", TensorProto.FLOAT, [1, 64, 8400])
y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 8400])
nodes = _build_yolo_dfl_projection_nodes("BoxRaw")
graph = helper.make_graph(
nodes[:-1] + [helper.make_node("Reshape", ["M0", "Shape2"], ["Y"])],
"yolo_dfl_projection_large",
[box_raw],
[y],
initializer=_make_yolo_dfl_initializers(8400, include_decode_tail=False))
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "dfl/yolo_dfl_projection_large", "yolo_dfl_projection_large.onnx")
def yolo_dfl_decode_tail_large():
"""YOLO-scale DFL projection followed by box decode, stride scale, score sigmoid, and final concat."""
box_raw = helper.make_tensor_value_info("BoxRaw", TensorProto.FLOAT, [1, 64, 8400])
class_raw = helper.make_tensor_value_info("ClassRaw", TensorProto.FLOAT, [1, 80, 8400])
y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 84, 8400])
nodes = _build_yolo_dfl_projection_nodes("BoxRaw")
nodes.extend([
helper.make_node("Slice", ["Box4", "starts0", "ends0", "axes"], ["L0"]),
helper.make_node("Slice", ["Box4", "starts1", "ends1", "axes"], ["L1"]),
helper.make_node("Sub", ["Anchor", "L0"], ["A"]),
helper.make_node("Add", ["L1", "Anchor"], ["B"]),
helper.make_node("Add", ["A", "B"], ["Sum"]),
helper.make_node("Div", ["Sum", "Half"], ["Center"]),
helper.make_node("Sub", ["B", "A"], ["Size"]),
helper.make_node("Concat", ["Center", "Size"], ["Boxes"], axis=1),
helper.make_node("Mul", ["Boxes", "Scale"], ["BoxesScaled"]),
helper.make_node("Sigmoid", ["ClassRaw"], ["Scores"]),
helper.make_node("Concat", ["BoxesScaled", "Scores"], ["Y"], axis=1),
])
graph = helper.make_graph(
nodes,
"yolo_dfl_decode_tail_large",
[box_raw, class_raw],
[y],
initializer=_make_yolo_dfl_initializers(8400, include_decode_tail=True))
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "dfl/yolo_dfl_decode_tail_large", "yolo_dfl_decode_tail_large.onnx")
# ---------------------------------------------------------------------------
# Resize tests
# ---------------------------------------------------------------------------
@@ -1452,6 +1552,387 @@ def slice_large_channel_1024():
save_model(model, "slice/large_channel_1024", "slice_large_channel_1024.onnx")
def slice_yolo_decode_tail():
"""YOLO-like decode tail where a non-zero-offset channel slice feeds arithmetic and Concat."""
rng = np.random.default_rng(109)
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 4, 32])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 32])
S0 = helper.make_tensor_value_info("S0", TensorProto.FLOAT, [1, 2, 32])
S1 = helper.make_tensor_value_info("S1", TensorProto.FLOAT, [1, 2, 32])
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 2, 32])
B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 2, 32])
Center = helper.make_tensor_value_info("Center", TensorProto.FLOAT, [1, 2, 32])
Size = helper.make_tensor_value_info("Size", TensorProto.FLOAT, [1, 2, 32])
Boxes = helper.make_tensor_value_info("Boxes", TensorProto.FLOAT, [1, 4, 32])
starts0 = make_int64_initializer("starts0", [0])
ends0 = make_int64_initializer("ends0", [2])
starts1 = make_int64_initializer("starts1", [2])
ends1 = make_int64_initializer("ends1", [4])
axes = make_int64_initializer("axes", [1])
anchor = numpy_helper.from_array(rng.uniform(-2.0, 2.0, (1, 2, 32)).astype(np.float32), name="Anchor")
half = numpy_helper.from_array(np.asarray([2.0], dtype=np.float32), name="Half")
scale = numpy_helper.from_array(np.asarray([8.0], dtype=np.float32), name="Scale")
slice0 = helper.make_node("Slice", ["X", "starts0", "ends0", "axes"], ["S0"])
slice1 = helper.make_node("Slice", ["X", "starts1", "ends1", "axes"], ["S1"])
sub_a = helper.make_node("Sub", ["Anchor", "S0"], ["A"])
add_b = helper.make_node("Add", ["S1", "Anchor"], ["B"])
add_sum = helper.make_node("Add", ["A", "B"], ["Sum"])
div_center = helper.make_node("Div", ["Sum", "Half"], ["Center"])
sub_size = helper.make_node("Sub", ["B", "A"], ["Size"])
concat_boxes = helper.make_node("Concat", ["Center", "Size"], ["Boxes"], axis=1)
mul_y = helper.make_node("Mul", ["Boxes", "Scale"], ["Y"])
graph = helper.make_graph(
[slice0, slice1, sub_a, add_b, add_sum, div_center, sub_size, concat_boxes, mul_y],
"slice_yolo_decode_tail",
[X],
[Y, S0, S1, A, B, Center, Size, Boxes],
initializer=[starts0, ends0, starts1, ends1, axes, anchor, half, scale])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "slice/yolo_decode_tail", "slice_yolo_decode_tail.onnx")
def slice_yolo_decode_tail_large_n():
"""Larger YOLO-like decode tail variant to stress non-zero-offset slice address handling."""
rng = np.random.default_rng(110)
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 4, 8400])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 8400])
S0 = helper.make_tensor_value_info("S0", TensorProto.FLOAT, [1, 2, 8400])
S1 = helper.make_tensor_value_info("S1", TensorProto.FLOAT, [1, 2, 8400])
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 2, 8400])
B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 2, 8400])
Center = helper.make_tensor_value_info("Center", TensorProto.FLOAT, [1, 2, 8400])
Size = helper.make_tensor_value_info("Size", TensorProto.FLOAT, [1, 2, 8400])
Boxes = helper.make_tensor_value_info("Boxes", TensorProto.FLOAT, [1, 4, 8400])
starts0 = make_int64_initializer("starts0", [0])
ends0 = make_int64_initializer("ends0", [2])
starts1 = make_int64_initializer("starts1", [2])
ends1 = make_int64_initializer("ends1", [4])
axes = make_int64_initializer("axes", [1])
anchor = numpy_helper.from_array(rng.uniform(-2.0, 2.0, (1, 2, 8400)).astype(np.float32), name="Anchor")
half = numpy_helper.from_array(np.asarray([2.0], dtype=np.float32), name="Half")
scale = numpy_helper.from_array(np.asarray([8.0], dtype=np.float32), name="Scale")
slice0 = helper.make_node("Slice", ["X", "starts0", "ends0", "axes"], ["S0"])
slice1 = helper.make_node("Slice", ["X", "starts1", "ends1", "axes"], ["S1"])
sub_a = helper.make_node("Sub", ["Anchor", "S0"], ["A"])
add_b = helper.make_node("Add", ["S1", "Anchor"], ["B"])
add_sum = helper.make_node("Add", ["A", "B"], ["Sum"])
div_center = helper.make_node("Div", ["Sum", "Half"], ["Center"])
sub_size = helper.make_node("Sub", ["B", "A"], ["Size"])
concat_boxes = helper.make_node("Concat", ["Center", "Size"], ["Boxes"], axis=1)
mul_y = helper.make_node("Mul", ["Boxes", "Scale"], ["Y"])
graph = helper.make_graph(
[slice0, slice1, sub_a, add_b, add_sum, div_center, sub_size, concat_boxes, mul_y],
"slice_yolo_decode_tail_large_n",
[X],
[Y, S0, S1, A, B, Center, Size, Boxes],
initializer=[starts0, ends0, starts1, ends1, axes, anchor, half, scale])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "slice/yolo_decode_tail_large_n", "slice_yolo_decode_tail_large_n.onnx")
def _make_yolo_decode_tail_constants(seed, n):
rng = np.random.default_rng(seed)
starts0 = make_int64_initializer("starts0", [0])
ends0 = make_int64_initializer("ends0", [2])
starts1 = make_int64_initializer("starts1", [2])
ends1 = make_int64_initializer("ends1", [4])
axes = make_int64_initializer("axes", [1])
anchor = numpy_helper.from_array(rng.uniform(-2.0, 2.0, (1, 2, n)).astype(np.float32), name="Anchor")
half = numpy_helper.from_array(np.asarray([2.0], dtype=np.float32), name="Half")
scale = numpy_helper.from_array(np.asarray([8.0], dtype=np.float32), name="Scale")
return starts0, ends0, starts1, ends1, axes, anchor, half, scale
def _build_yolo_decode_tail_graph(input_name, slice_source_name, output_name):
slice0 = helper.make_node("Slice", [slice_source_name, "starts0", "ends0", "axes"], ["S0"])
slice1 = helper.make_node("Slice", [slice_source_name, "starts1", "ends1", "axes"], ["S1"])
sub_a = helper.make_node("Sub", ["Anchor", "S0"], ["A"])
add_b = helper.make_node("Add", ["S1", "Anchor"], ["B"])
add_sum = helper.make_node("Add", ["A", "B"], ["Sum"])
div_center = helper.make_node("Div", ["Sum", "Half"], ["Center"])
sub_size = helper.make_node("Sub", ["B", "A"], ["Size"])
concat_boxes = helper.make_node("Concat", ["Center", "Size"], ["Boxes"], axis=1)
mul_y = helper.make_node("Mul", ["Boxes", "Scale"], [output_name])
return [slice0, slice1, sub_a, add_b, add_sum, div_center, sub_size, concat_boxes, mul_y]
def slice_yolo_decode_tail_internal_small():
"""YOLO-like decode tail from an internal tensor with only the final output exposed."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 4, 128])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 128])
zero = numpy_helper.from_array(np.zeros((1, 4, 128), dtype=np.float32), name="Z")
starts0, ends0, starts1, ends1, axes, anchor, half, scale = _make_yolo_decode_tail_constants(111, 128)
preadd = helper.make_node("Add", ["X", "Z"], ["P"])
graph = helper.make_graph(
[preadd] + _build_yolo_decode_tail_graph("X", "P", "Y"),
"slice_yolo_decode_tail_internal_small",
[X],
[Y],
initializer=[zero, starts0, ends0, starts1, ends1, axes, anchor, half, scale])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(
model,
"slice/yolo_decode_tail_internal_small",
"slice_yolo_decode_tail_internal_small.onnx")
def slice_yolo_decode_tail_internal_large():
"""Large YOLO-like decode tail from an internal tensor to stress large non-zero slice offsets."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 4, 8400])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 8400])
zero = numpy_helper.from_array(np.zeros((1, 4, 8400), dtype=np.float32), name="Z")
starts0, ends0, starts1, ends1, axes, anchor, half, scale = _make_yolo_decode_tail_constants(112, 8400)
preadd = helper.make_node("Add", ["X", "Z"], ["P"])
graph = helper.make_graph(
[preadd] + _build_yolo_decode_tail_graph("X", "P", "Y"),
"slice_yolo_decode_tail_internal_large",
[X],
[Y],
initializer=[zero, starts0, ends0, starts1, ends1, axes, anchor, half, scale])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(
model,
"slice/yolo_decode_tail_internal_large",
"slice_yolo_decode_tail_internal_large.onnx")
def slice_yolo_decode_tail_after_transpose():
"""YOLO-like decode tail after a transpose to mirror the final decode-tail producer shape change."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 128, 4])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 128])
starts0, ends0, starts1, ends1, axes, anchor, half, scale = _make_yolo_decode_tail_constants(113, 128)
transpose = helper.make_node("Transpose", ["X"], ["T"], perm=[0, 2, 1])
graph = helper.make_graph(
[transpose] + _build_yolo_decode_tail_graph("X", "T", "Y"),
"slice_yolo_decode_tail_after_transpose",
[X],
[Y],
initializer=[starts0, ends0, starts1, ends1, axes, anchor, half, scale])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(
model,
"slice/yolo_decode_tail_after_transpose",
"slice_yolo_decode_tail_after_transpose.onnx")
def _save_yolo_decode_tail_localization_variant(directory, filename, output_name):
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 4, 128])
output_shapes = {
"B": [1, 2, 128],
"Size": [1, 2, 128],
"Boxes": [1, 4, 128],
"Y": [1, 4, 128],
}
output = helper.make_tensor_value_info(output_name, TensorProto.FLOAT, output_shapes[output_name])
zero = numpy_helper.from_array(np.zeros((1, 4, 128), dtype=np.float32), name="Z")
starts0, ends0, starts1, ends1, axes, anchor, half, scale = _make_yolo_decode_tail_constants(114, 128)
preadd = helper.make_node("Add", ["X", "Z"], ["P"])
slice0 = helper.make_node("Slice", ["P", "starts0", "ends0", "axes"], ["S0"])
slice1 = helper.make_node("Slice", ["P", "starts1", "ends1", "axes"], ["S1"])
sub_a = helper.make_node("Sub", ["Anchor", "S0"], ["A"])
add_b = helper.make_node("Add", ["S1", "Anchor"], ["B"])
add_sum = helper.make_node("Add", ["A", "B"], ["Sum"])
div_center = helper.make_node("Div", ["Sum", "Half"], ["Center"])
sub_size = helper.make_node("Sub", ["B", "A"], ["Size"])
concat_boxes = helper.make_node("Concat", ["Center", "Size"], ["Boxes"], axis=1)
nodes = [preadd, slice0, slice1, sub_a, add_b, add_sum, div_center, sub_size, concat_boxes]
if output_name == "Y":
nodes.append(helper.make_node("Mul", ["Boxes", "Scale"], ["Y"]))
graph = helper.make_graph(
nodes,
directory.replace("/", "_"),
[X],
[output],
initializer=[zero, starts0, ends0, starts1, ends1, axes, anchor, half, scale])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, directory, filename)
def slice_yolo_decode_tail_output_b():
"""Localization variant exposing B only."""
_save_yolo_decode_tail_localization_variant(
"slice/yolo_decode_tail_output_b",
"slice_yolo_decode_tail_output_b.onnx",
"B")
def slice_yolo_decode_tail_output_size():
"""Localization variant exposing Size only."""
_save_yolo_decode_tail_localization_variant(
"slice/yolo_decode_tail_output_size",
"slice_yolo_decode_tail_output_size.onnx",
"Size")
def slice_yolo_decode_tail_output_boxes():
"""Localization variant exposing Boxes only."""
_save_yolo_decode_tail_localization_variant(
"slice/yolo_decode_tail_output_boxes",
"slice_yolo_decode_tail_output_boxes.onnx",
"Boxes")
def slice_yolo_decode_tail_output_y():
"""Localization variant exposing Y only."""
_save_yolo_decode_tail_localization_variant(
"slice/yolo_decode_tail_output_y",
"slice_yolo_decode_tail_output_y.onnx",
"Y")
def _build_yolo_head_final_concat_graph(lengths, output_name):
total_n = sum(lengths)
h0 = helper.make_tensor_value_info("H0", TensorProto.FLOAT, [1, 144, lengths[0]])
h1 = helper.make_tensor_value_info("H1", TensorProto.FLOAT, [1, 144, lengths[1]])
h2 = helper.make_tensor_value_info("H2", TensorProto.FLOAT, [1, 144, lengths[2]])
y = helper.make_tensor_value_info(output_name, TensorProto.FLOAT, [1, 84, total_n])
starts_box = make_int64_initializer("starts_box", [0])
ends_box = make_int64_initializer("ends_box", [4])
starts0 = make_int64_initializer("starts0", [0])
ends0 = make_int64_initializer("ends0", [2])
starts1 = make_int64_initializer("starts1", [2])
ends1 = make_int64_initializer("ends1", [4])
axes = make_int64_initializer("axes", [1])
split = make_int64_initializer("split", [64, 80])
rng = np.random.default_rng(115 + total_n)
anchor = numpy_helper.from_array(rng.uniform(-2.0, 2.0, (1, 2, total_n)).astype(np.float32), name="Anchor")
half = numpy_helper.from_array(np.asarray([2.0], dtype=np.float32), name="Half")
scale_values = np.full((1, total_n), 8.0, dtype=np.float32)
scale_values[:, 1::2] = 16.0
scale = numpy_helper.from_array(scale_values, name="Scale")
head = helper.make_node("Concat", ["H0", "H1", "H2"], ["Head"], axis=2)
split_node = helper.make_node("Split", ["Head", "split"], ["BoxRaw", "ClassRaw"], axis=1)
box4 = helper.make_node("Slice", ["BoxRaw", "starts_box", "ends_box", "axes"], ["Box4"])
slice0 = helper.make_node("Slice", ["Box4", "starts0", "ends0", "axes"], ["S0"])
slice1 = helper.make_node("Slice", ["Box4", "starts1", "ends1", "axes"], ["S1"])
sub_a = helper.make_node("Sub", ["Anchor", "S0"], ["A"])
add_b = helper.make_node("Add", ["S1", "Anchor"], ["B"])
add_sum = helper.make_node("Add", ["A", "B"], ["Sum"])
div_center = helper.make_node("Div", ["Sum", "Half"], ["Center"])
sub_size = helper.make_node("Sub", ["B", "A"], ["Size"])
concat_boxes = helper.make_node("Concat", ["Center", "Size"], ["Boxes"], axis=1)
mul_boxes = helper.make_node("Mul", ["Boxes", "Scale"], ["BoxesScaled"])
scores = helper.make_node("Sigmoid", ["ClassRaw"], ["Scores"])
final = helper.make_node("Concat", ["BoxesScaled", "Scores"], [output_name], axis=1)
graph = helper.make_graph(
[head, split_node, box4, slice0, slice1, sub_a, add_b, add_sum, div_center, sub_size, concat_boxes,
mul_boxes, scores, final],
f"{output_name}_graph",
[h0, h1, h2],
[y],
initializer=[starts_box, ends_box, starts0, ends0, starts1, ends1, axes, split, anchor, half, scale])
return helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
def slice_yolo_head_final_concat_small():
"""YOLO head/final-output structure with [1,144,N] head, split, decode, and final [1,84,N] concat."""
model = _build_yolo_head_final_concat_graph([96, 24, 8], "Y")
save_model(
model,
"slice/yolo_head_final_concat_small",
"slice_yolo_head_final_concat_small.onnx")
def slice_yolo_head_final_concat_large():
"""YOLO-scale final-head concat reproducer with head lengths 6400, 1600, and 400."""
model = _build_yolo_head_final_concat_graph([6400, 1600, 400], "Y")
save_model(
model,
"slice/yolo_head_final_concat_large",
"slice_yolo_head_final_concat_large.onnx")
def _build_yolo_head_localization_graph(output_name):
lengths = [96, 24, 8]
total_n = sum(lengths)
h0 = helper.make_tensor_value_info("H0", TensorProto.FLOAT, [1, 144, lengths[0]])
h1 = helper.make_tensor_value_info("H1", TensorProto.FLOAT, [1, 144, lengths[1]])
h2 = helper.make_tensor_value_info("H2", TensorProto.FLOAT, [1, 144, lengths[2]])
output_shapes = {
"BoxesScaled": [1, 4, total_n],
"Scores": [1, 80, total_n],
"Y": [1, 84, total_n],
}
output = helper.make_tensor_value_info(output_name, TensorProto.FLOAT, output_shapes[output_name])
starts_box = make_int64_initializer("starts_box", [0])
ends_box = make_int64_initializer("ends_box", [4])
starts0 = make_int64_initializer("starts0", [0])
ends0 = make_int64_initializer("ends0", [2])
starts1 = make_int64_initializer("starts1", [2])
ends1 = make_int64_initializer("ends1", [4])
axes = make_int64_initializer("axes", [1])
split = make_int64_initializer("split", [64, 80])
rng = np.random.default_rng(244)
anchor = numpy_helper.from_array(rng.uniform(-2.0, 2.0, (1, 2, total_n)).astype(np.float32), name="Anchor")
half = numpy_helper.from_array(np.asarray([2.0], dtype=np.float32), name="Half")
scale_values = np.full((1, total_n), 8.0, dtype=np.float32)
scale_values[:, 1::2] = 16.0
scale = numpy_helper.from_array(scale_values, name="Scale")
nodes = [
helper.make_node("Concat", ["H0", "H1", "H2"], ["Head"], axis=2),
helper.make_node("Split", ["Head", "split"], ["BoxRaw", "ClassRaw"], axis=1),
helper.make_node("Slice", ["BoxRaw", "starts_box", "ends_box", "axes"], ["Box4"]),
helper.make_node("Slice", ["Box4", "starts0", "ends0", "axes"], ["S0"]),
helper.make_node("Slice", ["Box4", "starts1", "ends1", "axes"], ["S1"]),
helper.make_node("Sub", ["Anchor", "S0"], ["A"]),
helper.make_node("Add", ["S1", "Anchor"], ["B"]),
helper.make_node("Add", ["A", "B"], ["Sum"]),
helper.make_node("Div", ["Sum", "Half"], ["Center"]),
helper.make_node("Sub", ["B", "A"], ["Size"]),
helper.make_node("Concat", ["Center", "Size"], ["Boxes"], axis=1),
helper.make_node("Mul", ["Boxes", "Scale"], ["BoxesScaled"]),
helper.make_node("Sigmoid", ["ClassRaw"], ["Scores"]),
]
if output_name == "Y":
nodes.append(helper.make_node("Concat", ["BoxesScaled", "Scores"], ["Y"], axis=1))
graph = helper.make_graph(
nodes,
f"yolo_head_{output_name.lower()}_graph",
[h0, h1, h2],
[output],
initializer=[starts_box, ends_box, starts0, ends0, starts1, ends1, axes, split, anchor, half, scale])
return helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
def slice_yolo_head_output_boxes_scaled():
"""Localization variant exposing only BoxesScaled."""
model = _build_yolo_head_localization_graph("BoxesScaled")
save_model(
model,
"slice/yolo_head_output_boxes_scaled",
"slice_yolo_head_output_boxes_scaled.onnx")
def slice_yolo_head_output_scores():
"""Localization variant exposing only Scores."""
model = _build_yolo_head_localization_graph("Scores")
save_model(
model,
"slice/yolo_head_output_scores",
"slice_yolo_head_output_scores.onnx")
def slice_yolo_head_output_y():
"""Localization variant exposing only final Y."""
model = _build_yolo_head_localization_graph("Y")
save_model(
model,
"slice/yolo_head_output_y",
"slice_yolo_head_output_y.onnx")
# ---------------------------------------------------------------------------
# Gather tests
# ---------------------------------------------------------------------------
@@ -2001,6 +2482,20 @@ if __name__ == "__main__":
slice_nchw_spatial_crop()
slice_after_conv()
slice_large_channel_1024()
slice_yolo_decode_tail()
slice_yolo_decode_tail_large_n()
slice_yolo_decode_tail_internal_small()
slice_yolo_decode_tail_internal_large()
slice_yolo_decode_tail_after_transpose()
slice_yolo_decode_tail_output_b()
slice_yolo_decode_tail_output_size()
slice_yolo_decode_tail_output_boxes()
slice_yolo_decode_tail_output_y()
slice_yolo_head_final_concat_small()
slice_yolo_head_final_concat_large()
slice_yolo_head_output_boxes_scaled()
slice_yolo_head_output_scores()
slice_yolo_head_output_y()
print("\nGenerating Softmax tests:")
softmax_basic()
@@ -2009,6 +2504,11 @@ if __name__ == "__main__":
softmax_negative_axis()
softmax_large_dimension_1024()
print("\nGenerating DFL tests:")
yolo_dfl_projection_small()
yolo_dfl_projection_large()
yolo_dfl_decode_tail_large()
print("\nGenerating Resize tests:")
resize_nearest_2x()
resize_nearest_non_uniform()
Binary file not shown.
Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More