3 Commits

Author SHA1 Message Date
ilgeco 852bef7605 ReduceMean + resnet
Validate Operations / validate-operations (push) Waiting to run
2026-06-10 14:30:10 +02:00
ilgeco 237654dadf Fix direct import
Validate Operations / validate-operations (push) Waiting to run
2026-06-10 12:14:20 +02:00
ilgeco 6d69600bc1 Yolo Image Validator + new accept rule
Validate Operations / validate-operations (push) Waiting to run
2026-06-10 11:59:43 +02:00
194 changed files with 880 additions and 1124 deletions
-1
View File
@@ -1 +0,0 @@
/home/ilgeco/Project/Raptor/build_debug/
-254
View File
@@ -1,254 +0,0 @@
diff --git a/src/PIM/Conversion/ONNXToSpatial/CMakeLists.txt b/src/PIM/Conversion/ONNXToSpatial/CMakeLists.txt
index 0b7e8cc..32964aa 100644
--- a/src/PIM/Conversion/ONNXToSpatial/CMakeLists.txt
+++ b/src/PIM/Conversion/ONNXToSpatial/CMakeLists.txt
@@ -22,6 +22,7 @@ add_pim_library(OMONNXToSpatial
Patterns/Tensor/Gather.cpp
Patterns/Tensor/Resize.cpp
Patterns/Tensor/Reshape.cpp
+ Patterns/Tensor/Slice.cpp
Patterns/Tensor/Split.cpp
Patterns/Tensor/Transpose.cpp
ONNXToSpatialPass.cpp
diff --git a/src/PIM/Conversion/ONNXToSpatial/ONNXToSpatialPass.cpp b/src/PIM/Conversion/ONNXToSpatial/ONNXToSpatialPass.cpp
index edf311e..c3d42f7 100644
--- a/src/PIM/Conversion/ONNXToSpatial/ONNXToSpatialPass.cpp
+++ b/src/PIM/Conversion/ONNXToSpatial/ONNXToSpatialPass.cpp
@@ -138,6 +138,7 @@ void ONNXToSpatialPass::runOnOperation() {
target.addIllegalOp<ONNXGatherOp>();
target.addIllegalOp<ONNXReshapeOp>();
target.addIllegalOp<ONNXResizeOp>();
+ target.addIllegalOp<ONNXSliceOp>();
target.addIllegalOp<ONNXLRNOp>();
target.addIllegalOp<ONNXReduceMeanV13Op>();
target.addIllegalOp<ONNXSplitOp>();
diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns.cpp b/src/PIM/Conversion/ONNXToSpatial/Patterns.cpp
index ffa0b1f..0a747e9 100644
--- a/src/PIM/Conversion/ONNXToSpatial/Patterns.cpp
+++ b/src/PIM/Conversion/ONNXToSpatial/Patterns.cpp
@@ -22,6 +22,7 @@ void populateConversionPatterns(RewritePatternSet& patterns, MLIRContext* ctx) {
populateGatherPatterns(patterns, ctx);
populateResizePatterns(patterns, ctx);
populateReshapePatterns(patterns, ctx);
+ populateSlicePatterns(patterns, ctx);
populateSplitPatterns(patterns, ctx);
populateTransposePatterns(patterns, ctx);
}
diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns.hpp b/src/PIM/Conversion/ONNXToSpatial/Patterns.hpp
index e58729e..c040536 100644
--- a/src/PIM/Conversion/ONNXToSpatial/Patterns.hpp
+++ b/src/PIM/Conversion/ONNXToSpatial/Patterns.hpp
@@ -29,6 +29,7 @@ void populateConcatPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext
void populateGatherPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populateResizePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populateReshapePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
+void populateSlicePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populateSplitPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populateTransposePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Slice.cpp b/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Slice.cpp
new file mode 100644
index 0000000..3f8867f
--- /dev/null
+++ b/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Slice.cpp
@@ -0,0 +1,200 @@
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/Transforms/DialectConversion.h"
+
+#include "llvm/ADT/SmallVector.h"
+
+#include <algorithm>
+#include <optional>
+
+#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common/Common.hpp"
+#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/CompileTime.hpp"
+#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Patterns.hpp"
+#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
+#include "src/Dialect/ONNX/ONNXOps.hpp"
+
+using namespace mlir;
+
+namespace onnx_mlir {
+namespace {
+
+static DenseElementsAttr getDenseConstantAttr(Value value) {
+ if (auto constantOp = value.getDefiningOp<arith::ConstantOp>())
+ return dyn_cast<DenseElementsAttr>(constantOp.getValue());
+ if (auto constantOp = value.getDefiningOp<ONNXConstantOp>())
+ return dyn_cast_or_null<DenseElementsAttr>(constantOp.getValueAttr());
+ return nullptr;
+}
+
+static FailureOr<SmallVector<int64_t>> getConstantIntValues(Value value) {
+ auto denseAttr = dyn_cast_or_null<DenseIntElementsAttr>(getDenseConstantAttr(value));
+ if (!denseAttr)
+ return failure();
+ return SmallVector<int64_t>(denseAttr.getValues<int64_t>().begin(), denseAttr.getValues<int64_t>().end());
+}
+
+static bool isNoneValueLike(Value value) { return isa_and_nonnull<ONNXNoneOp>(value.getDefiningOp()); }
+
+static FailureOr<Value> buildSlice(Value data,
+ RankedTensorType dataType,
+ RankedTensorType resultType,
+ ArrayRef<int64_t> starts,
+ ArrayRef<int64_t> ends,
+ std::optional<ArrayRef<int64_t>> axes,
+ std::optional<ArrayRef<int64_t>> steps,
+ ConversionPatternRewriter& rewriter,
+ Location loc) {
+ int64_t rank = dataType.getRank();
+ if (!dataType.hasStaticShape() || !resultType.hasStaticShape() || resultType.getRank() != rank)
+ return failure();
+
+ if (starts.size() != ends.size())
+ return failure();
+ if (axes && axes->size() != starts.size())
+ return failure();
+ if (steps && steps->size() != starts.size())
+ return failure();
+
+ SmallVector<int64_t> normalizedAxes;
+ if (axes) {
+ SmallVector<bool> seenAxes(rank, false);
+ normalizedAxes.reserve(axes->size());
+ for (int64_t axis : *axes) {
+ auto normalizedAxis = normalizeAxisChecked(axis, rank);
+ if (failed(normalizedAxis))
+ return failure();
+ if (seenAxes[*normalizedAxis])
+ return failure();
+ seenAxes[*normalizedAxis] = true;
+ normalizedAxes.push_back(*normalizedAxis);
+ }
+ }
+ else {
+ if (starts.size() > static_cast<size_t>(rank))
+ return failure();
+ normalizedAxes.reserve(starts.size());
+ for (size_t i = 0; i < starts.size(); ++i)
+ normalizedAxes.push_back(static_cast<int64_t>(i));
+ }
+
+ SmallVector<int64_t> normalizedSteps;
+ if (steps)
+ normalizedSteps.assign(steps->begin(), steps->end());
+ else
+ normalizedSteps.assign(starts.size(), 1);
+
+ SmallVector<int64_t> computedShape(dataType.getShape().begin(), dataType.getShape().end());
+ SmallVector<OpFoldResult> offsets = getZeroOffsets(rewriter, rank);
+ SmallVector<OpFoldResult> sizes = getStaticSizes(rewriter, dataType.getShape());
+ SmallVector<OpFoldResult> strides = getUnitStrides(rewriter, rank);
+
+ for (auto [sliceIndex, axis] : llvm::enumerate(normalizedAxes)) {
+ int64_t step = normalizedSteps[sliceIndex];
+ if (step <= 0)
+ return failure();
+
+ int64_t dimSize = dataType.getShape()[axis];
+ int64_t start = starts[sliceIndex];
+ int64_t end = ends[sliceIndex];
+
+ if (start < 0)
+ start += dimSize;
+ if (end < 0)
+ end += dimSize;
+
+ start = std::clamp(start, int64_t {0}, dimSize);
+ end = std::clamp(end, int64_t {0}, dimSize);
+
+ int64_t extent = std::max(end - start, int64_t {0});
+ int64_t size = (extent + step - 1) / step;
+
+ offsets[axis] = rewriter.getIndexAttr(start);
+ sizes[axis] = rewriter.getIndexAttr(size);
+ strides[axis] = rewriter.getIndexAttr(step);
+ computedShape[axis] = size;
+ }
+
+ if (llvm::ArrayRef(computedShape) != resultType.getShape())
+ return failure();
+
+ return tensor::ExtractSliceOp::create(rewriter, loc, resultType, data, offsets, sizes, strides).getResult();
+}
+
+struct Slice final : OpConversionPattern<ONNXSliceOp> {
+ using OpConversionPattern::OpConversionPattern;
+
+ LogicalResult matchAndRewrite(ONNXSliceOp sliceOp,
+ ONNXSliceOpAdaptor adaptor,
+ ConversionPatternRewriter& rewriter) const override {
+ auto dataType = dyn_cast<RankedTensorType>(adaptor.getData().getType());
+ auto resultType = dyn_cast<RankedTensorType>(sliceOp.getResult().getType());
+ if (!dataType || !resultType || !dataType.hasStaticShape() || !resultType.hasStaticShape())
+ return failure();
+
+ auto starts = getConstantIntValues(adaptor.getStarts());
+ auto ends = getConstantIntValues(adaptor.getEnds());
+ if (failed(starts))
+ return rewriter.notifyMatchFailure(sliceOp, "requires compile-time constant starts");
+ if (failed(ends))
+ return rewriter.notifyMatchFailure(sliceOp, "requires compile-time constant ends");
+
+ std::optional<SmallVector<int64_t>> axes;
+ if (!isNoneValueLike(adaptor.getAxes())) {
+ auto parsedAxes = getConstantIntValues(adaptor.getAxes());
+ if (failed(parsedAxes))
+ return rewriter.notifyMatchFailure(sliceOp, "requires compile-time constant axes when present");
+ axes = std::move(*parsedAxes);
+ }
+
+ std::optional<SmallVector<int64_t>> steps;
+ if (!isNoneValueLike(adaptor.getSteps())) {
+ auto parsedSteps = getConstantIntValues(adaptor.getSteps());
+ if (failed(parsedSteps))
+ return rewriter.notifyMatchFailure(sliceOp, "requires compile-time constant steps when present");
+ steps = std::move(*parsedSteps);
+ if (llvm::any_of(*steps, [](int64_t step) { return step <= 0; }))
+ return rewriter.notifyMatchFailure(sliceOp, "supports only positive constant steps");
+ }
+
+ ArrayRef<int64_t> startsRef = *starts;
+ ArrayRef<int64_t> endsRef = *ends;
+ std::optional<ArrayRef<int64_t>> axesRef = axes ? std::optional<ArrayRef<int64_t>>(ArrayRef<int64_t>(*axes))
+ : std::nullopt;
+ std::optional<ArrayRef<int64_t>> stepsRef = steps ? std::optional<ArrayRef<int64_t>>(ArrayRef<int64_t>(*steps))
+ : std::nullopt;
+
+ Location loc = sliceOp.getLoc();
+ auto tryBuildSlice = [&](Value data) {
+ return buildSlice(data, dataType, resultType, startsRef, endsRef, axesRef, stepsRef, rewriter, loc);
+ };
+
+ if (isCompileTimeComputable(adaptor.getData())) {
+ auto sliced = tryBuildSlice(adaptor.getData());
+ if (failed(sliced))
+ return rewriter.notifyMatchFailure(sliceOp, "failed to normalize static slice parameters");
+ rewriter.replaceOp(sliceOp, *sliced);
+ return success();
+ }
+
+ auto computeOp =
+ createSpatCompute<1>(rewriter, loc, TypeRange {resultType}, {}, adaptor.getData(), [&](Value data) {
+ auto sliced = tryBuildSlice(data);
+ if (failed(sliced))
+ return failure();
+ spatial::SpatYieldOp::create(rewriter, loc, *sliced);
+ return success();
+ });
+ if (failed(computeOp))
+ return rewriter.notifyMatchFailure(sliceOp, "failed to build runtime tensor.extract_slice lowering");
+
+ rewriter.replaceOp(sliceOp, computeOp->getResults());
+ return success();
+ }
+};
+
+} // namespace
+
+void populateSlicePatterns(RewritePatternSet& patterns, MLIRContext* ctx) { patterns.add<Slice>(ctx); }
+
+} // namespace onnx_mlir
@@ -140,6 +140,7 @@ void ONNXToSpatialPass::runOnOperation() {
target.addIllegalOp<ONNXResizeOp>();
target.addIllegalOp<ONNXSliceOp>();
target.addIllegalOp<ONNXLRNOp>();
target.addIllegalOp<ONNXReduceMeanOp>();
target.addIllegalOp<ONNXReduceMeanV13Op>();
target.addIllegalOp<ONNXSplitOp>();
@@ -6,6 +6,8 @@
#include <algorithm>
#include <numeric>
#include <optional>
#include <type_traits>
#include "src/Accelerators/PIM/Common/IR/AffineUtils.hpp"
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common/Common.hpp"
@@ -19,6 +21,85 @@ using namespace mlir;
namespace onnx_mlir {
namespace {
struct ReduceMeanSemantics {
SmallVector<int64_t> axes;
int64_t keepdims = 1;
bool isIdentity = false;
};
static bool isNoneValueLike(Value value) { return isa_and_nonnull<ONNXNoneOp>(value.getDefiningOp()); }
static FailureOr<SmallVector<int64_t>> getConstantIntValues(Value value) {
auto denseAttr = dyn_cast_or_null<DenseIntElementsAttr>(getHostConstDenseElementsAttr(value));
if (!denseAttr)
return failure();
return SmallVector<int64_t>(denseAttr.getValues<int64_t>().begin(), denseAttr.getValues<int64_t>().end());
}
static FailureOr<SmallVector<int64_t>> normalizeAxesChecked(ArrayRef<int64_t> axes, int64_t rank) {
SmallVector<int64_t> normalizedAxes;
normalizedAxes.reserve(axes.size());
for (int64_t axis : axes) {
auto normalizedAxis = normalizeAxisChecked(axis, rank);
if (failed(normalizedAxis))
return failure();
normalizedAxes.push_back(*normalizedAxis);
}
llvm::sort(normalizedAxes);
normalizedAxes.erase(std::unique(normalizedAxes.begin(), normalizedAxes.end()), normalizedAxes.end());
return normalizedAxes;
}
template <typename ReduceMeanOp, typename ReduceMeanOpAdaptor>
static FailureOr<ReduceMeanSemantics>
getReduceMeanSemantics(ReduceMeanOp reduceMeanOp, ReduceMeanOpAdaptor adaptor, int64_t inputRank) {
ReduceMeanSemantics semantics;
semantics.keepdims = reduceMeanOp.getKeepdims();
if constexpr (std::is_same_v<ReduceMeanOp, ONNXReduceMeanV13Op>) {
auto axes = onnx_mlir::normalizeAxesChecked(std::optional<ArrayAttr>(reduceMeanOp.getAxesAttr()), inputRank);
if (failed(axes))
return failure();
semantics.axes = std::move(*axes);
return semantics;
}
else {
if (isNoneValueLike(adaptor.getAxes())) {
if (reduceMeanOp.getNoopWithEmptyAxes() != 0) {
semantics.isIdentity = true;
return semantics;
}
semantics.axes.reserve(inputRank);
for (int64_t axis = 0; axis < inputRank; ++axis)
semantics.axes.push_back(axis);
return semantics;
}
auto axes = getConstantIntValues(adaptor.getAxes());
if (failed(axes))
return failure();
if (axes->empty()) {
if (reduceMeanOp.getNoopWithEmptyAxes() != 0) {
semantics.isIdentity = true;
return semantics;
}
semantics.axes.reserve(inputRank);
for (int64_t axis = 0; axis < inputRank; ++axis)
semantics.axes.push_back(axis);
return semantics;
}
auto normalizedAxes = normalizeAxesChecked(*axes, inputRank);
if (failed(normalizedAxes))
return failure();
semantics.axes = std::move(*normalizedAxes);
return semantics;
}
}
static SmallVector<bool> buildReducedAxesMask(ArrayRef<int64_t> axes, int64_t rank) {
SmallVector<bool> reducedAxes(rank, false);
for (int64_t axis : axes) {
@@ -251,11 +332,13 @@ static Value squeezeReducedAxes(Value keepdimsValue,
return squeezeCompute.getResult(0);
}
struct ReduceMeanToSpatialCompute : OpConversionPattern<ONNXReduceMeanV13Op> {
using OpConversionPattern::OpConversionPattern;
template <typename ReduceMeanOp>
struct ReduceMeanToSpatialCompute : OpConversionPattern<ReduceMeanOp> {
using OpConversionPattern<ReduceMeanOp>::OpConversionPattern;
using Adaptor = typename ReduceMeanOp::Adaptor;
LogicalResult matchAndRewrite(ONNXReduceMeanV13Op reduceMeanOp,
ONNXReduceMeanV13OpAdaptor adaptor,
LogicalResult matchAndRewrite(ReduceMeanOp reduceMeanOp,
Adaptor adaptor,
ConversionPatternRewriter& rewriter) const override {
auto inputType = dyn_cast<RankedTensorType>(adaptor.getData().getType());
auto resultType = dyn_cast<RankedTensorType>(reduceMeanOp.getReduced().getType());
@@ -266,10 +349,18 @@ struct ReduceMeanToSpatialCompute : OpConversionPattern<ONNXReduceMeanV13Op> {
return success();
}
auto axes = normalizeAxesChecked(std::optional<ArrayAttr>(reduceMeanOp.getAxesAttr()), inputType.getRank());
if (failed(axes))
return failure();
SmallVector<bool> reducedAxes = buildReducedAxesMask(*axes, inputType.getRank());
auto semantics = getReduceMeanSemantics(reduceMeanOp, adaptor, inputType.getRank());
if (failed(semantics))
return rewriter.notifyMatchFailure(reduceMeanOp, "requires compile-time constant, in-range ReduceMean axes");
if (semantics->isIdentity) {
if (inputType != resultType)
return rewriter.notifyMatchFailure(
reduceMeanOp, "noop_with_empty_axes identity requires the result type to match the input type");
rewriter.replaceOp(reduceMeanOp, adaptor.getData());
return success();
}
SmallVector<bool> reducedAxes = buildReducedAxesMask(semantics->axes, inputType.getRank());
if (reducedAxes.empty() && inputType.getRank() != 0)
return failure();
@@ -289,7 +380,7 @@ struct ReduceMeanToSpatialCompute : OpConversionPattern<ONNXReduceMeanV13Op> {
Value reducedKeepdims =
buildKeepdimsFromLanePackedBatch(*lanePackedKeepdims, keepdimsType, compactKeptType, reducedAxes, rewriter, loc);
if (reduceMeanOp.getKeepdims() != 0) {
if (semantics->keepdims != 0) {
rewriter.replaceOp(reduceMeanOp, reducedKeepdims);
return success();
}
@@ -303,7 +394,7 @@ struct ReduceMeanToSpatialCompute : OpConversionPattern<ONNXReduceMeanV13Op> {
} // namespace
void populateReduceMeanPatterns(RewritePatternSet& patterns, MLIRContext* ctx) {
patterns.add<ReduceMeanToSpatialCompute>(ctx);
patterns.add<ReduceMeanToSpatialCompute<ONNXReduceMeanV13Op>, ReduceMeanToSpatialCompute<ONNXReduceMeanOp>>(ctx);
}
} // namespace onnx_mlir
BIN
View File
Binary file not shown.
+4
View File
@@ -8,3 +8,7 @@ networks/**/outputs
networks/**/raptor
networks/**/runner
networks/**/simulation
networks/**/real_image_val
networks/**/*.png
networks/**/*.jpg
networks/**/*.csv
+4 -1
View File
@@ -199,7 +199,10 @@ int main(int argc, char **argv) {{
// ---- Cleanup ----
omTensorListDestroy(in_list);
omTensorListDestroy(out_list);
// Some debug-heavy models return aliased outputs. This runner is a short-
// lived process, so destroy only the list wrapper and let process exit
// reclaim the output tensors safely.
omTensorListDestroyShallow(out_list);
return 0;
}}
"""
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
+93 -500
View File
@@ -1053,6 +1053,92 @@ def reducemean_large_dimension_1024():
save_model(model, "reduce_mean/large_dimension_1024", "reduce_mean_large_dimension_1024.onnx")
def make_legacy_reducemean_model(name, shape, output_shape, directory, filename, *, axes, keepdims=1,
noop_with_empty_axes=0):
"""Create an opset-18 ReduceMean model that lowers to ONNXReduceMeanOp."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, shape)
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, output_shape)
initializers = []
node_inputs = ["X", ""]
if axes is not None:
initializers.append(make_int64_initializer("axes", axes))
node_inputs = ["X", "axes"]
node = helper.make_node("ReduceMean", node_inputs, ["Y"],
keepdims=keepdims, noop_with_empty_axes=noop_with_empty_axes)
graph = helper.make_graph([node], name, [X], [Y], initializer=initializers)
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 18)])
save_model(model, directory, filename)
def reducemean_legacy_axis1_keepdims_1():
"""Opset-18 ReduceMean over one positive axis, preserving rank."""
make_legacy_reducemean_model("reducemean_legacy_axis1_keepdims_1",
[2, 3, 4], [2, 1, 4],
"reduce_mean/legacy_axis1_keepdims_1",
"reduce_mean_legacy_axis1_keepdims_1.onnx",
axes=[1], keepdims=1)
def reducemean_legacy_axis1_keepdims_0():
"""Opset-18 ReduceMean over one positive axis, dropping the reduced axis."""
make_legacy_reducemean_model("reducemean_legacy_axis1_keepdims_0",
[2, 3, 4], [2, 4],
"reduce_mean/legacy_axis1_keepdims_0",
"reduce_mean_legacy_axis1_keepdims_0.onnx",
axes=[1], keepdims=0)
def reducemean_legacy_axes_1_2_keepdims_1():
"""Opset-18 ReduceMean over multiple positive axes."""
make_legacy_reducemean_model("reducemean_legacy_axes_1_2_keepdims_1",
[2, 3, 4], [2, 1, 1],
"reduce_mean/legacy_axes_1_2_keepdims_1",
"reduce_mean_legacy_axes_1_2_keepdims_1.onnx",
axes=[1, 2], keepdims=1)
def reducemean_legacy_negative_axis():
"""Opset-18 ReduceMean using a negative axis."""
make_legacy_reducemean_model("reducemean_legacy_negative_axis",
[2, 3, 4], [2, 3, 1],
"reduce_mean/legacy_negative_axis",
"reduce_mean_legacy_negative_axis.onnx",
axes=[-1], keepdims=1)
def reducemean_legacy_reduce_all_keepdims_1():
"""Opset-18 ReduceMean over all axes with the optional axes input omitted."""
make_legacy_reducemean_model("reducemean_legacy_reduce_all_keepdims_1",
[2, 3, 4], [1, 1, 1],
"reduce_mean/legacy_reduce_all_keepdims_1",
"reduce_mean_legacy_reduce_all_keepdims_1.onnx",
axes=None, keepdims=1)
def reducemean_legacy_empty_axes_noop():
"""Opset-18 ReduceMean with empty axes and noop_with_empty_axes enabled."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [3, 4])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [3, 4])
axes = make_int64_initializer("axes", [])
reduce = helper.make_node("ReduceMean", ["X", "axes"], ["R"],
keepdims=1, noop_with_empty_axes=1)
relu = helper.make_node("Relu", ["R"], ["Y"])
graph = helper.make_graph([reduce, relu], "reducemean_legacy_empty_axes_noop", [X], [Y], initializer=[axes])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 18)])
save_model(model, "reduce_mean/legacy_empty_axes_noop", "reduce_mean_legacy_empty_axes_noop.onnx")
def reducemean_legacy_nchw_spatial():
"""Opset-18 ReduceMean over H and W on an NCHW tensor."""
make_legacy_reducemean_model("reducemean_legacy_nchw_spatial",
[1, 3, 5, 5], [1, 3, 1, 1],
"reduce_mean/legacy_nchw_spatial",
"reduce_mean_legacy_nchw_spatial.onnx",
axes=[2, 3], keepdims=1)
# ---------------------------------------------------------------------------
# Relu tests
# ---------------------------------------------------------------------------
@@ -1200,106 +1286,6 @@ def softmax_large_dimension_1024():
save_model(model, "softmax/large_dimension_1024", "softmax_large_dimension_1024.onnx")
# ---------------------------------------------------------------------------
# DFL tests
# ---------------------------------------------------------------------------
def _make_yolo_dfl_initializers(n, include_decode_tail):
shape0 = make_int64_initializer("Shape0", [1, 4, 16, n])
shape1 = make_int64_initializer("Shape1", [1, 16, 4 * n])
shape2 = make_int64_initializer("Shape2", [1, 4, n])
proj = numpy_helper.from_array(np.arange(16, dtype=np.float32).reshape(1, 16), name="Proj")
initializers = [shape0, shape1, shape2, proj]
if include_decode_tail:
starts0 = make_int64_initializer("starts0", [0])
ends0 = make_int64_initializer("ends0", [2])
starts1 = make_int64_initializer("starts1", [2])
ends1 = make_int64_initializer("ends1", [4])
axes = make_int64_initializer("axes", [1])
rng = np.random.default_rng(301)
anchor = numpy_helper.from_array(
rng.uniform(-2.0, 2.0, (1, 2, n)).astype(np.float32), name="Anchor")
half = numpy_helper.from_array(np.asarray([2.0], dtype=np.float32), name="Half")
scale_values = np.full((1, n), 8.0, dtype=np.float32)
scale_values[:, 1::2] = 16.0
scale = numpy_helper.from_array(scale_values, name="Scale")
initializers.extend([starts0, ends0, starts1, ends1, axes, anchor, half, scale])
return initializers
def _build_yolo_dfl_projection_nodes(box_raw_name):
return [
helper.make_node("Reshape", [box_raw_name, "Shape0"], ["R0"]),
helper.make_node("Transpose", ["R0"], ["T0"], perm=[0, 2, 1, 3]),
helper.make_node("Softmax", ["T0"], ["S0"], axis=1),
helper.make_node("Reshape", ["S0", "Shape1"], ["R1"]),
helper.make_node("MatMul", ["Proj", "R1"], ["M0"]),
helper.make_node("Reshape", ["M0", "Shape2"], ["Box4"]),
]
def yolo_dfl_projection_small():
"""YOLO DFL projection path on a small [1,64,128] box tensor."""
box_raw = helper.make_tensor_value_info("BoxRaw", TensorProto.FLOAT, [1, 64, 128])
y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 128])
nodes = _build_yolo_dfl_projection_nodes("BoxRaw")
graph = helper.make_graph(
nodes[:-1] + [helper.make_node("Reshape", ["M0", "Shape2"], ["Y"])],
"yolo_dfl_projection_small",
[box_raw],
[y],
initializer=_make_yolo_dfl_initializers(128, include_decode_tail=False))
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "dfl/yolo_dfl_projection_small", "yolo_dfl_projection_small.onnx")
def yolo_dfl_projection_large():
"""YOLO DFL projection path on a YOLO-scale [1,64,8400] box tensor."""
box_raw = helper.make_tensor_value_info("BoxRaw", TensorProto.FLOAT, [1, 64, 8400])
y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 8400])
nodes = _build_yolo_dfl_projection_nodes("BoxRaw")
graph = helper.make_graph(
nodes[:-1] + [helper.make_node("Reshape", ["M0", "Shape2"], ["Y"])],
"yolo_dfl_projection_large",
[box_raw],
[y],
initializer=_make_yolo_dfl_initializers(8400, include_decode_tail=False))
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "dfl/yolo_dfl_projection_large", "yolo_dfl_projection_large.onnx")
def yolo_dfl_decode_tail_large():
"""YOLO-scale DFL projection followed by box decode, stride scale, score sigmoid, and final concat."""
box_raw = helper.make_tensor_value_info("BoxRaw", TensorProto.FLOAT, [1, 64, 8400])
class_raw = helper.make_tensor_value_info("ClassRaw", TensorProto.FLOAT, [1, 80, 8400])
y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 84, 8400])
nodes = _build_yolo_dfl_projection_nodes("BoxRaw")
nodes.extend([
helper.make_node("Slice", ["Box4", "starts0", "ends0", "axes"], ["L0"]),
helper.make_node("Slice", ["Box4", "starts1", "ends1", "axes"], ["L1"]),
helper.make_node("Sub", ["Anchor", "L0"], ["A"]),
helper.make_node("Add", ["L1", "Anchor"], ["B"]),
helper.make_node("Add", ["A", "B"], ["Sum"]),
helper.make_node("Div", ["Sum", "Half"], ["Center"]),
helper.make_node("Sub", ["B", "A"], ["Size"]),
helper.make_node("Concat", ["Center", "Size"], ["Boxes"], axis=1),
helper.make_node("Mul", ["Boxes", "Scale"], ["BoxesScaled"]),
helper.make_node("Sigmoid", ["ClassRaw"], ["Scores"]),
helper.make_node("Concat", ["BoxesScaled", "Scores"], ["Y"], axis=1),
])
graph = helper.make_graph(
nodes,
"yolo_dfl_decode_tail_large",
[box_raw, class_raw],
[y],
initializer=_make_yolo_dfl_initializers(8400, include_decode_tail=True))
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "dfl/yolo_dfl_decode_tail_large", "yolo_dfl_decode_tail_large.onnx")
# ---------------------------------------------------------------------------
# Resize tests
# ---------------------------------------------------------------------------
@@ -1552,387 +1538,6 @@ def slice_large_channel_1024():
save_model(model, "slice/large_channel_1024", "slice_large_channel_1024.onnx")
def slice_yolo_decode_tail():
"""YOLO-like decode tail where a non-zero-offset channel slice feeds arithmetic and Concat."""
rng = np.random.default_rng(109)
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 4, 32])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 32])
S0 = helper.make_tensor_value_info("S0", TensorProto.FLOAT, [1, 2, 32])
S1 = helper.make_tensor_value_info("S1", TensorProto.FLOAT, [1, 2, 32])
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 2, 32])
B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 2, 32])
Center = helper.make_tensor_value_info("Center", TensorProto.FLOAT, [1, 2, 32])
Size = helper.make_tensor_value_info("Size", TensorProto.FLOAT, [1, 2, 32])
Boxes = helper.make_tensor_value_info("Boxes", TensorProto.FLOAT, [1, 4, 32])
starts0 = make_int64_initializer("starts0", [0])
ends0 = make_int64_initializer("ends0", [2])
starts1 = make_int64_initializer("starts1", [2])
ends1 = make_int64_initializer("ends1", [4])
axes = make_int64_initializer("axes", [1])
anchor = numpy_helper.from_array(rng.uniform(-2.0, 2.0, (1, 2, 32)).astype(np.float32), name="Anchor")
half = numpy_helper.from_array(np.asarray([2.0], dtype=np.float32), name="Half")
scale = numpy_helper.from_array(np.asarray([8.0], dtype=np.float32), name="Scale")
slice0 = helper.make_node("Slice", ["X", "starts0", "ends0", "axes"], ["S0"])
slice1 = helper.make_node("Slice", ["X", "starts1", "ends1", "axes"], ["S1"])
sub_a = helper.make_node("Sub", ["Anchor", "S0"], ["A"])
add_b = helper.make_node("Add", ["S1", "Anchor"], ["B"])
add_sum = helper.make_node("Add", ["A", "B"], ["Sum"])
div_center = helper.make_node("Div", ["Sum", "Half"], ["Center"])
sub_size = helper.make_node("Sub", ["B", "A"], ["Size"])
concat_boxes = helper.make_node("Concat", ["Center", "Size"], ["Boxes"], axis=1)
mul_y = helper.make_node("Mul", ["Boxes", "Scale"], ["Y"])
graph = helper.make_graph(
[slice0, slice1, sub_a, add_b, add_sum, div_center, sub_size, concat_boxes, mul_y],
"slice_yolo_decode_tail",
[X],
[Y, S0, S1, A, B, Center, Size, Boxes],
initializer=[starts0, ends0, starts1, ends1, axes, anchor, half, scale])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "slice/yolo_decode_tail", "slice_yolo_decode_tail.onnx")
def slice_yolo_decode_tail_large_n():
"""Larger YOLO-like decode tail variant to stress non-zero-offset slice address handling."""
rng = np.random.default_rng(110)
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 4, 8400])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 8400])
S0 = helper.make_tensor_value_info("S0", TensorProto.FLOAT, [1, 2, 8400])
S1 = helper.make_tensor_value_info("S1", TensorProto.FLOAT, [1, 2, 8400])
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 2, 8400])
B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 2, 8400])
Center = helper.make_tensor_value_info("Center", TensorProto.FLOAT, [1, 2, 8400])
Size = helper.make_tensor_value_info("Size", TensorProto.FLOAT, [1, 2, 8400])
Boxes = helper.make_tensor_value_info("Boxes", TensorProto.FLOAT, [1, 4, 8400])
starts0 = make_int64_initializer("starts0", [0])
ends0 = make_int64_initializer("ends0", [2])
starts1 = make_int64_initializer("starts1", [2])
ends1 = make_int64_initializer("ends1", [4])
axes = make_int64_initializer("axes", [1])
anchor = numpy_helper.from_array(rng.uniform(-2.0, 2.0, (1, 2, 8400)).astype(np.float32), name="Anchor")
half = numpy_helper.from_array(np.asarray([2.0], dtype=np.float32), name="Half")
scale = numpy_helper.from_array(np.asarray([8.0], dtype=np.float32), name="Scale")
slice0 = helper.make_node("Slice", ["X", "starts0", "ends0", "axes"], ["S0"])
slice1 = helper.make_node("Slice", ["X", "starts1", "ends1", "axes"], ["S1"])
sub_a = helper.make_node("Sub", ["Anchor", "S0"], ["A"])
add_b = helper.make_node("Add", ["S1", "Anchor"], ["B"])
add_sum = helper.make_node("Add", ["A", "B"], ["Sum"])
div_center = helper.make_node("Div", ["Sum", "Half"], ["Center"])
sub_size = helper.make_node("Sub", ["B", "A"], ["Size"])
concat_boxes = helper.make_node("Concat", ["Center", "Size"], ["Boxes"], axis=1)
mul_y = helper.make_node("Mul", ["Boxes", "Scale"], ["Y"])
graph = helper.make_graph(
[slice0, slice1, sub_a, add_b, add_sum, div_center, sub_size, concat_boxes, mul_y],
"slice_yolo_decode_tail_large_n",
[X],
[Y, S0, S1, A, B, Center, Size, Boxes],
initializer=[starts0, ends0, starts1, ends1, axes, anchor, half, scale])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "slice/yolo_decode_tail_large_n", "slice_yolo_decode_tail_large_n.onnx")
def _make_yolo_decode_tail_constants(seed, n):
rng = np.random.default_rng(seed)
starts0 = make_int64_initializer("starts0", [0])
ends0 = make_int64_initializer("ends0", [2])
starts1 = make_int64_initializer("starts1", [2])
ends1 = make_int64_initializer("ends1", [4])
axes = make_int64_initializer("axes", [1])
anchor = numpy_helper.from_array(rng.uniform(-2.0, 2.0, (1, 2, n)).astype(np.float32), name="Anchor")
half = numpy_helper.from_array(np.asarray([2.0], dtype=np.float32), name="Half")
scale = numpy_helper.from_array(np.asarray([8.0], dtype=np.float32), name="Scale")
return starts0, ends0, starts1, ends1, axes, anchor, half, scale
def _build_yolo_decode_tail_graph(input_name, slice_source_name, output_name):
slice0 = helper.make_node("Slice", [slice_source_name, "starts0", "ends0", "axes"], ["S0"])
slice1 = helper.make_node("Slice", [slice_source_name, "starts1", "ends1", "axes"], ["S1"])
sub_a = helper.make_node("Sub", ["Anchor", "S0"], ["A"])
add_b = helper.make_node("Add", ["S1", "Anchor"], ["B"])
add_sum = helper.make_node("Add", ["A", "B"], ["Sum"])
div_center = helper.make_node("Div", ["Sum", "Half"], ["Center"])
sub_size = helper.make_node("Sub", ["B", "A"], ["Size"])
concat_boxes = helper.make_node("Concat", ["Center", "Size"], ["Boxes"], axis=1)
mul_y = helper.make_node("Mul", ["Boxes", "Scale"], [output_name])
return [slice0, slice1, sub_a, add_b, add_sum, div_center, sub_size, concat_boxes, mul_y]
def slice_yolo_decode_tail_internal_small():
"""YOLO-like decode tail from an internal tensor with only the final output exposed."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 4, 128])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 128])
zero = numpy_helper.from_array(np.zeros((1, 4, 128), dtype=np.float32), name="Z")
starts0, ends0, starts1, ends1, axes, anchor, half, scale = _make_yolo_decode_tail_constants(111, 128)
preadd = helper.make_node("Add", ["X", "Z"], ["P"])
graph = helper.make_graph(
[preadd] + _build_yolo_decode_tail_graph("X", "P", "Y"),
"slice_yolo_decode_tail_internal_small",
[X],
[Y],
initializer=[zero, starts0, ends0, starts1, ends1, axes, anchor, half, scale])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(
model,
"slice/yolo_decode_tail_internal_small",
"slice_yolo_decode_tail_internal_small.onnx")
def slice_yolo_decode_tail_internal_large():
"""Large YOLO-like decode tail from an internal tensor to stress large non-zero slice offsets."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 4, 8400])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 8400])
zero = numpy_helper.from_array(np.zeros((1, 4, 8400), dtype=np.float32), name="Z")
starts0, ends0, starts1, ends1, axes, anchor, half, scale = _make_yolo_decode_tail_constants(112, 8400)
preadd = helper.make_node("Add", ["X", "Z"], ["P"])
graph = helper.make_graph(
[preadd] + _build_yolo_decode_tail_graph("X", "P", "Y"),
"slice_yolo_decode_tail_internal_large",
[X],
[Y],
initializer=[zero, starts0, ends0, starts1, ends1, axes, anchor, half, scale])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(
model,
"slice/yolo_decode_tail_internal_large",
"slice_yolo_decode_tail_internal_large.onnx")
def slice_yolo_decode_tail_after_transpose():
"""YOLO-like decode tail after a transpose to mirror the final decode-tail producer shape change."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 128, 4])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 128])
starts0, ends0, starts1, ends1, axes, anchor, half, scale = _make_yolo_decode_tail_constants(113, 128)
transpose = helper.make_node("Transpose", ["X"], ["T"], perm=[0, 2, 1])
graph = helper.make_graph(
[transpose] + _build_yolo_decode_tail_graph("X", "T", "Y"),
"slice_yolo_decode_tail_after_transpose",
[X],
[Y],
initializer=[starts0, ends0, starts1, ends1, axes, anchor, half, scale])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(
model,
"slice/yolo_decode_tail_after_transpose",
"slice_yolo_decode_tail_after_transpose.onnx")
def _save_yolo_decode_tail_localization_variant(directory, filename, output_name):
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 4, 128])
output_shapes = {
"B": [1, 2, 128],
"Size": [1, 2, 128],
"Boxes": [1, 4, 128],
"Y": [1, 4, 128],
}
output = helper.make_tensor_value_info(output_name, TensorProto.FLOAT, output_shapes[output_name])
zero = numpy_helper.from_array(np.zeros((1, 4, 128), dtype=np.float32), name="Z")
starts0, ends0, starts1, ends1, axes, anchor, half, scale = _make_yolo_decode_tail_constants(114, 128)
preadd = helper.make_node("Add", ["X", "Z"], ["P"])
slice0 = helper.make_node("Slice", ["P", "starts0", "ends0", "axes"], ["S0"])
slice1 = helper.make_node("Slice", ["P", "starts1", "ends1", "axes"], ["S1"])
sub_a = helper.make_node("Sub", ["Anchor", "S0"], ["A"])
add_b = helper.make_node("Add", ["S1", "Anchor"], ["B"])
add_sum = helper.make_node("Add", ["A", "B"], ["Sum"])
div_center = helper.make_node("Div", ["Sum", "Half"], ["Center"])
sub_size = helper.make_node("Sub", ["B", "A"], ["Size"])
concat_boxes = helper.make_node("Concat", ["Center", "Size"], ["Boxes"], axis=1)
nodes = [preadd, slice0, slice1, sub_a, add_b, add_sum, div_center, sub_size, concat_boxes]
if output_name == "Y":
nodes.append(helper.make_node("Mul", ["Boxes", "Scale"], ["Y"]))
graph = helper.make_graph(
nodes,
directory.replace("/", "_"),
[X],
[output],
initializer=[zero, starts0, ends0, starts1, ends1, axes, anchor, half, scale])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, directory, filename)
def slice_yolo_decode_tail_output_b():
"""Localization variant exposing B only."""
_save_yolo_decode_tail_localization_variant(
"slice/yolo_decode_tail_output_b",
"slice_yolo_decode_tail_output_b.onnx",
"B")
def slice_yolo_decode_tail_output_size():
"""Localization variant exposing Size only."""
_save_yolo_decode_tail_localization_variant(
"slice/yolo_decode_tail_output_size",
"slice_yolo_decode_tail_output_size.onnx",
"Size")
def slice_yolo_decode_tail_output_boxes():
"""Localization variant exposing Boxes only."""
_save_yolo_decode_tail_localization_variant(
"slice/yolo_decode_tail_output_boxes",
"slice_yolo_decode_tail_output_boxes.onnx",
"Boxes")
def slice_yolo_decode_tail_output_y():
"""Localization variant exposing Y only."""
_save_yolo_decode_tail_localization_variant(
"slice/yolo_decode_tail_output_y",
"slice_yolo_decode_tail_output_y.onnx",
"Y")
def _build_yolo_head_final_concat_graph(lengths, output_name):
total_n = sum(lengths)
h0 = helper.make_tensor_value_info("H0", TensorProto.FLOAT, [1, 144, lengths[0]])
h1 = helper.make_tensor_value_info("H1", TensorProto.FLOAT, [1, 144, lengths[1]])
h2 = helper.make_tensor_value_info("H2", TensorProto.FLOAT, [1, 144, lengths[2]])
y = helper.make_tensor_value_info(output_name, TensorProto.FLOAT, [1, 84, total_n])
starts_box = make_int64_initializer("starts_box", [0])
ends_box = make_int64_initializer("ends_box", [4])
starts0 = make_int64_initializer("starts0", [0])
ends0 = make_int64_initializer("ends0", [2])
starts1 = make_int64_initializer("starts1", [2])
ends1 = make_int64_initializer("ends1", [4])
axes = make_int64_initializer("axes", [1])
split = make_int64_initializer("split", [64, 80])
rng = np.random.default_rng(115 + total_n)
anchor = numpy_helper.from_array(rng.uniform(-2.0, 2.0, (1, 2, total_n)).astype(np.float32), name="Anchor")
half = numpy_helper.from_array(np.asarray([2.0], dtype=np.float32), name="Half")
scale_values = np.full((1, total_n), 8.0, dtype=np.float32)
scale_values[:, 1::2] = 16.0
scale = numpy_helper.from_array(scale_values, name="Scale")
head = helper.make_node("Concat", ["H0", "H1", "H2"], ["Head"], axis=2)
split_node = helper.make_node("Split", ["Head", "split"], ["BoxRaw", "ClassRaw"], axis=1)
box4 = helper.make_node("Slice", ["BoxRaw", "starts_box", "ends_box", "axes"], ["Box4"])
slice0 = helper.make_node("Slice", ["Box4", "starts0", "ends0", "axes"], ["S0"])
slice1 = helper.make_node("Slice", ["Box4", "starts1", "ends1", "axes"], ["S1"])
sub_a = helper.make_node("Sub", ["Anchor", "S0"], ["A"])
add_b = helper.make_node("Add", ["S1", "Anchor"], ["B"])
add_sum = helper.make_node("Add", ["A", "B"], ["Sum"])
div_center = helper.make_node("Div", ["Sum", "Half"], ["Center"])
sub_size = helper.make_node("Sub", ["B", "A"], ["Size"])
concat_boxes = helper.make_node("Concat", ["Center", "Size"], ["Boxes"], axis=1)
mul_boxes = helper.make_node("Mul", ["Boxes", "Scale"], ["BoxesScaled"])
scores = helper.make_node("Sigmoid", ["ClassRaw"], ["Scores"])
final = helper.make_node("Concat", ["BoxesScaled", "Scores"], [output_name], axis=1)
graph = helper.make_graph(
[head, split_node, box4, slice0, slice1, sub_a, add_b, add_sum, div_center, sub_size, concat_boxes,
mul_boxes, scores, final],
f"{output_name}_graph",
[h0, h1, h2],
[y],
initializer=[starts_box, ends_box, starts0, ends0, starts1, ends1, axes, split, anchor, half, scale])
return helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
def slice_yolo_head_final_concat_small():
"""YOLO head/final-output structure with [1,144,N] head, split, decode, and final [1,84,N] concat."""
model = _build_yolo_head_final_concat_graph([96, 24, 8], "Y")
save_model(
model,
"slice/yolo_head_final_concat_small",
"slice_yolo_head_final_concat_small.onnx")
def slice_yolo_head_final_concat_large():
"""YOLO-scale final-head concat reproducer with head lengths 6400, 1600, and 400."""
model = _build_yolo_head_final_concat_graph([6400, 1600, 400], "Y")
save_model(
model,
"slice/yolo_head_final_concat_large",
"slice_yolo_head_final_concat_large.onnx")
def _build_yolo_head_localization_graph(output_name):
lengths = [96, 24, 8]
total_n = sum(lengths)
h0 = helper.make_tensor_value_info("H0", TensorProto.FLOAT, [1, 144, lengths[0]])
h1 = helper.make_tensor_value_info("H1", TensorProto.FLOAT, [1, 144, lengths[1]])
h2 = helper.make_tensor_value_info("H2", TensorProto.FLOAT, [1, 144, lengths[2]])
output_shapes = {
"BoxesScaled": [1, 4, total_n],
"Scores": [1, 80, total_n],
"Y": [1, 84, total_n],
}
output = helper.make_tensor_value_info(output_name, TensorProto.FLOAT, output_shapes[output_name])
starts_box = make_int64_initializer("starts_box", [0])
ends_box = make_int64_initializer("ends_box", [4])
starts0 = make_int64_initializer("starts0", [0])
ends0 = make_int64_initializer("ends0", [2])
starts1 = make_int64_initializer("starts1", [2])
ends1 = make_int64_initializer("ends1", [4])
axes = make_int64_initializer("axes", [1])
split = make_int64_initializer("split", [64, 80])
rng = np.random.default_rng(244)
anchor = numpy_helper.from_array(rng.uniform(-2.0, 2.0, (1, 2, total_n)).astype(np.float32), name="Anchor")
half = numpy_helper.from_array(np.asarray([2.0], dtype=np.float32), name="Half")
scale_values = np.full((1, total_n), 8.0, dtype=np.float32)
scale_values[:, 1::2] = 16.0
scale = numpy_helper.from_array(scale_values, name="Scale")
nodes = [
helper.make_node("Concat", ["H0", "H1", "H2"], ["Head"], axis=2),
helper.make_node("Split", ["Head", "split"], ["BoxRaw", "ClassRaw"], axis=1),
helper.make_node("Slice", ["BoxRaw", "starts_box", "ends_box", "axes"], ["Box4"]),
helper.make_node("Slice", ["Box4", "starts0", "ends0", "axes"], ["S0"]),
helper.make_node("Slice", ["Box4", "starts1", "ends1", "axes"], ["S1"]),
helper.make_node("Sub", ["Anchor", "S0"], ["A"]),
helper.make_node("Add", ["S1", "Anchor"], ["B"]),
helper.make_node("Add", ["A", "B"], ["Sum"]),
helper.make_node("Div", ["Sum", "Half"], ["Center"]),
helper.make_node("Sub", ["B", "A"], ["Size"]),
helper.make_node("Concat", ["Center", "Size"], ["Boxes"], axis=1),
helper.make_node("Mul", ["Boxes", "Scale"], ["BoxesScaled"]),
helper.make_node("Sigmoid", ["ClassRaw"], ["Scores"]),
]
if output_name == "Y":
nodes.append(helper.make_node("Concat", ["BoxesScaled", "Scores"], ["Y"], axis=1))
graph = helper.make_graph(
nodes,
f"yolo_head_{output_name.lower()}_graph",
[h0, h1, h2],
[output],
initializer=[starts_box, ends_box, starts0, ends0, starts1, ends1, axes, split, anchor, half, scale])
return helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
def slice_yolo_head_output_boxes_scaled():
"""Localization variant exposing only BoxesScaled."""
model = _build_yolo_head_localization_graph("BoxesScaled")
save_model(
model,
"slice/yolo_head_output_boxes_scaled",
"slice_yolo_head_output_boxes_scaled.onnx")
def slice_yolo_head_output_scores():
"""Localization variant exposing only Scores."""
model = _build_yolo_head_localization_graph("Scores")
save_model(
model,
"slice/yolo_head_output_scores",
"slice_yolo_head_output_scores.onnx")
def slice_yolo_head_output_y():
"""Localization variant exposing only final Y."""
model = _build_yolo_head_localization_graph("Y")
save_model(
model,
"slice/yolo_head_output_y",
"slice_yolo_head_output_y.onnx")
# ---------------------------------------------------------------------------
# Gather tests
# ---------------------------------------------------------------------------
@@ -2455,6 +2060,13 @@ if __name__ == "__main__":
reducemean_4d_spatial_keepdims_0()
reducemean_channel_axis_nchw()
reducemean_large_dimension_1024()
reducemean_legacy_axis1_keepdims_1()
reducemean_legacy_axis1_keepdims_0()
reducemean_legacy_axes_1_2_keepdims_1()
reducemean_legacy_negative_axis()
reducemean_legacy_reduce_all_keepdims_1()
reducemean_legacy_empty_axes_noop()
reducemean_legacy_nchw_spatial()
print("\nGenerating Relu tests:")
relu_basic()
@@ -2482,20 +2094,6 @@ if __name__ == "__main__":
slice_nchw_spatial_crop()
slice_after_conv()
slice_large_channel_1024()
slice_yolo_decode_tail()
slice_yolo_decode_tail_large_n()
slice_yolo_decode_tail_internal_small()
slice_yolo_decode_tail_internal_large()
slice_yolo_decode_tail_after_transpose()
slice_yolo_decode_tail_output_b()
slice_yolo_decode_tail_output_size()
slice_yolo_decode_tail_output_boxes()
slice_yolo_decode_tail_output_y()
slice_yolo_head_final_concat_small()
slice_yolo_head_final_concat_large()
slice_yolo_head_output_boxes_scaled()
slice_yolo_head_output_scores()
slice_yolo_head_output_y()
print("\nGenerating Softmax tests:")
softmax_basic()
@@ -2504,11 +2102,6 @@ if __name__ == "__main__":
softmax_negative_axis()
softmax_large_dimension_1024()
print("\nGenerating DFL tests:")
yolo_dfl_projection_small()
yolo_dfl_projection_large()
yolo_dfl_decode_tail_large()
print("\nGenerating Resize tests:")
resize_nearest_2x()
resize_nearest_non_uniform()
Binary file not shown.
Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More