diff --git a/backend-simulators/pim/pimsim-nn b/backend-simulators/pim/pimsim-nn index 6d3b898..3e3442b 160000 --- a/backend-simulators/pim/pimsim-nn +++ b/backend-simulators/pim/pimsim-nn @@ -1 +1 @@ -Subproject commit 6d3b898e6b191c4446dfcc8c085ba1e50125e942 +Subproject commit 3e3442b66354282e600c5c45990af0e92aecf0f9 diff --git a/build b/build new file mode 120000 index 0000000..226a5f0 --- /dev/null +++ b/build @@ -0,0 +1 @@ +/home/ilgeco/Project/Raptor/build_debug/ \ No newline at end of file diff --git a/diff.txt b/diff.txt new file mode 100644 index 0000000..3e2a594 --- /dev/null +++ b/diff.txt @@ -0,0 +1,254 @@ +diff --git a/src/PIM/Conversion/ONNXToSpatial/CMakeLists.txt b/src/PIM/Conversion/ONNXToSpatial/CMakeLists.txt +index 0b7e8cc..32964aa 100644 +--- a/src/PIM/Conversion/ONNXToSpatial/CMakeLists.txt ++++ b/src/PIM/Conversion/ONNXToSpatial/CMakeLists.txt +@@ -22,6 +22,7 @@ add_pim_library(OMONNXToSpatial + Patterns/Tensor/Gather.cpp + Patterns/Tensor/Resize.cpp + Patterns/Tensor/Reshape.cpp ++ Patterns/Tensor/Slice.cpp + Patterns/Tensor/Split.cpp + Patterns/Tensor/Transpose.cpp + ONNXToSpatialPass.cpp +diff --git a/src/PIM/Conversion/ONNXToSpatial/ONNXToSpatialPass.cpp b/src/PIM/Conversion/ONNXToSpatial/ONNXToSpatialPass.cpp +index edf311e..c3d42f7 100644 +--- a/src/PIM/Conversion/ONNXToSpatial/ONNXToSpatialPass.cpp ++++ b/src/PIM/Conversion/ONNXToSpatial/ONNXToSpatialPass.cpp +@@ -138,6 +138,7 @@ void ONNXToSpatialPass::runOnOperation() { + target.addIllegalOp(); + target.addIllegalOp(); + target.addIllegalOp(); ++ target.addIllegalOp(); + target.addIllegalOp(); + target.addIllegalOp(); + target.addIllegalOp(); +diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns.cpp b/src/PIM/Conversion/ONNXToSpatial/Patterns.cpp +index ffa0b1f..0a747e9 100644 +--- a/src/PIM/Conversion/ONNXToSpatial/Patterns.cpp ++++ b/src/PIM/Conversion/ONNXToSpatial/Patterns.cpp +@@ -22,6 +22,7 @@ void populateConversionPatterns(RewritePatternSet& patterns, MLIRContext* ctx) { + populateGatherPatterns(patterns, ctx); + populateResizePatterns(patterns, ctx); + populateReshapePatterns(patterns, ctx); ++ populateSlicePatterns(patterns, ctx); + populateSplitPatterns(patterns, ctx); + populateTransposePatterns(patterns, ctx); + } +diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns.hpp b/src/PIM/Conversion/ONNXToSpatial/Patterns.hpp +index e58729e..c040536 100644 +--- a/src/PIM/Conversion/ONNXToSpatial/Patterns.hpp ++++ b/src/PIM/Conversion/ONNXToSpatial/Patterns.hpp +@@ -29,6 +29,7 @@ void populateConcatPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext + void populateGatherPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx); + void populateResizePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx); + void populateReshapePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx); ++void populateSlicePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx); + void populateSplitPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx); + void populateTransposePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx); + +diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Slice.cpp b/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Slice.cpp +new file mode 100644 +index 0000000..3f8867f +--- /dev/null ++++ b/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Slice.cpp +@@ -0,0 +1,200 @@ ++#include "mlir/Dialect/Arith/IR/Arith.h" ++#include "mlir/Dialect/Tensor/IR/Tensor.h" ++#include "mlir/IR/BuiltinAttributes.h" ++#include "mlir/Transforms/DialectConversion.h" ++ ++#include "llvm/ADT/SmallVector.h" ++ ++#include ++#include ++ ++#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common/Common.hpp" ++#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/CompileTime.hpp" ++#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Patterns.hpp" ++#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp" ++#include "src/Dialect/ONNX/ONNXOps.hpp" ++ ++using namespace mlir; ++ ++namespace onnx_mlir { ++namespace { ++ ++static DenseElementsAttr getDenseConstantAttr(Value value) { ++ if (auto constantOp = value.getDefiningOp()) ++ return dyn_cast(constantOp.getValue()); ++ if (auto constantOp = value.getDefiningOp()) ++ return dyn_cast_or_null(constantOp.getValueAttr()); ++ return nullptr; ++} ++ ++static FailureOr> getConstantIntValues(Value value) { ++ auto denseAttr = dyn_cast_or_null(getDenseConstantAttr(value)); ++ if (!denseAttr) ++ return failure(); ++ return SmallVector(denseAttr.getValues().begin(), denseAttr.getValues().end()); ++} ++ ++static bool isNoneValueLike(Value value) { return isa_and_nonnull(value.getDefiningOp()); } ++ ++static FailureOr buildSlice(Value data, ++ RankedTensorType dataType, ++ RankedTensorType resultType, ++ ArrayRef starts, ++ ArrayRef ends, ++ std::optional> axes, ++ std::optional> steps, ++ ConversionPatternRewriter& rewriter, ++ Location loc) { ++ int64_t rank = dataType.getRank(); ++ if (!dataType.hasStaticShape() || !resultType.hasStaticShape() || resultType.getRank() != rank) ++ return failure(); ++ ++ if (starts.size() != ends.size()) ++ return failure(); ++ if (axes && axes->size() != starts.size()) ++ return failure(); ++ if (steps && steps->size() != starts.size()) ++ return failure(); ++ ++ SmallVector normalizedAxes; ++ if (axes) { ++ SmallVector seenAxes(rank, false); ++ normalizedAxes.reserve(axes->size()); ++ for (int64_t axis : *axes) { ++ auto normalizedAxis = normalizeAxisChecked(axis, rank); ++ if (failed(normalizedAxis)) ++ return failure(); ++ if (seenAxes[*normalizedAxis]) ++ return failure(); ++ seenAxes[*normalizedAxis] = true; ++ normalizedAxes.push_back(*normalizedAxis); ++ } ++ } ++ else { ++ if (starts.size() > static_cast(rank)) ++ return failure(); ++ normalizedAxes.reserve(starts.size()); ++ for (size_t i = 0; i < starts.size(); ++i) ++ normalizedAxes.push_back(static_cast(i)); ++ } ++ ++ SmallVector normalizedSteps; ++ if (steps) ++ normalizedSteps.assign(steps->begin(), steps->end()); ++ else ++ normalizedSteps.assign(starts.size(), 1); ++ ++ SmallVector computedShape(dataType.getShape().begin(), dataType.getShape().end()); ++ SmallVector offsets = getZeroOffsets(rewriter, rank); ++ SmallVector sizes = getStaticSizes(rewriter, dataType.getShape()); ++ SmallVector strides = getUnitStrides(rewriter, rank); ++ ++ for (auto [sliceIndex, axis] : llvm::enumerate(normalizedAxes)) { ++ int64_t step = normalizedSteps[sliceIndex]; ++ if (step <= 0) ++ return failure(); ++ ++ int64_t dimSize = dataType.getShape()[axis]; ++ int64_t start = starts[sliceIndex]; ++ int64_t end = ends[sliceIndex]; ++ ++ if (start < 0) ++ start += dimSize; ++ if (end < 0) ++ end += dimSize; ++ ++ start = std::clamp(start, int64_t {0}, dimSize); ++ end = std::clamp(end, int64_t {0}, dimSize); ++ ++ int64_t extent = std::max(end - start, int64_t {0}); ++ int64_t size = (extent + step - 1) / step; ++ ++ offsets[axis] = rewriter.getIndexAttr(start); ++ sizes[axis] = rewriter.getIndexAttr(size); ++ strides[axis] = rewriter.getIndexAttr(step); ++ computedShape[axis] = size; ++ } ++ ++ if (llvm::ArrayRef(computedShape) != resultType.getShape()) ++ return failure(); ++ ++ return tensor::ExtractSliceOp::create(rewriter, loc, resultType, data, offsets, sizes, strides).getResult(); ++} ++ ++struct Slice final : OpConversionPattern { ++ using OpConversionPattern::OpConversionPattern; ++ ++ LogicalResult matchAndRewrite(ONNXSliceOp sliceOp, ++ ONNXSliceOpAdaptor adaptor, ++ ConversionPatternRewriter& rewriter) const override { ++ auto dataType = dyn_cast(adaptor.getData().getType()); ++ auto resultType = dyn_cast(sliceOp.getResult().getType()); ++ if (!dataType || !resultType || !dataType.hasStaticShape() || !resultType.hasStaticShape()) ++ return failure(); ++ ++ auto starts = getConstantIntValues(adaptor.getStarts()); ++ auto ends = getConstantIntValues(adaptor.getEnds()); ++ if (failed(starts)) ++ return rewriter.notifyMatchFailure(sliceOp, "requires compile-time constant starts"); ++ if (failed(ends)) ++ return rewriter.notifyMatchFailure(sliceOp, "requires compile-time constant ends"); ++ ++ std::optional> axes; ++ if (!isNoneValueLike(adaptor.getAxes())) { ++ auto parsedAxes = getConstantIntValues(adaptor.getAxes()); ++ if (failed(parsedAxes)) ++ return rewriter.notifyMatchFailure(sliceOp, "requires compile-time constant axes when present"); ++ axes = std::move(*parsedAxes); ++ } ++ ++ std::optional> steps; ++ if (!isNoneValueLike(adaptor.getSteps())) { ++ auto parsedSteps = getConstantIntValues(adaptor.getSteps()); ++ if (failed(parsedSteps)) ++ return rewriter.notifyMatchFailure(sliceOp, "requires compile-time constant steps when present"); ++ steps = std::move(*parsedSteps); ++ if (llvm::any_of(*steps, [](int64_t step) { return step <= 0; })) ++ return rewriter.notifyMatchFailure(sliceOp, "supports only positive constant steps"); ++ } ++ ++ ArrayRef startsRef = *starts; ++ ArrayRef endsRef = *ends; ++ std::optional> axesRef = axes ? std::optional>(ArrayRef(*axes)) ++ : std::nullopt; ++ std::optional> stepsRef = steps ? std::optional>(ArrayRef(*steps)) ++ : std::nullopt; ++ ++ Location loc = sliceOp.getLoc(); ++ auto tryBuildSlice = [&](Value data) { ++ return buildSlice(data, dataType, resultType, startsRef, endsRef, axesRef, stepsRef, rewriter, loc); ++ }; ++ ++ if (isCompileTimeComputable(adaptor.getData())) { ++ auto sliced = tryBuildSlice(adaptor.getData()); ++ if (failed(sliced)) ++ return rewriter.notifyMatchFailure(sliceOp, "failed to normalize static slice parameters"); ++ rewriter.replaceOp(sliceOp, *sliced); ++ return success(); ++ } ++ ++ auto computeOp = ++ createSpatCompute<1>(rewriter, loc, TypeRange {resultType}, {}, adaptor.getData(), [&](Value data) { ++ auto sliced = tryBuildSlice(data); ++ if (failed(sliced)) ++ return failure(); ++ spatial::SpatYieldOp::create(rewriter, loc, *sliced); ++ return success(); ++ }); ++ if (failed(computeOp)) ++ return rewriter.notifyMatchFailure(sliceOp, "failed to build runtime tensor.extract_slice lowering"); ++ ++ rewriter.replaceOp(sliceOp, computeOp->getResults()); ++ return success(); ++ } ++}; ++ ++} // namespace ++ ++void populateSlicePatterns(RewritePatternSet& patterns, MLIRContext* ctx) { patterns.add(ctx); } ++ ++} // namespace onnx_mlir diff --git a/onnx-mlir b/onnx-mlir index eb54c2a..82018d7 160000 --- a/onnx-mlir +++ b/onnx-mlir @@ -1 +1 @@ -Subproject commit eb54c2afc46d00c6b196d1f275b6bfee17e12f69 +Subproject commit 82018d7ce59c94bfbe9479b16538224969fa45a0 diff --git a/src/PIM/Compiler/PimCompilerOptions.cpp b/src/PIM/Compiler/PimCompilerOptions.cpp index 0578e4e..7d1cb01 100644 --- a/src/PIM/Compiler/PimCompilerOptions.cpp +++ b/src/PIM/Compiler/PimCompilerOptions.cpp @@ -38,6 +38,12 @@ llvm::cl::opt llvm::cl::init(false), llvm::cl::cat(OnnxMlirOptions)); +llvm::cl::opt + pimDisableMemoryCoalescing("pim-disable-memory-coalescing", + llvm::cl::desc("Skip the PIM memory coalescing pass (developer diagnostic option)"), + llvm::cl::init(false), + llvm::cl::cat(OnnxMlirOptions)); + llvm::cl::opt useExperimentalConvImpl("use-experimental-conv-impl", llvm::cl::desc("Use experimental implementation for convolution"), llvm::cl::init(false), diff --git a/src/PIM/Compiler/PimCompilerOptions.hpp b/src/PIM/Compiler/PimCompilerOptions.hpp index b486070..3d90409 100644 --- a/src/PIM/Compiler/PimCompilerOptions.hpp +++ b/src/PIM/Compiler/PimCompilerOptions.hpp @@ -36,6 +36,7 @@ extern llvm::cl::opt pimMergeScheduler; extern llvm::cl::opt pimMemoryReport; extern llvm::cl::opt pimOnlyCodegen; +extern llvm::cl::opt pimDisableMemoryCoalescing; extern llvm::cl::opt useExperimentalConvImpl; extern llvm::cl::opt pimEmitJson; diff --git a/src/PIM/Compiler/PimCompilerUtils.cpp b/src/PIM/Compiler/PimCompilerUtils.cpp index 5035379..e9bc397 100644 --- a/src/PIM/Compiler/PimCompilerUtils.cpp +++ b/src/PIM/Compiler/PimCompilerUtils.cpp @@ -46,7 +46,8 @@ void addPassesPim(OwningOpRef& module, if (pimEmissionTarget >= EmitPimCodegen) { pm.addPass(createPimHostConstantFoldingPass()); pm.addPass(createMessagePass("Pim host constants folded")); - pm.addPass(createPimMemoryCoalescingPass()); + if (!pimDisableMemoryCoalescing) + pm.addPass(createPimMemoryCoalescingPass()); pm.addPass(createPimVerificationPass()); pm.addPass(createMessagePass("Pim verified")); pm.addPass(createEmitPimCodePass()); diff --git a/src/src.zip b/src/src.zip new file mode 100644 index 0000000..424557a Binary files /dev/null and b/src/src.zip differ diff --git a/validation/networks/yolo11n/depth_35_variants/output0_duplicated/output0_duplicated.onnx b/validation/networks/yolo11n/depth_35_variants/output0_duplicated/output0_duplicated.onnx new file mode 100644 index 0000000..1757456 Binary files /dev/null and b/validation/networks/yolo11n/depth_35_variants/output0_duplicated/output0_duplicated.onnx differ diff --git a/validation/networks/yolo11n/depth_35_variants/output0_first_with_original_debug_outputs/output0_first_with_original_debug_outputs.onnx b/validation/networks/yolo11n/depth_35_variants/output0_first_with_original_debug_outputs/output0_first_with_original_debug_outputs.onnx new file mode 100644 index 0000000..ce92487 Binary files /dev/null and b/validation/networks/yolo11n/depth_35_variants/output0_first_with_original_debug_outputs/output0_first_with_original_debug_outputs.onnx differ diff --git a/validation/networks/yolo11n/depth_35_variants/output0_last_with_original_debug_outputs/output0_last_with_original_debug_outputs.onnx b/validation/networks/yolo11n/depth_35_variants/output0_last_with_original_debug_outputs/output0_last_with_original_debug_outputs.onnx new file mode 100644 index 0000000..834df16 Binary files /dev/null and b/validation/networks/yolo11n/depth_35_variants/output0_last_with_original_debug_outputs/output0_last_with_original_debug_outputs.onnx differ diff --git a/validation/networks/yolo11n/depth_35_variants/output0_only/output0_only.onnx b/validation/networks/yolo11n/depth_35_variants/output0_only/output0_only.onnx new file mode 100644 index 0000000..4c9eb04 Binary files /dev/null and b/validation/networks/yolo11n/depth_35_variants/output0_only/output0_only.onnx differ diff --git a/validation/networks/yolo11n/depth_35_variants/yolo_tail_localization_outputs/yolo_tail_localization_outputs.onnx b/validation/networks/yolo11n/depth_35_variants/yolo_tail_localization_outputs/yolo_tail_localization_outputs.onnx new file mode 100644 index 0000000..0b7ab11 Binary files /dev/null and b/validation/networks/yolo11n/depth_35_variants/yolo_tail_localization_outputs/yolo_tail_localization_outputs.onnx differ diff --git a/validation/operations/add/after_gemm/add_after_gemm.onnx b/validation/operations/add/after_gemm/add_after_gemm.onnx index f88f43d..8202d11 100644 Binary files a/validation/operations/add/after_gemm/add_after_gemm.onnx and b/validation/operations/add/after_gemm/add_after_gemm.onnx differ diff --git a/validation/operations/add/basic/add_basic.onnx b/validation/operations/add/basic/add_basic.onnx index c7699c1..4b5c107 100644 Binary files a/validation/operations/add/basic/add_basic.onnx and b/validation/operations/add/basic/add_basic.onnx differ diff --git a/validation/operations/add/broadcast_row/add_broadcast_row.onnx b/validation/operations/add/broadcast_row/add_broadcast_row.onnx index abdfccb..853016a 100644 Binary files a/validation/operations/add/broadcast_row/add_broadcast_row.onnx and b/validation/operations/add/broadcast_row/add_broadcast_row.onnx differ diff --git a/validation/operations/add/channel_broadcast_1024/add_channel_broadcast_1024.onnx b/validation/operations/add/channel_broadcast_1024/add_channel_broadcast_1024.onnx index 8b563ec..1a6ed21 100644 Binary files a/validation/operations/add/channel_broadcast_1024/add_channel_broadcast_1024.onnx and b/validation/operations/add/channel_broadcast_1024/add_channel_broadcast_1024.onnx differ diff --git a/validation/operations/add/leading_dimension_broadcast/add_leading_dimension_broadcast.onnx b/validation/operations/add/leading_dimension_broadcast/add_leading_dimension_broadcast.onnx index 46c7b83..58a40ce 100644 Binary files a/validation/operations/add/leading_dimension_broadcast/add_leading_dimension_broadcast.onnx and b/validation/operations/add/leading_dimension_broadcast/add_leading_dimension_broadcast.onnx differ diff --git a/validation/operations/concat/channel_axis/concat_channel_axis.onnx b/validation/operations/concat/channel_axis/concat_channel_axis.onnx index 134da55..340e6f7 100644 Binary files a/validation/operations/concat/channel_axis/concat_channel_axis.onnx and b/validation/operations/concat/channel_axis/concat_channel_axis.onnx differ diff --git a/validation/operations/concat/negative_axis/concat_negative_axis.onnx b/validation/operations/concat/negative_axis/concat_negative_axis.onnx index f2ceaa0..724ae14 100644 Binary files a/validation/operations/concat/negative_axis/concat_negative_axis.onnx and b/validation/operations/concat/negative_axis/concat_negative_axis.onnx differ diff --git a/validation/operations/concat/three_inputs_channel_axis/concat_three_inputs_channel_axis.onnx b/validation/operations/concat/three_inputs_channel_axis/concat_three_inputs_channel_axis.onnx index 5178101..2ac1bcd 100644 Binary files a/validation/operations/concat/three_inputs_channel_axis/concat_three_inputs_channel_axis.onnx and b/validation/operations/concat/three_inputs_channel_axis/concat_three_inputs_channel_axis.onnx differ diff --git a/validation/operations/conv/batch_2/conv_batch_2.onnx b/validation/operations/conv/batch_2/conv_batch_2.onnx index 8fa369c..fc9fb02 100644 Binary files a/validation/operations/conv/batch_2/conv_batch_2.onnx and b/validation/operations/conv/batch_2/conv_batch_2.onnx differ diff --git a/validation/operations/conv/batch_4_pointwise/conv_batch_4_pointwise.onnx b/validation/operations/conv/batch_4_pointwise/conv_batch_4_pointwise.onnx index 52feeec..78cad13 100644 Binary files a/validation/operations/conv/batch_4_pointwise/conv_batch_4_pointwise.onnx and b/validation/operations/conv/batch_4_pointwise/conv_batch_4_pointwise.onnx differ diff --git a/validation/operations/conv/depthwise_1024_channels/conv_depthwise_1024_channels.onnx b/validation/operations/conv/depthwise_1024_channels/conv_depthwise_1024_channels.onnx index fe2b99e..ef55228 100644 Binary files a/validation/operations/conv/depthwise_1024_channels/conv_depthwise_1024_channels.onnx and b/validation/operations/conv/depthwise_1024_channels/conv_depthwise_1024_channels.onnx differ diff --git a/validation/operations/conv/depthwise_grouped/conv_depthwise_grouped.onnx b/validation/operations/conv/depthwise_grouped/conv_depthwise_grouped.onnx index 84da811..f92d7d5 100644 Binary files a/validation/operations/conv/depthwise_grouped/conv_depthwise_grouped.onnx and b/validation/operations/conv/depthwise_grouped/conv_depthwise_grouped.onnx differ diff --git a/validation/operations/conv/dilated_3x3/conv_dilated_3x3.onnx b/validation/operations/conv/dilated_3x3/conv_dilated_3x3.onnx index 7602a5a..3b8b953 100644 Binary files a/validation/operations/conv/dilated_3x3/conv_dilated_3x3.onnx and b/validation/operations/conv/dilated_3x3/conv_dilated_3x3.onnx differ diff --git a/validation/operations/conv/dynamic/conv_dynamic.onnx b/validation/operations/conv/dynamic/conv_dynamic.onnx index c21dcf3..2f63495 100644 Binary files a/validation/operations/conv/dynamic/conv_dynamic.onnx and b/validation/operations/conv/dynamic/conv_dynamic.onnx differ diff --git a/validation/operations/conv/explicit_padding/conv_explicit_padding.onnx b/validation/operations/conv/explicit_padding/conv_explicit_padding.onnx index 15813f7..fc62886 100644 Binary files a/validation/operations/conv/explicit_padding/conv_explicit_padding.onnx and b/validation/operations/conv/explicit_padding/conv_explicit_padding.onnx differ diff --git a/validation/operations/conv/grouped_many_groups/conv_grouped_many_groups.onnx b/validation/operations/conv/grouped_many_groups/conv_grouped_many_groups.onnx index 53e138f..d4b26e8 100644 Binary files a/validation/operations/conv/grouped_many_groups/conv_grouped_many_groups.onnx and b/validation/operations/conv/grouped_many_groups/conv_grouped_many_groups.onnx differ diff --git a/validation/operations/conv/grouped_two_groups/conv_grouped_two_groups.onnx b/validation/operations/conv/grouped_two_groups/conv_grouped_two_groups.onnx index 162214e..1f55cc8 100644 Binary files a/validation/operations/conv/grouped_two_groups/conv_grouped_two_groups.onnx and b/validation/operations/conv/grouped_two_groups/conv_grouped_two_groups.onnx differ diff --git a/validation/operations/conv/huge_pointwise_1024/conv_huge_pointwise_1024.onnx b/validation/operations/conv/huge_pointwise_1024/conv_huge_pointwise_1024.onnx index 517d3c8..867e74c 100644 Binary files a/validation/operations/conv/huge_pointwise_1024/conv_huge_pointwise_1024.onnx and b/validation/operations/conv/huge_pointwise_1024/conv_huge_pointwise_1024.onnx differ diff --git a/validation/operations/conv/huge_pointwise_1024_dynamic/conv_huge_pointwise_1024_dynamic.onnx b/validation/operations/conv/huge_pointwise_1024_dynamic/conv_huge_pointwise_1024_dynamic.onnx index f5ce95d..7f4163c 100644 Binary files a/validation/operations/conv/huge_pointwise_1024_dynamic/conv_huge_pointwise_1024_dynamic.onnx and b/validation/operations/conv/huge_pointwise_1024_dynamic/conv_huge_pointwise_1024_dynamic.onnx differ diff --git a/validation/operations/conv/kernel_3x3/conv_kernel_3x3.onnx b/validation/operations/conv/kernel_3x3/conv_kernel_3x3.onnx index 701fc94..c76ed00 100644 Binary files a/validation/operations/conv/kernel_3x3/conv_kernel_3x3.onnx and b/validation/operations/conv/kernel_3x3/conv_kernel_3x3.onnx differ diff --git a/validation/operations/conv/kernel_equals_input_spatial/conv_kernel_equals_input_spatial.onnx b/validation/operations/conv/kernel_equals_input_spatial/conv_kernel_equals_input_spatial.onnx index ecad81a..113d0cd 100644 Binary files a/validation/operations/conv/kernel_equals_input_spatial/conv_kernel_equals_input_spatial.onnx and b/validation/operations/conv/kernel_equals_input_spatial/conv_kernel_equals_input_spatial.onnx differ diff --git a/validation/operations/conv/large_input_channels_1x1/conv_large_input_channels_1x1.onnx b/validation/operations/conv/large_input_channels_1x1/conv_large_input_channels_1x1.onnx index 858b451..4cca526 100644 Binary files a/validation/operations/conv/large_input_channels_1x1/conv_large_input_channels_1x1.onnx and b/validation/operations/conv/large_input_channels_1x1/conv_large_input_channels_1x1.onnx differ diff --git a/validation/operations/conv/large_output_channels_1x1/conv_large_output_channels_1x1.onnx b/validation/operations/conv/large_output_channels_1x1/conv_large_output_channels_1x1.onnx index 2c3b410..19083d0 100644 Binary files a/validation/operations/conv/large_output_channels_1x1/conv_large_output_channels_1x1.onnx and b/validation/operations/conv/large_output_channels_1x1/conv_large_output_channels_1x1.onnx differ diff --git a/validation/operations/conv/large_spatial/conv_large_spatial.onnx b/validation/operations/conv/large_spatial/conv_large_spatial.onnx index 3e3d862..9380e03 100644 Binary files a/validation/operations/conv/large_spatial/conv_large_spatial.onnx and b/validation/operations/conv/large_spatial/conv_large_spatial.onnx differ diff --git a/validation/operations/conv/multi_channel/conv_multi_channel.onnx b/validation/operations/conv/multi_channel/conv_multi_channel.onnx index bfec7b2..0f4c529 100644 Binary files a/validation/operations/conv/multi_channel/conv_multi_channel.onnx and b/validation/operations/conv/multi_channel/conv_multi_channel.onnx differ diff --git a/validation/operations/conv/non_square_kernel_1x3/conv_non_square_kernel_1x3.onnx b/validation/operations/conv/non_square_kernel_1x3/conv_non_square_kernel_1x3.onnx index 118ed6b..691aaa2 100644 Binary files a/validation/operations/conv/non_square_kernel_1x3/conv_non_square_kernel_1x3.onnx and b/validation/operations/conv/non_square_kernel_1x3/conv_non_square_kernel_1x3.onnx differ diff --git a/validation/operations/conv/non_square_kernel_3x1/conv_non_square_kernel_3x1.onnx b/validation/operations/conv/non_square_kernel_3x1/conv_non_square_kernel_3x1.onnx index b60dfa2..ac78dc5 100644 Binary files a/validation/operations/conv/non_square_kernel_3x1/conv_non_square_kernel_3x1.onnx and b/validation/operations/conv/non_square_kernel_3x1/conv_non_square_kernel_3x1.onnx differ diff --git a/validation/operations/conv/non_uniform_stride/conv_non_uniform_stride.onnx b/validation/operations/conv/non_uniform_stride/conv_non_uniform_stride.onnx index 9f2a692..c41ac2f 100644 Binary files a/validation/operations/conv/non_uniform_stride/conv_non_uniform_stride.onnx and b/validation/operations/conv/non_uniform_stride/conv_non_uniform_stride.onnx differ diff --git a/validation/operations/conv/pointwise_1x1/conv_1x1.onnx b/validation/operations/conv/pointwise_1x1/conv_1x1.onnx index 50cf143..5946fdd 100644 Binary files a/validation/operations/conv/pointwise_1x1/conv_1x1.onnx and b/validation/operations/conv/pointwise_1x1/conv_1x1.onnx differ diff --git a/validation/operations/conv/real_asymmetric_padding/conv_real_asymmetric_padding.onnx b/validation/operations/conv/real_asymmetric_padding/conv_real_asymmetric_padding.onnx index 481f0ec..48c62e6 100644 Binary files a/validation/operations/conv/real_asymmetric_padding/conv_real_asymmetric_padding.onnx and b/validation/operations/conv/real_asymmetric_padding/conv_real_asymmetric_padding.onnx differ diff --git a/validation/operations/conv/same_lower_3x3/conv_same_lower_3x3.onnx b/validation/operations/conv/same_lower_3x3/conv_same_lower_3x3.onnx index 9044d8d..5af31a0 100644 Binary files a/validation/operations/conv/same_lower_3x3/conv_same_lower_3x3.onnx and b/validation/operations/conv/same_lower_3x3/conv_same_lower_3x3.onnx differ diff --git a/validation/operations/conv/same_padding_3x3/conv_same_padding_3x3.onnx b/validation/operations/conv/same_padding_3x3/conv_same_padding_3x3.onnx index 3a017fc..0797ab7 100644 Binary files a/validation/operations/conv/same_padding_3x3/conv_same_padding_3x3.onnx and b/validation/operations/conv/same_padding_3x3/conv_same_padding_3x3.onnx differ diff --git a/validation/operations/conv/stride_2/conv_stride_2.onnx b/validation/operations/conv/stride_2/conv_stride_2.onnx index 9135966..56c7e62 100644 Binary files a/validation/operations/conv/stride_2/conv_stride_2.onnx and b/validation/operations/conv/stride_2/conv_stride_2.onnx differ diff --git a/validation/operations/conv/with_bias_3x3/conv_with_bias_3x3.onnx b/validation/operations/conv/with_bias_3x3/conv_with_bias_3x3.onnx index d4f81e4..9709538 100644 Binary files a/validation/operations/conv/with_bias_3x3/conv_with_bias_3x3.onnx and b/validation/operations/conv/with_bias_3x3/conv_with_bias_3x3.onnx differ diff --git a/validation/operations/conv/without_kernel_shape_attr/conv_without_kernel_shape_attr.onnx b/validation/operations/conv/without_kernel_shape_attr/conv_without_kernel_shape_attr.onnx index b784f99..b72f977 100644 Binary files a/validation/operations/conv/without_kernel_shape_attr/conv_without_kernel_shape_attr.onnx and b/validation/operations/conv/without_kernel_shape_attr/conv_without_kernel_shape_attr.onnx differ diff --git a/validation/operations/dfl/yolo_dfl_decode_tail_large/yolo_dfl_decode_tail_large.onnx b/validation/operations/dfl/yolo_dfl_decode_tail_large/yolo_dfl_decode_tail_large.onnx new file mode 100644 index 0000000..7b02cb3 Binary files /dev/null and b/validation/operations/dfl/yolo_dfl_decode_tail_large/yolo_dfl_decode_tail_large.onnx differ diff --git a/validation/operations/dfl/yolo_dfl_projection_large/yolo_dfl_projection_large.onnx b/validation/operations/dfl/yolo_dfl_projection_large/yolo_dfl_projection_large.onnx new file mode 100644 index 0000000..10868c0 Binary files /dev/null and b/validation/operations/dfl/yolo_dfl_projection_large/yolo_dfl_projection_large.onnx differ diff --git a/validation/operations/dfl/yolo_dfl_projection_small/yolo_dfl_projection_small.onnx b/validation/operations/dfl/yolo_dfl_projection_small/yolo_dfl_projection_small.onnx new file mode 100644 index 0000000..f49e090 Binary files /dev/null and b/validation/operations/dfl/yolo_dfl_projection_small/yolo_dfl_projection_small.onnx differ diff --git a/validation/operations/div/after_gemm/div_after_gemm.onnx b/validation/operations/div/after_gemm/div_after_gemm.onnx index ae8770d..aea8f35 100644 Binary files a/validation/operations/div/after_gemm/div_after_gemm.onnx and b/validation/operations/div/after_gemm/div_after_gemm.onnx differ diff --git a/validation/operations/div/basic/div_basic.onnx b/validation/operations/div/basic/div_basic.onnx index d2dd4f5..6f3954f 100644 Binary files a/validation/operations/div/basic/div_basic.onnx and b/validation/operations/div/basic/div_basic.onnx differ diff --git a/validation/operations/div/channel_broadcast_1024/div_channel_broadcast_1024.onnx b/validation/operations/div/channel_broadcast_1024/div_channel_broadcast_1024.onnx index 0664097..6fc75ed 100644 Binary files a/validation/operations/div/channel_broadcast_1024/div_channel_broadcast_1024.onnx and b/validation/operations/div/channel_broadcast_1024/div_channel_broadcast_1024.onnx differ diff --git a/validation/operations/div/leading_dimension_broadcast/div_leading_dimension_broadcast.onnx b/validation/operations/div/leading_dimension_broadcast/div_leading_dimension_broadcast.onnx index 68ce49b..50a36f4 100644 Binary files a/validation/operations/div/leading_dimension_broadcast/div_leading_dimension_broadcast.onnx and b/validation/operations/div/leading_dimension_broadcast/div_leading_dimension_broadcast.onnx differ diff --git a/validation/operations/div/runtime_scalar_rhs/div_runtime_scalar_rhs.onnx b/validation/operations/div/runtime_scalar_rhs/div_runtime_scalar_rhs.onnx index 5885133..e18f399 100644 Binary files a/validation/operations/div/runtime_scalar_rhs/div_runtime_scalar_rhs.onnx and b/validation/operations/div/runtime_scalar_rhs/div_runtime_scalar_rhs.onnx differ diff --git a/validation/operations/div/scalar_constant/div_scalar_constant.onnx b/validation/operations/div/scalar_constant/div_scalar_constant.onnx index b61f587..01190f0 100644 Binary files a/validation/operations/div/scalar_constant/div_scalar_constant.onnx and b/validation/operations/div/scalar_constant/div_scalar_constant.onnx differ diff --git a/validation/operations/gather/3d_input_axis1/gather_3d_input_axis1.onnx b/validation/operations/gather/3d_input_axis1/gather_3d_input_axis1.onnx index d46b548..6935f61 100644 Binary files a/validation/operations/gather/3d_input_axis1/gather_3d_input_axis1.onnx and b/validation/operations/gather/3d_input_axis1/gather_3d_input_axis1.onnx differ diff --git a/validation/operations/gather/axis0_matrix_indices/gather_axis0_matrix_indices.onnx b/validation/operations/gather/axis0_matrix_indices/gather_axis0_matrix_indices.onnx index 4119bad..2ff2843 100644 Binary files a/validation/operations/gather/axis0_matrix_indices/gather_axis0_matrix_indices.onnx and b/validation/operations/gather/axis0_matrix_indices/gather_axis0_matrix_indices.onnx differ diff --git a/validation/operations/gather/axis1/gather_axis1.onnx b/validation/operations/gather/axis1/gather_axis1.onnx index c0d6ed4..9cbd37d 100644 Binary files a/validation/operations/gather/axis1/gather_axis1.onnx and b/validation/operations/gather/axis1/gather_axis1.onnx differ diff --git a/validation/operations/gather/negative_axis/gather_negative_axis.onnx b/validation/operations/gather/negative_axis/gather_negative_axis.onnx index b9a3cc8..b759257 100644 Binary files a/validation/operations/gather/negative_axis/gather_negative_axis.onnx and b/validation/operations/gather/negative_axis/gather_negative_axis.onnx differ diff --git a/validation/operations/gather/negative_indices/gather_negative_indices.onnx b/validation/operations/gather/negative_indices/gather_negative_indices.onnx index ea7403c..f0e07ca 100644 Binary files a/validation/operations/gather/negative_indices/gather_negative_indices.onnx and b/validation/operations/gather/negative_indices/gather_negative_indices.onnx differ diff --git a/validation/operations/gemm/alpha_beta/gemm_alpha_beta.onnx b/validation/operations/gemm/alpha_beta/gemm_alpha_beta.onnx index 1248276..fdb1910 100644 Binary files a/validation/operations/gemm/alpha_beta/gemm_alpha_beta.onnx and b/validation/operations/gemm/alpha_beta/gemm_alpha_beta.onnx differ diff --git a/validation/operations/gemm/bias_rank2_broadcast/gemm_bias_rank2_broadcast.onnx b/validation/operations/gemm/bias_rank2_broadcast/gemm_bias_rank2_broadcast.onnx index 596be43..637323b 100644 Binary files a/validation/operations/gemm/bias_rank2_broadcast/gemm_bias_rank2_broadcast.onnx and b/validation/operations/gemm/bias_rank2_broadcast/gemm_bias_rank2_broadcast.onnx differ diff --git a/validation/operations/gemm/dynamic/gemm_dynamic.onnx b/validation/operations/gemm/dynamic/gemm_dynamic.onnx index f23e103..917113d 100644 Binary files a/validation/operations/gemm/dynamic/gemm_dynamic.onnx and b/validation/operations/gemm/dynamic/gemm_dynamic.onnx differ diff --git a/validation/operations/gemm/dynamic_alpha/gemm_dynamic_alpha.onnx b/validation/operations/gemm/dynamic_alpha/gemm_dynamic_alpha.onnx index 2fdccb3..4decf30 100644 Binary files a/validation/operations/gemm/dynamic_alpha/gemm_dynamic_alpha.onnx and b/validation/operations/gemm/dynamic_alpha/gemm_dynamic_alpha.onnx differ diff --git a/validation/operations/gemm/dynamic_beta/gemm_dynamic_beta.onnx b/validation/operations/gemm/dynamic_beta/gemm_dynamic_beta.onnx index 716d64d..1ea67b7 100644 Binary files a/validation/operations/gemm/dynamic_beta/gemm_dynamic_beta.onnx and b/validation/operations/gemm/dynamic_beta/gemm_dynamic_beta.onnx differ diff --git a/validation/operations/gemm/dynamic_bias/gemm_dynamic_bias.onnx b/validation/operations/gemm/dynamic_bias/gemm_dynamic_bias.onnx index 5ffd977..69d3a3a 100644 Binary files a/validation/operations/gemm/dynamic_bias/gemm_dynamic_bias.onnx and b/validation/operations/gemm/dynamic_bias/gemm_dynamic_bias.onnx differ diff --git a/validation/operations/gemm/dynamic_bias_alpha_beta/gemm_dynamic_bias_alpha_beta.onnx b/validation/operations/gemm/dynamic_bias_alpha_beta/gemm_dynamic_bias_alpha_beta.onnx index f5f03f3..ca50913 100644 Binary files a/validation/operations/gemm/dynamic_bias_alpha_beta/gemm_dynamic_bias_alpha_beta.onnx and b/validation/operations/gemm/dynamic_bias_alpha_beta/gemm_dynamic_bias_alpha_beta.onnx differ diff --git a/validation/operations/gemm/dynamic_transB/gemm_dynamic_transB.onnx b/validation/operations/gemm/dynamic_transB/gemm_dynamic_transB.onnx index 8e9eb94..82109e1 100644 Binary files a/validation/operations/gemm/dynamic_transB/gemm_dynamic_transB.onnx and b/validation/operations/gemm/dynamic_transB/gemm_dynamic_transB.onnx differ diff --git a/validation/operations/gemm/huge_1024/gemm_huge_1024.onnx b/validation/operations/gemm/huge_1024/gemm_huge_1024.onnx index 6e6031d..d46409c 100644 Binary files a/validation/operations/gemm/huge_1024/gemm_huge_1024.onnx and b/validation/operations/gemm/huge_1024/gemm_huge_1024.onnx differ diff --git a/validation/operations/gemm/large/gemm_large.onnx b/validation/operations/gemm/large/gemm_large.onnx index d44a8db..1b11e87 100644 Binary files a/validation/operations/gemm/large/gemm_large.onnx and b/validation/operations/gemm/large/gemm_large.onnx differ diff --git a/validation/operations/gemm/large_k_small_n/gemm_large_k_small_n.onnx b/validation/operations/gemm/large_k_small_n/gemm_large_k_small_n.onnx index da27754..05f395b 100644 Binary files a/validation/operations/gemm/large_k_small_n/gemm_large_k_small_n.onnx and b/validation/operations/gemm/large_k_small_n/gemm_large_k_small_n.onnx differ diff --git a/validation/operations/gemm/non_square/gemm_non_square.onnx b/validation/operations/gemm/non_square/gemm_non_square.onnx index d26ecf4..bef0692 100644 Binary files a/validation/operations/gemm/non_square/gemm_non_square.onnx and b/validation/operations/gemm/non_square/gemm_non_square.onnx differ diff --git a/validation/operations/gemm/scalar_bias/gemm_scalar_bias.onnx b/validation/operations/gemm/scalar_bias/gemm_scalar_bias.onnx index 8114542..9c30453 100644 Binary files a/validation/operations/gemm/scalar_bias/gemm_scalar_bias.onnx and b/validation/operations/gemm/scalar_bias/gemm_scalar_bias.onnx differ diff --git a/validation/operations/gemm/simple/gemm_simple.onnx b/validation/operations/gemm/simple/gemm_simple.onnx index 237810f..7eb9d78 100644 Binary files a/validation/operations/gemm/simple/gemm_simple.onnx and b/validation/operations/gemm/simple/gemm_simple.onnx differ diff --git a/validation/operations/gemm/small/gemm_small.onnx b/validation/operations/gemm/small/gemm_small.onnx index 49a7f91..8ea5da3 100644 Binary files a/validation/operations/gemm/small/gemm_small.onnx and b/validation/operations/gemm/small/gemm_small.onnx differ diff --git a/validation/operations/gemm/small_k_large_n/gemm_small_k_large_n.onnx b/validation/operations/gemm/small_k_large_n/gemm_small_k_large_n.onnx index d101ff5..8baaa0d 100644 Binary files a/validation/operations/gemm/small_k_large_n/gemm_small_k_large_n.onnx and b/validation/operations/gemm/small_k_large_n/gemm_small_k_large_n.onnx differ diff --git a/validation/operations/gemm/transA/gemm_transA.onnx b/validation/operations/gemm/transA/gemm_transA.onnx index fc36591..b20089b 100644 Binary files a/validation/operations/gemm/transA/gemm_transA.onnx and b/validation/operations/gemm/transA/gemm_transA.onnx differ diff --git a/validation/operations/gemm/transA_transB/gemm_transA_transB.onnx b/validation/operations/gemm/transA_transB/gemm_transA_transB.onnx index 9ee8835..cb091cc 100644 Binary files a/validation/operations/gemm/transA_transB/gemm_transA_transB.onnx and b/validation/operations/gemm/transA_transB/gemm_transA_transB.onnx differ diff --git a/validation/operations/gemm/transB/gemm_transB.onnx b/validation/operations/gemm/transB/gemm_transB.onnx index 2df2d83..2a5ea81 100644 Binary files a/validation/operations/gemm/transB/gemm_transB.onnx and b/validation/operations/gemm/transB/gemm_transB.onnx differ diff --git a/validation/operations/gemm/transB_with_bias/gemm_transB_with_bias.onnx b/validation/operations/gemm/transB_with_bias/gemm_transB_with_bias.onnx index 696ef58..a4602fb 100644 Binary files a/validation/operations/gemm/transB_with_bias/gemm_transB_with_bias.onnx and b/validation/operations/gemm/transB_with_bias/gemm_transB_with_bias.onnx differ diff --git a/validation/operations/gemm/with_bias/gemm_with_bias.onnx b/validation/operations/gemm/with_bias/gemm_with_bias.onnx index b1b4bb2..f3ee094 100644 Binary files a/validation/operations/gemm/with_bias/gemm_with_bias.onnx and b/validation/operations/gemm/with_bias/gemm_with_bias.onnx differ diff --git a/validation/operations/gen_tests.py b/validation/operations/gen_tests.py index ad107b3..39a704a 100644 --- a/validation/operations/gen_tests.py +++ b/validation/operations/gen_tests.py @@ -1200,6 +1200,106 @@ def softmax_large_dimension_1024(): save_model(model, "softmax/large_dimension_1024", "softmax_large_dimension_1024.onnx") +# --------------------------------------------------------------------------- +# DFL tests +# --------------------------------------------------------------------------- + +def _make_yolo_dfl_initializers(n, include_decode_tail): + shape0 = make_int64_initializer("Shape0", [1, 4, 16, n]) + shape1 = make_int64_initializer("Shape1", [1, 16, 4 * n]) + shape2 = make_int64_initializer("Shape2", [1, 4, n]) + proj = numpy_helper.from_array(np.arange(16, dtype=np.float32).reshape(1, 16), name="Proj") + initializers = [shape0, shape1, shape2, proj] + + if include_decode_tail: + starts0 = make_int64_initializer("starts0", [0]) + ends0 = make_int64_initializer("ends0", [2]) + starts1 = make_int64_initializer("starts1", [2]) + ends1 = make_int64_initializer("ends1", [4]) + axes = make_int64_initializer("axes", [1]) + rng = np.random.default_rng(301) + anchor = numpy_helper.from_array( + rng.uniform(-2.0, 2.0, (1, 2, n)).astype(np.float32), name="Anchor") + half = numpy_helper.from_array(np.asarray([2.0], dtype=np.float32), name="Half") + scale_values = np.full((1, n), 8.0, dtype=np.float32) + scale_values[:, 1::2] = 16.0 + scale = numpy_helper.from_array(scale_values, name="Scale") + initializers.extend([starts0, ends0, starts1, ends1, axes, anchor, half, scale]) + + return initializers + + +def _build_yolo_dfl_projection_nodes(box_raw_name): + return [ + helper.make_node("Reshape", [box_raw_name, "Shape0"], ["R0"]), + helper.make_node("Transpose", ["R0"], ["T0"], perm=[0, 2, 1, 3]), + helper.make_node("Softmax", ["T0"], ["S0"], axis=1), + helper.make_node("Reshape", ["S0", "Shape1"], ["R1"]), + helper.make_node("MatMul", ["Proj", "R1"], ["M0"]), + helper.make_node("Reshape", ["M0", "Shape2"], ["Box4"]), + ] + + +def yolo_dfl_projection_small(): + """YOLO DFL projection path on a small [1,64,128] box tensor.""" + box_raw = helper.make_tensor_value_info("BoxRaw", TensorProto.FLOAT, [1, 64, 128]) + y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 128]) + nodes = _build_yolo_dfl_projection_nodes("BoxRaw") + graph = helper.make_graph( + nodes[:-1] + [helper.make_node("Reshape", ["M0", "Shape2"], ["Y"])], + "yolo_dfl_projection_small", + [box_raw], + [y], + initializer=_make_yolo_dfl_initializers(128, include_decode_tail=False)) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "dfl/yolo_dfl_projection_small", "yolo_dfl_projection_small.onnx") + + +def yolo_dfl_projection_large(): + """YOLO DFL projection path on a YOLO-scale [1,64,8400] box tensor.""" + box_raw = helper.make_tensor_value_info("BoxRaw", TensorProto.FLOAT, [1, 64, 8400]) + y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 8400]) + nodes = _build_yolo_dfl_projection_nodes("BoxRaw") + graph = helper.make_graph( + nodes[:-1] + [helper.make_node("Reshape", ["M0", "Shape2"], ["Y"])], + "yolo_dfl_projection_large", + [box_raw], + [y], + initializer=_make_yolo_dfl_initializers(8400, include_decode_tail=False)) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "dfl/yolo_dfl_projection_large", "yolo_dfl_projection_large.onnx") + + +def yolo_dfl_decode_tail_large(): + """YOLO-scale DFL projection followed by box decode, stride scale, score sigmoid, and final concat.""" + box_raw = helper.make_tensor_value_info("BoxRaw", TensorProto.FLOAT, [1, 64, 8400]) + class_raw = helper.make_tensor_value_info("ClassRaw", TensorProto.FLOAT, [1, 80, 8400]) + y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 84, 8400]) + + nodes = _build_yolo_dfl_projection_nodes("BoxRaw") + nodes.extend([ + helper.make_node("Slice", ["Box4", "starts0", "ends0", "axes"], ["L0"]), + helper.make_node("Slice", ["Box4", "starts1", "ends1", "axes"], ["L1"]), + helper.make_node("Sub", ["Anchor", "L0"], ["A"]), + helper.make_node("Add", ["L1", "Anchor"], ["B"]), + helper.make_node("Add", ["A", "B"], ["Sum"]), + helper.make_node("Div", ["Sum", "Half"], ["Center"]), + helper.make_node("Sub", ["B", "A"], ["Size"]), + helper.make_node("Concat", ["Center", "Size"], ["Boxes"], axis=1), + helper.make_node("Mul", ["Boxes", "Scale"], ["BoxesScaled"]), + helper.make_node("Sigmoid", ["ClassRaw"], ["Scores"]), + helper.make_node("Concat", ["BoxesScaled", "Scores"], ["Y"], axis=1), + ]) + graph = helper.make_graph( + nodes, + "yolo_dfl_decode_tail_large", + [box_raw, class_raw], + [y], + initializer=_make_yolo_dfl_initializers(8400, include_decode_tail=True)) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "dfl/yolo_dfl_decode_tail_large", "yolo_dfl_decode_tail_large.onnx") + + # --------------------------------------------------------------------------- # Resize tests # --------------------------------------------------------------------------- @@ -1452,6 +1552,387 @@ def slice_large_channel_1024(): save_model(model, "slice/large_channel_1024", "slice_large_channel_1024.onnx") +def slice_yolo_decode_tail(): + """YOLO-like decode tail where a non-zero-offset channel slice feeds arithmetic and Concat.""" + rng = np.random.default_rng(109) + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 4, 32]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 32]) + S0 = helper.make_tensor_value_info("S0", TensorProto.FLOAT, [1, 2, 32]) + S1 = helper.make_tensor_value_info("S1", TensorProto.FLOAT, [1, 2, 32]) + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 2, 32]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 2, 32]) + Center = helper.make_tensor_value_info("Center", TensorProto.FLOAT, [1, 2, 32]) + Size = helper.make_tensor_value_info("Size", TensorProto.FLOAT, [1, 2, 32]) + Boxes = helper.make_tensor_value_info("Boxes", TensorProto.FLOAT, [1, 4, 32]) + + starts0 = make_int64_initializer("starts0", [0]) + ends0 = make_int64_initializer("ends0", [2]) + starts1 = make_int64_initializer("starts1", [2]) + ends1 = make_int64_initializer("ends1", [4]) + axes = make_int64_initializer("axes", [1]) + anchor = numpy_helper.from_array(rng.uniform(-2.0, 2.0, (1, 2, 32)).astype(np.float32), name="Anchor") + half = numpy_helper.from_array(np.asarray([2.0], dtype=np.float32), name="Half") + scale = numpy_helper.from_array(np.asarray([8.0], dtype=np.float32), name="Scale") + + slice0 = helper.make_node("Slice", ["X", "starts0", "ends0", "axes"], ["S0"]) + slice1 = helper.make_node("Slice", ["X", "starts1", "ends1", "axes"], ["S1"]) + sub_a = helper.make_node("Sub", ["Anchor", "S0"], ["A"]) + add_b = helper.make_node("Add", ["S1", "Anchor"], ["B"]) + add_sum = helper.make_node("Add", ["A", "B"], ["Sum"]) + div_center = helper.make_node("Div", ["Sum", "Half"], ["Center"]) + sub_size = helper.make_node("Sub", ["B", "A"], ["Size"]) + concat_boxes = helper.make_node("Concat", ["Center", "Size"], ["Boxes"], axis=1) + mul_y = helper.make_node("Mul", ["Boxes", "Scale"], ["Y"]) + graph = helper.make_graph( + [slice0, slice1, sub_a, add_b, add_sum, div_center, sub_size, concat_boxes, mul_y], + "slice_yolo_decode_tail", + [X], + [Y, S0, S1, A, B, Center, Size, Boxes], + initializer=[starts0, ends0, starts1, ends1, axes, anchor, half, scale]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "slice/yolo_decode_tail", "slice_yolo_decode_tail.onnx") + + +def slice_yolo_decode_tail_large_n(): + """Larger YOLO-like decode tail variant to stress non-zero-offset slice address handling.""" + rng = np.random.default_rng(110) + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 4, 8400]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 8400]) + S0 = helper.make_tensor_value_info("S0", TensorProto.FLOAT, [1, 2, 8400]) + S1 = helper.make_tensor_value_info("S1", TensorProto.FLOAT, [1, 2, 8400]) + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 2, 8400]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 2, 8400]) + Center = helper.make_tensor_value_info("Center", TensorProto.FLOAT, [1, 2, 8400]) + Size = helper.make_tensor_value_info("Size", TensorProto.FLOAT, [1, 2, 8400]) + Boxes = helper.make_tensor_value_info("Boxes", TensorProto.FLOAT, [1, 4, 8400]) + + starts0 = make_int64_initializer("starts0", [0]) + ends0 = make_int64_initializer("ends0", [2]) + starts1 = make_int64_initializer("starts1", [2]) + ends1 = make_int64_initializer("ends1", [4]) + axes = make_int64_initializer("axes", [1]) + anchor = numpy_helper.from_array(rng.uniform(-2.0, 2.0, (1, 2, 8400)).astype(np.float32), name="Anchor") + half = numpy_helper.from_array(np.asarray([2.0], dtype=np.float32), name="Half") + scale = numpy_helper.from_array(np.asarray([8.0], dtype=np.float32), name="Scale") + + slice0 = helper.make_node("Slice", ["X", "starts0", "ends0", "axes"], ["S0"]) + slice1 = helper.make_node("Slice", ["X", "starts1", "ends1", "axes"], ["S1"]) + sub_a = helper.make_node("Sub", ["Anchor", "S0"], ["A"]) + add_b = helper.make_node("Add", ["S1", "Anchor"], ["B"]) + add_sum = helper.make_node("Add", ["A", "B"], ["Sum"]) + div_center = helper.make_node("Div", ["Sum", "Half"], ["Center"]) + sub_size = helper.make_node("Sub", ["B", "A"], ["Size"]) + concat_boxes = helper.make_node("Concat", ["Center", "Size"], ["Boxes"], axis=1) + mul_y = helper.make_node("Mul", ["Boxes", "Scale"], ["Y"]) + graph = helper.make_graph( + [slice0, slice1, sub_a, add_b, add_sum, div_center, sub_size, concat_boxes, mul_y], + "slice_yolo_decode_tail_large_n", + [X], + [Y, S0, S1, A, B, Center, Size, Boxes], + initializer=[starts0, ends0, starts1, ends1, axes, anchor, half, scale]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "slice/yolo_decode_tail_large_n", "slice_yolo_decode_tail_large_n.onnx") + + +def _make_yolo_decode_tail_constants(seed, n): + rng = np.random.default_rng(seed) + starts0 = make_int64_initializer("starts0", [0]) + ends0 = make_int64_initializer("ends0", [2]) + starts1 = make_int64_initializer("starts1", [2]) + ends1 = make_int64_initializer("ends1", [4]) + axes = make_int64_initializer("axes", [1]) + anchor = numpy_helper.from_array(rng.uniform(-2.0, 2.0, (1, 2, n)).astype(np.float32), name="Anchor") + half = numpy_helper.from_array(np.asarray([2.0], dtype=np.float32), name="Half") + scale = numpy_helper.from_array(np.asarray([8.0], dtype=np.float32), name="Scale") + return starts0, ends0, starts1, ends1, axes, anchor, half, scale + + +def _build_yolo_decode_tail_graph(input_name, slice_source_name, output_name): + slice0 = helper.make_node("Slice", [slice_source_name, "starts0", "ends0", "axes"], ["S0"]) + slice1 = helper.make_node("Slice", [slice_source_name, "starts1", "ends1", "axes"], ["S1"]) + sub_a = helper.make_node("Sub", ["Anchor", "S0"], ["A"]) + add_b = helper.make_node("Add", ["S1", "Anchor"], ["B"]) + add_sum = helper.make_node("Add", ["A", "B"], ["Sum"]) + div_center = helper.make_node("Div", ["Sum", "Half"], ["Center"]) + sub_size = helper.make_node("Sub", ["B", "A"], ["Size"]) + concat_boxes = helper.make_node("Concat", ["Center", "Size"], ["Boxes"], axis=1) + mul_y = helper.make_node("Mul", ["Boxes", "Scale"], [output_name]) + return [slice0, slice1, sub_a, add_b, add_sum, div_center, sub_size, concat_boxes, mul_y] + + +def slice_yolo_decode_tail_internal_small(): + """YOLO-like decode tail from an internal tensor with only the final output exposed.""" + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 4, 128]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 128]) + zero = numpy_helper.from_array(np.zeros((1, 4, 128), dtype=np.float32), name="Z") + starts0, ends0, starts1, ends1, axes, anchor, half, scale = _make_yolo_decode_tail_constants(111, 128) + + preadd = helper.make_node("Add", ["X", "Z"], ["P"]) + graph = helper.make_graph( + [preadd] + _build_yolo_decode_tail_graph("X", "P", "Y"), + "slice_yolo_decode_tail_internal_small", + [X], + [Y], + initializer=[zero, starts0, ends0, starts1, ends1, axes, anchor, half, scale]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model( + model, + "slice/yolo_decode_tail_internal_small", + "slice_yolo_decode_tail_internal_small.onnx") + + +def slice_yolo_decode_tail_internal_large(): + """Large YOLO-like decode tail from an internal tensor to stress large non-zero slice offsets.""" + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 4, 8400]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 8400]) + zero = numpy_helper.from_array(np.zeros((1, 4, 8400), dtype=np.float32), name="Z") + starts0, ends0, starts1, ends1, axes, anchor, half, scale = _make_yolo_decode_tail_constants(112, 8400) + + preadd = helper.make_node("Add", ["X", "Z"], ["P"]) + graph = helper.make_graph( + [preadd] + _build_yolo_decode_tail_graph("X", "P", "Y"), + "slice_yolo_decode_tail_internal_large", + [X], + [Y], + initializer=[zero, starts0, ends0, starts1, ends1, axes, anchor, half, scale]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model( + model, + "slice/yolo_decode_tail_internal_large", + "slice_yolo_decode_tail_internal_large.onnx") + + +def slice_yolo_decode_tail_after_transpose(): + """YOLO-like decode tail after a transpose to mirror the final decode-tail producer shape change.""" + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 128, 4]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 128]) + starts0, ends0, starts1, ends1, axes, anchor, half, scale = _make_yolo_decode_tail_constants(113, 128) + + transpose = helper.make_node("Transpose", ["X"], ["T"], perm=[0, 2, 1]) + graph = helper.make_graph( + [transpose] + _build_yolo_decode_tail_graph("X", "T", "Y"), + "slice_yolo_decode_tail_after_transpose", + [X], + [Y], + initializer=[starts0, ends0, starts1, ends1, axes, anchor, half, scale]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model( + model, + "slice/yolo_decode_tail_after_transpose", + "slice_yolo_decode_tail_after_transpose.onnx") + + +def _save_yolo_decode_tail_localization_variant(directory, filename, output_name): + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 4, 128]) + output_shapes = { + "B": [1, 2, 128], + "Size": [1, 2, 128], + "Boxes": [1, 4, 128], + "Y": [1, 4, 128], + } + output = helper.make_tensor_value_info(output_name, TensorProto.FLOAT, output_shapes[output_name]) + zero = numpy_helper.from_array(np.zeros((1, 4, 128), dtype=np.float32), name="Z") + starts0, ends0, starts1, ends1, axes, anchor, half, scale = _make_yolo_decode_tail_constants(114, 128) + + preadd = helper.make_node("Add", ["X", "Z"], ["P"]) + slice0 = helper.make_node("Slice", ["P", "starts0", "ends0", "axes"], ["S0"]) + slice1 = helper.make_node("Slice", ["P", "starts1", "ends1", "axes"], ["S1"]) + sub_a = helper.make_node("Sub", ["Anchor", "S0"], ["A"]) + add_b = helper.make_node("Add", ["S1", "Anchor"], ["B"]) + add_sum = helper.make_node("Add", ["A", "B"], ["Sum"]) + div_center = helper.make_node("Div", ["Sum", "Half"], ["Center"]) + sub_size = helper.make_node("Sub", ["B", "A"], ["Size"]) + concat_boxes = helper.make_node("Concat", ["Center", "Size"], ["Boxes"], axis=1) + nodes = [preadd, slice0, slice1, sub_a, add_b, add_sum, div_center, sub_size, concat_boxes] + if output_name == "Y": + nodes.append(helper.make_node("Mul", ["Boxes", "Scale"], ["Y"])) + graph = helper.make_graph( + nodes, + directory.replace("/", "_"), + [X], + [output], + initializer=[zero, starts0, ends0, starts1, ends1, axes, anchor, half, scale]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, directory, filename) + + +def slice_yolo_decode_tail_output_b(): + """Localization variant exposing B only.""" + _save_yolo_decode_tail_localization_variant( + "slice/yolo_decode_tail_output_b", + "slice_yolo_decode_tail_output_b.onnx", + "B") + + +def slice_yolo_decode_tail_output_size(): + """Localization variant exposing Size only.""" + _save_yolo_decode_tail_localization_variant( + "slice/yolo_decode_tail_output_size", + "slice_yolo_decode_tail_output_size.onnx", + "Size") + + +def slice_yolo_decode_tail_output_boxes(): + """Localization variant exposing Boxes only.""" + _save_yolo_decode_tail_localization_variant( + "slice/yolo_decode_tail_output_boxes", + "slice_yolo_decode_tail_output_boxes.onnx", + "Boxes") + + +def slice_yolo_decode_tail_output_y(): + """Localization variant exposing Y only.""" + _save_yolo_decode_tail_localization_variant( + "slice/yolo_decode_tail_output_y", + "slice_yolo_decode_tail_output_y.onnx", + "Y") + + +def _build_yolo_head_final_concat_graph(lengths, output_name): + total_n = sum(lengths) + h0 = helper.make_tensor_value_info("H0", TensorProto.FLOAT, [1, 144, lengths[0]]) + h1 = helper.make_tensor_value_info("H1", TensorProto.FLOAT, [1, 144, lengths[1]]) + h2 = helper.make_tensor_value_info("H2", TensorProto.FLOAT, [1, 144, lengths[2]]) + y = helper.make_tensor_value_info(output_name, TensorProto.FLOAT, [1, 84, total_n]) + + starts_box = make_int64_initializer("starts_box", [0]) + ends_box = make_int64_initializer("ends_box", [4]) + starts0 = make_int64_initializer("starts0", [0]) + ends0 = make_int64_initializer("ends0", [2]) + starts1 = make_int64_initializer("starts1", [2]) + ends1 = make_int64_initializer("ends1", [4]) + axes = make_int64_initializer("axes", [1]) + split = make_int64_initializer("split", [64, 80]) + rng = np.random.default_rng(115 + total_n) + anchor = numpy_helper.from_array(rng.uniform(-2.0, 2.0, (1, 2, total_n)).astype(np.float32), name="Anchor") + half = numpy_helper.from_array(np.asarray([2.0], dtype=np.float32), name="Half") + scale_values = np.full((1, total_n), 8.0, dtype=np.float32) + scale_values[:, 1::2] = 16.0 + scale = numpy_helper.from_array(scale_values, name="Scale") + + head = helper.make_node("Concat", ["H0", "H1", "H2"], ["Head"], axis=2) + split_node = helper.make_node("Split", ["Head", "split"], ["BoxRaw", "ClassRaw"], axis=1) + box4 = helper.make_node("Slice", ["BoxRaw", "starts_box", "ends_box", "axes"], ["Box4"]) + slice0 = helper.make_node("Slice", ["Box4", "starts0", "ends0", "axes"], ["S0"]) + slice1 = helper.make_node("Slice", ["Box4", "starts1", "ends1", "axes"], ["S1"]) + sub_a = helper.make_node("Sub", ["Anchor", "S0"], ["A"]) + add_b = helper.make_node("Add", ["S1", "Anchor"], ["B"]) + add_sum = helper.make_node("Add", ["A", "B"], ["Sum"]) + div_center = helper.make_node("Div", ["Sum", "Half"], ["Center"]) + sub_size = helper.make_node("Sub", ["B", "A"], ["Size"]) + concat_boxes = helper.make_node("Concat", ["Center", "Size"], ["Boxes"], axis=1) + mul_boxes = helper.make_node("Mul", ["Boxes", "Scale"], ["BoxesScaled"]) + scores = helper.make_node("Sigmoid", ["ClassRaw"], ["Scores"]) + final = helper.make_node("Concat", ["BoxesScaled", "Scores"], [output_name], axis=1) + graph = helper.make_graph( + [head, split_node, box4, slice0, slice1, sub_a, add_b, add_sum, div_center, sub_size, concat_boxes, + mul_boxes, scores, final], + f"{output_name}_graph", + [h0, h1, h2], + [y], + initializer=[starts_box, ends_box, starts0, ends0, starts1, ends1, axes, split, anchor, half, scale]) + return helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + + +def slice_yolo_head_final_concat_small(): + """YOLO head/final-output structure with [1,144,N] head, split, decode, and final [1,84,N] concat.""" + model = _build_yolo_head_final_concat_graph([96, 24, 8], "Y") + save_model( + model, + "slice/yolo_head_final_concat_small", + "slice_yolo_head_final_concat_small.onnx") + + +def slice_yolo_head_final_concat_large(): + """YOLO-scale final-head concat reproducer with head lengths 6400, 1600, and 400.""" + model = _build_yolo_head_final_concat_graph([6400, 1600, 400], "Y") + save_model( + model, + "slice/yolo_head_final_concat_large", + "slice_yolo_head_final_concat_large.onnx") + + +def _build_yolo_head_localization_graph(output_name): + lengths = [96, 24, 8] + total_n = sum(lengths) + h0 = helper.make_tensor_value_info("H0", TensorProto.FLOAT, [1, 144, lengths[0]]) + h1 = helper.make_tensor_value_info("H1", TensorProto.FLOAT, [1, 144, lengths[1]]) + h2 = helper.make_tensor_value_info("H2", TensorProto.FLOAT, [1, 144, lengths[2]]) + output_shapes = { + "BoxesScaled": [1, 4, total_n], + "Scores": [1, 80, total_n], + "Y": [1, 84, total_n], + } + output = helper.make_tensor_value_info(output_name, TensorProto.FLOAT, output_shapes[output_name]) + + starts_box = make_int64_initializer("starts_box", [0]) + ends_box = make_int64_initializer("ends_box", [4]) + starts0 = make_int64_initializer("starts0", [0]) + ends0 = make_int64_initializer("ends0", [2]) + starts1 = make_int64_initializer("starts1", [2]) + ends1 = make_int64_initializer("ends1", [4]) + axes = make_int64_initializer("axes", [1]) + split = make_int64_initializer("split", [64, 80]) + rng = np.random.default_rng(244) + anchor = numpy_helper.from_array(rng.uniform(-2.0, 2.0, (1, 2, total_n)).astype(np.float32), name="Anchor") + half = numpy_helper.from_array(np.asarray([2.0], dtype=np.float32), name="Half") + scale_values = np.full((1, total_n), 8.0, dtype=np.float32) + scale_values[:, 1::2] = 16.0 + scale = numpy_helper.from_array(scale_values, name="Scale") + + nodes = [ + helper.make_node("Concat", ["H0", "H1", "H2"], ["Head"], axis=2), + helper.make_node("Split", ["Head", "split"], ["BoxRaw", "ClassRaw"], axis=1), + helper.make_node("Slice", ["BoxRaw", "starts_box", "ends_box", "axes"], ["Box4"]), + helper.make_node("Slice", ["Box4", "starts0", "ends0", "axes"], ["S0"]), + helper.make_node("Slice", ["Box4", "starts1", "ends1", "axes"], ["S1"]), + helper.make_node("Sub", ["Anchor", "S0"], ["A"]), + helper.make_node("Add", ["S1", "Anchor"], ["B"]), + helper.make_node("Add", ["A", "B"], ["Sum"]), + helper.make_node("Div", ["Sum", "Half"], ["Center"]), + helper.make_node("Sub", ["B", "A"], ["Size"]), + helper.make_node("Concat", ["Center", "Size"], ["Boxes"], axis=1), + helper.make_node("Mul", ["Boxes", "Scale"], ["BoxesScaled"]), + helper.make_node("Sigmoid", ["ClassRaw"], ["Scores"]), + ] + if output_name == "Y": + nodes.append(helper.make_node("Concat", ["BoxesScaled", "Scores"], ["Y"], axis=1)) + graph = helper.make_graph( + nodes, + f"yolo_head_{output_name.lower()}_graph", + [h0, h1, h2], + [output], + initializer=[starts_box, ends_box, starts0, ends0, starts1, ends1, axes, split, anchor, half, scale]) + return helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + + +def slice_yolo_head_output_boxes_scaled(): + """Localization variant exposing only BoxesScaled.""" + model = _build_yolo_head_localization_graph("BoxesScaled") + save_model( + model, + "slice/yolo_head_output_boxes_scaled", + "slice_yolo_head_output_boxes_scaled.onnx") + + +def slice_yolo_head_output_scores(): + """Localization variant exposing only Scores.""" + model = _build_yolo_head_localization_graph("Scores") + save_model( + model, + "slice/yolo_head_output_scores", + "slice_yolo_head_output_scores.onnx") + + +def slice_yolo_head_output_y(): + """Localization variant exposing only final Y.""" + model = _build_yolo_head_localization_graph("Y") + save_model( + model, + "slice/yolo_head_output_y", + "slice_yolo_head_output_y.onnx") + + # --------------------------------------------------------------------------- # Gather tests # --------------------------------------------------------------------------- @@ -2001,6 +2482,20 @@ if __name__ == "__main__": slice_nchw_spatial_crop() slice_after_conv() slice_large_channel_1024() + slice_yolo_decode_tail() + slice_yolo_decode_tail_large_n() + slice_yolo_decode_tail_internal_small() + slice_yolo_decode_tail_internal_large() + slice_yolo_decode_tail_after_transpose() + slice_yolo_decode_tail_output_b() + slice_yolo_decode_tail_output_size() + slice_yolo_decode_tail_output_boxes() + slice_yolo_decode_tail_output_y() + slice_yolo_head_final_concat_small() + slice_yolo_head_final_concat_large() + slice_yolo_head_output_boxes_scaled() + slice_yolo_head_output_scores() + slice_yolo_head_output_y() print("\nGenerating Softmax tests:") softmax_basic() @@ -2009,6 +2504,11 @@ if __name__ == "__main__": softmax_negative_axis() softmax_large_dimension_1024() + print("\nGenerating DFL tests:") + yolo_dfl_projection_small() + yolo_dfl_projection_large() + yolo_dfl_decode_tail_large() + print("\nGenerating Resize tests:") resize_nearest_2x() resize_nearest_non_uniform() diff --git a/validation/operations/matmul/basic/matmul_basic.onnx b/validation/operations/matmul/basic/matmul_basic.onnx index 4d0b920..c131c17 100644 Binary files a/validation/operations/matmul/basic/matmul_basic.onnx and b/validation/operations/matmul/basic/matmul_basic.onnx differ diff --git a/validation/operations/matmul/batched_3d/matmul_batched_3d.onnx b/validation/operations/matmul/batched_3d/matmul_batched_3d.onnx index d58cd3b..13f3dc6 100644 Binary files a/validation/operations/matmul/batched_3d/matmul_batched_3d.onnx and b/validation/operations/matmul/batched_3d/matmul_batched_3d.onnx differ diff --git a/validation/operations/matmul/batched_3d_dynamic/matmul_batched_3d_dynamic.onnx b/validation/operations/matmul/batched_3d_dynamic/matmul_batched_3d_dynamic.onnx index c8037ed..61040cd 100644 Binary files a/validation/operations/matmul/batched_3d_dynamic/matmul_batched_3d_dynamic.onnx and b/validation/operations/matmul/batched_3d_dynamic/matmul_batched_3d_dynamic.onnx differ diff --git a/validation/operations/matmul/batched_left_constant/matmul_batched_left_constant.onnx b/validation/operations/matmul/batched_left_constant/matmul_batched_left_constant.onnx index 91921f7..37bb886 100644 Binary files a/validation/operations/matmul/batched_left_constant/matmul_batched_left_constant.onnx and b/validation/operations/matmul/batched_left_constant/matmul_batched_left_constant.onnx differ diff --git a/validation/operations/matmul/batched_lhs_broadcast/matmul_batched_lhs_broadcast.onnx b/validation/operations/matmul/batched_lhs_broadcast/matmul_batched_lhs_broadcast.onnx index b1d7810..771a4a1 100644 Binary files a/validation/operations/matmul/batched_lhs_broadcast/matmul_batched_lhs_broadcast.onnx and b/validation/operations/matmul/batched_lhs_broadcast/matmul_batched_lhs_broadcast.onnx differ diff --git a/validation/operations/matmul/batched_rhs_broadcast/matmul_batched_rhs_broadcast.onnx b/validation/operations/matmul/batched_rhs_broadcast/matmul_batched_rhs_broadcast.onnx index 7981e76..447368f 100644 Binary files a/validation/operations/matmul/batched_rhs_broadcast/matmul_batched_rhs_broadcast.onnx and b/validation/operations/matmul/batched_rhs_broadcast/matmul_batched_rhs_broadcast.onnx differ diff --git a/validation/operations/matmul/dynamic/matmul_dynamic.onnx b/validation/operations/matmul/dynamic/matmul_dynamic.onnx index 30947ff..f7fe1fd 100644 Binary files a/validation/operations/matmul/dynamic/matmul_dynamic.onnx and b/validation/operations/matmul/dynamic/matmul_dynamic.onnx differ diff --git a/validation/operations/matmul/huge_1024/matmul_huge_1024.onnx b/validation/operations/matmul/huge_1024/matmul_huge_1024.onnx index bfc613b..bfa35e1 100644 Binary files a/validation/operations/matmul/huge_1024/matmul_huge_1024.onnx and b/validation/operations/matmul/huge_1024/matmul_huge_1024.onnx differ diff --git a/validation/operations/matmul/left_constant/matmul_left_constant.onnx b/validation/operations/matmul/left_constant/matmul_left_constant.onnx index 7f727e5..0cf483a 100644 Binary files a/validation/operations/matmul/left_constant/matmul_left_constant.onnx and b/validation/operations/matmul/left_constant/matmul_left_constant.onnx differ diff --git a/validation/operations/matmul/matrix_vector/matmul_matrix_vector.onnx b/validation/operations/matmul/matrix_vector/matmul_matrix_vector.onnx index d6a08a9..ea3b5e8 100644 Binary files a/validation/operations/matmul/matrix_vector/matmul_matrix_vector.onnx and b/validation/operations/matmul/matrix_vector/matmul_matrix_vector.onnx differ diff --git a/validation/operations/matmul/vector_matrix/matmul_vector_matrix.onnx b/validation/operations/matmul/vector_matrix/matmul_vector_matrix.onnx index 2b4b88e..eb4f8f3 100644 Binary files a/validation/operations/matmul/vector_matrix/matmul_vector_matrix.onnx and b/validation/operations/matmul/vector_matrix/matmul_vector_matrix.onnx differ diff --git a/validation/operations/mul/after_conv/mul_after_conv.onnx b/validation/operations/mul/after_conv/mul_after_conv.onnx index 8c88597..57cde8e 100644 Binary files a/validation/operations/mul/after_conv/mul_after_conv.onnx and b/validation/operations/mul/after_conv/mul_after_conv.onnx differ diff --git a/validation/operations/mul/basic/mul_basic.onnx b/validation/operations/mul/basic/mul_basic.onnx index 589c035..cb2f167 100644 Binary files a/validation/operations/mul/basic/mul_basic.onnx and b/validation/operations/mul/basic/mul_basic.onnx differ diff --git a/validation/operations/mul/channel_broadcast_1024/mul_channel_broadcast_1024.onnx b/validation/operations/mul/channel_broadcast_1024/mul_channel_broadcast_1024.onnx index e971205..8454cf0 100644 Binary files a/validation/operations/mul/channel_broadcast_1024/mul_channel_broadcast_1024.onnx and b/validation/operations/mul/channel_broadcast_1024/mul_channel_broadcast_1024.onnx differ diff --git a/validation/operations/mul/leading_dimension_broadcast/mul_leading_dimension_broadcast.onnx b/validation/operations/mul/leading_dimension_broadcast/mul_leading_dimension_broadcast.onnx index 55addb9..53942d9 100644 Binary files a/validation/operations/mul/leading_dimension_broadcast/mul_leading_dimension_broadcast.onnx and b/validation/operations/mul/leading_dimension_broadcast/mul_leading_dimension_broadcast.onnx differ diff --git a/validation/operations/mul/scalar_constant/mul_scalar_constant.onnx b/validation/operations/mul/scalar_constant/mul_scalar_constant.onnx index 600cf8b..f19ba6b 100644 Binary files a/validation/operations/mul/scalar_constant/mul_scalar_constant.onnx and b/validation/operations/mul/scalar_constant/mul_scalar_constant.onnx differ diff --git a/validation/operations/pool/avg_basic/avgpool_basic.onnx b/validation/operations/pool/avg_basic/avgpool_basic.onnx index 69a1b8a..2f03305 100644 Binary files a/validation/operations/pool/avg_basic/avgpool_basic.onnx and b/validation/operations/pool/avg_basic/avgpool_basic.onnx differ diff --git a/validation/operations/pool/avg_ceil_mode/avgpool_ceil_mode.onnx b/validation/operations/pool/avg_ceil_mode/avgpool_ceil_mode.onnx index 723e8ec..1219ea5 100644 Binary files a/validation/operations/pool/avg_ceil_mode/avgpool_ceil_mode.onnx and b/validation/operations/pool/avg_ceil_mode/avgpool_ceil_mode.onnx differ diff --git a/validation/operations/pool/avg_explicit_padding/avgpool_explicit_padding.onnx b/validation/operations/pool/avg_explicit_padding/avgpool_explicit_padding.onnx index edc1f1f..4cd2f83 100644 Binary files a/validation/operations/pool/avg_explicit_padding/avgpool_explicit_padding.onnx and b/validation/operations/pool/avg_explicit_padding/avgpool_explicit_padding.onnx differ diff --git a/validation/operations/pool/avg_include_pad/avgpool_include_pad.onnx b/validation/operations/pool/avg_include_pad/avgpool_include_pad.onnx index 1a1a726..7a6ec6d 100644 Binary files a/validation/operations/pool/avg_include_pad/avgpool_include_pad.onnx and b/validation/operations/pool/avg_include_pad/avgpool_include_pad.onnx differ diff --git a/validation/operations/pool/avg_large_channels/avgpool_large_channels.onnx b/validation/operations/pool/avg_large_channels/avgpool_large_channels.onnx index 254c3c6..acf9bb6 100644 Binary files a/validation/operations/pool/avg_large_channels/avgpool_large_channels.onnx and b/validation/operations/pool/avg_large_channels/avgpool_large_channels.onnx differ diff --git a/validation/operations/pool/avg_non_uniform_stride/avgpool_non_uniform_stride.onnx b/validation/operations/pool/avg_non_uniform_stride/avgpool_non_uniform_stride.onnx index e3c0116..3eabba6 100644 Binary files a/validation/operations/pool/avg_non_uniform_stride/avgpool_non_uniform_stride.onnx and b/validation/operations/pool/avg_non_uniform_stride/avgpool_non_uniform_stride.onnx differ diff --git a/validation/operations/pool/avg_real_asymmetric_padding/avgpool_real_asymmetric_padding.onnx b/validation/operations/pool/avg_real_asymmetric_padding/avgpool_real_asymmetric_padding.onnx index 5933547..6276535 100644 Binary files a/validation/operations/pool/avg_real_asymmetric_padding/avgpool_real_asymmetric_padding.onnx and b/validation/operations/pool/avg_real_asymmetric_padding/avgpool_real_asymmetric_padding.onnx differ diff --git a/validation/operations/pool/max_after_conv/maxpool_after_conv.onnx b/validation/operations/pool/max_after_conv/maxpool_after_conv.onnx index da54062..34a796f 100644 Binary files a/validation/operations/pool/max_after_conv/maxpool_after_conv.onnx and b/validation/operations/pool/max_after_conv/maxpool_after_conv.onnx differ diff --git a/validation/operations/pool/max_basic/maxpool_basic.onnx b/validation/operations/pool/max_basic/maxpool_basic.onnx index cd248ba..8c891b0 100644 Binary files a/validation/operations/pool/max_basic/maxpool_basic.onnx and b/validation/operations/pool/max_basic/maxpool_basic.onnx differ diff --git a/validation/operations/pool/max_ceil_mode/maxpool_ceil_mode.onnx b/validation/operations/pool/max_ceil_mode/maxpool_ceil_mode.onnx index fc4b590..e477d69 100644 Binary files a/validation/operations/pool/max_ceil_mode/maxpool_ceil_mode.onnx and b/validation/operations/pool/max_ceil_mode/maxpool_ceil_mode.onnx differ diff --git a/validation/operations/pool/max_global_style_kernel_equals_input/maxpool_global_style_kernel_equals_input.onnx b/validation/operations/pool/max_global_style_kernel_equals_input/maxpool_global_style_kernel_equals_input.onnx index 1d83961..77be0f6 100644 Binary files a/validation/operations/pool/max_global_style_kernel_equals_input/maxpool_global_style_kernel_equals_input.onnx and b/validation/operations/pool/max_global_style_kernel_equals_input/maxpool_global_style_kernel_equals_input.onnx differ diff --git a/validation/operations/pool/max_non_square_kernel/maxpool_non_square_kernel.onnx b/validation/operations/pool/max_non_square_kernel/maxpool_non_square_kernel.onnx index 4feb046..c82f31b 100644 Binary files a/validation/operations/pool/max_non_square_kernel/maxpool_non_square_kernel.onnx and b/validation/operations/pool/max_non_square_kernel/maxpool_non_square_kernel.onnx differ diff --git a/validation/operations/pool/max_real_asymmetric_padding/maxpool_real_asymmetric_padding.onnx b/validation/operations/pool/max_real_asymmetric_padding/maxpool_real_asymmetric_padding.onnx index 10255ae..3eb3f65 100644 Binary files a/validation/operations/pool/max_real_asymmetric_padding/maxpool_real_asymmetric_padding.onnx and b/validation/operations/pool/max_real_asymmetric_padding/maxpool_real_asymmetric_padding.onnx differ diff --git a/validation/operations/pool/max_same_upper/maxpool_same_upper.onnx b/validation/operations/pool/max_same_upper/maxpool_same_upper.onnx index 37c0819..e350bee 100644 Binary files a/validation/operations/pool/max_same_upper/maxpool_same_upper.onnx and b/validation/operations/pool/max_same_upper/maxpool_same_upper.onnx differ diff --git a/validation/operations/pool/max_stride2_multichannel/maxpool_stride2_multichannel.onnx b/validation/operations/pool/max_stride2_multichannel/maxpool_stride2_multichannel.onnx index 77745c3..b627a3a 100644 Binary files a/validation/operations/pool/max_stride2_multichannel/maxpool_stride2_multichannel.onnx and b/validation/operations/pool/max_stride2_multichannel/maxpool_stride2_multichannel.onnx differ diff --git a/validation/operations/reduce_mean/4d_spatial/reduce_mean_4d_spatial.onnx b/validation/operations/reduce_mean/4d_spatial/reduce_mean_4d_spatial.onnx index 4658251..e496fef 100644 Binary files a/validation/operations/reduce_mean/4d_spatial/reduce_mean_4d_spatial.onnx and b/validation/operations/reduce_mean/4d_spatial/reduce_mean_4d_spatial.onnx differ diff --git a/validation/operations/reduce_mean/4d_spatial_keepdims_0/reduce_mean_4d_spatial_keepdims_0.onnx b/validation/operations/reduce_mean/4d_spatial_keepdims_0/reduce_mean_4d_spatial_keepdims_0.onnx index c642f07..8a8ffee 100644 Binary files a/validation/operations/reduce_mean/4d_spatial_keepdims_0/reduce_mean_4d_spatial_keepdims_0.onnx and b/validation/operations/reduce_mean/4d_spatial_keepdims_0/reduce_mean_4d_spatial_keepdims_0.onnx differ diff --git a/validation/operations/reduce_mean/after_conv/reduce_mean_after_conv.onnx b/validation/operations/reduce_mean/after_conv/reduce_mean_after_conv.onnx index deac445..c20be96 100644 Binary files a/validation/operations/reduce_mean/after_conv/reduce_mean_after_conv.onnx and b/validation/operations/reduce_mean/after_conv/reduce_mean_after_conv.onnx differ diff --git a/validation/operations/reduce_mean/all_axes_keepdims_0/reduce_mean_all_axes_keepdims_0.onnx b/validation/operations/reduce_mean/all_axes_keepdims_0/reduce_mean_all_axes_keepdims_0.onnx index 2bfada4..223a52d 100644 Binary files a/validation/operations/reduce_mean/all_axes_keepdims_0/reduce_mean_all_axes_keepdims_0.onnx and b/validation/operations/reduce_mean/all_axes_keepdims_0/reduce_mean_all_axes_keepdims_0.onnx differ diff --git a/validation/operations/reduce_mean/all_axes_keepdims_1/reduce_mean_all_axes_keepdims_1.onnx b/validation/operations/reduce_mean/all_axes_keepdims_1/reduce_mean_all_axes_keepdims_1.onnx index 9e5a3c7..59d31d7 100644 Binary files a/validation/operations/reduce_mean/all_axes_keepdims_1/reduce_mean_all_axes_keepdims_1.onnx and b/validation/operations/reduce_mean/all_axes_keepdims_1/reduce_mean_all_axes_keepdims_1.onnx differ diff --git a/validation/operations/reduce_mean/basic/reduce_mean_basic.onnx b/validation/operations/reduce_mean/basic/reduce_mean_basic.onnx index 5760eb6..43b28f2 100644 Binary files a/validation/operations/reduce_mean/basic/reduce_mean_basic.onnx and b/validation/operations/reduce_mean/basic/reduce_mean_basic.onnx differ diff --git a/validation/operations/reduce_mean/channel_axis_nchw/reduce_mean_channel_axis_nchw.onnx b/validation/operations/reduce_mean/channel_axis_nchw/reduce_mean_channel_axis_nchw.onnx index 7cb7a98..d68c96f 100644 Binary files a/validation/operations/reduce_mean/channel_axis_nchw/reduce_mean_channel_axis_nchw.onnx and b/validation/operations/reduce_mean/channel_axis_nchw/reduce_mean_channel_axis_nchw.onnx differ diff --git a/validation/operations/reduce_mean/keepdims_0/reduce_mean_keepdims_0.onnx b/validation/operations/reduce_mean/keepdims_0/reduce_mean_keepdims_0.onnx index 53626bc..b5258cc 100644 Binary files a/validation/operations/reduce_mean/keepdims_0/reduce_mean_keepdims_0.onnx and b/validation/operations/reduce_mean/keepdims_0/reduce_mean_keepdims_0.onnx differ diff --git a/validation/operations/reduce_mean/large_dimension_1024/reduce_mean_large_dimension_1024.onnx b/validation/operations/reduce_mean/large_dimension_1024/reduce_mean_large_dimension_1024.onnx index 77b3b09..a87c0a6 100644 Binary files a/validation/operations/reduce_mean/large_dimension_1024/reduce_mean_large_dimension_1024.onnx and b/validation/operations/reduce_mean/large_dimension_1024/reduce_mean_large_dimension_1024.onnx differ diff --git a/validation/operations/reduce_mean/negative_axis/reduce_mean_negative_axis.onnx b/validation/operations/reduce_mean/negative_axis/reduce_mean_negative_axis.onnx index 839e320..ac237a4 100644 Binary files a/validation/operations/reduce_mean/negative_axis/reduce_mean_negative_axis.onnx and b/validation/operations/reduce_mean/negative_axis/reduce_mean_negative_axis.onnx differ diff --git a/validation/operations/relu/4d/relu_4d.onnx b/validation/operations/relu/4d/relu_4d.onnx index 3d62ef7..207f02c 100644 Binary files a/validation/operations/relu/4d/relu_4d.onnx and b/validation/operations/relu/4d/relu_4d.onnx differ diff --git a/validation/operations/relu/after_conv/relu_after_conv.onnx b/validation/operations/relu/after_conv/relu_after_conv.onnx index d2cb581..5d4d481 100644 Binary files a/validation/operations/relu/after_conv/relu_after_conv.onnx and b/validation/operations/relu/after_conv/relu_after_conv.onnx differ diff --git a/validation/operations/relu/after_gemm/relu_after_gemm.onnx b/validation/operations/relu/after_gemm/relu_after_gemm.onnx index 0c80915..187e679 100644 Binary files a/validation/operations/relu/after_gemm/relu_after_gemm.onnx and b/validation/operations/relu/after_gemm/relu_after_gemm.onnx differ diff --git a/validation/operations/relu/basic/relu_basic.onnx b/validation/operations/relu/basic/relu_basic.onnx index 119f330..6c28335 100644 Binary files a/validation/operations/relu/basic/relu_basic.onnx and b/validation/operations/relu/basic/relu_basic.onnx differ diff --git a/validation/operations/reshape/4d_to_2d_flatten/reshape_4d_to_2d_flatten.onnx b/validation/operations/reshape/4d_to_2d_flatten/reshape_4d_to_2d_flatten.onnx index 41b9c7a..25a997f 100644 Binary files a/validation/operations/reshape/4d_to_2d_flatten/reshape_4d_to_2d_flatten.onnx and b/validation/operations/reshape/4d_to_2d_flatten/reshape_4d_to_2d_flatten.onnx differ diff --git a/validation/operations/reshape/infer_dim_minus_one/reshape_infer_dim_minus_one.onnx b/validation/operations/reshape/infer_dim_minus_one/reshape_infer_dim_minus_one.onnx index 3afcedb..c4106cd 100644 Binary files a/validation/operations/reshape/infer_dim_minus_one/reshape_infer_dim_minus_one.onnx and b/validation/operations/reshape/infer_dim_minus_one/reshape_infer_dim_minus_one.onnx differ diff --git a/validation/operations/reshape/same_rank/reshape_same_rank.onnx b/validation/operations/reshape/same_rank/reshape_same_rank.onnx index 7942128..c21243c 100644 Binary files a/validation/operations/reshape/same_rank/reshape_same_rank.onnx and b/validation/operations/reshape/same_rank/reshape_same_rank.onnx differ diff --git a/validation/operations/reshape/zero_copies_input_dim/reshape_zero_copies_input_dim.onnx b/validation/operations/reshape/zero_copies_input_dim/reshape_zero_copies_input_dim.onnx index 45e09dd..3066125 100644 Binary files a/validation/operations/reshape/zero_copies_input_dim/reshape_zero_copies_input_dim.onnx and b/validation/operations/reshape/zero_copies_input_dim/reshape_zero_copies_input_dim.onnx differ diff --git a/validation/operations/resize/height_only/resize_height_only.onnx b/validation/operations/resize/height_only/resize_height_only.onnx index 53bad98..aeff0ee 100644 Binary files a/validation/operations/resize/height_only/resize_height_only.onnx and b/validation/operations/resize/height_only/resize_height_only.onnx differ diff --git a/validation/operations/resize/nearest_2x/resize_nearest_2x.onnx b/validation/operations/resize/nearest_2x/resize_nearest_2x.onnx index b4b8f5f..aa9be45 100644 Binary files a/validation/operations/resize/nearest_2x/resize_nearest_2x.onnx and b/validation/operations/resize/nearest_2x/resize_nearest_2x.onnx differ diff --git a/validation/operations/resize/nearest_downsample/resize_nearest_downsample.onnx b/validation/operations/resize/nearest_downsample/resize_nearest_downsample.onnx index 6c2a952..3612722 100644 Binary files a/validation/operations/resize/nearest_downsample/resize_nearest_downsample.onnx and b/validation/operations/resize/nearest_downsample/resize_nearest_downsample.onnx differ diff --git a/validation/operations/resize/non_uniform/resize_non_uniform.onnx b/validation/operations/resize/non_uniform/resize_non_uniform.onnx index 9003cee..3cf7aaa 100644 Binary files a/validation/operations/resize/non_uniform/resize_non_uniform.onnx and b/validation/operations/resize/non_uniform/resize_non_uniform.onnx differ diff --git a/validation/operations/resize/width_only/resize_width_only.onnx b/validation/operations/resize/width_only/resize_width_only.onnx index cf6844d..d5baab2 100644 Binary files a/validation/operations/resize/width_only/resize_width_only.onnx and b/validation/operations/resize/width_only/resize_width_only.onnx differ diff --git a/validation/operations/resize/with_sizes/resize_with_sizes.onnx b/validation/operations/resize/with_sizes/resize_with_sizes.onnx index 227b586..cde4e37 100644 Binary files a/validation/operations/resize/with_sizes/resize_with_sizes.onnx and b/validation/operations/resize/with_sizes/resize_with_sizes.onnx differ diff --git a/validation/operations/sigmoid/4d/sigmoid_4d.onnx b/validation/operations/sigmoid/4d/sigmoid_4d.onnx index b0fb523..12fa3d5 100644 Binary files a/validation/operations/sigmoid/4d/sigmoid_4d.onnx and b/validation/operations/sigmoid/4d/sigmoid_4d.onnx differ diff --git a/validation/operations/sigmoid/after_gemm/sigmoid_after_gemm.onnx b/validation/operations/sigmoid/after_gemm/sigmoid_after_gemm.onnx index 58245f1..7226e54 100644 Binary files a/validation/operations/sigmoid/after_gemm/sigmoid_after_gemm.onnx and b/validation/operations/sigmoid/after_gemm/sigmoid_after_gemm.onnx differ diff --git a/validation/operations/sigmoid/basic/sigmoid_basic.onnx b/validation/operations/sigmoid/basic/sigmoid_basic.onnx index 977731e..408ff60 100644 Binary files a/validation/operations/sigmoid/basic/sigmoid_basic.onnx and b/validation/operations/sigmoid/basic/sigmoid_basic.onnx differ diff --git a/validation/operations/slice/2d_basic/slice_2d_basic.onnx b/validation/operations/slice/2d_basic/slice_2d_basic.onnx index 6409387..1e1de62 100644 Binary files a/validation/operations/slice/2d_basic/slice_2d_basic.onnx and b/validation/operations/slice/2d_basic/slice_2d_basic.onnx differ diff --git a/validation/operations/slice/after_conv/slice_after_conv.onnx b/validation/operations/slice/after_conv/slice_after_conv.onnx index dbcbed7..abea383 100644 Binary files a/validation/operations/slice/after_conv/slice_after_conv.onnx and b/validation/operations/slice/after_conv/slice_after_conv.onnx differ diff --git a/validation/operations/slice/default_axes/slice_default_axes.onnx b/validation/operations/slice/default_axes/slice_default_axes.onnx index 0dcf32f..f21067b 100644 Binary files a/validation/operations/slice/default_axes/slice_default_axes.onnx and b/validation/operations/slice/default_axes/slice_default_axes.onnx differ diff --git a/validation/operations/slice/large_channel_1024/slice_large_channel_1024.onnx b/validation/operations/slice/large_channel_1024/slice_large_channel_1024.onnx index 9cc9201..faa7f42 100644 Binary files a/validation/operations/slice/large_channel_1024/slice_large_channel_1024.onnx and b/validation/operations/slice/large_channel_1024/slice_large_channel_1024.onnx differ diff --git a/validation/operations/slice/nchw_spatial_crop/slice_nchw_spatial_crop.onnx b/validation/operations/slice/nchw_spatial_crop/slice_nchw_spatial_crop.onnx index 3edfb7a..2f1544d 100644 Binary files a/validation/operations/slice/nchw_spatial_crop/slice_nchw_spatial_crop.onnx and b/validation/operations/slice/nchw_spatial_crop/slice_nchw_spatial_crop.onnx differ diff --git a/validation/operations/slice/negative_axis/slice_negative_axis.onnx b/validation/operations/slice/negative_axis/slice_negative_axis.onnx index 2fc969b..d431190 100644 Binary files a/validation/operations/slice/negative_axis/slice_negative_axis.onnx and b/validation/operations/slice/negative_axis/slice_negative_axis.onnx differ diff --git a/validation/operations/slice/negative_indices/slice_negative_indices.onnx b/validation/operations/slice/negative_indices/slice_negative_indices.onnx index 2a8d3d8..eb40762 100644 Binary files a/validation/operations/slice/negative_indices/slice_negative_indices.onnx and b/validation/operations/slice/negative_indices/slice_negative_indices.onnx differ diff --git a/validation/operations/slice/step2/slice_step2.onnx b/validation/operations/slice/step2/slice_step2.onnx index be0a876..77ab653 100644 Binary files a/validation/operations/slice/step2/slice_step2.onnx and b/validation/operations/slice/step2/slice_step2.onnx differ diff --git a/validation/operations/slice/yolo_decode_tail/slice_yolo_decode_tail.onnx b/validation/operations/slice/yolo_decode_tail/slice_yolo_decode_tail.onnx new file mode 100644 index 0000000..d35dae8 Binary files /dev/null and b/validation/operations/slice/yolo_decode_tail/slice_yolo_decode_tail.onnx differ diff --git a/validation/operations/slice/yolo_decode_tail_after_transpose/slice_yolo_decode_tail_after_transpose.onnx b/validation/operations/slice/yolo_decode_tail_after_transpose/slice_yolo_decode_tail_after_transpose.onnx new file mode 100644 index 0000000..ef7014b Binary files /dev/null and b/validation/operations/slice/yolo_decode_tail_after_transpose/slice_yolo_decode_tail_after_transpose.onnx differ diff --git a/validation/operations/slice/yolo_decode_tail_internal_large/slice_yolo_decode_tail_internal_large.onnx b/validation/operations/slice/yolo_decode_tail_internal_large/slice_yolo_decode_tail_internal_large.onnx new file mode 100644 index 0000000..f8d59d7 Binary files /dev/null and b/validation/operations/slice/yolo_decode_tail_internal_large/slice_yolo_decode_tail_internal_large.onnx differ diff --git a/validation/operations/slice/yolo_decode_tail_internal_small/slice_yolo_decode_tail_internal_small.onnx b/validation/operations/slice/yolo_decode_tail_internal_small/slice_yolo_decode_tail_internal_small.onnx new file mode 100644 index 0000000..184594a Binary files /dev/null and b/validation/operations/slice/yolo_decode_tail_internal_small/slice_yolo_decode_tail_internal_small.onnx differ diff --git a/validation/operations/slice/yolo_decode_tail_large_n/slice_yolo_decode_tail_large_n.onnx b/validation/operations/slice/yolo_decode_tail_large_n/slice_yolo_decode_tail_large_n.onnx new file mode 100644 index 0000000..ac92a30 Binary files /dev/null and b/validation/operations/slice/yolo_decode_tail_large_n/slice_yolo_decode_tail_large_n.onnx differ diff --git a/validation/operations/slice/yolo_decode_tail_output_b/slice_yolo_decode_tail_output_b.onnx b/validation/operations/slice/yolo_decode_tail_output_b/slice_yolo_decode_tail_output_b.onnx new file mode 100644 index 0000000..4427fcd Binary files /dev/null and b/validation/operations/slice/yolo_decode_tail_output_b/slice_yolo_decode_tail_output_b.onnx differ diff --git a/validation/operations/slice/yolo_decode_tail_output_boxes/slice_yolo_decode_tail_output_boxes.onnx b/validation/operations/slice/yolo_decode_tail_output_boxes/slice_yolo_decode_tail_output_boxes.onnx new file mode 100644 index 0000000..d20a3b3 Binary files /dev/null and b/validation/operations/slice/yolo_decode_tail_output_boxes/slice_yolo_decode_tail_output_boxes.onnx differ diff --git a/validation/operations/slice/yolo_decode_tail_output_size/slice_yolo_decode_tail_output_size.onnx b/validation/operations/slice/yolo_decode_tail_output_size/slice_yolo_decode_tail_output_size.onnx new file mode 100644 index 0000000..f7d96d9 Binary files /dev/null and b/validation/operations/slice/yolo_decode_tail_output_size/slice_yolo_decode_tail_output_size.onnx differ diff --git a/validation/operations/slice/yolo_decode_tail_output_y/slice_yolo_decode_tail_output_y.onnx b/validation/operations/slice/yolo_decode_tail_output_y/slice_yolo_decode_tail_output_y.onnx new file mode 100644 index 0000000..4c20c8d Binary files /dev/null and b/validation/operations/slice/yolo_decode_tail_output_y/slice_yolo_decode_tail_output_y.onnx differ diff --git a/validation/operations/slice/yolo_head_final_concat_large/slice_yolo_head_final_concat_large.onnx b/validation/operations/slice/yolo_head_final_concat_large/slice_yolo_head_final_concat_large.onnx new file mode 100644 index 0000000..8d4db99 Binary files /dev/null and b/validation/operations/slice/yolo_head_final_concat_large/slice_yolo_head_final_concat_large.onnx differ diff --git a/validation/operations/slice/yolo_head_final_concat_small/slice_yolo_head_final_concat_small.onnx b/validation/operations/slice/yolo_head_final_concat_small/slice_yolo_head_final_concat_small.onnx new file mode 100644 index 0000000..bd40669 Binary files /dev/null and b/validation/operations/slice/yolo_head_final_concat_small/slice_yolo_head_final_concat_small.onnx differ diff --git a/validation/operations/slice/yolo_head_output_boxes_scaled/slice_yolo_head_output_boxes_scaled.onnx b/validation/operations/slice/yolo_head_output_boxes_scaled/slice_yolo_head_output_boxes_scaled.onnx new file mode 100644 index 0000000..0efad75 Binary files /dev/null and b/validation/operations/slice/yolo_head_output_boxes_scaled/slice_yolo_head_output_boxes_scaled.onnx differ diff --git a/validation/operations/slice/yolo_head_output_scores/slice_yolo_head_output_scores.onnx b/validation/operations/slice/yolo_head_output_scores/slice_yolo_head_output_scores.onnx new file mode 100644 index 0000000..340007a Binary files /dev/null and b/validation/operations/slice/yolo_head_output_scores/slice_yolo_head_output_scores.onnx differ diff --git a/validation/operations/slice/yolo_head_output_y/slice_yolo_head_output_y.onnx b/validation/operations/slice/yolo_head_output_y/slice_yolo_head_output_y.onnx new file mode 100644 index 0000000..f606b40 Binary files /dev/null and b/validation/operations/slice/yolo_head_output_y/slice_yolo_head_output_y.onnx differ diff --git a/validation/operations/softmax/3d_last_axis/softmax_3d_last_axis.onnx b/validation/operations/softmax/3d_last_axis/softmax_3d_last_axis.onnx index 08ffdb8..eb942e4 100644 Binary files a/validation/operations/softmax/3d_last_axis/softmax_3d_last_axis.onnx and b/validation/operations/softmax/3d_last_axis/softmax_3d_last_axis.onnx differ diff --git a/validation/operations/softmax/basic/softmax_basic.onnx b/validation/operations/softmax/basic/softmax_basic.onnx index 689313b..89c9197 100644 Binary files a/validation/operations/softmax/basic/softmax_basic.onnx and b/validation/operations/softmax/basic/softmax_basic.onnx differ diff --git a/validation/operations/softmax/channel_axis/softmax_channel_axis.onnx b/validation/operations/softmax/channel_axis/softmax_channel_axis.onnx index 7bb2f90..7561af8 100644 Binary files a/validation/operations/softmax/channel_axis/softmax_channel_axis.onnx and b/validation/operations/softmax/channel_axis/softmax_channel_axis.onnx differ diff --git a/validation/operations/softmax/large_dimension_1024/softmax_large_dimension_1024.onnx b/validation/operations/softmax/large_dimension_1024/softmax_large_dimension_1024.onnx index 1e0be18..8c1a188 100644 Binary files a/validation/operations/softmax/large_dimension_1024/softmax_large_dimension_1024.onnx and b/validation/operations/softmax/large_dimension_1024/softmax_large_dimension_1024.onnx differ diff --git a/validation/operations/softmax/negative_axis/softmax_negative_axis.onnx b/validation/operations/softmax/negative_axis/softmax_negative_axis.onnx index 77a07d8..121f846 100644 Binary files a/validation/operations/softmax/negative_axis/softmax_negative_axis.onnx and b/validation/operations/softmax/negative_axis/softmax_negative_axis.onnx differ diff --git a/validation/operations/split/basic/split_basic.onnx b/validation/operations/split/basic/split_basic.onnx index 22ddd19..fd0a29f 100644 Binary files a/validation/operations/split/basic/split_basic.onnx and b/validation/operations/split/basic/split_basic.onnx differ diff --git a/validation/operations/split/equal_three_way/split_equal_three_way.onnx b/validation/operations/split/equal_three_way/split_equal_three_way.onnx index 03d4216..0842621 100644 Binary files a/validation/operations/split/equal_three_way/split_equal_three_way.onnx and b/validation/operations/split/equal_three_way/split_equal_three_way.onnx differ diff --git a/validation/operations/split/negative_axis/split_negative_axis.onnx b/validation/operations/split/negative_axis/split_negative_axis.onnx index 5770ada..3aa87b9 100644 Binary files a/validation/operations/split/negative_axis/split_negative_axis.onnx and b/validation/operations/split/negative_axis/split_negative_axis.onnx differ diff --git a/validation/operations/split/uneven_channel_axis_4d/split_uneven_channel_axis_4d.onnx b/validation/operations/split/uneven_channel_axis_4d/split_uneven_channel_axis_4d.onnx index 9a6ac8c..7846e3c 100644 Binary files a/validation/operations/split/uneven_channel_axis_4d/split_uneven_channel_axis_4d.onnx and b/validation/operations/split/uneven_channel_axis_4d/split_uneven_channel_axis_4d.onnx differ diff --git a/validation/operations/sub/after_gemm/sub_after_gemm.onnx b/validation/operations/sub/after_gemm/sub_after_gemm.onnx index 0f79fa9..3bbedb8 100644 Binary files a/validation/operations/sub/after_gemm/sub_after_gemm.onnx and b/validation/operations/sub/after_gemm/sub_after_gemm.onnx differ diff --git a/validation/operations/sub/basic/sub_basic.onnx b/validation/operations/sub/basic/sub_basic.onnx index eda1d67..6a66177 100644 Binary files a/validation/operations/sub/basic/sub_basic.onnx and b/validation/operations/sub/basic/sub_basic.onnx differ diff --git a/validation/operations/sub/broadcast_row/sub_broadcast_row.onnx b/validation/operations/sub/broadcast_row/sub_broadcast_row.onnx index 39e80a6..f75c9dd 100644 Binary files a/validation/operations/sub/broadcast_row/sub_broadcast_row.onnx and b/validation/operations/sub/broadcast_row/sub_broadcast_row.onnx differ diff --git a/validation/operations/sub/channel_broadcast_1024/sub_channel_broadcast_1024.onnx b/validation/operations/sub/channel_broadcast_1024/sub_channel_broadcast_1024.onnx index f37e916..76a0995 100644 Binary files a/validation/operations/sub/channel_broadcast_1024/sub_channel_broadcast_1024.onnx and b/validation/operations/sub/channel_broadcast_1024/sub_channel_broadcast_1024.onnx differ diff --git a/validation/operations/sub/constant_lhs_broadcast/sub_constant_lhs_broadcast.onnx b/validation/operations/sub/constant_lhs_broadcast/sub_constant_lhs_broadcast.onnx index ec9995f..f1e762e 100644 Binary files a/validation/operations/sub/constant_lhs_broadcast/sub_constant_lhs_broadcast.onnx and b/validation/operations/sub/constant_lhs_broadcast/sub_constant_lhs_broadcast.onnx differ diff --git a/validation/operations/sub/leading_dimension_broadcast/sub_leading_dimension_broadcast.onnx b/validation/operations/sub/leading_dimension_broadcast/sub_leading_dimension_broadcast.onnx index b757a01..4f07e4a 100644 Binary files a/validation/operations/sub/leading_dimension_broadcast/sub_leading_dimension_broadcast.onnx and b/validation/operations/sub/leading_dimension_broadcast/sub_leading_dimension_broadcast.onnx differ diff --git a/validation/raptor.py b/validation/raptor.py index 371c30c..e9feac7 100644 --- a/validation/raptor.py +++ b/validation/raptor.py @@ -41,7 +41,8 @@ def _format_command(cmd): def compile_with_raptor(network_path, raptor_onnx_path: Path, output_base: Path, crossbar_size, crossbar_count, core_count=None, pim_merge_scheduler="peft", - pim_memory_report="none", cwd=None, verbose=False, reporter=None, timeout_sec=None): + pim_memory_report="none", raptor_extra_args=None, cwd=None, verbose=False, + reporter=None, timeout_sec=None): # Define the arguments, with the possibility to set crossbar size and count args = [ network_path, @@ -57,6 +58,8 @@ def compile_with_raptor(network_path, raptor_onnx_path: Path, output_base: Path, args.append(f"--core-count={core_count}") if pim_memory_report != "none": args.append(f"--pim-memory-report={pim_memory_report}") + if raptor_extra_args: + args.extend(str(arg) for arg in raptor_extra_args) if verbose: args.append("--enable-timing") diff --git a/validation/tools/check_pim_tail_signature.py b/validation/tools/check_pim_tail_signature.py new file mode 100644 index 0000000..ad0ac32 --- /dev/null +++ b/validation/tools/check_pim_tail_signature.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 +"""Check whether a PIM bufferized IR dump matches the YOLO depth_35 tail signature.""" + +from __future__ import annotations + +import argparse +import re +from pathlib import Path + + +NONZERO_BOX_SUBVIEW_RE = re.compile( + r"memref\.subview .*?: .* to " + r"memref<1x2x8400xf32, strided<\[[0-9]+, 8400, 1\], offset: 16800>>" +) + +FINAL_CONCAT_RE = re.compile( + r"pim\.concat axis 1 .*?: " + r"\(memref<1x4x8400xf32(?:, strided<[^>]+>)?>, " + r"memref<1x80x8400xf32(?:, strided<[^>]+>)?>\) -> memref<1x84x8400xf32>" +) + +FINAL_OUTPUT_RE = re.compile(r"memref<1x84x8400xf32>") + + +def resolve_pim1_buff(path_str: str) -> Path: + path = Path(path_str) + if path.is_dir(): + candidate = path / "pim1_buff.mlir" + if candidate.is_file(): + return candidate + candidate = path / "dialects" / "pim1_buff.mlir" + if candidate.is_file(): + return candidate + if path.is_file(): + return path + raise FileNotFoundError(f"could not find pim1_buff.mlir under {path}") + + +def find_line_offsets(text: str) -> list[int]: + offsets = [0] + for match in re.finditer(r"\n", text): + offsets.append(match.end()) + return offsets + + +def line_number(offsets: list[int], position: int) -> int: + lo = 0 + hi = len(offsets) + while lo + 1 < hi: + mid = (lo + hi) // 2 + if offsets[mid] <= position: + lo = mid + else: + hi = mid + return lo + 1 + + +def extract_block(lines: list[str], center_line: int, radius: int = 1) -> str: + start = max(center_line - radius - 1, 0) + end = min(center_line + radius, len(lines)) + return "\n".join(lines[start:end]) + + +def check_signature(path: Path) -> dict[str, object]: + text = path.read_text() + lines = text.splitlines() + offsets = find_line_offsets(text) + + subview_match = NONZERO_BOX_SUBVIEW_RE.search(text) + subview_var = None + subview_line = None + subview_snippet = None + if subview_match: + line_no = line_number(offsets, subview_match.start()) + subview_line = line_no + subview_snippet = extract_block(lines, line_no, radius=0) + line_text = lines[line_no - 1] + lhs = line_text.split("=", 1)[0].strip() + subview_var = lhs + + direct_feed_matches: list[tuple[str, int, str]] = [] + if subview_var: + for idx, line in enumerate(lines, start=1): + if subview_var not in line: + continue + if not re.search(r"pim\.vv(add|sub|mul)|pim\.concat", line): + continue + direct_feed_matches.append((line.strip(), idx, extract_block(lines, idx, radius=0))) + + subview_used_as_dest = any(re.search(rf"pim\.vv(add|sub|mul)\([^)]*, [^)]*, {re.escape(subview_var)}\)", line) + for line, _, _ in direct_feed_matches) if subview_var else False + final_concat_match = FINAL_CONCAT_RE.search(text) + final_output_shape = bool(FINAL_OUTPUT_RE.search(text)) + + return { + "path": path, + "has_nonzero_box_subview": bool(subview_match), + "nonzero_box_subview_line": subview_line, + "nonzero_box_subview_snippet": subview_snippet, + "subview_var": subview_var, + "direct_pim_uses": direct_feed_matches, + "subview_used_as_dest": subview_used_as_dest, + "has_final_output_concat": bool(final_concat_match), + "has_final_output_shape": final_output_shape, + } + + +def print_report(result: dict[str, object]) -> None: + path = result["path"] + print(f"== {path} ==") + print(f"nonzero_box_subview: {result['has_nonzero_box_subview']}") + if result["nonzero_box_subview_snippet"]: + print(result["nonzero_box_subview_snippet"]) + direct_uses = result["direct_pim_uses"] + print(f"direct_pim_use_count: {len(direct_uses)}") + for line, line_no, snippet in direct_uses: + print(f"line {line_no}: {line}") + print(snippet) + print(f"subview_used_as_destination: {result['subview_used_as_dest']}") + print(f"final_output_concat_4_80_to_84: {result['has_final_output_concat']}") + print(f"contains_output_shape_1x84x8400: {result['has_final_output_shape']}") + structurally_equivalent = ( + result["has_nonzero_box_subview"] + and bool(direct_uses) + and result["has_final_output_concat"] + and result["has_final_output_shape"] + ) + print(f"structurally_equivalent_to_yolo_tail: {structurally_equivalent}") + print() + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("paths", nargs="+", help="Workspace, dialect dir, or pim1_buff.mlir to inspect") + args = parser.parse_args() + + for path_str in args.paths: + result = check_signature(resolve_pim1_buff(path_str)) + print_report(result) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/validation/tools/make_yolo_depth35_output_variants.py b/validation/tools/make_yolo_depth35_output_variants.py new file mode 100644 index 0000000..f57415b --- /dev/null +++ b/validation/tools/make_yolo_depth35_output_variants.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python3 +"""Generate output-only ONNX variants for the real yolo11n depth_35 model.""" + +from __future__ import annotations + +import argparse +from pathlib import Path + +import onnx +from onnx import TensorProto, helper, shape_inference + + +ORIGINAL_DEBUG_OUTPUTS = [ + "/model.20/act/Mul_output_0", + "/model.9/cv1/act/Mul_output_0", + "/model.8/m.0/m/m.0/cv2/conv/Conv_output_0", + "/model.23/cv3.1/cv3.1.0/cv3.1.0.0/act/Mul_output_0", + "/model.8/m.0/m/m.1/cv1/act/Mul_output_0", + "/model.23/dfl/Transpose_output_0", + "output0", + "/model.23/cv2.1/cv2.1.0/act/Mul_output_0", +] + +LOCALIZATION_NODE_NAMES = [ + "/model.23/dfl/Transpose", + "/model.23/dfl/Softmax", + "/model.23/dfl/conv/Conv", + "/model.23/dfl/Reshape_1", + "/model.23/Slice", + "/model.23/Slice_1", + "/model.23/Sub", + "/model.23/Add_1", + "/model.23/Sub_1", + "/model.23/Concat_4", + "/model.23/Mul_2", + "/model.23/Sigmoid", + "/model.23/Concat_5", +] + + +def collect_value_infos(model: onnx.ModelProto) -> dict[str, onnx.ValueInfoProto]: + infos: dict[str, onnx.ValueInfoProto] = {} + for value in list(model.graph.input) + list(model.graph.output) + list(model.graph.value_info): + infos[value.name] = value + return infos + + +def clone_value_info(value: onnx.ValueInfoProto, name: str | None = None) -> onnx.ValueInfoProto: + cloned = onnx.ValueInfoProto() + cloned.CopyFrom(value) + if name is not None: + cloned.name = name + return cloned + + +def make_tensor_value_info_from_type(name: str, tensor_type: onnx.TypeProto.Tensor) -> onnx.ValueInfoProto: + shape = [] + for dim in tensor_type.shape.dim: + if dim.HasField("dim_value"): + shape.append(dim.dim_value) + elif dim.HasField("dim_param"): + shape.append(dim.dim_param) + else: + shape.append(None) + return helper.make_tensor_value_info(name, tensor_type.elem_type, shape) + + +def lookup_output_value_info(model: onnx.ModelProto, value_infos: dict[str, onnx.ValueInfoProto], output_name: str) -> onnx.ValueInfoProto: + value = value_infos.get(output_name) + if value is not None: + return clone_value_info(value) + + for initializer in model.graph.initializer: + if initializer.name != output_name: + continue + dims = list(initializer.dims) + return helper.make_tensor_value_info(output_name, initializer.data_type, dims) + + raise KeyError(f"missing value info for output {output_name}") + + +def build_model_with_outputs( + base_model: onnx.ModelProto, + inferred_model: onnx.ModelProto, + output_names: list[str], + extra_nodes: list[onnx.NodeProto] | None = None, + extra_value_infos: list[onnx.ValueInfoProto] | None = None, +) -> onnx.ModelProto: + value_infos = collect_value_infos(inferred_model) + if extra_value_infos: + for value in extra_value_infos: + value_infos[value.name] = value + model = onnx.ModelProto() + model.CopyFrom(base_model) + + del model.graph.output[:] + for output_name in output_names: + model.graph.output.append(lookup_output_value_info(inferred_model, value_infos, output_name)) + + if extra_nodes: + model.graph.node.extend(extra_nodes) + if extra_value_infos: + model.graph.value_info.extend(extra_value_infos) + + onnx.checker.check_model(model) + return model + + +def find_node_output(model: onnx.ModelProto, node_name: str) -> str: + for node in model.graph.node: + if node.name == node_name: + return node.output[0] + + matching_names = sorted(node.name for node in model.graph.node if "model.23" in node.name and "dfl" in node.name) + suffix = "" + if matching_names: + suffix = "\nmatching /model.23 dfl nodes:\n " + "\n ".join(matching_names) + raise KeyError(f"could not find node named {node_name}{suffix}") + + +def save_variant(model: onnx.ModelProto, out_dir: Path, variant_name: str) -> None: + variant_dir = out_dir / variant_name + variant_dir.mkdir(parents=True, exist_ok=True) + onnx.save(model, variant_dir / f"{variant_name}.onnx") + + +def unique_preserving_order(names: list[str]) -> list[str]: + seen: set[str] = set() + unique_names: list[str] = [] + for name in names: + if name in seen: + continue + seen.add(name) + unique_names.append(name) + return unique_names + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--input", required=True, help="Path to yolo11n depth_35 ONNX model.") + parser.add_argument("--out-dir", required=True, help="Directory where variants will be generated.") + args = parser.parse_args() + + input_path = Path(args.input).resolve() + out_dir = Path(args.out_dir).resolve() + out_dir.mkdir(parents=True, exist_ok=True) + + base_model = onnx.load(input_path) + inferred_model = shape_inference.infer_shapes(base_model) + + output0_only = build_model_with_outputs(base_model, inferred_model, ["output0"]) + save_variant(output0_only, out_dir, "output0_only") + + output0_first = build_model_with_outputs( + base_model, + inferred_model, + ["output0"] + [name for name in ORIGINAL_DEBUG_OUTPUTS if name != "output0"], + ) + save_variant(output0_first, out_dir, "output0_first_with_original_debug_outputs") + + output0_last = build_model_with_outputs( + base_model, + inferred_model, + [name for name in ORIGINAL_DEBUG_OUTPUTS if name != "output0"] + ["output0"], + ) + save_variant(output0_last, out_dir, "output0_last_with_original_debug_outputs") + + identity_name = "output0_identity" + identity_node = helper.make_node("Identity", ["output0"], [identity_name], name="output0_identity_node") + output0_value = lookup_output_value_info(inferred_model, collect_value_infos(inferred_model), "output0") + duplicated = build_model_with_outputs( + base_model, + inferred_model, + ["output0", identity_name], + extra_nodes=[identity_node], + extra_value_infos=[make_tensor_value_info_from_type(identity_name, output0_value.type.tensor_type)], + ) + save_variant(duplicated, out_dir, "output0_duplicated") + + localization_outputs = [ + "/model.23/dfl/Transpose_output_0", + find_node_output(base_model, "/model.23/dfl/Softmax"), + find_node_output(base_model, "/model.23/dfl/conv/Conv"), + find_node_output(base_model, "/model.23/dfl/Reshape_1"), + find_node_output(base_model, "/model.23/Slice"), + find_node_output(base_model, "/model.23/Slice_1"), + find_node_output(base_model, "/model.23/Sub"), + find_node_output(base_model, "/model.23/Add_1"), + find_node_output(base_model, "/model.23/Sub_1"), + find_node_output(base_model, "/model.23/Concat_4"), + find_node_output(base_model, "/model.23/Mul_2"), + find_node_output(base_model, "/model.23/Sigmoid"), + find_node_output(base_model, "/model.23/Concat_5"), + "output0", + ] + localization = build_model_with_outputs( + base_model, + inferred_model, + unique_preserving_order(localization_outputs), + ) + save_variant(localization, out_dir, "yolo_tail_localization_outputs") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/validation/validate.py b/validation/validate.py index 8d234aa..f9441ab 100644 --- a/validation/validate.py +++ b/validation/validate.py @@ -77,6 +77,8 @@ def main(): help="Scheduler used by the Spatial merge-compute-nodes pass.") ap.add_argument("--pim-memory-report", choices=("none", "summary", "full"), default="none", help="Emit a human-readable PIM memory planning report during codegen.") + ap.add_argument("--raptor-extra-arg", action="append", default=[], + help="Additional argument to pass through to the Raptor compiler. Repeat as needed.") ap.add_argument("--command-timeout-seconds", type=float, default=1000000.0, help="Per-subprocess timeout in seconds for compiler, runner, and simulator commands.") ap.add_argument("--clean", action="store_true", @@ -145,6 +147,7 @@ def main(): onnx_path, a.raptor_path, a.onnx_include_dir, simulator_dir, crossbar_size=a.crossbar_size, crossbar_count=a.crossbar_count, core_count=a.core_count, pim_merge_scheduler=a.pim_merge_scheduler, pim_memory_report=a.pim_memory_report, + raptor_extra_args=a.raptor_extra_arg, command_timeout_seconds=a.command_timeout_seconds, threshold=a.threshold, seed=a.seed, diff --git a/validation/validate_one.py b/validation/validate_one.py index 44858c4..ffed3c0 100644 --- a/validation/validate_one.py +++ b/validation/validate_one.py @@ -289,7 +289,8 @@ def validate_outputs(sim_arrays, runner_out_dir, outputs_descriptor, threshold=1 def validate_network(network_onnx_path, raptor_path, onnx_include_dir, simulator_dir, crossbar_size=64, crossbar_count=8, core_count=None, - pim_merge_scheduler="peft", pim_memory_report="none", threshold=1e-3, + pim_merge_scheduler="peft", pim_memory_report="none", raptor_extra_args=None, + threshold=1e-3, seed=0, reporter=None, model_index=1, model_total=1, verbose=False, command_timeout_seconds=60.0, mode=MODE_FULL): network_onnx_path = Path(network_onnx_path).resolve() @@ -343,7 +344,7 @@ def validate_network(network_onnx_path, raptor_path, onnx_include_dir, pim_pass_timings = compile_with_raptor( network_mlir_path, raptor_path, pim_output_base, crossbar_size, crossbar_count, core_count=core_count, pim_merge_scheduler=pim_merge_scheduler, - pim_memory_report=pim_memory_report, + pim_memory_report=pim_memory_report, raptor_extra_args=raptor_extra_args, cwd=raptor_dir, verbose=verbose, reporter=reporter, timeout_sec=command_timeout_seconds) print_info(reporter, f"PIM artifacts saved to {raptor_dir / 'pim'}") reporter.advance() @@ -383,7 +384,7 @@ def validate_network(network_onnx_path, raptor_path, onnx_include_dir, pim_pass_timings = compile_with_raptor( network_mlir_path, raptor_path, pim_output_base, crossbar_size, crossbar_count, core_count=core_count, pim_merge_scheduler=pim_merge_scheduler, - pim_memory_report=pim_memory_report, + pim_memory_report=pim_memory_report, raptor_extra_args=raptor_extra_args, cwd=raptor_dir, verbose=verbose, reporter=reporter, timeout_sec=command_timeout_seconds) print_info(reporter, f"PIM artifacts saved to {raptor_dir / 'pim'}") reporter.advance()