From 39830be8889d4579135ca2428932c844e20afa95 Mon Sep 17 00:00:00 2001 From: NiccoloN Date: Mon, 30 Mar 2026 15:41:12 +0200 Subject: [PATCH] add support for operations: reduceMean, add, mul, div, sigmoid --- .../src/lib/json_to_instruction/json_isa.rs | 4 +- .../src/lib/memory_manager/type_traits.rs | 12 +- src/PIM/Compiler/PimCodeGen.cpp | 10 +- .../Conversion/ONNXToSpatial/CMakeLists.txt | 3 + src/PIM/Conversion/ONNXToSpatial/Common.hpp | 2 - .../ONNXToSpatial/ONNXToSpatialPass.cpp | 7 + src/PIM/Conversion/ONNXToSpatial/Patterns.hpp | 6 + .../Patterns/Math/Elementwise.cpp | 204 ++++++++ .../Patterns/Math/ReduceMean.cpp | 163 ++++++ .../ONNXToSpatial/Patterns/NN/Sigmoid.cpp | 36 ++ .../Conversion/SpatialToPim/SpatialToPim.td | 12 + .../SpatialToPim/SpatialToPimPass.cpp | 64 ++- src/PIM/Dialect/Spatial/Spatial.td | 16 + .../SpatialBufferizableOpInterface.cpp | 111 ++-- validation/operations/README.md | 144 ++++-- .../add/after_gemm/add_after_gemm.onnx | Bin 0 -> 8594 bytes .../operations/add/basic/add_basic.onnx | Bin 0 -> 100 bytes .../add/broadcast_row/add_broadcast_row.onnx | Bin 0 -> 130 bytes .../div/after_gemm/div_after_gemm.onnx | Bin 0 -> 8594 bytes .../operations/div/basic/div_basic.onnx | Bin 0 -> 223 bytes .../scalar_constant/div_scalar_constant.onnx | Bin 0 -> 104 bytes validation/operations/gen_tests.py | 487 ++++++++++++++---- .../mul/after_conv/mul_after_conv.onnx | Bin 0 -> 460 bytes .../operations/mul/basic/mul_basic.onnx | Bin 0 -> 100 bytes .../scalar_constant/mul_scalar_constant.onnx | Bin 0 -> 104 bytes .../4d_spatial/reduce_mean_4d_spatial.onnx | Bin 0 -> 144 bytes .../after_conv/reduce_mean_after_conv.onnx | Bin 0 -> 478 bytes .../reduce_mean/basic/reduce_mean_basic.onnx | Bin 0 -> 120 bytes .../keepdims_0/reduce_mean_keepdims_0.onnx | Bin 0 -> 121 bytes .../operations/sigmoid/4d/sigmoid_4d.onnx | Bin 0 -> 97 bytes .../after_gemm/sigmoid_after_gemm.onnx | Bin 0 -> 8458 bytes .../sigmoid/basic/sigmoid_basic.onnx | Bin 0 -> 84 bytes 32 files changed, 1057 insertions(+), 224 deletions(-) create mode 100644 src/PIM/Conversion/ONNXToSpatial/Patterns/Math/Elementwise.cpp create mode 100644 src/PIM/Conversion/ONNXToSpatial/Patterns/Math/ReduceMean.cpp create mode 100644 src/PIM/Conversion/ONNXToSpatial/Patterns/NN/Sigmoid.cpp create mode 100644 validation/operations/add/after_gemm/add_after_gemm.onnx create mode 100644 validation/operations/add/basic/add_basic.onnx create mode 100644 validation/operations/add/broadcast_row/add_broadcast_row.onnx create mode 100644 validation/operations/div/after_gemm/div_after_gemm.onnx create mode 100644 validation/operations/div/basic/div_basic.onnx create mode 100644 validation/operations/div/scalar_constant/div_scalar_constant.onnx create mode 100644 validation/operations/mul/after_conv/mul_after_conv.onnx create mode 100644 validation/operations/mul/basic/mul_basic.onnx create mode 100644 validation/operations/mul/scalar_constant/mul_scalar_constant.onnx create mode 100644 validation/operations/reduce_mean/4d_spatial/reduce_mean_4d_spatial.onnx create mode 100644 validation/operations/reduce_mean/after_conv/reduce_mean_after_conv.onnx create mode 100644 validation/operations/reduce_mean/basic/reduce_mean_basic.onnx create mode 100644 validation/operations/reduce_mean/keepdims_0/reduce_mean_keepdims_0.onnx create mode 100644 validation/operations/sigmoid/4d/sigmoid_4d.onnx create mode 100644 validation/operations/sigmoid/after_gemm/sigmoid_after_gemm.onnx create mode 100644 validation/operations/sigmoid/basic/sigmoid_basic.onnx diff --git a/backend-simulators/pim/pim-simulator/src/lib/json_to_instruction/json_isa.rs b/backend-simulators/pim/pim-simulator/src/lib/json_to_instruction/json_isa.rs index 068e639..f2885b8 100644 --- a/backend-simulators/pim/pim-simulator/src/lib/json_to_instruction/json_isa.rs +++ b/backend-simulators/pim/pim-simulator/src/lib/json_to_instruction/json_isa.rs @@ -1,4 +1,4 @@ -use anyhow::{Context, Result}; +use anyhow::{Context, Result, ensure}; use paste::paste; use std::{collections::HashMap, mem::offset_of, sync::LazyLock}; @@ -36,6 +36,7 @@ static SIMD: LazyLock> = LazyLock::new(|| { add_to_json_map!(storage, vvmax); add_to_json_map!(storage, vvsll); add_to_json_map!(storage, vvsra); + add_to_json_map!(storage, vavg); add_to_json_map!(storage, vrelu); add_to_json_map!(storage, vtanh); add_to_json_map!(storage, vsigm); @@ -339,6 +340,7 @@ fn json_to_vavg( let rd = json_i64!(json, "rd") as i32; let rs1 = json_i64!(json, "rs1") as i32; let rs2 = json_i64!(json, "rs2") as i32; + ensure!(rs2 == 1, "vavg only supports stride 1"); let len = json_i64!(json, "len") as i32; let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap()); inst_data_builder diff --git a/backend-simulators/pim/pim-simulator/src/lib/memory_manager/type_traits.rs b/backend-simulators/pim/pim-simulator/src/lib/memory_manager/type_traits.rs index 7eedea7..151209e 100644 --- a/backend-simulators/pim/pim-simulator/src/lib/memory_manager/type_traits.rs +++ b/backend-simulators/pim/pim-simulator/src/lib/memory_manager/type_traits.rs @@ -55,19 +55,15 @@ pub trait HasSigm { impl HasSigm for f32 { fn sigm(self) -> Self { - let x = self; - let e = std::f32::consts::E; - let ex = x.powf(x); - (ex) / (1.0+ex) + let ex = self.exp(); + ex / (1.0 + ex) } } impl HasSigm for f64 { fn sigm(self) -> Self { - let x = self; - let e = std::f64::consts::E; - let ex = x.powf(x); - (ex) / (1.0+ex) + let ex = self.exp(); + ex / (1.0 + ex) } } diff --git a/src/PIM/Compiler/PimCodeGen.cpp b/src/PIM/Compiler/PimCodeGen.cpp index 7c1f6e9..d799c46 100644 --- a/src/PIM/Compiler/PimCodeGen.cpp +++ b/src/PIM/Compiler/PimCodeGen.cpp @@ -121,6 +121,13 @@ json::Object PimCodeGen::createEmptyOffset() { return offset; } +static json::Object createRs1OnlyOffset() { + json::Object offset; + offset["offset_select"] = 1; + offset["offset_value"] = 0; + return offset; +} + void PimCodeGen::emitInstruction(json::Object instruction) const { coreFileStream << json::Value(std::move(instruction)) << ','; } @@ -331,7 +338,8 @@ void PimCodeGen::codeGenVAvgOp(pim::PimVAvgOp vavgOp) const { json["op"] = "vavg"; json["rd"] = 0; json["rs1"] = 1; - json["offset"] = createEmptyOffset(); + json["rs2"] = 1; + json["offset"] = createRs1OnlyOffset(); json["len"] = getValueSizeInBytes(vavgOp.getInput()); emitInstruction(std::move(json)); } diff --git a/src/PIM/Conversion/ONNXToSpatial/CMakeLists.txt b/src/PIM/Conversion/ONNXToSpatial/CMakeLists.txt index 245ff6f..1e3b3ae 100644 --- a/src/PIM/Conversion/ONNXToSpatial/CMakeLists.txt +++ b/src/PIM/Conversion/ONNXToSpatial/CMakeLists.txt @@ -4,10 +4,13 @@ add_public_tablegen_target(ONNXToSpatialIncGen) add_pim_library(OMONNXToSpatial Patterns/Math/Conv.cpp + Patterns/Math/Elementwise.cpp Patterns/Math/Gemm.cpp Patterns/Math/MatMul.cpp + Patterns/Math/ReduceMean.cpp Patterns/NN/Pool.cpp Patterns/NN/Relu.cpp + Patterns/NN/Sigmoid.cpp Patterns/Tensor/Concat.cpp Patterns/Tensor/Reshape.cpp ONNXToSpatialPass.cpp diff --git a/src/PIM/Conversion/ONNXToSpatial/Common.hpp b/src/PIM/Conversion/ONNXToSpatial/Common.hpp index 864148d..56a5920 100644 --- a/src/PIM/Conversion/ONNXToSpatial/Common.hpp +++ b/src/PIM/Conversion/ONNXToSpatial/Common.hpp @@ -14,8 +14,6 @@ #include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp" #include "src/Dialect/ONNX/ONNXOps.hpp" -#define DEFINE_MAP_OP(opname) opname, - namespace onnx_mlir { template diff --git a/src/PIM/Conversion/ONNXToSpatial/ONNXToSpatialPass.cpp b/src/PIM/Conversion/ONNXToSpatial/ONNXToSpatialPass.cpp index a6dc98f..759f4e7 100644 --- a/src/PIM/Conversion/ONNXToSpatial/ONNXToSpatialPass.cpp +++ b/src/PIM/Conversion/ONNXToSpatial/ONNXToSpatialPass.cpp @@ -72,11 +72,15 @@ void ONNXToSpatialPass::runOnOperation() { target.addLegalDialect(); target.addDynamicallyLegalOp( [](ONNXMatMulOp op) { return cast(op.getY().getType()).getRank() != 2; }); + target.addIllegalOp(); + target.addIllegalOp(); + target.addIllegalOp(); target.addIllegalOp(); target.addIllegalOp(); target.addIllegalOp(); target.addIllegalOp(); target.addIllegalOp(); + target.addIllegalOp(); target.addIllegalOp(); target.addIllegalOp(); target.addIllegalOp(); @@ -86,10 +90,13 @@ void ONNXToSpatialPass::runOnOperation() { RewritePatternSet patterns(ctx); patterns.add(ctx); + populateElementwisePatterns(patterns, ctx); populateGemmPatterns(patterns, ctx); populateConvPatterns(patterns, ctx); populatePoolPatterns(patterns, ctx); + populateReduceMeanPatterns(patterns, ctx); populateReluPatterns(patterns, ctx); + populateSigmoidPatterns(patterns, ctx); populateConcatPatterns(patterns, ctx); populateReshapePatterns(patterns, ctx); diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns.hpp b/src/PIM/Conversion/ONNXToSpatial/Patterns.hpp index 58f9a10..38232ba 100644 --- a/src/PIM/Conversion/ONNXToSpatial/Patterns.hpp +++ b/src/PIM/Conversion/ONNXToSpatial/Patterns.hpp @@ -7,14 +7,20 @@ namespace onnx_mlir { void populateConvPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx); +void populateElementwisePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx); + void populateGemmPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx); void populateMatMulRewritePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx); void populatePoolPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx); +void populateReduceMeanPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx); + void populateReluPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx); +void populateSigmoidPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx); + void populateConcatPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx); void populateReshapePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx); diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/Elementwise.cpp b/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/Elementwise.cpp new file mode 100644 index 0000000..21221e5 --- /dev/null +++ b/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/Elementwise.cpp @@ -0,0 +1,204 @@ +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/Transforms/DialectConversion.h" + +#include "llvm/ADT/SmallVector.h" + +#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common.hpp" +#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Patterns.hpp" +#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp" +#include "src/Dialect/ONNX/ONNXOps.hpp" + +using namespace mlir; + +namespace onnx_mlir { +namespace { + +static SmallVector computeRowMajorStrides(ArrayRef shape) { + SmallVector strides(shape.size(), 1); + for (int64_t i = static_cast(shape.size()) - 2; i >= 0; --i) + strides[i] = strides[i + 1] * shape[i + 1]; + return strides; +} + +static DenseElementsAttr getDenseConstantAttr(Value value) { + if (auto constantOp = value.getDefiningOp()) + return dyn_cast(constantOp.getValue()); + + if (auto constantOp = value.getDefiningOp()) + return dyn_cast_or_null(constantOp.getValueAttr()); + + return nullptr; +} + +static FailureOr materializeBroadcastedConstantTensor(Value value, + RankedTensorType resultType, + ConversionPatternRewriter& rewriter, + Location loc) { + auto denseAttr = getDenseConstantAttr(value); + if (!denseAttr) + return failure(); + + auto sourceType = dyn_cast(denseAttr.getType()); + if (!sourceType || !sourceType.hasStaticShape() || !resultType.hasStaticShape()) + return failure(); + + if (sourceType == resultType) + return value; + + ArrayRef sourceShape = sourceType.getShape(); + ArrayRef resultShape = resultType.getShape(); + if (sourceShape.size() > resultShape.size()) + return failure(); + + const int64_t rankOffset = static_cast(resultShape.size() - sourceShape.size()); + for (int64_t i = 0; i < static_cast(resultShape.size()); ++i) { + const int64_t sourceIndex = i - rankOffset; + const int64_t sourceDim = sourceIndex < 0 ? 1 : sourceShape[sourceIndex]; + const int64_t resultDim = resultShape[i]; + if (sourceDim != 1 && sourceDim != resultDim) + return failure(); + } + + SmallVector sourceValues(denseAttr.getValues()); + SmallVector sourceStrides = computeRowMajorStrides(sourceShape); + SmallVector resultStrides = computeRowMajorStrides(resultShape); + + SmallVector resultValues; + resultValues.reserve(resultType.getNumElements()); + + for (int64_t flatIndex = 0; flatIndex < resultType.getNumElements(); ++flatIndex) { + int64_t remaining = flatIndex; + int64_t sourceFlatIndex = 0; + + for (int64_t i = 0; i < static_cast(resultShape.size()); ++i) { + const int64_t resultIndex = resultStrides.empty() ? 0 : remaining / resultStrides[i]; + remaining = resultStrides.empty() ? 0 : remaining % resultStrides[i]; + + const int64_t sourceIndex = i - rankOffset; + if (sourceIndex < 0) + continue; + + const int64_t sourceDim = sourceShape[sourceIndex]; + const int64_t mappedIndex = sourceDim == 1 ? 0 : resultIndex; + sourceFlatIndex += mappedIndex * sourceStrides[sourceIndex]; + } + + resultValues.push_back(sourceValues[sourceFlatIndex]); + } + + auto broadcastedAttr = DenseElementsAttr::get(resultType, resultValues); + return arith::ConstantOp::create(rewriter, loc, resultType, broadcastedAttr).getResult(); +} + +static FailureOr prepareElementwiseOperand(Value value, + RankedTensorType resultType, + ConversionPatternRewriter& rewriter, + Location loc) { + auto valueType = dyn_cast(value.getType()); + if (!valueType || !valueType.hasStaticShape()) + return failure(); + + if (valueType == resultType) + return value; + + return materializeBroadcastedConstantTensor(value, resultType, rewriter, loc); +} + +static FailureOr materializeReciprocalTensor(Value value, + RankedTensorType resultType, + ConversionPatternRewriter& rewriter, + Location loc) { + auto broadcastedValue = materializeBroadcastedConstantTensor(value, resultType, rewriter, loc); + if (failed(broadcastedValue)) + return failure(); + + auto denseAttr = dyn_cast(getDenseConstantAttr(*broadcastedValue)); + if (!denseAttr) + return failure(); + + SmallVector reciprocalValues; + reciprocalValues.reserve(denseAttr.getNumElements()); + for (const APFloat& valueAttr : denseAttr.getValues()) { + APFloat reciprocal(valueAttr.getSemantics(), 1); + auto status = reciprocal.divide(valueAttr, APFloat::rmNearestTiesToEven); + if (status & APFloat::opInvalidOp) + return failure(); + reciprocalValues.push_back(std::move(reciprocal)); + } + + auto reciprocalAttr = DenseFPElementsAttr::get(resultType, reciprocalValues); + return arith::ConstantOp::create(rewriter, loc, resultType, reciprocalAttr).getResult(); +} + +template +struct BinaryElementwiseToSpatialCompute : OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + using Adaptor = typename OnnxOp::Adaptor; + + LogicalResult matchAndRewrite(OnnxOp op, Adaptor adaptor, ConversionPatternRewriter& rewriter) const override { + auto resultType = dyn_cast(op->getResult(0).getType()); + if (!resultType || !resultType.hasStaticShape()) + return failure(); + + Location loc = op.getLoc(); + auto lhs = prepareElementwiseOperand(adaptor.getOperands()[0], resultType, rewriter, loc); + if (failed(lhs)) + return failure(); + + auto rhs = prepareElementwiseOperand(adaptor.getOperands()[1], resultType, rewriter, loc); + if (failed(rhs)) + return failure(); + + constexpr size_t numInputs = 2; + auto computeOp = + createSpatCompute(rewriter, loc, resultType, {}, ValueRange {*lhs, *rhs}, [&](Value x, Value y) { + auto loweredOp = SpatialOp::create(rewriter, loc, resultType, x, y); + spatial::SpatYieldOp::create(rewriter, loc, loweredOp.getResult()); + }); + + rewriter.replaceOp(op, computeOp); + return success(); + } +}; + +struct DivToSpatialCompute : OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(ONNXDivOp op, ONNXDivOpAdaptor adaptor, ConversionPatternRewriter& rewriter) const override { + auto resultType = dyn_cast(op.getResult().getType()); + if (!resultType || !resultType.hasStaticShape()) + return failure(); + + Location loc = op.getLoc(); + auto lhs = prepareElementwiseOperand(adaptor.getA(), resultType, rewriter, loc); + if (failed(lhs)) + return failure(); + + auto reciprocalRhs = materializeReciprocalTensor(adaptor.getB(), resultType, rewriter, loc); + if (failed(reciprocalRhs)) + return failure(); + + constexpr size_t numInputs = 2; + auto computeOp = createSpatCompute( + rewriter, loc, resultType, {}, ValueRange {*lhs, *reciprocalRhs}, [&](Value x, Value reciprocal) { + auto mulOp = spatial::SpatVMulOp::create(rewriter, loc, resultType, x, reciprocal); + spatial::SpatYieldOp::create(rewriter, loc, mulOp.getResult()); + }); + + rewriter.replaceOp(op, computeOp); + return success(); + } +}; + +} // namespace + +void populateElementwisePatterns(RewritePatternSet& patterns, MLIRContext* ctx) { + patterns.add>(ctx); + patterns.add>(ctx); + patterns.add(ctx); +} + +} // namespace onnx_mlir diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/ReduceMean.cpp b/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/ReduceMean.cpp new file mode 100644 index 0000000..c78e078 --- /dev/null +++ b/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/ReduceMean.cpp @@ -0,0 +1,163 @@ +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Transforms/DialectConversion.h" + +#include "llvm/ADT/SmallVector.h" + +#include + +#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common.hpp" +#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Patterns.hpp" +#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp" +#include "src/Dialect/ONNX/ONNXOps.hpp" + +using namespace mlir; + +namespace onnx_mlir { +namespace { + +static SmallVector normalizeAxes(ArrayAttr axesAttr, int64_t rank) { + SmallVector normalizedAxes; + if (!axesAttr) { + normalizedAxes.reserve(rank); + for (int64_t axis = 0; axis < rank; axis++) + normalizedAxes.push_back(axis); + return normalizedAxes; + } + + normalizedAxes.reserve(axesAttr.size()); + for (Attribute attr : axesAttr) { + int64_t axis = cast(attr).getInt(); + normalizedAxes.push_back(axis >= 0 ? axis : rank + axis); + } + + llvm::sort(normalizedAxes); + normalizedAxes.erase(std::unique(normalizedAxes.begin(), normalizedAxes.end()), normalizedAxes.end()); + return normalizedAxes; +} + +static SmallVector buildReducedAxesMask(ArrayRef axes, int64_t rank) { + SmallVector reducedAxes(rank, false); + for (int64_t axis : axes) { + if (axis < 0 || axis >= rank) + return {}; + reducedAxes[axis] = true; + } + return reducedAxes; +} + +static RankedTensorType getAllOnesType(RankedTensorType inputType, Type elementType) { + return RankedTensorType::get(SmallVector(inputType.getRank(), 1), elementType); +} + +static SmallVector buildCollapseReassociation(ArrayRef reducedAxes) { + SmallVector reassociation; + ReassociationIndices currentGroup; + + for (auto [axis, isReduced] : llvm::enumerate(reducedAxes)) { + currentGroup.push_back(axis); + if (!isReduced) { + reassociation.push_back(currentGroup); + currentGroup.clear(); + } + } + + if (!currentGroup.empty()) { + if (reassociation.empty()) + reassociation.push_back(std::move(currentGroup)); + else + reassociation.back().append(currentGroup.begin(), currentGroup.end()); + } + + return reassociation; +} + +static Value createAverageCompute(Value input, + RankedTensorType resultType, + ConversionPatternRewriter& rewriter, + Location loc) { + constexpr size_t numInputs = 1; + auto computeOp = createSpatCompute(rewriter, loc, resultType, {}, ValueRange {input}, [&](Value x) { + auto avgOp = spatial::SpatVAvgOp::create(rewriter, loc, resultType, x); + spatial::SpatYieldOp::create(rewriter, loc, avgOp.getResult()); + }); + return computeOp.getResult(0); +} + +static Value buildReduceMeanKeepdims(Value input, + ArrayRef reducedAxes, + int64_t axis, + RankedTensorType leafType, + ConversionPatternRewriter& rewriter, + Location loc) { + int64_t rank = cast(input.getType()).getRank(); + if (axis == rank) + return createAverageCompute(input, leafType, rewriter, loc); + + if (reducedAxes[axis]) + return buildReduceMeanKeepdims(input, reducedAxes, axis + 1, leafType, rewriter, loc); + + SmallVector slices = sliceTensor(input, axis, /*sliceSize=*/1, rewriter, loc); + SmallVector reducedSlices; + reducedSlices.reserve(slices.size()); + for (Value slice : slices) + reducedSlices.push_back(buildReduceMeanKeepdims(slice, reducedAxes, axis + 1, leafType, rewriter, loc)); + + return reducedSlices.size() == 1 ? reducedSlices.front() + : tensor::ConcatOp::create(rewriter, loc, axis, reducedSlices).getResult(); +} + +static Value squeezeReducedAxes(Value keepdimsValue, + RankedTensorType resultType, + ArrayRef reducedAxes, + ConversionPatternRewriter& rewriter, + Location loc) { + if (resultType.getRank() == 0) { + SmallVector indices(cast(keepdimsValue.getType()).getRank(), + arith::ConstantIndexOp::create(rewriter, loc, 0)); + Value element = tensor::ExtractOp::create(rewriter, loc, keepdimsValue, indices); + return tensor::FromElementsOp::create(rewriter, loc, resultType, ValueRange {element}); + } + + return tensor::CollapseShapeOp::create( + rewriter, loc, resultType, keepdimsValue, buildCollapseReassociation(reducedAxes)) + .getResult(); +} + +struct ReduceMeanToSpatialCompute : OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult matchAndRewrite(ONNXReduceMeanV13Op reduceMeanOp, + ONNXReduceMeanV13OpAdaptor adaptor, + ConversionPatternRewriter& rewriter) const override { + auto inputType = dyn_cast(adaptor.getData().getType()); + auto resultType = dyn_cast(reduceMeanOp.getReduced().getType()); + if (!inputType || !resultType || !inputType.hasStaticShape() || !resultType.hasStaticShape()) + return failure(); + + SmallVector axes = normalizeAxes(reduceMeanOp.getAxesAttr(), inputType.getRank()); + SmallVector reducedAxes = buildReducedAxesMask(axes, inputType.getRank()); + if (reducedAxes.empty() && inputType.getRank() != 0) + return failure(); + + Location loc = reduceMeanOp.getLoc(); + RankedTensorType leafType = getAllOnesType(inputType, resultType.getElementType()); + Value reducedKeepdims = buildReduceMeanKeepdims(adaptor.getData(), reducedAxes, /*axis=*/0, leafType, rewriter, loc); + + if (reduceMeanOp.getKeepdims() != 0) { + rewriter.replaceOp(reduceMeanOp, reducedKeepdims); + return success(); + } + + Value reduced = squeezeReducedAxes(reducedKeepdims, resultType, reducedAxes, rewriter, loc); + rewriter.replaceOp(reduceMeanOp, reduced); + return success(); + } +}; + +} // namespace + +void populateReduceMeanPatterns(RewritePatternSet& patterns, MLIRContext* ctx) { + patterns.add(ctx); +} + +} // namespace onnx_mlir diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns/NN/Sigmoid.cpp b/src/PIM/Conversion/ONNXToSpatial/Patterns/NN/Sigmoid.cpp new file mode 100644 index 0000000..1fc13e8 --- /dev/null +++ b/src/PIM/Conversion/ONNXToSpatial/Patterns/NN/Sigmoid.cpp @@ -0,0 +1,36 @@ +#include "mlir/Transforms/DialectConversion.h" + +#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common.hpp" +#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp" +#include "src/Dialect/ONNX/ONNXOps.hpp" + +using namespace mlir; + +namespace onnx_mlir { +namespace { + +struct SigmoidToSpatialCompute : OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult matchAndRewrite(ONNXSigmoidOp sigmoidOp, + ONNXSigmoidOpAdaptor adaptor, + ConversionPatternRewriter& rewriter) const override { + Location loc = sigmoidOp.getLoc(); + Type resultType = sigmoidOp.getResult().getType(); + constexpr size_t numInputs = 1; + auto computeOp = createSpatCompute(rewriter, loc, resultType, {}, adaptor.getX(), [&](Value x) { + auto spatSigmoidOp = spatial::SpatSigmoidOp::create(rewriter, loc, resultType, x); + spatial::SpatYieldOp::create(rewriter, loc, spatSigmoidOp.getResult()); + }); + rewriter.replaceOp(sigmoidOp, computeOp); + return success(); + } +}; + +} // namespace + +void populateSigmoidPatterns(RewritePatternSet& patterns, MLIRContext* ctx) { + patterns.add(ctx); +} + +} // namespace onnx_mlir diff --git a/src/PIM/Conversion/SpatialToPim/SpatialToPim.td b/src/PIM/Conversion/SpatialToPim/SpatialToPim.td index de37037..ce94a90 100644 --- a/src/PIM/Conversion/SpatialToPim/SpatialToPim.td +++ b/src/PIM/Conversion/SpatialToPim/SpatialToPim.td @@ -39,6 +39,12 @@ def spatToPimVVMul : Pat< (NativeCodeCall<"onnx_mlir::getBestOutputTensorFromOperandsOrAllocate($_builder, $0.getDefiningOp())"> $srcOpRes)) >; +def spatToPimVAvg : Pat< + (SpatVAvgOp:$srcOpRes $input), + (PimVAvgOp $input, + (NativeCodeCall<"onnx_mlir::getBestOutputTensorFromOperandsOrAllocate($_builder, $0.getDefiningOp())"> $srcOpRes)) +>; + def spatToPimVVMax : Pat< (SpatVMaxOp:$srcOpRes $a, $b), (PimVVMaxOp $a, $b, @@ -51,4 +57,10 @@ def spatToPimVRelu : Pat< (NativeCodeCall<"onnx_mlir::getBestOutputTensorFromOperandsOrAllocate($_builder, $0.getDefiningOp())"> $srcOpRes)) >; +def spatToPimVSigm : Pat< + (SpatSigmoidOp:$srcOpRes $input), + (PimVSigmOp $input, + (NativeCodeCall<"onnx_mlir::getBestOutputTensorFromOperandsOrAllocate($_builder, $0.getDefiningOp())"> $srcOpRes)) +>; + #endif // SPATIAL_TO_PIM diff --git a/src/PIM/Conversion/SpatialToPim/SpatialToPimPass.cpp b/src/PIM/Conversion/SpatialToPim/SpatialToPimPass.cpp index fedb72d..b527f6d 100644 --- a/src/PIM/Conversion/SpatialToPim/SpatialToPimPass.cpp +++ b/src/PIM/Conversion/SpatialToPim/SpatialToPimPass.cpp @@ -161,26 +161,41 @@ void SpatialToPimPass::runOnOperation() { } for (auto receiveOp : funcOp.getOps()) { - operationsToRemove.push_back(receiveOp); + markOpToRemove(receiveOp); runOnReceiveOp(receiveOp, rewriter); } for (auto computeOp : funcOp.getOps()) { - operationsToRemove.push_back(computeOp); + markOpToRemove(computeOp); runOnComputeOp(computeOp, rewriter); } enlargeVMMOutTensorsToCrossbarSize(funcOp, rewriter); replaceReturnOpOperands(returnOp, rewriter); - // Remove all ComputeOps - for (auto opToRemove : llvm::reverse(operationsToRemove)) { - if (!opToRemove->use_empty()) { + SmallVector pendingRemovals(operationsToRemove.begin(), operationsToRemove.end()); + while (!pendingRemovals.empty()) { + bool erasedAnyOp = false; + for (auto it = pendingRemovals.begin(); it != pendingRemovals.end();) { + Operation* opToRemove = *it; + if (!opToRemove->use_empty()) { + ++it; + continue; + } + + rewriter.eraseOp(opToRemove); + it = pendingRemovals.erase(it); + erasedAnyOp = true; + } + + if (erasedAnyOp) + continue; + + for (auto opToRemove : pendingRemovals) { opToRemove->dump(); for (auto user : opToRemove->getUsers()) user->dump(); - assert(false && "opToRemove should be unused at this point"); } - rewriter.eraseOp(opToRemove); + assert(false && "tracked op removal reached a cycle or missed dependency"); } // Dump to file for debug @@ -284,10 +299,19 @@ void SpatialToPimPass::runOnComputeOp(spatial::SpatWeightedCompute computeOp, IR auto concatUses = concatValue.getUses(); auto numConcatUses = rangeLength(concatUses); if (numConcatUses == 1) { - OpOperand& concatUse = *concatUses.begin(); - Operation* concatUser = concatUse.getOwner(); + Value chainedValue = concatValue; + Operation* concatUser = concatUses.begin()->getOwner(); + + while (isChannelUseChainOp(concatUser)) { + auto chainUses = concatUser->getResult(0).getUses(); + if (rangeLength(chainUses) != 1) + break; + chainedValue = concatUser->getResult(0); + concatUser = chainUses.begin()->getOwner(); + } + if (isa(concatUser)) { - size_t concatIndexInReturn = concatUse.getOperandNumber(); + size_t concatIndexInReturn = chainedValue.getUses().begin()->getOperandNumber(); size_t resultIndexInConcat = resultUses.begin()->getOperandNumber(); size_t offset = 0; for (auto operand : concatOp->getOperands().take_front(resultIndexInConcat)) @@ -602,10 +626,22 @@ void SpatialToPimPass::replaceReturnOpOperands(func::ReturnOp& returnOp, IRRewri rewriter.modifyOpInPlace(returnOp, [&] { returnOp.setOperand(orderWithinReturn, outputTensors[orderWithinReturn]); }); - if (isa(returnOperand)) { - auto returnOperandUses = it.value().getUses(); - if (rangeLength(returnOperandUses) == 0) - rewriter.eraseOp(returnOperand); + Operation* opToErase = returnOperand; + while (opToErase) { + bool isExclusivelyOwnedByReturnChain = opToErase->use_empty() || opToErase->hasOneUse(); + if (!isExclusivelyOwnedByReturnChain) + break; + + if (isChannelUseChainOp(opToErase)) { + Value source = opToErase->getOperand(0); + markOpToRemove(opToErase); + opToErase = source.getDefiningOp(); + continue; + } + + if (isa(opToErase)) + markOpToRemove(opToErase); + break; } } } diff --git a/src/PIM/Dialect/Spatial/Spatial.td b/src/PIM/Dialect/Spatial/Spatial.td index 84de4a2..a970bdf 100644 --- a/src/PIM/Dialect/Spatial/Spatial.td +++ b/src/PIM/Dialect/Spatial/Spatial.td @@ -239,6 +239,22 @@ def SpatSumOp : SpatOp<"sum", []> { }]; } +def SpatVAvgOp : SpatOp<"vavg", []> { + let summary = "Average all elements of the input tensor to a single scalar wrapped in a tensor"; + + let arguments = (ins + SpatTensor:$input + ); + + let results = (outs + SpatTensor:$output + ); + + let assemblyFormat = [{ + `(` $input `)` attr-dict `:` type($input) `->` type($output) + }]; +} + def SpatSigmoidOp : SpatOp<"sigmoid", []> { let summary = "Element-wise sigmoid activation"; diff --git a/src/PIM/Dialect/Spatial/Transforms/SpatialBufferizableOpInterface.cpp b/src/PIM/Dialect/Spatial/Transforms/SpatialBufferizableOpInterface.cpp index 5d0869f..d81df66 100644 --- a/src/PIM/Dialect/Spatial/Transforms/SpatialBufferizableOpInterface.cpp +++ b/src/PIM/Dialect/Spatial/Transforms/SpatialBufferizableOpInterface.cpp @@ -361,7 +361,7 @@ struct ChannelBroadcastReceiveOpInterface } /* - * Turn the channel receive to pim.load using by creating a new global buffer + * Turn the broadcast receive into a regular pim.receive from the broadcaster. */ LogicalResult bufferize(Operation* op, RewriterBase& rewriter, @@ -370,8 +370,21 @@ struct ChannelBroadcastReceiveOpInterface auto outputTensor = createEmptyFromType(op->getResult(0).getType(), op->getLoc(), rewriter); - auto outputType = cast(outputTensor.getType()); - auto outputSize = outputType.getNumElements() * outputType.getElementTypeBitWidth() / 8; + auto numElements = cast(outputTensor.getType()).getNumElements(); + auto elementSize = cast(outputTensor.getType()).getElementTypeBitWidth() / 8; + + auto precomputedOtherCoreId = op->getAttr(PRECOMPUTED_OTHER_CORE_ID_ATTR_NAME); + if (precomputedOtherCoreId) { + Value newValue = pim::PimReceiveOp::create(rewriter, + op->getLoc(), + outputTensor.getType(), + outputTensor, + rewriter.getI32IntegerAttr(numElements * elementSize), + cast(precomputedOtherCoreId)) + .getOutput(); + replaceOpWithBufferizedValues(rewriter, op, newValue); + return success(); + } auto channelNewOp = op->getOperand(0).getDefiningOp(); if (!channelNewOp) { @@ -379,31 +392,30 @@ struct ChannelBroadcastReceiveOpInterface return failure(); } - // The first 'broadcast' operation creates the buffer just after the - // channelNewOp, while the other 'broadcast' operation need to find this - // buffer allocation just after the channelNewOp - Value bufferAllocation; - if (auto allocOpAfterChannel = dyn_cast(channelNewOp->getNextNode())) { - // Buffer already allocated, load from this buffer - bufferAllocation = allocOpAfterChannel; - } - else { - // Buffer was not allocated previously, allocate it after channelNewOp - rewriter.setInsertionPointAfter(channelNewOp); - bufferAllocation = createEmptyFromType(op->getResult(0).getType(), op->getLoc(), rewriter); - } + auto srcCoreId = [&]() -> FailureOr { + for (Operation* user : channelNewOp->getUsers()) { + auto sendOp = dyn_cast(user); + if (!sendOp) + continue; + auto sendCoreIdAttr = cast(sendOp->getParentOp()).getCoreIdAttr(); + op->setAttr(PRECOMPUTED_OTHER_CORE_ID_ATTR_NAME, sendCoreIdAttr); + return cast(sendOp->getParentOp()).getCoreId(); + } + op->emitError("ChannelBroadcastReceiveOp has no matching ChannelBroadcastSendOp"); + return failure(); + }(); + if (failed(srcCoreId)) + return failure(); - rewriter.setInsertionPoint(op); - auto memCopyHostToDevOp = pim::PimMemCopyHostToDevOp::create(rewriter, - op->getLoc(), - outputTensor.getType(), - outputTensor, - bufferAllocation, - rewriter.getI32IntegerAttr(0), - rewriter.getI32IntegerAttr(0), - rewriter.getI32IntegerAttr(outputSize)); + Value newValue = pim::PimReceiveOp::create(rewriter, + op->getLoc(), + outputTensor.getType(), + outputTensor, + rewriter.getI32IntegerAttr(numElements * elementSize), + rewriter.getI32IntegerAttr(srcCoreId.value())) + .getOutput(); - replaceOpWithBufferizedValues(rewriter, op, memCopyHostToDevOp.getOutput()); + replaceOpWithBufferizedValues(rewriter, op, newValue); return success(); } @@ -428,8 +440,7 @@ struct ChannelBroadcastSendOpInterface } /* - * Turn the channel send into a device-to-host copy into the shared - * broadcast buffer that receive ops load from later. + * Turn the broadcast send into one pim.send per broadcast receiver. */ LogicalResult bufferize(Operation* op, RewriterBase& rewriter, @@ -448,32 +459,32 @@ struct ChannelBroadcastSendOpInterface return failure(); } - // The first 'broadcast' operation creates the buffer just after the - // channelNewOp, while the other 'broadcast' operation need to find this - // buffer allocation just after the channelNewOp - Value bufferAllocation; - if (auto allocOpAfterChannel = dyn_cast(channelNewOp->getNextNode())) { - // Buffer already allocated, load from this buffer - bufferAllocation = allocOpAfterChannel; - } - else { - // Buffer was not allocated previously, allocate it after channelNewOp - rewriter.setInsertionPointAfter(channelNewOp); - bufferAllocation = createEmptyFromType(srcTensor.getType(), op->getLoc(), rewriter); - } - auto srcType = cast(srcTensor.getType()); auto sizeInBytes = srcType.getNumElements() * srcType.getElementTypeBitWidth() / 8; + auto srcCoreIdAttr = cast(op->getParentOp()).getCoreIdAttr(); rewriter.setInsertionPoint(op); - pim::PimMemCopyDevToHostOp::create(rewriter, - op->getLoc(), - bufferAllocation.getType(), - bufferAllocation, - srcMemRef, - rewriter.getI32IntegerAttr(0), - rewriter.getI32IntegerAttr(0), - rewriter.getI32IntegerAttr(sizeInBytes)); + bool foundReceiver = false; + for (Operation* user : channelNewOp->getUsers()) { + auto receiveOp = dyn_cast(user); + if (!receiveOp) + continue; + + foundReceiver = true; + auto dstCoreId = cast(receiveOp->getParentOp()).getCoreId(); + receiveOp->setAttr(PRECOMPUTED_OTHER_CORE_ID_ATTR_NAME, srcCoreIdAttr); + pim::PimSendOp::create(rewriter, + op->getLoc(), + srcMemRef, + rewriter.getI32IntegerAttr(sizeInBytes), + rewriter.getI32IntegerAttr(dstCoreId)); + } + + if (!foundReceiver) { + op->emitError("SpatChannelBroadcastSendOp has no matching ChannelBroadcastReceiveOp"); + return failure(); + } + rewriter.eraseOp(op); return success(); } diff --git a/validation/operations/README.md b/validation/operations/README.md index 6594703..4870159 100644 --- a/validation/operations/README.md +++ b/validation/operations/README.md @@ -3,66 +3,108 @@ ONNX test models used by `validate.py` to verify the Raptor compiler + PIM simulator pipeline. Generated tests can be regenerated with: + ``` python3 validation/operations/gen_tests.py ``` ## Conv -| Test | Directory | Input | Output | Kernel | Stride | Padding | Bias | Notes | -|------|-----------|-------|--------|--------|--------|---------|------|-------| -| Simple | `conv/simple` | [1,3,3,3] | [1,1,2,2] | 2x2 | 1 | none | no | Basic conv, hand-crafted | -| With constant | `conv/with_constant` | [1,3,3,3] | [1,1,3,3] | 2x2 | 1 | SAME_UPPER | yes | Hand-crafted, constant weight+bias | -| Batch 2 | `conv/batch_2` | [2,3,3,3] | [2,1,3,3] | 2x2 | 1 | SAME_UPPER | yes | Batched input | -| Kernel 3x3 | `conv/kernel_3x3` | [1,1,5,5] | [1,1,3,3] | 3x3 | 1 | none | no | Larger kernel | -| Stride 2 | `conv/stride_2` | [1,1,6,6] | [1,1,2,2] | 3x3 | 2 | none | no | Strided convolution | -| Multi channel | `conv/multi_channel` | [1,3,5,5] | [1,4,3,3] | 3x3 | 1 | none | no | 3 in channels, 4 out channels | -| Pointwise 1x1 | `conv/pointwise_1x1` | [1,8,4,4] | [1,4,4,4] | 1x1 | 1 | none | no | Channel mixing | -| SAME padding 3x3 | `conv/same_padding_3x3` | [1,1,5,5] | [1,1,5,5] | 3x3 | 1 | SAME_UPPER | no | Spatial dims preserved | -| Explicit padding | `conv/explicit_padding` | [1,1,4,4] | [1,1,4,4] | 3x3 | 1 | [1,1,1,1] | no | Symmetric explicit pads | -| With bias 3x3 | `conv/with_bias_3x3` | [1,3,5,5] | [1,2,3,3] | 3x3 | 1 | none | yes | Multi-channel with bias | -| Large spatial | `conv/large_spatial` | [1,1,8,8] | [1,1,6,6] | 3x3 | 1 | none | no | Larger spatial input | - -## Pool - -| Test | Directory | Input | Output | Kernel | Stride | Padding | Notes | -|------|-----------|-------|--------|--------|--------|---------|-------| -| Max basic | `pool/max_basic` | [1,1,4,4] | [1,1,3,3] | 2x2 | 1 | none | Basic max pooling | -| Max stride 2 multi-channel | `pool/max_stride2_multichannel` | [1,5,6,6] | [1,5,3,3] | 2x2 | 2 | none | Channel-preserving max pool | -| Max SAME_UPPER | `pool/max_same_upper` | [1,1,5,5] | [1,1,3,3] | 3x3 | 2 | SAME_UPPER | Deprecated auto_pad path | -| Avg basic | `pool/avg_basic` | [1,3,4,4] | [1,3,3,3] | 2x2 | 1 | none | Basic average pooling | -| Avg explicit padding | `pool/avg_explicit_padding` | [1,2,4,4] | [1,2,2,2] | 3x3 | 2 | [1,1,1,1] | `count_include_pad=0` | -| Avg include pad | `pool/avg_include_pad` | [1,2,4,4] | [1,2,2,2] | 3x3 | 2 | [1,1,1,1] | `count_include_pad=1` | -| Max after Conv | `pool/max_after_conv` | [1,3,6,6] | [1,4,2,2] | Conv 3x3 then Pool 2x2 | 2 | none | Regression for `pool(conv(...))` | - -## Relu - -| Test | Directory | Input | Output | Notes | -|------|-----------|-------|--------|-------| -| Basic | `relu/basic` | [4,8] | [4,8] | Standalone 2D Relu | -| 4D | `relu/4d` | [2,3,4,4] | [2,3,4,4] | Standalone NCHW Relu | -| After Conv | `relu/after_conv` | [1,3,5,5] | [1,2,3,3] | Conv 3x3 + bias, then Relu | -| After Gemm | `relu/after_gemm` | [4,64] | [4,32] | Gemm + bias, then Relu | +| Test | Directory | Input | Output | Kernel | Stride | Padding | Bias | Notes | +|------------------|-------------------------|-----------|-----------|--------|--------|------------|------|------------------------------------| +| Simple | `conv/simple` | [1,3,3,3] | [1,1,2,2] | 2x2 | 1 | none | no | Basic conv, hand-crafted | +| With constant | `conv/with_constant` | [1,3,3,3] | [1,1,3,3] | 2x2 | 1 | SAME_UPPER | yes | Hand-crafted, constant weight+bias | +| Batch 2 | `conv/batch_2` | [2,3,3,3] | [2,1,3,3] | 2x2 | 1 | SAME_UPPER | yes | Batched input | +| Kernel 3x3 | `conv/kernel_3x3` | [1,1,5,5] | [1,1,3,3] | 3x3 | 1 | none | no | Larger kernel | +| Stride 2 | `conv/stride_2` | [1,1,6,6] | [1,1,2,2] | 3x3 | 2 | none | no | Strided convolution | +| Multi channel | `conv/multi_channel` | [1,3,5,5] | [1,4,3,3] | 3x3 | 1 | none | no | 3 in channels, 4 out channels | +| Pointwise 1x1 | `conv/pointwise_1x1` | [1,8,4,4] | [1,4,4,4] | 1x1 | 1 | none | no | Channel mixing | +| SAME padding 3x3 | `conv/same_padding_3x3` | [1,1,5,5] | [1,1,5,5] | 3x3 | 1 | SAME_UPPER | no | Spatial dims preserved | +| Explicit padding | `conv/explicit_padding` | [1,1,4,4] | [1,1,4,4] | 3x3 | 1 | [1,1,1,1] | no | Symmetric explicit pads | +| With bias 3x3 | `conv/with_bias_3x3` | [1,3,5,5] | [1,2,3,3] | 3x3 | 1 | none | yes | Multi-channel with bias | +| Large spatial | `conv/large_spatial` | [1,1,8,8] | [1,1,6,6] | 3x3 | 1 | none | no | Larger spatial input | ## Gemm -| Test | Directory | A (input) | W (weight) | Output | transB | alpha | beta | Bias | Notes | -|------|-----------|-----------|------------|--------|--------|-------|------|------|-------| -| Default | `gemm/` | [10,132] | [132,132] | [10,132] | no | 1 | 1 | no | Hand-crafted, square weights | -| Non-square | `gemm/non_square` | [4,128] | [128,64] | [4,64] | no | 1 | 1 | no | K != N | -| With bias | `gemm/with_bias` | [4,128] | [128,128] | [4,128] | no | 1 | 1 | [128] | Bias vector | -| transB | `gemm/transB` | [4,128] | [64,128] | [4,64] | yes | 1 | 1 | no | Transposed weight | -| Alpha/beta | `gemm/alpha_beta` | [4,64] | [64,64] | [4,64] | no | 0.5 | 0.25 | [64] | Scaled matmul + bias | -| Small | `gemm/small` | [2,8] | [8,4] | [2,4] | no | 1 | 1 | no | Tiny matrices | -| Large | `gemm/large` | [8,256] | [256,128] | [8,128] | no | 1 | 1 | no | Larger matrices | -| transB + bias | `gemm/transB_with_bias` | [4,128] | [64,128] | [4,64] | yes | 1 | 1 | [64] | Combined | +| Test | Directory | A (input) | W (weight) | Output | transB | alpha | beta | Bias | Notes | +|---------------|-------------------------|-----------|------------|----------|--------|-------|------|-------|------------------------------| +| Default | `gemm/` | [10,132] | [132,132] | [10,132] | no | 1 | 1 | no | Hand-crafted, square weights | +| Non-square | `gemm/non_square` | [4,128] | [128,64] | [4,64] | no | 1 | 1 | no | K != N | +| With bias | `gemm/with_bias` | [4,128] | [128,128] | [4,128] | no | 1 | 1 | [128] | Bias vector | +| transB | `gemm/transB` | [4,128] | [64,128] | [4,64] | yes | 1 | 1 | no | Transposed weight | +| Alpha/beta | `gemm/alpha_beta` | [4,64] | [64,64] | [4,64] | no | 0.5 | 0.25 | [64] | Scaled matmul + bias | +| Small | `gemm/small` | [2,8] | [8,4] | [2,4] | no | 1 | 1 | no | Tiny matrices | +| Large | `gemm/large` | [8,256] | [256,128] | [8,128] | no | 1 | 1 | no | Larger matrices | +| transB + bias | `gemm/transB_with_bias` | [4,128] | [64,128] | [4,64] | yes | 1 | 1 | [64] | Combined | ## Gemv -| Test | Directory | Input | W (weight) | Output | Bias | Notes | -|------|-----------|-------|------------|--------|------|-------| -| Simple | `gemv/simple` | [1,132] | [132,132] | [1,132] | no | Single-sample matmul | -| Constant | `gemv/constant` | _(none)_ | [132,132] | [1,132] | no | All inputs constant | -| Homogeneous const | `gemv/with_homogeneous_constant` | [1,132] | [132,132] | [1,132] | [1,132] | Bias matches output shape | -| Heterogeneous const | `gemv/with_heterogeneous_constant` | [1,132] | [132,132] | [1,132] | [1,132] | Different constant pattern | -| Scalar const | `gemv/with_scalar_constant` | [1,132] | [132,132] | [1,132] | [1,1] | Scalar bias, broadcast | +| Test | Directory | Input | W (weight) | Output | Bias | Notes | +|---------------------|------------------------------------|----------|------------|---------|---------|----------------------------| +| Simple | `gemv/simple` | [1,132] | [132,132] | [1,132] | no | Single-sample matmul | +| Constant | `gemv/constant` | _(none)_ | [132,132] | [1,132] | no | All inputs constant | +| Homogeneous const | `gemv/with_homogeneous_constant` | [1,132] | [132,132] | [1,132] | [1,132] | Bias matches output shape | +| Heterogeneous const | `gemv/with_heterogeneous_constant` | [1,132] | [132,132] | [1,132] | [1,132] | Different constant pattern | +| Scalar const | `gemv/with_scalar_constant` | [1,132] | [132,132] | [1,132] | [1,1] | Scalar bias, broadcast | + +## Pool + +| Test | Directory | Input | Output | Kernel | Stride | Padding | Notes | +|----------------------------|---------------------------------|-----------|-----------|------------------------|--------|------------|----------------------------------| +| Max basic | `pool/max_basic` | [1,1,4,4] | [1,1,3,3] | 2x2 | 1 | none | Basic max pooling | +| Max stride 2 multi-channel | `pool/max_stride2_multichannel` | [1,5,6,6] | [1,5,3,3] | 2x2 | 2 | none | Channel-preserving max pool | +| Max SAME_UPPER | `pool/max_same_upper` | [1,1,5,5] | [1,1,3,3] | 3x3 | 2 | SAME_UPPER | Deprecated auto_pad path | +| Avg basic | `pool/avg_basic` | [1,3,4,4] | [1,3,3,3] | 2x2 | 1 | none | Basic average pooling | +| Avg explicit padding | `pool/avg_explicit_padding` | [1,2,4,4] | [1,2,2,2] | 3x3 | 2 | [1,1,1,1] | `count_include_pad=0` | +| Avg include pad | `pool/avg_include_pad` | [1,2,4,4] | [1,2,2,2] | 3x3 | 2 | [1,1,1,1] | `count_include_pad=1` | +| Max after Conv | `pool/max_after_conv` | [1,3,6,6] | [1,4,2,2] | Conv 3x3 then Pool 2x2 | 2 | none | Regression for `pool(conv(...))` | + +## ReduceMean + +| Test | Directory | Input | Output | Axes | Keepdims | Notes | +|------------|--------------------------|-----------|-----------|-------|----------|-------------------------------------------------| +| Basic | `reduce_mean/basic` | [4,8] | [4,1] | [1] | 1 | Reduce feature dimension, preserving rank | +| Keepdims 0 | `reduce_mean/keepdims_0` | [4,8] | [4] | [1] | 0 | Reduce feature dimension, dropping reduced axis | +| 4D spatial | `reduce_mean/4d_spatial` | [1,3,4,4] | [1,3,1,1] | [2,3] | 1 | Reduce H and W on NCHW input | +| After Conv | `reduce_mean/after_conv` | [1,3,5,5] | [1,2,1,1] | [2,3] | 1 | Conv 3x3 + bias, then spatial ReduceMean | + +## Relu + +| Test | Directory | Input | Output | Notes | +|------------|-------------------|-----------|-----------|----------------------------| +| Basic | `relu/basic` | [4,8] | [4,8] | Standalone 2D Relu | +| 4D | `relu/4d` | [2,3,4,4] | [2,3,4,4] | Standalone NCHW Relu | +| After Conv | `relu/after_conv` | [1,3,5,5] | [1,2,3,3] | Conv 3x3 + bias, then Relu | +| After Gemm | `relu/after_gemm` | [4,64] | [4,32] | Gemm + bias, then Relu | + +## Sigmoid + +| Test | Directory | Input | Output | Notes | +|------------|----------------------|-----------|-----------|---------------------------| +| Basic | `sigmoid/basic` | [4,8] | [4,8] | Standalone 2D Sigmoid | +| 4D | `sigmoid/4d` | [2,3,4,4] | [2,3,4,4] | Standalone NCHW Sigmoid | +| After Gemm | `sigmoid/after_gemm` | [4,64] | [4,32] | Gemm + bias, then Sigmoid | + +## Add + +| Test | Directory | Input(s) | Output | Notes | +|---------------|---------------------|------------------|--------|---------------------------------------------| +| Basic | `add/basic` | A:[4,8], B:[4,8] | [4,8] | Elementwise add, same-shape inputs | +| Broadcast row | `add/broadcast_row` | A:[4,8], B:[8] | [4,8] | Row-vector broadcasting via initializer | +| After Gemm | `add/after_gemm` | A:[4,64], D:[32] | [4,32] | Gemm + bias, then Add with broadcast vector | + +## Mul + +| Test | Directory | Input(s) | Output | Notes | +|-----------------|-----------------------|--------------------------|-----------|-------------------------------------------| +| Basic | `mul/basic` | A:[4,8], B:[4,8] | [4,8] | Elementwise multiply, same-shape inputs | +| Scalar constant | `mul/scalar_constant` | X:[4,8], S:[1] | [4,8] | Scalar broadcasting via initializer | +| After Conv | `mul/after_conv` | X:[1,3,5,5], S:[1,2,1,1] | [1,2,3,3] | Conv 3x3 + bias, then per-channel scaling | + +## Div + +| Test | Directory | Input(s) | Output | Notes | +|-----------------|-----------------------|------------------|--------|------------------------------------------------------| +| Basic | `div/basic` | X:[4,8], D:[4,8] | [4,8] | Elementwise divide by same-shape constant tensor | +| Scalar constant | `div/scalar_constant` | X:[4,8], S:[1] | [4,8] | Scalar broadcasting via initializer | +| After Gemm | `div/after_gemm` | A:[4,64], D:[32] | [4,32] | Gemm + bias, then Div with positive broadcast vector | diff --git a/validation/operations/add/after_gemm/add_after_gemm.onnx b/validation/operations/add/after_gemm/add_after_gemm.onnx new file mode 100644 index 0000000000000000000000000000000000000000..f88f43d5dcfa9d244c94a2ed536efa2d9de0d9d8 GIT binary patch literal 8594 zcmZ8nX;g@9(5@s&3(+2uM5Pi*+cVRqO{t{PqP?^$DYPkskS%FJA#2ubQ9UzRU*06! zn_c!bl6{M>bG~!FAK#Dr&)k3J%$ak|bzL*E3f9XU<&@4kP)v*`{2-HE2&GnLl~F15UJgWE$lW3>CvsCLE|s+fSeqvm8J2f$s>8M zjuraXjpu=j6S2K@G?$O=i+#>!;a4FZN^@PgbZRe|GfwW8e=(1T7zo?-0<6efqhoV;~gD&ywt@NFDPZR zu1~wz@;4Z>b3TKA$ZM*$P(v4(iodI6_*&*?*b%7ACR;mf_MGpAie)45B+kaui%W$s z=B2d%#2D@Ps~s#9H&Ol zw`I_g3nQqYtprvaY7}-w3^TazJyhkApm3J*&f$MNssygMY`d*-1j@9g;YHUi(79kS7)|~T z2h4}jLBnKB>WPMH*Co(mkUoy+GT=EU!Z}&#KWg6^hINlSn{1}PfY9#E)Datsjti=6 zk4KE;Hp}zkUjtVR?y0a@{bVT|TvyQa;L}0s_x6p@@3$Sh-+l&G*QU|ONq0s0*=^8X zXWqEsQ9T8|3}tK62N0)jiLYa}(`lz zgEjw%zSemxKPZ8Qi^iCmJ(MFjitDo@`M(8|P%$Y8cljychCj3Lwq_bO4Qgv_eVDmd1EjGY`IR z&cfq|<2h1wshGCsHaI>iCryh7wA;c0-+H(T{fo;QgRG@6!|$WGe8L#Kuw*`}u8`); z)M)yjQ;Bo62xIt?q&)J>JCwgXbw8n3+b@-RoWGQf$rXBBob(TIJ72uY! zCI?GT;U@z^SyXZ2@{U1#d-W{#^;6+PJvkH}WQqp@e!1|6MF zspE?2Z{cAWW(4NEjKudtV(_dgQ{t{);Icjdhk87q^b3jHCt(xKc@oR% z6Dz^^`F)tU-yi>eSt&$R6x#Y%N}}RcIUCt8vbaS0wz&TJZfa1wOxK)~vDI+|*7l#l zcJ2hJ>VH6K;lDJGsVSpFcQ_8P*h!HO+_>lGQ|Qih;0pupi1~WKFyL;Z zkgSl-^YnxH)`nCpH?5`A$GW_Bmps0!bH{(@^a$L8xcgRLUX$RD+blJOc~>O)_S00{ z*GCm{m)|BWen+Z~{ZYtohDuWpyw)&K*jHBsd2vB}rnE^+H@^Z8FP{_&m)`BIuR5=b zw%|Q~BKS}LGHNkNz@v|dzSXLsLfRo(fDX7lET2N=$n)WzskkD>f=mCiB!`iCeBk>+ z-nco8x0_T6t%_r?Sr||G#xdY^do{e#bYgGIFn&`T3~{4&fM3yN+dEoWJn_9OdfS!I zB1=`2YcHfP!(v74^+VCRLcqrzF8tFm3*AS*fq@o&81_Yii}nVi#n=I;`?m!0tw-ah zEFA{BN-{{?CFHy|;pP`*H2T~=@+dIIv8`L064#^&{Z^Tyo$f$>)vPOQyFLJBDkyU4 zlF?jLX2Hq-MdHP&UnwYeDM>bUh^g)|JaK>)zdtBND-O%hk^2(pS&@wa`;NmfoWUCA zB~2=$_k)tvPT1CLP4D7Wc|oBZ*W5PcV;2|lBySJ2_}&-Gz2f<^r!{W8n~Duv6QDuw zB-GjEV)~AHs&p-Y-x9Gr&}uj=p0$Tg2fCr}Pi1sT9|x~o_R_J9s<^LqsTi`th<{la z!QwbYTvy))r%FP^>3c#@r_U(%d8oqH>#}iEvO3N*h{LdjpL_A>8P2GQ! z(Cc0#e(bX5hq8;Bc1)hbr5nzPv2T8XeE%NMdAAyL&Ejy#A9t|a5kTu*&G^%Vvt&1O zAI$bEZL0paO*FMB5cUTEC{5lAMpB-QhPoD*VVH{2_0w_lE`kcXrA-$XgmJuOI!~&Q zN7>s>Sl0cSqNer5u^-dnowG8ZO6$jUS6j$nfIU8#=?|WFZ1IoY6!d-ehm5Q43EfvN zfjkzHVz>n9FCPUNWi@at+l$93i=gknM3`uOk}_puaKN4vn3pvN)yo&Mbf`4`o`>vS z9L*8^W002}1?^Q4_+B%CiqB_>=`VBee2$agsqNAUx3C}SK&g*H2khR7IzJp#)+f0 z39q)ci#j1`;+j*h2$F{I1n*Dq-wkWrArnPQiwE$L*>B-(_+6U*_9|^j=np5F6wt8r zj&RhY`;^%sHI&K5hL@GA#+T#82L+vkLVr&8=FR2@4_Tmzn5ma*V3Qc z2dT>49ZpzR3T@xw>8X7P$X(eBrk&1Qr970UDOQH$JDzNt_G*ui%fzrZxWbJBj zAD)B@U%263OEUfSrwvuw5jd>hFw zDu-!awgD&>M`2(`GZc80(2Pq%`9WqMoK~#@>$mD-`Q`!a|27++4p87Q2{oR-#uLv! z`v@}8LGbI#6Y=QRbKtZtoDE}d(ur?ATy|80yMjWvYv&L)xm61VGl*`SONB>UQt{)K zpRh826Q!N2gSDM=(XFmZba*BsPAZb&!_)RqjBz4vm#-5Vm&~x;9=}9f@l%&$lC0@+ zvKs%Lq5y~7=d#hUG~Dz(8D|W)glkS~$XBv~ie~6x?EO6u_ahZ7ue#xJ-)ufwn}i!% z`=h_bL&y_`fm!};L87n}Muz*~G22{>{MZlG5<6(QSVP61NAkY77*1Lv$rkZ_aDMl_ zrsbgqXcDynPVd+&40;;GKT3=^Kj{U1KEH>OgU4{orD?oZX|^D9EkJCE(1E%=&7fWE zhV9l8)Of zP_55`;ii&&?RK%)7~}xycWt<5zZBnhIzTDa9(Y6X3HY3|JIN16^G|GU6tOIXOGBl| zxhb3TTzk3Iy7e?yxeT6v_rtkbU392O14h};!J3x_Y_ui}wOnIyhSE|v@X3!o_1)M} zQy}@%b7+h4O`+rM1Rmp%jZ4q{1Ft7_HTAueg~26*`QDe!VE)wIwk7`-Je=f!Bmbnc zAWe-S-txR>O+MZ1Y@*ret+Z@Z7Jc@52%$e(slwZnHx$WX_=p_l@Bfjr?Et(!&4Z8C zY=hXXpCMMdlqP4MhoeQ&T=sq>q)rLsS*20vQ89=ok4{4CL`zoIFvO>)MT&8~ z=rtq=4=i;=#Vj|xblQwHbxqhXD+gr~^`P9_7{@wf)ho%(8Yhg8FB3UiQcp zyOI+yz042)6|2eIr3YTcOybD3ek`$M0msCh7DG~U*(iKDd_NSK16Sxs$J&}YSML*QA_=7RPLyvTF}-;3m2{8jvEx)@-WWC-^m=D} zK=Kf-PqgEtH!ZX&@r-cr-ELSC97;i2imca`3ejCE>{K-XME`5NFQP47u@mo$_; zm@#N%eobYQ4oV0dBx9&M?e=Y-qovl%CXbbI=*J6`$ zOEgoT$fsQ1(yoz8piq~AuVOsueB^s@lqsQt4ey&4`nmIDtKV?N!Wl1oI}Y1x=CPIM zLE+i_L_T5UVXONi7n9FP(80!74x5*PS2gEg{ec{A+JBcEKc>@}^-g%WE(BF7{)J4e zq+wxY5Egxc)^0b(AqOYWgd`1qR+NGx=EvdWOBVDcIF(awCgS^q162RZox|njpr7GI zaq(O^{`;RTm1f-s#d=LF3rRq=yjd)$#^czie}p;zeunBHslvdH1l~Bckjnca9bOYe z6}onO4GnUdWl=j(qi68ZW6%!86hC08^{!)$j2*tN1+h_qU@n zRr6W}GmOmhO!%tbE?BQN4<6p^eUxch0?$@jAzz53_knvzy8ldJuC)P+_gkBG8(A=N0;(yt1#k_1?nigh=m5o2)!dQL0^spF??Z`vz1Bx6bbkpOg zX}J36Wx9A@3tM|N_l3QC;DJ|zpt&QP3y(a5lO^xSPcaqaYPP_jL|Z;3|5dPXQ^z%F zviu;dN(^hLrr(=l;qAdf`t36mbyfv&zN9>Y#vKZtti-l)qA<-~k*1HGj`y8S*?4mx z1^hT*E#K&Dt8v~BLx-yJ#7VBa@7~oW^D*bZG^K%L9A$aSu^{XDBi-PBMJlN8o{j-e z)Hp#afJ<{`p!4E9o}#FP8s3wzNk0yD*Ca8-Ux7cyy)*vjU~$@^9b%2vUa-H{N%A%` z@oV`!wx4Uue|&4;aKW8 zUl!tUh`{^fV{xuyCR>G83Ma;e^7XgQV&X_8%u?{?Lq8-q?w&8dvkoH_o#nK9$xhNa zCCMM7c0y8_7rd`}0GqC_f|ltzTp2bHwuC;UkdV7@?vNY4e_BoWJcLKSwBRN?dDQpz zM3dzVJ-RLAbLsr4B|1A`tXT#}kDi4xPiFIuewpl9lYx6){e_2y>ij~}4W-LWFnE*# zUh3skcc(@0Hl0MQ|1HbA_2RH9MHalY+&E0;2+WUfqet8U*L!Agnd$|Sjva&In`_kO zevV>Nr1(M34RObc0&4ZO7ix~DU~*C?Jp5wK?LL>uP1%T=AH5bM(@n8>(P%tsSR-6l zB{+9Ml8TMXxalEoVxKV*rpC-tA`WuJWBEYv(qT&r4c5MuO*w7EUx*SL%~^Q zIM_;(r7VizWY2HvKf{X;{I{HXLB%#A;1roVSHqJ$b#%}yq*QrReBTzrV)7fBz19Rb z7EZ?FjqBmoHffT19?Xi*_4%~A6l(l?u+b;t9^Kh5f&O}0Jk>;3Jf8UwjDm7liX=rX z8GY>Tiee8>3GPbCLY21NG;Tq&D7iQl*Hx{88Q{#Bogr{fe!{ifz;dJxt9|<%m~Bq-r!CEH4k@HH$PctJa^NPdW{cPg(NK%p-KF!vKrdeua)t z!^HA8m%ygw5NOv~30;>HxIH}x*Jzxj{+YIXOZ^cP`%7S{M;?~%>eavrS`43(guO4^ z*=*=C@ri#J57*icO#$lM)$3!4775T6n~VRfh-KTs^4xc)4$CHFLTZ~ShEMR}Dy{iE z_qhgo)?N?}FIM89w!D%pyI+4^dt5deCYoHZY5l!j*}6*wf`jazx1B; zAMc@n8z0n2S))6uI5WMtQe->hFr^TU$RN81lNuMg*E?(P_Cyq{L)%26qG z(ZG#$G^IVB%{HACL%ZYgqF64#hYXyscLD4DsE4!%v2Z==2N|2Z1BZlM&I@1CbnMev z%5csCWk-M99_x;~+q2o@(^Jt#+ZtO2PUP}$Cn5XfW3YobG71zF_|!o3;{CmtZT{& zo!ttfEG|N$`c`T`eNOo2MxppJ-4ko|Gq^@RjH|mE$t7(%|Mrq#9PkS$Llb?Km2g-| zBv0z=PCX~K(WdN92st!{E6?TPs^M>&$_;wGzANLn^h!M4sd`BYWzXP;!!;)0QU%^(UdK3Ogl2(56{^)Re>G?sVE7+~4B zesJ)ulBlCc07Dv@nkyT?)SRmb6f`l`unrP?p*xhnu8motHD(JI_;W}gFY6rT)|1K)@{QfD*J8fR$H^# z+2_IxgJXjeYy7>nCR zJg3#eMe?(mfpvq^SV?0QoEZ^^-ERn*{YK%1&>*&Ns)7v?(m1dB3f#GL2|U6qY_}>6 zMw={2xZ@^?aJxU|C}wfsk*^e3tYLd`(MVh*=}1z)!r1A=Fu18cfLCS5e z={5X<#2K}XYvO0o$*c3Y^W9h5e5V2!xqB`ehez;_qIz26osEazZ-grEc{We$w}V!b zG{=kx2HBTh^itgpj<$cGZ2O@+w*Lm0lpw?FUI(M9@=V;`%Uc)4x?x(?WoWbdKyh)3 zETuOH$onRZOD}=#4`len@MIz7Lv@j$D>Lpv~7GNwbZ<9|u?nbyuyY<~gfj`s5Xqb9aw8f6rNA#(HTS zpLUBh+=pRP;9Xc%IT{_t^h5UsZS=6Vz#Ugsi0{!B8(zJp{syT$%VZpcCj%Z#mZ7)` zQ}$kAz@wH9;**8?s5v1@c(x&mT`JasZiYW-o?s9&N+RSMCL+o38x9#+z*xJ`q3dxDI9odfYX9JLO2G zW6|>j+F_H5I?J6{#ovHcSEUh!SfN$c8Il>T$ZGd}aiO#t4%=cZ4Dw$su6-~W&!(s0 zv+yB!BY!kcRR1l6s`d8MFC)JH*$hL%&qF3F^y&y7d^0kU_a8b-MUOMYon<#*L$wLF z`6aTLR1Z5hg`xSMaj5$lxz%W&FibrNy+bFGjjWz<{KYA#IJZ@h*)7Fu|2jg6Q-3-b zc2fB2Y02UF*7U)voQ_Y7#;R@sH!qlouI*hQn`Dgt4zGc1=>ppi9iwUL%q0*LYQd!P zRCM=y0I%+K(@dGgB(3qAR>~a^74a>79^GggTAqQezn%EB^hgeyZieyuT(E3tI=}ri z33FBk!t`agz$T)ItTvyadz;E_VUY!{)I10`TFgP_=S5I@cvxKM?h89|TblGBfs1k@ zFn(n(21UIgF@Gj%7^=gi>0Cdszj!??EWJk9^m zk22 z&ZYhA$dku<@r%B_`vZ$uB#i|aSrWm6Yy0tQ^b)F{%Ant~SB=}R$6oD9qLLLvbIP=39cHClrpI%QsCm61f{NG~$NB4Y5-BG42Xo*jYY zj5(MxV->teUr(=%eu?@GMri(aHYZ%G68q&A(%4P$JotGF{VS)Bo`0vY!ekd5G({E% zXBXKU4E%pV9S3PS IiGd3L1Gz85r~m)} literal 0 HcmV?d00001 diff --git a/validation/operations/add/basic/add_basic.onnx b/validation/operations/add/basic/add_basic.onnx new file mode 100644 index 0000000000000000000000000000000000000000..c7699c130a30cb13cf816ab2a73cfc7c250fecf5 GIT binary patch literal 100 zcmdj8tNFOi2;qOiW3MPf9G#OpX!;N(u3C@o+E-ad0tlumCX! VNYDvgFbOCai7x2G!o?uK3jn$;3i<#5 literal 0 HcmV?d00001 diff --git a/validation/operations/add/broadcast_row/add_broadcast_row.onnx b/validation/operations/add/broadcast_row/add_broadcast_row.onnx new file mode 100644 index 0000000000000000000000000000000000000000..abdfccb476800e73d3a670dd5d6cf46346b8e252 GIT binary patch literal 130 zcmdj8tNFOi2+EOiW3MPb$h!Oi4~GE{QM7FW1uK;1FPRVs!FS zm@JrVf56DzRyjIu|E|+J_tiF;+wa^Gvj22rh5i5J2D>O>plL#UTs$0%LL6L794tW0 RkpvWsL>F{o;bIWr1pv&I9tQvb literal 0 HcmV?d00001 diff --git a/validation/operations/div/after_gemm/div_after_gemm.onnx b/validation/operations/div/after_gemm/div_after_gemm.onnx new file mode 100644 index 0000000000000000000000000000000000000000..ae8770d6bd8452d54317f4b4f10adc214e92f59e GIT binary patch literal 8594 zcmZ8nXE@hy)bE!W*(-@CD>E|l``$-Jl9^3NSuF~o2$j+h8mP3>)LzQ?o`ZHOm84~) zJv8)Bh4Q$r=enL3&x`xbx$hU}y3aY+`JB(^NXkrJ>nx=xC72>52$m8!D+=7kiMqum zBuL3h3EZRvE{cNCaUw4B(iCN5=B3S=9k(Plc~<<;z0pQTNk>U-d4ZE4c=`&*nF_=B zhryQ6eEKYNc3sIL_N2i5`PbOG861s`3gp&S#j-Sk9ywT=&`ha8_9P|-CjGv^1)i-( z>jT*oI$Q^2my7_%L-nSC+N`FK6t-g^uBny<{RJymO`0j-~X7 zQ-Xxq$xwMIj>h{NV_k+2I&Vbt=4X>?e`RxQQvO(q6j{%b18m{p$r)IoWyL<&8p6+S z?HC?i%}(F)1(7|oXk2+Hj?=DYpOa_Ob*CFl(Qq%Xd{2ena4?c~YAl94%_G9%!?vK5 zJqb40r_&L^G_tJtj1AkPpvq+l`8$|{q4XczG_4f9qqbwJ3QCX0Tu0I#Lov9DSr{PPr#EuIH@tCh)7$^`U8(?HR8IMDEL zVePI{So)y?^?!@e``T%+X+WJEira8wjx#7k3TW#qE2`I2b*S!kq>2-#SkD@7{90Va zc36i&vV;*l`(RpoaB?e_Y-jvkUKysHI*TJWlyFRlL3 z+swjX_?ISpqhQS^_r}s#)o{2l3ZQAjS#0fE026=sz_UA}z?4m6_HC=!gotKV^&yAd zR;g#LTCX{u0&NJ;iiZ`tMXYUeA9p6+5f-@KWrNisFeCO9(>`ef-^s^3i1rp)_L> zf8ek?jGj}CfuEnU#_kqe(3?c2?y02QopI_ z8Mh19^~@xL3z76|tpZ#Tp8yfIGeGXdD7I&J1WK-*1>SSK$w{pM1<%xJMo=hG?PFYj z?O651%X$1KibeZDFPfz#&%eBzO{Zkv;0swboTl}a882u@la+GS#(xcIa&b55C5Q{Zz2r-eU6=B z`Rur;8(0T-V_0?^y_OQzjCg*D3#h-s=1F@Ai`RIu9}CCPs|bHk5g4#zHfm7bl#CX? z3v12hE&#D8UAD@54#|%j1&7W&L_xkj@v1fG9;O9dB{QM3doG>-W=*9@I#3pAi8;!} zSP1qYZ_tj~et{%4=uZ7B58%P32@sSVMT_kcsj>Yjwl%%y4;|*w zE0VwaUTETO44*zm(vv<58j}(Pl>vrOwrwih-m6KIcX~T~J*F#6?b2hVDu(pzML7Ju zZUAeXTuJ7xB9Q%P7@_czeUE*KS;stKN3=PA`@cgd@$oBKS17=>I&1i_%n|%Lq}jf} z9H_f&3HyW#>1XE%2#VX!BCI$H9=!!ubh1KqGcz|t!M z!T()27R$-vL@t1Ib|zDo$9?vbs=0ygv9wPiobEJVXW>(KbIasT;)R$1NS-0i-rUom zhlvcv*wzWJ=EzXU(opiT*Co|I>5%#Q7d|~U3kH(cuxl@K=(EQSjM&>bnGbMdGa3tP zu3VC4qL;_e;VnzZx-tpM%|%&*UltP$QGk6pnQ+PW1@~>{R4SO3Ojc%k&@6tFTYLUA z-y`c$+vO4hsl_8mv{HmJ%nb1Go-ii$auD|o#rtvN7sBYY_i?u!Y~FC8QCS>sj$4X_%?+3(8K`6p*GwVRKdBN`W3p zhX&Cv>mw|`O_8o`G=likW69HIBfDj6&oZ_LQtDJkaxZqHk^x@~^!vbf4DQAD{mKM$ zMnX$sEBk#ggKAz{!yNfIP}Deql4ft%<>_g(YSB4%_nI!H$1UW>Xoka>K2=s?@PMoO zypqo-F^8~~_i*Q6K1M`dWloNnIK|6=aeNdzV5dkgj@7W=PXBQN<4It@)e>kr&#F3N zVQfwqb*$5%ZKvm9zg;NYECq^TX@T2ZDnEG^%?b`+ zeB8^gsbF(my>jdUN* z2S@1v{HJXTahZN}4^Ok5ArAB@RGfaau4Uh?%%H-~2nyCE!rKwc@Idex-uUP?v@MLo zsEhkiZzzuQ0^~q<~4AAr+hEwm$)2N@ykea^_&E8z* zw~h*9cXp=1uJp~=EpJYzjdL0IN{01!?PDJsmXQ9#PF8sJ11nA5&staRXYxjK$+JKi zzStX*tE3s87bTwI1gS8kLPt|gfwijD_Dh0qiJ8fShihOZ<7N_w`>_0`|iV>m!Z({-GdC`6KTV>KD@4>2<1~n z>G8)Ddar+jor~JREu5uDgTFITbKe(M;(r*Ig}1S}F&|mP$v|qJyMsliPshGqLoj+7 z1N!1s?C9;~u;zv*e+L$U)wWoO7Az!_leJu4Yde!{c*TDHolNfvoiJ})DrPkt#mMni zyzeDHPzcH6#ZUjmSJpmE^|=z3R4yON$#Qn}g)iN?77JO8TQO)_1wU(@pU{53U9Hu} zrL;n#7WEo4>4S&|#TYK;>WpOIQbrKmJGuh1mzt7x>lM7$ag}LyPK6+K*IMy6=HMYO z27UJ%v3_kXyK8rVGy7D>#UA7^YK9^uFLtE|qsEZ^rA$0>!wt@T_r=XkD|y?TNswB3 zici+}tGP7i0o$>{ovcg!A!cs?S$#9*-utfM#4Yn!`vzIkPPSu9?}k8+R4Lvn4Q3sY zu3)J?@3dq}S2Vv@n&5qHnMu@o-r7MxGt?cHo8&gwu;n z11#awRlGavB;Kx!q|nd+n4soN5`G*sOM8&vH3zERmyffOBT4tmJc{|bO*mWf5$1S| zgG>!wQd1wpXXTz`r&b=pG7VX}y3c`$bD6loZXI(oY{4_7OW2Y*i>SZehMwNd<#&Zv zu+jBuSR9(p4q05_Q<|nQ%(4*jku3J&EIN%OGiUige+v^p6*#vm9GcG*aYcLtzj>b#P2xRp@9FofO!YX5OjX0{IayTt%?&c{$l$M- z(NNo<0QBJ^-kfy+r9nWCA{@X{PC$#d1*6;H2K4)H9?4vYp$ywuwB~aiW}g?(`pafe z`_2`Qn0i5q-gNfI@+dklGXUczE&Q}sfx>rdgN%+Vc$rRvo4+cC@WfaUv|qu!9uIiw zXB%+!>p9Fv)r*3D&V{Zu2e5z20;nIE4BPv!u$@*Cl+`){dZV{9GaqwWadJ6Snx3q= za%cz#MYjqcH!9Jc@=Vftn2a$pd)eLYEjat9F12k>ppvY0!jV=MboTjFa(^HW4ZI>u z{SZUP?*~&_m>QVtU#{sBTTbg#FQco?Qrc3SLfc-Z!Wo@AT=UhpxKvok&U6=Yp?5{` z<#lQDN)2cKvNGUwd0DLys8N!J63&Q8f~tQCY^K7?+NsyWVa4WAY*w5V)TxS++C6hn zu^A6n$B&`ig@bIK%yxEWaXtUnVL0_BdcgEh2hhDHh5Lt{<0NiJP<~Sg+J9CepI|$v zFPl!+yDY%`(hPKLx`cawIAPnc9ClwefH@>R7D^dyHwuF0~pK3w@3}K?txSV}l%sT$D*ix*p)B9V&D(ERi|?ilXmt z&FJYKXWADqh}?}NK0RL^%vF4(QO9~@J;7y#9^~%v z;m$`jp{|G`RRsTIFQ$LSOP^9d~U8fmO?H| zdu>O9r?0TT65XghUPCCjFG=6KE^vp`Q;9Fy$Tp?e(6iM!^x^nNc0_*`>oi-9qLHz% zom&j~TkPPl?F)A1UjR+pyqm>J-;CiHx~8bam%Sx*0Y~CYghu$RJNkYdvQ>3$%Os>PuQFF zL9pSc94wbfqV(HUczwMQnb_~frKPR--7y5Rrt4FUv>KT7Y{2*)1K8R5jBT#Fj}yw1 z!0=f!v+%o)o@OWbM;Y^>W3vb?enEWfs~+4cdWh|Iwix0X6ln64O6Hd^5qfI7xk=jA z6r&qMi(W<1-{WUkBR3MBH+aIYyjf5X-i@8#MpKBrJhgTufQGUXuWcLz*5_MLD|j_G zsJsD>sY}8GxP;?h3dy-Xn{~Y#1%9%#;KRYuTz{GoXq<^6$+n~1q3I8CCI6I9>hh-g zglQm{R>?@j8{!ovQl@J-W1C-dcLo}m!Rz%*c6cg@n%KjGH8*Rz;^gVB{TpWDG8Xpo zI*@*R9F@em<3ce>SVg12d6qvVMhRevlz==+%&AYV0tI%*xyj<`3KNEvWmOD;SV3C zq(Q4r6tNd+-XN{j#0riTW9UrZ913Z}RwST?fCQv5IvTzGDUd9ClcGa$dGE?7r_Hn zBRaqLENlGaD!h5BP8fgc2iAQt=R9?!Y3pur_G8#kU7r*orx*87`GgeNkQ+QTk%XtA z$I(+RAC*i8(8TZrH&UPg!7Vc6AU~IdXp6x;r)-k-KFhk&BcNUODC_@i#?BU+!rS}_ zRQ*t$-rt%8j~{!H+eUZ#);XG5gh7l>X~4H>92(A4XPci4V*054+){A`irAq<_VVx1 z^lBcjGsBe}k8WmXht=YeYa^lH+C@BV5zIPA@4?f5$1ph)7y1}Hk9WHdaH)coUU zQ~P*+WU>J;lQ9?)s1H-sE+F6TIK(lya1A^Cpgl|gt2#oNgQ_zuD5+&m+g#zN&tKel zY7i5WQo-6Qj}50Q$=Bqw3AvV_HH5K_-~wvr9ck&935@R-LiUIG zFuU|0&cB<9g46}nZ!bcB%qPI1I}@N{dH}iTeqtA=CO~2433h6W2gUb`(V3m!@KNJ&?k8j`m63m-s$1gm2M6jYxLk)_Bw8_ zt0oPo27^Me3vC(x2tS>h1S;uT^!~{i7GoMfr53hyTvHJ~w4TIQf7K|iZ4v49Od_e? zHCV*OK}6PgDp_C&A!3YX%cPL)kWMhx(VBKe2GaM^L?&l=fia~sZ1wy$Tz+&O+_qDK zzW2J&mL&y!J#`MBt{cJGe`~mRjooOSC{IBRBe~C8r&C5`J!*|GrI{;MV1BGVq|k5n z*LN6Y?HolKOEYM_Qz$oaMH;v_db4Ee#Qu{KAn{00M~@NgLf-_n5sp+B@A zdxgTeds$7pD*T!)2KBlVAzQ+NzDy4%X{{9c@!6dspZn9))@qjE7yw?=x3jRi12}iR z1$ssP#rY3yU;|f+!!LV)&aj;<_R&OU+Y`wQPj5zVS1;0=Foq^*xq#u>2vQ01f>t#R zsPSuL_M=lE)?XR?wCt!L_dRChJ?77g-ov7oK7ecNS!kpg$c#C~WGc_|(mB##A8ST` zu7r@nMFwAT7;n0?m#vy)OC@GjwCs`A5N?nn*@6^mc(#NN2qbZhTR%&iFdmNhXn)EEm9&|%70|v|H!{d#4m|iVS zGnT8vvB)r3bU%_=f7BtB-hLK8Z3H})^r5V_Hq_4?4@m_wbY#|8nq!bfkpV;c+#W4l z6DJ18eodo_(MHhw%zYr$-ZprNECBW$dMy2j$ttl3EMTmgXfwrjZ7YH07wPW+v};SewqeCXqwOdS)Ws z%sf+SxIsT>)^)}Rrqu+Hy0j`bM)$GB+A1V>#U2Dxeq(c|9AzfoWNTow@ZAs}Ci7)0 zDt~cf?>+O;KEfHw<5sgvua#j%;V3$KWC}d*(uPd&RC=xdgSF3$$CA-eXt6_%1}mD` zq=`GY;v)~SE@w4Pd6bM1S&SAHCi7cvl(S4p5BQ{zTC2NF1frwwp#SD32RWM#UdL$? z_%4`^8}t3>;{4elG`i2^CMM9poXHsZ;5+*!@`|$&h>*>pQ*2@DBJh!E;SHn~QoMsE zb=@CD))ATX_j^D8a@TNLn`8=>9ZT?g<&gfSk_6W$YEo0vR~D_k4L6+32I+z@@cpue zU12UTVCe&e7SgQS%oG~J;^AcT8?HRl6f!hVu`NmxAnh^-8ZUM-sZ;KBVuK+!>|hc4 z$#}zb0~ZKbFH0Sj^C`602_{WCi?2@KMXL@=iY%Cj_Rk|})@c*k|1p>Z&&#mLZ4WBV zxXB)Ut;Qu6$6$)#GpFx*8R&JFXXh{8UcJ7pwzE_jXHcnfHvd34Xn z6U1s4(rNS6Xnw&0tv^_hryfv>-EHpHG%Gr)vKG4{HR1fxyJ#0<1eQsbvtK>6X&Q+`2$luTfp|Ik00WOQ|ZZ#X0+Vp1rjNBIEnLveZLk%_u&FGd@+ws zuTKMUDG~DFbf{MKD-+c1W`;+DD0%HqZe!DYc0<0M_1K3nwe*#E)n+B~KF@}9&un<@ z|Ci1C{s1jMXu+E_dD?U-fex=d#9o}v=8wE+?X!BtiIKWAKqoH3cb3|#VWL9ype_^@;t7)iFk%SQ*n5=rAO_ zECbCWH{fE~Nr>4|0f*ze;k42}82v*6CN#VN>u4LWteXxq(ForDc?wcjcLC-+gdKA# zpt|WLK;sg~Rlg3SZFa)+pX=bxkeV63{4j(m4J}tF%1Vut6evnc4VM%hI*xOOjG_O} NsO=;wHB4USe*mCC%WePw literal 0 HcmV?d00001 diff --git a/validation/operations/div/basic/div_basic.onnx b/validation/operations/div/basic/div_basic.onnx new file mode 100644 index 0000000000000000000000000000000000000000..d2dd4f57265e0aa5b64358e636abba360470e51a GIT binary patch literal 223 zcmd?2=g~#F>&=7N3+@oSCfE!^pwH!6Crt#OUJHz-Zaq zZ{NMoz&>UDB6}5;BKvKZHrby)JITI3smy+#d5e9=Uq$=#s_*SDoeZ`AH;2W(;^iHC zq1%t`AAI;}&+`AIeeACW`j`+VUO_D^*4?BfMW?cK$9*$c5b*`G~b zW&e-$mi_*N>h?F@s@l)1Uupm8VXb|(%v1X)VW3Ne__%mD7=<{vm^fH~m?H@&7>O?E K#KOfOzzYBrT}U1P literal 0 HcmV?d00001 diff --git a/validation/operations/div/scalar_constant/div_scalar_constant.onnx b/validation/operations/div/scalar_constant/div_scalar_constant.onnx new file mode 100644 index 0000000000000000000000000000000000000000..b61f5870ec25c819659b01f726f0d490f6c6560e GIT binary patch literal 104 zcmd*w59R;> literal 0 HcmV?d00001 diff --git a/validation/operations/gen_tests.py b/validation/operations/gen_tests.py index 777725b..c146f8d 100644 --- a/validation/operations/gen_tests.py +++ b/validation/operations/gen_tests.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -"""Generate ONNX test models for validating GEMM, Conv, Pooling, and Relu implementations.""" +"""Generate ONNX test models for validating GEMM, Conv, Pooling, Relu, and ReduceMean implementations.""" import numpy as np import onnx @@ -19,102 +19,8 @@ def save_model(model, directory, filename): print(f" {path.relative_to(OPERATIONS_DIR)}") -# --------------------------------------------------------------------------- -# GEMM tests -# --------------------------------------------------------------------------- - -def gemm_non_square(): - """GEMM with non-square weight matrix: [B, K] @ [K, N], K != N.""" - B, K, N = 4, 128, 64 - W = numpy_helper.from_array(np.random.default_rng(42).uniform(-1, 1, (K, N)).astype(np.float32), name="W") - A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) - Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) - node = helper.make_node("Gemm", ["A", "W"], ["Y"]) - graph = helper.make_graph([node], "gemm_non_square", [A], [Y], initializer=[W]) - model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) - save_model(model, "gemm/non_square", "gemm_non_square.onnx") - - -def gemm_with_bias(): - """GEMM with bias: Y = A @ W + C.""" - B, K, N = 4, 128, 128 - rng = np.random.default_rng(43) - W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") - C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C") - A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) - Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) - node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"]) - graph = helper.make_graph([node], "gemm_with_bias", [A], [Y], initializer=[W, C]) - model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) - save_model(model, "gemm/with_bias", "gemm_with_bias.onnx") - - -def gemm_transB(): - """GEMM with transB=1: Y = A @ W^T.""" - B, K, N = 4, 128, 64 - rng = np.random.default_rng(44) - # W stored as [N, K], transposed during computation - W = numpy_helper.from_array(rng.uniform(-1, 1, (N, K)).astype(np.float32), name="W") - A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) - Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) - node = helper.make_node("Gemm", ["A", "W"], ["Y"], transB=1) - graph = helper.make_graph([node], "gemm_transB", [A], [Y], initializer=[W]) - model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) - save_model(model, "gemm/transB", "gemm_transB.onnx") - - -def gemm_alpha_beta(): - """GEMM with alpha and beta: Y = 0.5 * A @ W + 0.25 * C.""" - B, K, N = 4, 64, 64 - rng = np.random.default_rng(45) - W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") - C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C") - A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) - Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) - node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"], alpha=0.5, beta=0.25) - graph = helper.make_graph([node], "gemm_alpha_beta", [A], [Y], initializer=[W, C]) - model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) - save_model(model, "gemm/alpha_beta", "gemm_alpha_beta.onnx") - - -def gemm_small(): - """Small GEMM: [2, 8] @ [8, 4].""" - B, K, N = 2, 8, 4 - rng = np.random.default_rng(46) - W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") - A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) - Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) - node = helper.make_node("Gemm", ["A", "W"], ["Y"]) - graph = helper.make_graph([node], "gemm_small", [A], [Y], initializer=[W]) - model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) - save_model(model, "gemm/small", "gemm_small.onnx") - - -def gemm_large(): - """Larger GEMM: [8, 256] @ [256, 128].""" - B, K, N = 8, 256, 128 - rng = np.random.default_rng(47) - W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") - A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) - Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) - node = helper.make_node("Gemm", ["A", "W"], ["Y"]) - graph = helper.make_graph([node], "gemm_large", [A], [Y], initializer=[W]) - model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) - save_model(model, "gemm/large", "gemm_large.onnx") - - -def gemm_transB_with_bias(): - """GEMM with transB and bias: Y = A @ W^T + C.""" - B, K, N = 4, 128, 64 - rng = np.random.default_rng(48) - W = numpy_helper.from_array(rng.uniform(-1, 1, (N, K)).astype(np.float32), name="W") - C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C") - A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) - Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) - node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"], transB=1) - graph = helper.make_graph([node], "gemm_transB_with_bias", [A], [Y], initializer=[W, C]) - model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) - save_model(model, "gemm/transB_with_bias", "gemm_transB_with_bias.onnx") +def make_int64_initializer(name, values): + return numpy_helper.from_array(np.asarray(values, dtype=np.int64), name=name) # --------------------------------------------------------------------------- @@ -248,6 +154,104 @@ def conv_large_spatial(): save_model(model, "conv/large_spatial", "conv_large_spatial.onnx") +# --------------------------------------------------------------------------- +# GEMM tests +# --------------------------------------------------------------------------- + +def gemm_non_square(): + """GEMM with non-square weight matrix: [B, K] @ [K, N], K != N.""" + B, K, N = 4, 128, 64 + W = numpy_helper.from_array(np.random.default_rng(42).uniform(-1, 1, (K, N)).astype(np.float32), name="W") + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) + node = helper.make_node("Gemm", ["A", "W"], ["Y"]) + graph = helper.make_graph([node], "gemm_non_square", [A], [Y], initializer=[W]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "gemm/non_square", "gemm_non_square.onnx") + + +def gemm_with_bias(): + """GEMM with bias: Y = A @ W + C.""" + B, K, N = 4, 128, 128 + rng = np.random.default_rng(43) + W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") + C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C") + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) + node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"]) + graph = helper.make_graph([node], "gemm_with_bias", [A], [Y], initializer=[W, C]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "gemm/with_bias", "gemm_with_bias.onnx") + + +def gemm_transB(): + """GEMM with transB=1: Y = A @ W^T.""" + B, K, N = 4, 128, 64 + rng = np.random.default_rng(44) + # W stored as [N, K], transposed during computation + W = numpy_helper.from_array(rng.uniform(-1, 1, (N, K)).astype(np.float32), name="W") + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) + node = helper.make_node("Gemm", ["A", "W"], ["Y"], transB=1) + graph = helper.make_graph([node], "gemm_transB", [A], [Y], initializer=[W]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "gemm/transB", "gemm_transB.onnx") + + +def gemm_alpha_beta(): + """GEMM with alpha and beta: Y = 0.5 * A @ W + 0.25 * C.""" + B, K, N = 4, 64, 64 + rng = np.random.default_rng(45) + W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") + C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C") + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) + node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"], alpha=0.5, beta=0.25) + graph = helper.make_graph([node], "gemm_alpha_beta", [A], [Y], initializer=[W, C]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "gemm/alpha_beta", "gemm_alpha_beta.onnx") + + +def gemm_small(): + """Small GEMM: [2, 8] @ [8, 4].""" + B, K, N = 2, 8, 4 + rng = np.random.default_rng(46) + W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) + node = helper.make_node("Gemm", ["A", "W"], ["Y"]) + graph = helper.make_graph([node], "gemm_small", [A], [Y], initializer=[W]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "gemm/small", "gemm_small.onnx") + + +def gemm_large(): + """Larger GEMM: [8, 256] @ [256, 128].""" + B, K, N = 8, 256, 128 + rng = np.random.default_rng(47) + W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) + node = helper.make_node("Gemm", ["A", "W"], ["Y"]) + graph = helper.make_graph([node], "gemm_large", [A], [Y], initializer=[W]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "gemm/large", "gemm_large.onnx") + + +def gemm_transB_with_bias(): + """GEMM with transB and bias: Y = A @ W^T + C.""" + B, K, N = 4, 128, 64 + rng = np.random.default_rng(48) + W = numpy_helper.from_array(rng.uniform(-1, 1, (N, K)).astype(np.float32), name="W") + C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C") + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) + node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"], transB=1) + graph = helper.make_graph([node], "gemm_transB_with_bias", [A], [Y], initializer=[W, C]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "gemm/transB_with_bias", "gemm_transB_with_bias.onnx") + + # --------------------------------------------------------------------------- # Pooling tests # --------------------------------------------------------------------------- @@ -327,6 +331,55 @@ def maxpool_after_conv(): save_model(model, "pool/max_after_conv", "maxpool_after_conv.onnx") +# --------------------------------------------------------------------------- +# ReduceMean tests +# --------------------------------------------------------------------------- + +def reducemean_basic(): + """ReduceMean over the feature dimension, preserving rank.""" + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 1]) + node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[1], keepdims=1) + graph = helper.make_graph([node], "reducemean_basic", [X], [Y]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "reduce_mean/basic", "reduce_mean_basic.onnx") + + +def reducemean_keepdims_0(): + """ReduceMean over the feature dimension, dropping the reduced axis.""" + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4]) + node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[1], keepdims=0) + graph = helper.make_graph([node], "reducemean_keepdims_0", [X], [Y]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "reduce_mean/keepdims_0", "reduce_mean_keepdims_0.onnx") + + +def reducemean_4d_spatial(): + """ReduceMean over H and W on an NCHW tensor.""" + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 4, 4]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 3, 1, 1]) + node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[2, 3], keepdims=1) + graph = helper.make_graph([node], "reducemean_4d_spatial", [X], [Y]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "reduce_mean/4d_spatial", "reduce_mean_4d_spatial.onnx") + + +def reducemean_after_conv(): + """Conv followed by ReduceMean over the spatial dimensions.""" + rng = np.random.default_rng(62) + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 1, 1]) + W = numpy_helper.from_array(rng.uniform(-1, 1, (2, 3, 3, 3)).astype(np.float32), name="W") + B = numpy_helper.from_array(rng.uniform(-1, 1, (2,)).astype(np.float32), name="B") + conv = helper.make_node("Conv", ["X", "W", "B"], ["C"], + kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0]) + reduce = helper.make_node("ReduceMean", ["C"], ["Y"], axes=[2, 3], keepdims=1) + graph = helper.make_graph([conv, reduce], "reducemean_after_conv", [X], [Y], initializer=[W, B]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "reduce_mean/after_conv", "reduce_mean_after_conv.onnx") + + # --------------------------------------------------------------------------- # Relu tests # --------------------------------------------------------------------------- @@ -381,6 +434,220 @@ def relu_after_gemm(): save_model(model, "relu/after_gemm", "relu_after_gemm.onnx") +# --------------------------------------------------------------------------- +# Sigmoid tests +# --------------------------------------------------------------------------- + +def sigmoid_basic(): + """Standalone Sigmoid on a simple 2D tensor.""" + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8]) + node = helper.make_node("Sigmoid", ["X"], ["Y"]) + graph = helper.make_graph([node], "sigmoid_basic", [X], [Y]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "sigmoid/basic", "sigmoid_basic.onnx") + + +def sigmoid_4d(): + """Standalone Sigmoid on an NCHW tensor.""" + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 3, 4, 4]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 3, 4, 4]) + node = helper.make_node("Sigmoid", ["X"], ["Y"]) + graph = helper.make_graph([node], "sigmoid_4d", [X], [Y]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "sigmoid/4d", "sigmoid_4d.onnx") + + +def sigmoid_after_gemm(): + """Gemm followed by Sigmoid.""" + B, K, N = 4, 64, 32 + rng = np.random.default_rng(63) + W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") + C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C") + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) + gemm = helper.make_node("Gemm", ["A", "W", "C"], ["G"]) + sigmoid = helper.make_node("Sigmoid", ["G"], ["Y"]) + graph = helper.make_graph([gemm, sigmoid], "sigmoid_after_gemm", [A], [Y], initializer=[W, C]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "sigmoid/after_gemm", "sigmoid_after_gemm.onnx") + + +# --------------------------------------------------------------------------- +# Add tests +# --------------------------------------------------------------------------- + +def add_basic(): + """Elementwise Add on two inputs with identical shapes.""" + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [4, 8]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [4, 8]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8]) + node = helper.make_node("Add", ["A", "B"], ["Y"]) + graph = helper.make_graph([node], "add_basic", [A, B], [Y]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "add/basic", "add_basic.onnx") + + +def add_broadcast_row(): + """Elementwise Add with row-vector broadcasting.""" + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [4, 8]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8]) + B = numpy_helper.from_array(np.random.default_rng(64).uniform(-1, 1, (8,)).astype(np.float32), name="B") + node = helper.make_node("Add", ["A", "B"], ["Y"]) + graph = helper.make_graph([node], "add_broadcast_row", [A], [Y], initializer=[B]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "add/broadcast_row", "add_broadcast_row.onnx") + + +def add_after_gemm(): + """Gemm followed by Add with a broadcast bias vector.""" + B, K, N = 4, 64, 32 + rng = np.random.default_rng(65) + W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") + C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C") + D = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="D") + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) + gemm = helper.make_node("Gemm", ["A", "W", "C"], ["G"]) + add = helper.make_node("Add", ["G", "D"], ["Y"]) + graph = helper.make_graph([gemm, add], "add_after_gemm", [A], [Y], initializer=[W, C, D]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "add/after_gemm", "add_after_gemm.onnx") + + +# --------------------------------------------------------------------------- +# Mul tests +# --------------------------------------------------------------------------- + +def mul_basic(): + """Elementwise Mul on two inputs with identical shapes.""" + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [4, 8]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [4, 8]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8]) + node = helper.make_node("Mul", ["A", "B"], ["Y"]) + graph = helper.make_graph([node], "mul_basic", [A, B], [Y]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "mul/basic", "mul_basic.onnx") + + +def mul_scalar_constant(): + """Elementwise Mul with scalar broadcasting.""" + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8]) + S = numpy_helper.from_array(np.asarray([1.5], dtype=np.float32), name="S") + node = helper.make_node("Mul", ["X", "S"], ["Y"]) + graph = helper.make_graph([node], "mul_scalar_constant", [X], [Y], initializer=[S]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "mul/scalar_constant", "mul_scalar_constant.onnx") + + +def mul_after_conv(): + """Conv followed by Mul with per-channel scaling.""" + rng = np.random.default_rng(66) + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 3, 3]) + W = numpy_helper.from_array(rng.uniform(-1, 1, (2, 3, 3, 3)).astype(np.float32), name="W") + B = numpy_helper.from_array(rng.uniform(-1, 1, (2,)).astype(np.float32), name="B") + S = numpy_helper.from_array(rng.uniform(0.5, 1.5, (1, 2, 1, 1)).astype(np.float32), name="S") + conv = helper.make_node("Conv", ["X", "W", "B"], ["C"], + kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0]) + mul = helper.make_node("Mul", ["C", "S"], ["Y"]) + graph = helper.make_graph([conv, mul], "mul_after_conv", [X], [Y], initializer=[W, B, S]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "mul/after_conv", "mul_after_conv.onnx") + + +# --------------------------------------------------------------------------- +# Div tests +# --------------------------------------------------------------------------- + +def div_basic(): + """Elementwise Div by a same-shape constant tensor.""" + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8]) + D = numpy_helper.from_array(np.random.default_rng(67).uniform(0.5, 2.0, (4, 8)).astype(np.float32), name="D") + node = helper.make_node("Div", ["X", "D"], ["Y"]) + graph = helper.make_graph([node], "div_basic", [X], [Y], initializer=[D]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "div/basic", "div_basic.onnx") + + +def div_scalar_constant(): + """Elementwise Div with scalar broadcasting.""" + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8]) + S = numpy_helper.from_array(np.asarray([2.0], dtype=np.float32), name="S") + node = helper.make_node("Div", ["X", "S"], ["Y"]) + graph = helper.make_graph([node], "div_scalar_constant", [X], [Y], initializer=[S]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "div/scalar_constant", "div_scalar_constant.onnx") + + +def div_after_gemm(): + """Gemm followed by Div with a broadcast divisor vector.""" + B, K, N = 4, 64, 32 + rng = np.random.default_rng(68) + W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") + C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C") + D = numpy_helper.from_array(rng.uniform(0.5, 2.0, (N,)).astype(np.float32), name="D") + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) + gemm = helper.make_node("Gemm", ["A", "W", "C"], ["G"]) + div = helper.make_node("Div", ["G", "D"], ["Y"]) + graph = helper.make_graph([gemm, div], "div_after_gemm", [A], [Y], initializer=[W, C, D]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "div/after_gemm", "div_after_gemm.onnx") + + +# --------------------------------------------------------------------------- +# ReduceMean tests +# --------------------------------------------------------------------------- + +def reducemean_basic(): + """ReduceMean over the feature dimension, preserving rank.""" + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 1]) + node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[1], keepdims=1) + graph = helper.make_graph([node], "reducemean_basic", [X], [Y]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "reduce_mean/basic", "reduce_mean_basic.onnx") + + +def reducemean_keepdims_0(): + """ReduceMean over the feature dimension, dropping the reduced axis.""" + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4]) + node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[1], keepdims=0) + graph = helper.make_graph([node], "reducemean_keepdims_0", [X], [Y]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "reduce_mean/keepdims_0", "reduce_mean_keepdims_0.onnx") + + +def reducemean_4d_spatial(): + """ReduceMean over H and W on an NCHW tensor.""" + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 4, 4]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 3, 1, 1]) + node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[2, 3], keepdims=1) + graph = helper.make_graph([node], "reducemean_4d_spatial", [X], [Y]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "reduce_mean/4d_spatial", "reduce_mean_4d_spatial.onnx") + + +def reducemean_after_conv(): + """Conv followed by ReduceMean over the spatial dimensions.""" + rng = np.random.default_rng(62) + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 1, 1]) + W = numpy_helper.from_array(rng.uniform(-1, 1, (2, 3, 3, 3)).astype(np.float32), name="W") + B = numpy_helper.from_array(rng.uniform(-1, 1, (2,)).astype(np.float32), name="B") + conv = helper.make_node("Conv", ["X", "W", "B"], ["C"], + kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0]) + reduce = helper.make_node("ReduceMean", ["C"], ["Y"], axes=[2, 3], keepdims=1) + graph = helper.make_graph([conv, reduce], "reducemean_after_conv", [X], [Y], initializer=[W, B]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "reduce_mean/after_conv", "reduce_mean_after_conv.onnx") + + # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- @@ -415,10 +682,36 @@ if __name__ == "__main__": avgpool_include_pad() maxpool_after_conv() + print("\nGenerating ReduceMean tests:") + reducemean_basic() + reducemean_keepdims_0() + reducemean_4d_spatial() + reducemean_after_conv() + print("\nGenerating Relu tests:") relu_basic() relu_4d() relu_after_conv() relu_after_gemm() + print("\nGenerating Sigmoid tests:") + sigmoid_basic() + sigmoid_4d() + sigmoid_after_gemm() + + print("\nGenerating Add tests:") + add_basic() + add_broadcast_row() + add_after_gemm() + + print("\nGenerating Mul tests:") + mul_basic() + mul_scalar_constant() + mul_after_conv() + + print("\nGenerating Div tests:") + div_basic() + div_scalar_constant() + div_after_gemm() + print("\nDone.") diff --git a/validation/operations/mul/after_conv/mul_after_conv.onnx b/validation/operations/mul/after_conv/mul_after_conv.onnx new file mode 100644 index 0000000000000000000000000000000000000000..8c88597e2dcb03a90df29d74d6c44eeb48510a9e GIT binary patch literal 460 zcmdwCCbH$wocP4F zlGLL3WRTe}7&(|Ym_bm0(TOqK>jvYp&k^==MMv$tS48bUUQuVaNIr4zv`t3#5>ej! zUq1c6*Koz+y;pq#?Jp&Z+85Ur+kOhN-2cQyWX|`?bbf!+8>y*dasT)v;79; zJbTL*n)VGT+V)ppPqOneY}!+3@X_w1fP?)7QEU6X(l2*EF_hlFSfXnG`>#g!-#rTK z=PRw)m#ylw|Gj8tOwEzJ?)%q`7{Pf9G#OpX!;N(u3C@o+E-ad0tlumCX! VNYDvgFbOCai7x2G!o?uK3jonD3q$|_ literal 0 HcmV?d00001 diff --git a/validation/operations/mul/scalar_constant/mul_scalar_constant.onnx b/validation/operations/mul/scalar_constant/mul_scalar_constant.onnx new file mode 100644 index 0000000000000000000000000000000000000000..600cf8b755dc9f5a7a149830d23f89aeebe267af GIT binary patch literal 104 zcmdUI0u<5W4^X literal 0 HcmV?d00001 diff --git a/validation/operations/reduce_mean/4d_spatial/reduce_mean_4d_spatial.onnx b/validation/operations/reduce_mean/4d_spatial/reduce_mean_4d_spatial.onnx new file mode 100644 index 0000000000000000000000000000000000000000..4658251bcc8075d41ab127d581daabae3fed6223 GIT binary patch literal 144 zcmdnuF$nMi00rI|pa1{> literal 0 HcmV?d00001 diff --git a/validation/operations/reduce_mean/after_conv/reduce_mean_after_conv.onnx b/validation/operations/reduce_mean/after_conv/reduce_mean_after_conv.onnx new file mode 100644 index 0000000000000000000000000000000000000000..deac445368a2ab44a9e3612f2ac45013f2060579 GIT binary patch literal 478 zcmdwCCbH-_*oBEnY5` z#0sDiCWtxwTpZb{sRb#Sxy2GdJxoHPMPRkLK(+CSX(g#e@yQ@(ykO*D;$Q|r0Y)dr zaIYJT4v*vQJa_!Ho4B3dzQpF{z9aXK+L@|F*?Xnlw}~mUw}0WTZg1tozJGeIx_zrw z>Hf>-1oyw)cVu6Xv-AF^zkci$xY=$eHUN$S0JjXlQpdsw&a>l8?}+wi|= zzr!K7{Ygelc6a1!?C!n!xA$AD+Y`!=zK z*fXvx0CEP%;utk_oDa(TRnNL4X$khdG#x literal 0 HcmV?d00001 diff --git a/validation/operations/reduce_mean/basic/reduce_mean_basic.onnx b/validation/operations/reduce_mean/basic/reduce_mean_basic.onnx new file mode 100644 index 0000000000000000000000000000000000000000..5760eb68ceac42f28b1c97e2102da62fc2d1f6c8 GIT binary patch literal 120 zcmdm6iK*2~fK}IJQE(QT!0D90C A#{d8T literal 0 HcmV?d00001 diff --git a/validation/operations/reduce_mean/keepdims_0/reduce_mean_keepdims_0.onnx b/validation/operations/reduce_mean/keepdims_0/reduce_mean_keepdims_0.onnx new file mode 100644 index 0000000000000000000000000000000000000000..53626bcfef2ff214a3c13cbb7d7190f656ea4c27 GIT binary patch literal 121 zcmd9r=VFWyVvJN`56(=_&Cg5`;wpwP;!RSbq(PEmTp}EdLIPY&985sW48$xT RoCK7N#3kv(!o?uK3jiO!3`GC{ literal 0 HcmV?d00001 diff --git a/validation/operations/sigmoid/after_gemm/sigmoid_after_gemm.onnx b/validation/operations/sigmoid/after_gemm/sigmoid_after_gemm.onnx new file mode 100644 index 0000000000000000000000000000000000000000..58245f171e0395cbfe89be5d241b5f9712dd1187 GIT binary patch literal 8458 zcmZ9ScT~v#_s1)vts!Y@YNu4%?|T~(4W(2{sI+M+4av$Tl)XvF$||e(y?m@hk}Z@S z5i+xXo%8+g_s8p;*XzDsf4$DR&wC%wb7d4=|9Q%(%1L_4Nk+&?daFwM8us>0NK2Db zl9TjRm5el$4oyi)%See=Rh{;~&7|1GoP_L2Nj;A&mU+l{$mlCedPzq3&Gu**@J$#m zTZC8rwYkRI+Wk$ECU3rA$_Gmq(t6(qWW3*&{%k2kJ((eOt3xl=4OcN@a-L0tH}54C zlUlkH7V2K3IgYggl<@YtOwtd`Cb>MoH`-2Y__>c@t=2)k45o3EaUM417}M$lEp z2Vy@BJG@iSTPzSu;L2SicywVmD0@B;sal(BmqlXo_6dTa;|Cbp?Jur7?@971$rPWM zkM>Qms5K*jCwGN$z+D{*T6B=AnkVo)^F#Gslus&+cv5Cpp_s4g- z{N5A`lOp)mgxWZ7ulQ4nvv4HvCcfGgYra)T_*fOZPpo$k+={eDXzD7lRqW zMX>eFLs;w3i=Sp{;^`w{T+p45-B72%zjddhQ44k>`H(oYpt*TwLWZq4Z_*5%Ch;u|~+lipO$^m=1O{GZZQ2hL3CU;d|goP=dD7@|v z#wurXVD4oKE%D%!yQ2AkLmpeYTJpjnHh6nL0ravch3%=@>~OI^KHB1gr=M&hpRKOg za8;73kGgZ`wTU=UD}mF7^oP$YT==tE26Z2o;_aE|DZ}UmReNNy<8cWLG?|Vcvz`li zmfGktu$Vu7k!I!R+rV6Bl$cUbDm?j7fPFT`im#P)U{$3BJvtT76T0tHi|YW`IPev0 zKXHN1e(=TeL$P!y*_fR-t)fRq9MP`mrnn_^5&4|9$MCozIDGb2*!!XnzTYzdCcVx< zy+PaQj?G&r_BUg(?I{g*w8kwiZ7}rcYvJ7%TNJLhP)qfCQaUKG)`%LRy>uuZv2>&P z2WAUv5+8z0N&pU=vqP{en8lB0jS?i(ov`9lqiCD`10ol`g>|oHu*0-L{P@mp@@YRn zDS7vy}LjU~PB1FeG~B8Lz=BslF`&r4rKk+}sWhWpvpmpfuq6-qK9MIq z?tpKf*9kM8N@3t)S=@I-j(tMoIIFot>^nDzyw0e?Z~N2Ye*qIYFn~_aneYt~%xAgr zf&M(XZ41mQ7uE3TyM5Ob+9^Q+8w7K)rC0W??j{(Q+alRB3z17WaD0u z_*_E~uK#DuF8_S^{YfcYd?&^Ad9D#qukP?bo#}&c+cJOgYE3HYclx5&04*HzOO@+%??SuZUh-~qC%48*Fu(4_2k&3#P*8kYw*^W z=Ga)3gAV5NssGw`lK9W8uJ@@9@!Yv%6twFI-Ir0sdw*1U)u!{Jsk=4au0XPjxB)lc z_G5P)E$mKLgt0s2IL!O1d)|g!^uFv0$QI~juw zZ$Q8Ai6Fml7LMMX%mKE!n4P|!Ht7`!x4r=;E?-C1tBvRpw1C_s#KL8BVbAj^*e<7l ziW}ANbf*fM^;%0WUFU-JuQXCLQ$u;}Xndfi$^=KFYCsSVlJ;Dcg)-?J zF4(q$29KL4{@yVVXIiE*L_L7^c{8!}uMr1{>KGDtm+Wr-g3XrOXm`L0`Z=HopQa_? zy*1D1pZ`prxqU3Jt5_w5ypqCCV_oLOBiPng~=}lP~Yk_+IVIL5Ao3DXqN-fJ7N+{^l_j^ zp@2i5nuwlz6ERZ90%xtXalcXW3NC&bMV~__LfY{SFnZ-rF?3H5YC|aX$#@UHpXl-G z#aUc`4Zt?j6BniL5l#&KNo&1wF=)3hj=to`DI?mT>C0A%Y#0DDRH9i!FI8}!HlNaK z3LszW9&DA5W2uIl5YuZMivfCg@7XkLu3azcrWb>)xd~o&XE>XB9Ez9jq-(z_zz(bE zl-V4?a`6a$S9yeD#@|09)u6n`KJNd%5Dp_Rlb?lZ2FQz0q)sIhOaF<-8`~`Hk=3p?5q5 z8MeUhB?f3U$p9saf75R7M1I-O3w_xD!&Oai{`^Z6(WZ;Z+Y(W0)**;KdrOS&7{hCq ze*oY7MUbUGlTAJ=vyp^7I9A;kS{9BKt)G}u!*W@0(9H+^_DndYu~ppA#|`&ym|d6l zL7%5;sNsc^13`0-3)a870A23se6~J-u50psm)mh!NHXR86)<#tGb#JI;bFT+ z6gxK>`xiR!A|EaETs)=cE)lw{PgBmjZBY92DkXm103YR7i`Vx?v(0HwRxg#M2aO>d zv^R+#E$-!h@4zzX`k{)7SN@4AZ{10ETpv7as>mktGqCUYbQtn>KS|Af0@nYXpi0S3 zcj3!Y`02EZ(pU85<4bb!$LS*G>6gKIZYn;}59E@2E_BE=i&th^;As6L!n*nT7&1hk z_u07co_ck3_Zq?W!J{C}EE`L2+u$oZUoO3KUo^>UpgHQP{M65nl|wDjJF^n3&pYwu z^{tfM;0$wje@GhAed@o9Qj*^w>EH;~2npc<)2FaX zauVPD)OdO(YokqaE)QDlLYiHgtQgk-UuE33?~j1P+Z=$I-!A_;s`GIs#CPCkSBn<6q zCuqwq1&_rCXrby^qDi&T*b$0z8rtD>Prghum*sii60lEl7ALmYa&n;(DTigVQ-5vz z6EG8-6dbv2P5{60@#72o<_VI&^5IHzHPtveVA$#$R8^>e@YjIl#^dm|a~n0M=5s=c zI_!J-fihIXk)lI*M4>vnc*>z#@jWs0`Xsz^Lme-yDgr4j1FVT0fK?vl;`mb@lr!oO zl^@aPYbDNnr*0L+gb#GD_+-r{g=;Byq&#c;$MD=%S4@82^T$`bdF=Th-qXVd8%mx+ zSmOxv-Z75XPtHZfLRYT%5iVw#Z-#p_c2T9G1^xmLc%LdQYWMc$TfZX(+m+vFL+4Bm z$7-0B_|IMP^a5xa)C&j9s1}c})Pwt0o?xu%#oq$6dHfs?Jo7`H%~srmhsgq~j!wX* zecwU61?@ai7gEGGyJ%V+1%!363W%#0<9WUKc#H%KJAVVby z&X)^EskgG2v@n6D9Jvf<|C8kJS5J|G*-Kb6-wiG92V>)=VVE~H1?I=RhUK!qDc?n& z|6HGj{a5UO!h`q3Hp_Lw?Cd5{Ly!?}{a7f-JxE5oo$0h7E*`(>_Q8?g95CXUI$Q1@ zhz7GIc&|zn`c_Rx+n(O(&bVAYxBm_ti3^4{<2Fk1(#5xp4kRl-2F37uP#iN7?PmC4 z^fE7UlG;Tp&RqqApbwC+Dwu2M=uvm1E8F#L739|~BdPgDLiD0`JMpKB|W(sL>Alsa3(wY5%Aqcun0>ukE6#LI}L+9fu#D`=Y+7dfm{7LcDi47n4Um zg0vC|JSJzw5-r)>dz&=hJw6NNl4kLcmqWO<*%_UJu7mpSTzt20DYy)_VSG6pE6Ed= zwuZVDcjNF(eqL~D?T*Gt!@57$_5Kwv+9ZaiO@XUXE2hb zH(P>gr6Jq<1oFGEO>{2e8Cbuy#Enl6LVTD!{{5o>i7Nx~_SjLlRwbWx9JBb)BWaLu zxDJ{@{cwNBT`|PSln0l#2#;;ESbx$aUNhmaaQu?m8DpRSgo`eF!86PlBhTjZS8ZSP z?mw1;atdf<<=eVXeTw+hZ&@6ZrVVH3CBeU_R#9U^AFgN_hdXGTxKhEUuA_gk7?UMO zu4k--vWsQp+2;(n|Fedp``cjt&O!WafGu_v<-i|{Q0!Bp#V+;pp#4Y^vaAi#))(|D zUyTDD4It##A~-yHFs2%90H>i^7+W63`|79haQ_AO&$2AY6iMOC?_cMsv zE=^`DtneM1v)R@D9Dg+ko{Y@LO1sCj>|r>12TkPHwO_&S?_ZLz)a4niZ()yA1j~Fp z4;umpu*bVTeAly{Hpk^*QSf$K#Ff&itV29>se}vc&2O z;x*qD!rP8yl+;VY8>?@)Eg7DH@$U^e%fOwhr*EO^%gI9Yj@kfLgUro46er;Ds(awbTRXK6)oN6jCRDiw)$yi;nTzGS-AO2Ij=B{=!DXoOLt`_9{rctuYRRc2Shxbhew z_D&&=e_=$!|MqCe!Rc&2XfNcoRfGE)U$!{w#nxk`v1_*)-);#Ie?TjpS!N;jFBwD(WSQNom9Xb zub;aYLlNg>2V=ebBu+aO$zwlj;|VEy-ud9O*!+AvjxSZiN&V${*3BtAU&9T*T{7m8 z?*Eazx++ClGzzj`H0!qBkLKu6Q@HSJ5eqx7(z^x2u<1h(hm5~Kv#eBbp3+GEB{>sM zoBD!8%O^-Ooy^7^(pY(A3@d6?LhXsK?&B-eczMNTIAE{L!&44}_NJ-S(0Bm)$4B6Q zr3L7;Esnq3O2+?U%=rCX3l29a=Ca0I+NE`rJZlQjz&K5qZZei+PV^_!QiOLSmI?#@ z-6x-2g&g(Q0k!qCIeAVWmRP%;o-DsDy4_e$ZI4=DV}mjtcwGbE!+z0QD@%;n9YGI& z+t(RN-WSwta-q+Lrw}Er!$oti0_->q!M0OasX_*a?b`*{OrqI;WjgNO>By2lZE?om z^>9u;932+dfc2nrBzr-E0IJDS+2-Gl57D{#Tl_lt4zbvORq<85q>{0Z(GO!;Wd z2JvdUG-K#eSR0`PABNlE#HcE`V?SC5I4{Xp?A*9~B(9P7l^VJBe=oh0&Izj zh6QJ?z@N@8l3wvh@U|);-B0pF?8`}dCb+}|&|vR7SmtI7^$~qh_RtTqbDNHPyA<$Z z+-hn$Sq;5Z8ff65C~E4o;lFpMb4N@Ih21K`tkbF7Vlk12E-dDyVwq2mWHUTQQOnU^6ba_qYdFp6;=rYBW=V3;y zC$73FiGxSw2Tq1&o}VAgzHF_&EF4opm9D91w%Qc(zdxY%vLn>4sf|aiicsEDU?(fP9?sRkFGC9WKo4UopAP3N zpGed^Y>QhaJHe3|8ffukGBrEP@}d|^*kkL+qkkl^lXWz&@z_U-DlTYtaxjL)kH+b% zm3aDAS*n}r%qG<`;9)yh$TQNwPwS+~F1rSzcT8o48_M`6LkaFZGve&y(dbk*igkCS z;N}J|Zcgqd>(htmx5PAD^+bcsN*(cNhY4qA%@#fx-J+4^Dg5Mb20C7NMs>b(zm0b4v(jRt(zNoV@9>{eOjqqP}Gu7mW;=y_=D7g}a zbFMvy{L#0eub4h2`0F+D7;V|wHw|w;Z>QinGJvlWde~*3c<`zY$_GZ` z`?WqOCAU`mym>YR2!SlAvJM6fQ{>%NDQsn#4aMi2d1Kj8$nkna8~prfo$MeQXrhM7 zdyFw_)nVGPa58Ijy#h5I9XjYekki-LV%pIHD3BOKZ&IxJ>}Dl4csPZhyj@1;G*<+b0n`i zktYQ1c_UujHx#XE#_;~|W-7G$OUui8xMkE|_xN%z)LW07c78Vr$Mg7WxDoF>KAHD- zq*JdaONHZ>YUrk}guV*4(3z{xOa1)W3#{=2wKw%F%FRh!IUIew~UtzXo zIIgJ6g(auvQO%nXw4qRnPrUY~(@V^GVw)q|O^C(&UzK^rflGpzdKR)5H3_%XKa!PV zFyx&1F4#AY<8=XvRMljI-jzDI>Z&K#Cu)F2WCqPQ+9ZzrzJxx7&KF$E?J=_U21&jH zmN8x_PA%$-4H_O?czr*`X}EBXff~hb0k%>d4adDFs~KAbc5NOt!zI1(zmy)XeHdfw3*5cKtKmscaXmqBQYm$vH9| z-j7v{=fj{g6Y!TyEF2!Q45qvs&0igR{QHkLLF%WB@O!fwf2#jOmmKx+?0o|)%Cg{; zlQF{7$3t=7c2z!e^cB1@iNYDtBMF;K`D0iq%$za=1AVR8b50u=jzg?%Dd4-7mT>Z8 z1*ADF6Gx4X!)raUgXsyH(-DMa$8Wpu$_&OP_ea9|Uz(`=AdJH^ z?78{k3cCN@1ogMM<4yC~;=9+%@F=>Qgn54SwB;1tJoXr7Uen`=D`&Y$M_+{<+tc_~ znGQGqmgBf(@9CrF6csToP({=epY%efE7_1Z~Ymr}U$g%tdAZiC4Oy!rW)I8NKD z%ubh5a0UCK|M)>1zss7(FH)pqPdrgA>bAJ~K|Sqxq`^lcPf_&!NNnt#iXE}Z+*RMh z0Il~({#hb@3{_;ek4s@U=&)t`e{goNBssjfNm*rmF>l~C7+QTn&^5`#_ON~|Bc|Y# zxpA!J)*s(ZoD5Zg8%2=JfPwajY@F}UbcOKDg+Y#P9q0KDsD&Lg^& zaLnUJu=k!SW|v!{+5jh3nx{!pi4|bzn1jwQ5_m^MD(zb^8VCL9$ETXi=)?g>{PuX0 z=o=zny>ySaI6jk>H~7+7Z3EWMQh=40M&rrAMHD_&gAHmd@%K(OT1V^Y)1w`rxg~-t z9(U4uvsCUr=83VfrJ(#Won0R4vUkx8Sa7cyYS$?9z>Oz`p{A43yIBJV=*sZ25eLcs zS2Bc`W^v1z0Vpcz@%vG;@IqW2>>lcYuY;Z0ZS7hrvKY=WCXQ6>?~AcT=Y@vL!#G3n zKcVQDCpS*jNA(GW@1K(bs4Pq_zm4!6wQZfnh5tg!Sy5GV9M=Vt7p7)zH|Hj&>T70#FW0b^pSDRKS5x;Q0u{1jR#+zErGvJ9 zAJ2={4#$7W9u&Uait5u|LcHoaINMf;Pqlu7*1-pGZgn&d-=m4=@(p0UPk;PY{er$9 zPDa>&PdwjOf;a7J1#|gtFfMqHAjk`_DA-6`kvbgDN9MUJeHnotdu!va{&xJPY%@9T wJWEs4jPUW-Xo%|HGt{8kPfkHbQdLGyN~U+u@`&pRM*hE`zE^KK31!9q1FF7;YXATM literal 0 HcmV?d00001 diff --git a/validation/operations/sigmoid/basic/sigmoid_basic.onnx b/validation/operations/sigmoid/basic/sigmoid_basic.onnx new file mode 100644 index 0000000000000000000000000000000000000000..977731e791112f1a4e358a8f91becbd08eb57f1a GIT binary patch literal 84 zcmd