add support for operations: reduceMean, add, mul, div, sigmoid

2026-03-30 15:41:12 +02:00
parent 5e7114f517
commit 39830be888
32 changed files with 1057 additions and 224 deletions
@@ -3,66 +3,108 @@
 ONNX test models used by `validate.py` to verify the Raptor compiler + PIM simulator pipeline.

 Generated tests can be regenerated with:
+
 ```
 python3 validation/operations/gen_tests.py
 ```

 ## Conv

-| Test | Directory | Input | Output | Kernel | Stride | Padding | Bias | Notes |
-|------|-----------|-------|--------|--------|--------|---------|------|-------|
-| Simple | `conv/simple` | [1,3,3,3] | [1,1,2,2] | 2x2 | 1 | none | no | Basic conv, hand-crafted |
-| With constant | `conv/with_constant` | [1,3,3,3] | [1,1,3,3] | 2x2 | 1 | SAME_UPPER | yes | Hand-crafted, constant weight+bias |
-| Batch 2 | `conv/batch_2` | [2,3,3,3] | [2,1,3,3] | 2x2 | 1 | SAME_UPPER | yes | Batched input |
-| Kernel 3x3 | `conv/kernel_3x3` | [1,1,5,5] | [1,1,3,3] | 3x3 | 1 | none | no | Larger kernel |
-| Stride 2 | `conv/stride_2` | [1,1,6,6] | [1,1,2,2] | 3x3 | 2 | none | no | Strided convolution |
-| Multi channel | `conv/multi_channel` | [1,3,5,5] | [1,4,3,3] | 3x3 | 1 | none | no | 3 in channels, 4 out channels |
-| Pointwise 1x1 | `conv/pointwise_1x1` | [1,8,4,4] | [1,4,4,4] | 1x1 | 1 | none | no | Channel mixing |
-| SAME padding 3x3 | `conv/same_padding_3x3` | [1,1,5,5] | [1,1,5,5] | 3x3 | 1 | SAME_UPPER | no | Spatial dims preserved |
-| Explicit padding | `conv/explicit_padding` | [1,1,4,4] | [1,1,4,4] | 3x3 | 1 | [1,1,1,1] | no | Symmetric explicit pads |
-| With bias 3x3 | `conv/with_bias_3x3` | [1,3,5,5] | [1,2,3,3] | 3x3 | 1 | none | yes | Multi-channel with bias |
-| Large spatial | `conv/large_spatial` | [1,1,8,8] | [1,1,6,6] | 3x3 | 1 | none | no | Larger spatial input |
-
-## Pool
-
-| Test | Directory | Input | Output | Kernel | Stride | Padding | Notes |
-|------|-----------|-------|--------|--------|--------|---------|-------|
-| Max basic | `pool/max_basic` | [1,1,4,4] | [1,1,3,3] | 2x2 | 1 | none | Basic max pooling |
-| Max stride 2 multi-channel | `pool/max_stride2_multichannel` | [1,5,6,6] | [1,5,3,3] | 2x2 | 2 | none | Channel-preserving max pool |
-| Max SAME_UPPER | `pool/max_same_upper` | [1,1,5,5] | [1,1,3,3] | 3x3 | 2 | SAME_UPPER | Deprecated auto_pad path |
-| Avg basic | `pool/avg_basic` | [1,3,4,4] | [1,3,3,3] | 2x2 | 1 | none | Basic average pooling |
-| Avg explicit padding | `pool/avg_explicit_padding` | [1,2,4,4] | [1,2,2,2] | 3x3 | 2 | [1,1,1,1] | `count_include_pad=0` |
-| Avg include pad | `pool/avg_include_pad` | [1,2,4,4] | [1,2,2,2] | 3x3 | 2 | [1,1,1,1] | `count_include_pad=1` |
-| Max after Conv | `pool/max_after_conv` | [1,3,6,6] | [1,4,2,2] | Conv 3x3 then Pool 2x2 | 2 | none | Regression for `pool(conv(...))` |
-
-## Relu
-
-| Test | Directory | Input | Output | Notes |
-|------|-----------|-------|--------|-------|
-| Basic | `relu/basic` | [4,8] | [4,8] | Standalone 2D Relu |
-| 4D | `relu/4d` | [2,3,4,4] | [2,3,4,4] | Standalone NCHW Relu |
-| After Conv | `relu/after_conv` | [1,3,5,5] | [1,2,3,3] | Conv 3x3 + bias, then Relu |
-| After Gemm | `relu/after_gemm` | [4,64] | [4,32] | Gemm + bias, then Relu |
+| Test             | Directory               | Input     | Output    | Kernel | Stride | Padding    | Bias | Notes                              |
+|------------------|-------------------------|-----------|-----------|--------|--------|------------|------|------------------------------------|
+| Simple           | `conv/simple`           | [1,3,3,3] | [1,1,2,2] | 2x2    | 1      | none       | no   | Basic conv, hand-crafted           |
+| With constant    | `conv/with_constant`    | [1,3,3,3] | [1,1,3,3] | 2x2    | 1      | SAME_UPPER | yes  | Hand-crafted, constant weight+bias |
+| Batch 2          | `conv/batch_2`          | [2,3,3,3] | [2,1,3,3] | 2x2    | 1      | SAME_UPPER | yes  | Batched input                      |
+| Kernel 3x3       | `conv/kernel_3x3`       | [1,1,5,5] | [1,1,3,3] | 3x3    | 1      | none       | no   | Larger kernel                      |
+| Stride 2         | `conv/stride_2`         | [1,1,6,6] | [1,1,2,2] | 3x3    | 2      | none       | no   | Strided convolution                |
+| Multi channel    | `conv/multi_channel`    | [1,3,5,5] | [1,4,3,3] | 3x3    | 1      | none       | no   | 3 in channels, 4 out channels      |
+| Pointwise 1x1    | `conv/pointwise_1x1`    | [1,8,4,4] | [1,4,4,4] | 1x1    | 1      | none       | no   | Channel mixing                     |
+| SAME padding 3x3 | `conv/same_padding_3x3` | [1,1,5,5] | [1,1,5,5] | 3x3    | 1      | SAME_UPPER | no   | Spatial dims preserved             |
+| Explicit padding | `conv/explicit_padding` | [1,1,4,4] | [1,1,4,4] | 3x3    | 1      | [1,1,1,1]  | no   | Symmetric explicit pads            |
+| With bias 3x3    | `conv/with_bias_3x3`    | [1,3,5,5] | [1,2,3,3] | 3x3    | 1      | none       | yes  | Multi-channel with bias            |
+| Large spatial    | `conv/large_spatial`    | [1,1,8,8] | [1,1,6,6] | 3x3    | 1      | none       | no   | Larger spatial input               |

 ## Gemm

-| Test | Directory | A (input) | W (weight) | Output | transB | alpha | beta | Bias | Notes |
-|------|-----------|-----------|------------|--------|--------|-------|------|------|-------|
-| Default | `gemm/` | [10,132] | [132,132] | [10,132] | no | 1 | 1 | no | Hand-crafted, square weights |
-| Non-square | `gemm/non_square` | [4,128] | [128,64] | [4,64] | no | 1 | 1 | no | K != N |
-| With bias | `gemm/with_bias` | [4,128] | [128,128] | [4,128] | no | 1 | 1 | [128] | Bias vector |
-| transB | `gemm/transB` | [4,128] | [64,128] | [4,64] | yes | 1 | 1 | no | Transposed weight |
-| Alpha/beta | `gemm/alpha_beta` | [4,64] | [64,64] | [4,64] | no | 0.5 | 0.25 | [64] | Scaled matmul + bias |
-| Small | `gemm/small` | [2,8] | [8,4] | [2,4] | no | 1 | 1 | no | Tiny matrices |
-| Large | `gemm/large` | [8,256] | [256,128] | [8,128] | no | 1 | 1 | no | Larger matrices |
-| transB + bias | `gemm/transB_with_bias` | [4,128] | [64,128] | [4,64] | yes | 1 | 1 | [64] | Combined |
+| Test          | Directory               | A (input) | W (weight) | Output   | transB | alpha | beta | Bias  | Notes                        |
+|---------------|-------------------------|-----------|------------|----------|--------|-------|------|-------|------------------------------|
+| Default       | `gemm/`                 | [10,132]  | [132,132]  | [10,132] | no     | 1     | 1    | no    | Hand-crafted, square weights |
+| Non-square    | `gemm/non_square`       | [4,128]   | [128,64]   | [4,64]   | no     | 1     | 1    | no    | K != N                       |
+| With bias     | `gemm/with_bias`        | [4,128]   | [128,128]  | [4,128]  | no     | 1     | 1    | [128] | Bias vector                  |
+| transB        | `gemm/transB`           | [4,128]   | [64,128]   | [4,64]   | yes    | 1     | 1    | no    | Transposed weight            |
+| Alpha/beta    | `gemm/alpha_beta`       | [4,64]    | [64,64]    | [4,64]   | no     | 0.5   | 0.25 | [64]  | Scaled matmul + bias         |
+| Small         | `gemm/small`            | [2,8]     | [8,4]      | [2,4]    | no     | 1     | 1    | no    | Tiny matrices                |
+| Large         | `gemm/large`            | [8,256]   | [256,128]  | [8,128]  | no     | 1     | 1    | no    | Larger matrices              |
+| transB + bias | `gemm/transB_with_bias` | [4,128]   | [64,128]   | [4,64]   | yes    | 1     | 1    | [64]  | Combined                     |

 ## Gemv

-| Test | Directory | Input | W (weight) | Output | Bias | Notes |
-|------|-----------|-------|------------|--------|------|-------|
-| Simple | `gemv/simple` | [1,132] | [132,132] | [1,132] | no | Single-sample matmul |
-| Constant | `gemv/constant` | _(none)_ | [132,132] | [1,132] | no | All inputs constant |
-| Homogeneous const | `gemv/with_homogeneous_constant` | [1,132] | [132,132] | [1,132] | [1,132] | Bias matches output shape |
-| Heterogeneous const | `gemv/with_heterogeneous_constant` | [1,132] | [132,132] | [1,132] | [1,132] | Different constant pattern |
-| Scalar const | `gemv/with_scalar_constant` | [1,132] | [132,132] | [1,132] | [1,1] | Scalar bias, broadcast |
+| Test                | Directory                          | Input    | W (weight) | Output  | Bias    | Notes                      |
+|---------------------|------------------------------------|----------|------------|---------|---------|----------------------------|
+| Simple              | `gemv/simple`                      | [1,132]  | [132,132]  | [1,132] | no      | Single-sample matmul       |
+| Constant            | `gemv/constant`                    | _(none)_ | [132,132]  | [1,132] | no      | All inputs constant        |
+| Homogeneous const   | `gemv/with_homogeneous_constant`   | [1,132]  | [132,132]  | [1,132] | [1,132] | Bias matches output shape  |
+| Heterogeneous const | `gemv/with_heterogeneous_constant` | [1,132]  | [132,132]  | [1,132] | [1,132] | Different constant pattern |
+| Scalar const        | `gemv/with_scalar_constant`        | [1,132]  | [132,132]  | [1,132] | [1,1]   | Scalar bias, broadcast     |
+
+## Pool
+
+| Test                       | Directory                       | Input     | Output    | Kernel                 | Stride | Padding    | Notes                            |
+|----------------------------|---------------------------------|-----------|-----------|------------------------|--------|------------|----------------------------------|
+| Max basic                  | `pool/max_basic`                | [1,1,4,4] | [1,1,3,3] | 2x2                    | 1      | none       | Basic max pooling                |
+| Max stride 2 multi-channel | `pool/max_stride2_multichannel` | [1,5,6,6] | [1,5,3,3] | 2x2                    | 2      | none       | Channel-preserving max pool      |
+| Max SAME_UPPER             | `pool/max_same_upper`           | [1,1,5,5] | [1,1,3,3] | 3x3                    | 2      | SAME_UPPER | Deprecated auto_pad path         |
+| Avg basic                  | `pool/avg_basic`                | [1,3,4,4] | [1,3,3,3] | 2x2                    | 1      | none       | Basic average pooling            |
+| Avg explicit padding       | `pool/avg_explicit_padding`     | [1,2,4,4] | [1,2,2,2] | 3x3                    | 2      | [1,1,1,1]  | `count_include_pad=0`            |
+| Avg include pad            | `pool/avg_include_pad`          | [1,2,4,4] | [1,2,2,2] | 3x3                    | 2      | [1,1,1,1]  | `count_include_pad=1`            |
+| Max after Conv             | `pool/max_after_conv`           | [1,3,6,6] | [1,4,2,2] | Conv 3x3 then Pool 2x2 | 2      | none       | Regression for `pool(conv(...))` |
+
+## ReduceMean
+
+| Test       | Directory                | Input     | Output    | Axes  | Keepdims | Notes                                           |
+|------------|--------------------------|-----------|-----------|-------|----------|-------------------------------------------------|
+| Basic      | `reduce_mean/basic`      | [4,8]     | [4,1]     | [1]   | 1        | Reduce feature dimension, preserving rank       |
+| Keepdims 0 | `reduce_mean/keepdims_0` | [4,8]     | [4]       | [1]   | 0        | Reduce feature dimension, dropping reduced axis |
+| 4D spatial | `reduce_mean/4d_spatial` | [1,3,4,4] | [1,3,1,1] | [2,3] | 1        | Reduce H and W on NCHW input                    |
+| After Conv | `reduce_mean/after_conv` | [1,3,5,5] | [1,2,1,1] | [2,3] | 1        | Conv 3x3 + bias, then spatial ReduceMean        |
+
+## Relu
+
+| Test       | Directory         | Input     | Output    | Notes                      |
+|------------|-------------------|-----------|-----------|----------------------------|
+| Basic      | `relu/basic`      | [4,8]     | [4,8]     | Standalone 2D Relu         |
+| 4D         | `relu/4d`         | [2,3,4,4] | [2,3,4,4] | Standalone NCHW Relu       |
+| After Conv | `relu/after_conv` | [1,3,5,5] | [1,2,3,3] | Conv 3x3 + bias, then Relu |
+| After Gemm | `relu/after_gemm` | [4,64]    | [4,32]    | Gemm + bias, then Relu     |
+
+## Sigmoid
+
+| Test       | Directory            | Input     | Output    | Notes                     |
+|------------|----------------------|-----------|-----------|---------------------------|
+| Basic      | `sigmoid/basic`      | [4,8]     | [4,8]     | Standalone 2D Sigmoid     |
+| 4D         | `sigmoid/4d`         | [2,3,4,4] | [2,3,4,4] | Standalone NCHW Sigmoid   |
+| After Gemm | `sigmoid/after_gemm` | [4,64]    | [4,32]    | Gemm + bias, then Sigmoid |
+
+## Add
+
+| Test          | Directory           | Input(s)         | Output | Notes                                       |
+|---------------|---------------------|------------------|--------|---------------------------------------------|
+| Basic         | `add/basic`         | A:[4,8], B:[4,8] | [4,8]  | Elementwise add, same-shape inputs          |
+| Broadcast row | `add/broadcast_row` | A:[4,8], B:[8]   | [4,8]  | Row-vector broadcasting via initializer     |
+| After Gemm    | `add/after_gemm`    | A:[4,64], D:[32] | [4,32] | Gemm + bias, then Add with broadcast vector |
+
+## Mul
+
+| Test            | Directory             | Input(s)                 | Output    | Notes                                     |
+|-----------------|-----------------------|--------------------------|-----------|-------------------------------------------|
+| Basic           | `mul/basic`           | A:[4,8], B:[4,8]         | [4,8]     | Elementwise multiply, same-shape inputs   |
+| Scalar constant | `mul/scalar_constant` | X:[4,8], S:[1]           | [4,8]     | Scalar broadcasting via initializer       |
+| After Conv      | `mul/after_conv`      | X:[1,3,5,5], S:[1,2,1,1] | [1,2,3,3] | Conv 3x3 + bias, then per-channel scaling |
+
+## Div
+
+| Test            | Directory             | Input(s)         | Output | Notes                                                |
+|-----------------|-----------------------|------------------|--------|------------------------------------------------------|
+| Basic           | `div/basic`           | X:[4,8], D:[4,8] | [4,8]  | Elementwise divide by same-shape constant tensor     |
+| Scalar constant | `div/scalar_constant` | X:[4,8], S:[1]   | [4,8]  | Scalar broadcasting via initializer                  |
+| After Gemm      | `div/after_gemm`      | A:[4,64], D:[32] | [4,32] | Gemm + bias, then Div with positive broadcast vector |
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-"""Generate ONNX test models for validating GEMM, Conv, Pooling, and Relu implementations."""
+"""Generate ONNX test models for validating GEMM, Conv, Pooling, Relu, and ReduceMean implementations."""

 import numpy as np
 import onnx
@@ -19,102 +19,8 @@ def save_model(model, directory, filename):
    print(f"  {path.relative_to(OPERATIONS_DIR)}")


-# ---------------------------------------------------------------------------
-# GEMM tests
-# ---------------------------------------------------------------------------
-
-def gemm_non_square():
-    """GEMM with non-square weight matrix: [B, K] @ [K, N], K != N."""
-    B, K, N = 4, 128, 64
-    W = numpy_helper.from_array(np.random.default_rng(42).uniform(-1, 1, (K, N)).astype(np.float32), name="W")
-    A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
-    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
-    node = helper.make_node("Gemm", ["A", "W"], ["Y"])
-    graph = helper.make_graph([node], "gemm_non_square", [A], [Y], initializer=[W])
-    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
-    save_model(model, "gemm/non_square", "gemm_non_square.onnx")
-
-
-def gemm_with_bias():
-    """GEMM with bias: Y = A @ W + C."""
-    B, K, N = 4, 128, 128
-    rng = np.random.default_rng(43)
-    W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
-    C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
-    A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
-    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
-    node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"])
-    graph = helper.make_graph([node], "gemm_with_bias", [A], [Y], initializer=[W, C])
-    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
-    save_model(model, "gemm/with_bias", "gemm_with_bias.onnx")
-
-
-def gemm_transB():
-    """GEMM with transB=1: Y = A @ W^T."""
-    B, K, N = 4, 128, 64
-    rng = np.random.default_rng(44)
-    # W stored as [N, K], transposed during computation
-    W = numpy_helper.from_array(rng.uniform(-1, 1, (N, K)).astype(np.float32), name="W")
-    A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
-    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
-    node = helper.make_node("Gemm", ["A", "W"], ["Y"], transB=1)
-    graph = helper.make_graph([node], "gemm_transB", [A], [Y], initializer=[W])
-    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
-    save_model(model, "gemm/transB", "gemm_transB.onnx")
-
-
-def gemm_alpha_beta():
-    """GEMM with alpha and beta: Y = 0.5 * A @ W + 0.25 * C."""
-    B, K, N = 4, 64, 64
-    rng = np.random.default_rng(45)
-    W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
-    C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
-    A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
-    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
-    node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"], alpha=0.5, beta=0.25)
-    graph = helper.make_graph([node], "gemm_alpha_beta", [A], [Y], initializer=[W, C])
-    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
-    save_model(model, "gemm/alpha_beta", "gemm_alpha_beta.onnx")
-
-
-def gemm_small():
-    """Small GEMM: [2, 8] @ [8, 4]."""
-    B, K, N = 2, 8, 4
-    rng = np.random.default_rng(46)
-    W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
-    A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
-    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
-    node = helper.make_node("Gemm", ["A", "W"], ["Y"])
-    graph = helper.make_graph([node], "gemm_small", [A], [Y], initializer=[W])
-    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
-    save_model(model, "gemm/small", "gemm_small.onnx")
-
-
-def gemm_large():
-    """Larger GEMM: [8, 256] @ [256, 128]."""
-    B, K, N = 8, 256, 128
-    rng = np.random.default_rng(47)
-    W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
-    A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
-    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
-    node = helper.make_node("Gemm", ["A", "W"], ["Y"])
-    graph = helper.make_graph([node], "gemm_large", [A], [Y], initializer=[W])
-    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
-    save_model(model, "gemm/large", "gemm_large.onnx")
-
-
-def gemm_transB_with_bias():
-    """GEMM with transB and bias: Y = A @ W^T + C."""
-    B, K, N = 4, 128, 64
-    rng = np.random.default_rng(48)
-    W = numpy_helper.from_array(rng.uniform(-1, 1, (N, K)).astype(np.float32), name="W")
-    C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
-    A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
-    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
-    node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"], transB=1)
-    graph = helper.make_graph([node], "gemm_transB_with_bias", [A], [Y], initializer=[W, C])
-    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
-    save_model(model, "gemm/transB_with_bias", "gemm_transB_with_bias.onnx")
+def make_int64_initializer(name, values):
+    return numpy_helper.from_array(np.asarray(values, dtype=np.int64), name=name)


 # ---------------------------------------------------------------------------
@@ -248,6 +154,104 @@ def conv_large_spatial():
    save_model(model, "conv/large_spatial", "conv_large_spatial.onnx")


+# ---------------------------------------------------------------------------
+# GEMM tests
+# ---------------------------------------------------------------------------
+
+def gemm_non_square():
+    """GEMM with non-square weight matrix: [B, K] @ [K, N], K != N."""
+    B, K, N = 4, 128, 64
+    W = numpy_helper.from_array(np.random.default_rng(42).uniform(-1, 1, (K, N)).astype(np.float32), name="W")
+    A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
+    node = helper.make_node("Gemm", ["A", "W"], ["Y"])
+    graph = helper.make_graph([node], "gemm_non_square", [A], [Y], initializer=[W])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "gemm/non_square", "gemm_non_square.onnx")
+
+
+def gemm_with_bias():
+    """GEMM with bias: Y = A @ W + C."""
+    B, K, N = 4, 128, 128
+    rng = np.random.default_rng(43)
+    W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
+    C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
+    A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
+    node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"])
+    graph = helper.make_graph([node], "gemm_with_bias", [A], [Y], initializer=[W, C])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "gemm/with_bias", "gemm_with_bias.onnx")
+
+
+def gemm_transB():
+    """GEMM with transB=1: Y = A @ W^T."""
+    B, K, N = 4, 128, 64
+    rng = np.random.default_rng(44)
+    # W stored as [N, K], transposed during computation
+    W = numpy_helper.from_array(rng.uniform(-1, 1, (N, K)).astype(np.float32), name="W")
+    A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
+    node = helper.make_node("Gemm", ["A", "W"], ["Y"], transB=1)
+    graph = helper.make_graph([node], "gemm_transB", [A], [Y], initializer=[W])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "gemm/transB", "gemm_transB.onnx")
+
+
+def gemm_alpha_beta():
+    """GEMM with alpha and beta: Y = 0.5 * A @ W + 0.25 * C."""
+    B, K, N = 4, 64, 64
+    rng = np.random.default_rng(45)
+    W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
+    C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
+    A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
+    node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"], alpha=0.5, beta=0.25)
+    graph = helper.make_graph([node], "gemm_alpha_beta", [A], [Y], initializer=[W, C])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "gemm/alpha_beta", "gemm_alpha_beta.onnx")
+
+
+def gemm_small():
+    """Small GEMM: [2, 8] @ [8, 4]."""
+    B, K, N = 2, 8, 4
+    rng = np.random.default_rng(46)
+    W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
+    A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
+    node = helper.make_node("Gemm", ["A", "W"], ["Y"])
+    graph = helper.make_graph([node], "gemm_small", [A], [Y], initializer=[W])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "gemm/small", "gemm_small.onnx")
+
+
+def gemm_large():
+    """Larger GEMM: [8, 256] @ [256, 128]."""
+    B, K, N = 8, 256, 128
+    rng = np.random.default_rng(47)
+    W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
+    A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
+    node = helper.make_node("Gemm", ["A", "W"], ["Y"])
+    graph = helper.make_graph([node], "gemm_large", [A], [Y], initializer=[W])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "gemm/large", "gemm_large.onnx")
+
+
+def gemm_transB_with_bias():
+    """GEMM with transB and bias: Y = A @ W^T + C."""
+    B, K, N = 4, 128, 64
+    rng = np.random.default_rng(48)
+    W = numpy_helper.from_array(rng.uniform(-1, 1, (N, K)).astype(np.float32), name="W")
+    C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
+    A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
+    node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"], transB=1)
+    graph = helper.make_graph([node], "gemm_transB_with_bias", [A], [Y], initializer=[W, C])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "gemm/transB_with_bias", "gemm_transB_with_bias.onnx")
+
+
 # ---------------------------------------------------------------------------
 # Pooling tests
 # ---------------------------------------------------------------------------
@@ -327,6 +331,55 @@ def maxpool_after_conv():
    save_model(model, "pool/max_after_conv", "maxpool_after_conv.onnx")


+# ---------------------------------------------------------------------------
+# ReduceMean tests
+# ---------------------------------------------------------------------------
+
+def reducemean_basic():
+    """ReduceMean over the feature dimension, preserving rank."""
+    X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 1])
+    node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[1], keepdims=1)
+    graph = helper.make_graph([node], "reducemean_basic", [X], [Y])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "reduce_mean/basic", "reduce_mean_basic.onnx")
+
+
+def reducemean_keepdims_0():
+    """ReduceMean over the feature dimension, dropping the reduced axis."""
+    X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4])
+    node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[1], keepdims=0)
+    graph = helper.make_graph([node], "reducemean_keepdims_0", [X], [Y])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "reduce_mean/keepdims_0", "reduce_mean_keepdims_0.onnx")
+
+
+def reducemean_4d_spatial():
+    """ReduceMean over H and W on an NCHW tensor."""
+    X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 4, 4])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 3, 1, 1])
+    node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[2, 3], keepdims=1)
+    graph = helper.make_graph([node], "reducemean_4d_spatial", [X], [Y])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "reduce_mean/4d_spatial", "reduce_mean_4d_spatial.onnx")
+
+
+def reducemean_after_conv():
+    """Conv followed by ReduceMean over the spatial dimensions."""
+    rng = np.random.default_rng(62)
+    X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 1, 1])
+    W = numpy_helper.from_array(rng.uniform(-1, 1, (2, 3, 3, 3)).astype(np.float32), name="W")
+    B = numpy_helper.from_array(rng.uniform(-1, 1, (2,)).astype(np.float32), name="B")
+    conv = helper.make_node("Conv", ["X", "W", "B"], ["C"],
+                            kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0])
+    reduce = helper.make_node("ReduceMean", ["C"], ["Y"], axes=[2, 3], keepdims=1)
+    graph = helper.make_graph([conv, reduce], "reducemean_after_conv", [X], [Y], initializer=[W, B])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "reduce_mean/after_conv", "reduce_mean_after_conv.onnx")
+
+
 # ---------------------------------------------------------------------------
 # Relu tests
 # ---------------------------------------------------------------------------
@@ -381,6 +434,220 @@ def relu_after_gemm():
    save_model(model, "relu/after_gemm", "relu_after_gemm.onnx")


+# ---------------------------------------------------------------------------
+# Sigmoid tests
+# ---------------------------------------------------------------------------
+
+def sigmoid_basic():
+    """Standalone Sigmoid on a simple 2D tensor."""
+    X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8])
+    node = helper.make_node("Sigmoid", ["X"], ["Y"])
+    graph = helper.make_graph([node], "sigmoid_basic", [X], [Y])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "sigmoid/basic", "sigmoid_basic.onnx")
+
+
+def sigmoid_4d():
+    """Standalone Sigmoid on an NCHW tensor."""
+    X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 3, 4, 4])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 3, 4, 4])
+    node = helper.make_node("Sigmoid", ["X"], ["Y"])
+    graph = helper.make_graph([node], "sigmoid_4d", [X], [Y])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "sigmoid/4d", "sigmoid_4d.onnx")
+
+
+def sigmoid_after_gemm():
+    """Gemm followed by Sigmoid."""
+    B, K, N = 4, 64, 32
+    rng = np.random.default_rng(63)
+    W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
+    C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
+    A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
+    gemm = helper.make_node("Gemm", ["A", "W", "C"], ["G"])
+    sigmoid = helper.make_node("Sigmoid", ["G"], ["Y"])
+    graph = helper.make_graph([gemm, sigmoid], "sigmoid_after_gemm", [A], [Y], initializer=[W, C])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "sigmoid/after_gemm", "sigmoid_after_gemm.onnx")
+
+
+# ---------------------------------------------------------------------------
+# Add tests
+# ---------------------------------------------------------------------------
+
+def add_basic():
+    """Elementwise Add on two inputs with identical shapes."""
+    A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [4, 8])
+    B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [4, 8])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8])
+    node = helper.make_node("Add", ["A", "B"], ["Y"])
+    graph = helper.make_graph([node], "add_basic", [A, B], [Y])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "add/basic", "add_basic.onnx")
+
+
+def add_broadcast_row():
+    """Elementwise Add with row-vector broadcasting."""
+    A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [4, 8])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8])
+    B = numpy_helper.from_array(np.random.default_rng(64).uniform(-1, 1, (8,)).astype(np.float32), name="B")
+    node = helper.make_node("Add", ["A", "B"], ["Y"])
+    graph = helper.make_graph([node], "add_broadcast_row", [A], [Y], initializer=[B])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "add/broadcast_row", "add_broadcast_row.onnx")
+
+
+def add_after_gemm():
+    """Gemm followed by Add with a broadcast bias vector."""
+    B, K, N = 4, 64, 32
+    rng = np.random.default_rng(65)
+    W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
+    C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
+    D = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="D")
+    A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
+    gemm = helper.make_node("Gemm", ["A", "W", "C"], ["G"])
+    add = helper.make_node("Add", ["G", "D"], ["Y"])
+    graph = helper.make_graph([gemm, add], "add_after_gemm", [A], [Y], initializer=[W, C, D])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "add/after_gemm", "add_after_gemm.onnx")
+
+
+# ---------------------------------------------------------------------------
+# Mul tests
+# ---------------------------------------------------------------------------
+
+def mul_basic():
+    """Elementwise Mul on two inputs with identical shapes."""
+    A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [4, 8])
+    B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [4, 8])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8])
+    node = helper.make_node("Mul", ["A", "B"], ["Y"])
+    graph = helper.make_graph([node], "mul_basic", [A, B], [Y])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "mul/basic", "mul_basic.onnx")
+
+
+def mul_scalar_constant():
+    """Elementwise Mul with scalar broadcasting."""
+    X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8])
+    S = numpy_helper.from_array(np.asarray([1.5], dtype=np.float32), name="S")
+    node = helper.make_node("Mul", ["X", "S"], ["Y"])
+    graph = helper.make_graph([node], "mul_scalar_constant", [X], [Y], initializer=[S])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "mul/scalar_constant", "mul_scalar_constant.onnx")
+
+
+def mul_after_conv():
+    """Conv followed by Mul with per-channel scaling."""
+    rng = np.random.default_rng(66)
+    X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 3, 3])
+    W = numpy_helper.from_array(rng.uniform(-1, 1, (2, 3, 3, 3)).astype(np.float32), name="W")
+    B = numpy_helper.from_array(rng.uniform(-1, 1, (2,)).astype(np.float32), name="B")
+    S = numpy_helper.from_array(rng.uniform(0.5, 1.5, (1, 2, 1, 1)).astype(np.float32), name="S")
+    conv = helper.make_node("Conv", ["X", "W", "B"], ["C"],
+                            kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0])
+    mul = helper.make_node("Mul", ["C", "S"], ["Y"])
+    graph = helper.make_graph([conv, mul], "mul_after_conv", [X], [Y], initializer=[W, B, S])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "mul/after_conv", "mul_after_conv.onnx")
+
+
+# ---------------------------------------------------------------------------
+# Div tests
+# ---------------------------------------------------------------------------
+
+def div_basic():
+    """Elementwise Div by a same-shape constant tensor."""
+    X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8])
+    D = numpy_helper.from_array(np.random.default_rng(67).uniform(0.5, 2.0, (4, 8)).astype(np.float32), name="D")
+    node = helper.make_node("Div", ["X", "D"], ["Y"])
+    graph = helper.make_graph([node], "div_basic", [X], [Y], initializer=[D])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "div/basic", "div_basic.onnx")
+
+
+def div_scalar_constant():
+    """Elementwise Div with scalar broadcasting."""
+    X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8])
+    S = numpy_helper.from_array(np.asarray([2.0], dtype=np.float32), name="S")
+    node = helper.make_node("Div", ["X", "S"], ["Y"])
+    graph = helper.make_graph([node], "div_scalar_constant", [X], [Y], initializer=[S])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "div/scalar_constant", "div_scalar_constant.onnx")
+
+
+def div_after_gemm():
+    """Gemm followed by Div with a broadcast divisor vector."""
+    B, K, N = 4, 64, 32
+    rng = np.random.default_rng(68)
+    W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
+    C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
+    D = numpy_helper.from_array(rng.uniform(0.5, 2.0, (N,)).astype(np.float32), name="D")
+    A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
+    gemm = helper.make_node("Gemm", ["A", "W", "C"], ["G"])
+    div = helper.make_node("Div", ["G", "D"], ["Y"])
+    graph = helper.make_graph([gemm, div], "div_after_gemm", [A], [Y], initializer=[W, C, D])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "div/after_gemm", "div_after_gemm.onnx")
+
+
+# ---------------------------------------------------------------------------
+# ReduceMean tests
+# ---------------------------------------------------------------------------
+
+def reducemean_basic():
+    """ReduceMean over the feature dimension, preserving rank."""
+    X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 1])
+    node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[1], keepdims=1)
+    graph = helper.make_graph([node], "reducemean_basic", [X], [Y])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "reduce_mean/basic", "reduce_mean_basic.onnx")
+
+
+def reducemean_keepdims_0():
+    """ReduceMean over the feature dimension, dropping the reduced axis."""
+    X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4])
+    node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[1], keepdims=0)
+    graph = helper.make_graph([node], "reducemean_keepdims_0", [X], [Y])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "reduce_mean/keepdims_0", "reduce_mean_keepdims_0.onnx")
+
+
+def reducemean_4d_spatial():
+    """ReduceMean over H and W on an NCHW tensor."""
+    X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 4, 4])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 3, 1, 1])
+    node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[2, 3], keepdims=1)
+    graph = helper.make_graph([node], "reducemean_4d_spatial", [X], [Y])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "reduce_mean/4d_spatial", "reduce_mean_4d_spatial.onnx")
+
+
+def reducemean_after_conv():
+    """Conv followed by ReduceMean over the spatial dimensions."""
+    rng = np.random.default_rng(62)
+    X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 1, 1])
+    W = numpy_helper.from_array(rng.uniform(-1, 1, (2, 3, 3, 3)).astype(np.float32), name="W")
+    B = numpy_helper.from_array(rng.uniform(-1, 1, (2,)).astype(np.float32), name="B")
+    conv = helper.make_node("Conv", ["X", "W", "B"], ["C"],
+                            kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0])
+    reduce = helper.make_node("ReduceMean", ["C"], ["Y"], axes=[2, 3], keepdims=1)
+    graph = helper.make_graph([conv, reduce], "reducemean_after_conv", [X], [Y], initializer=[W, B])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "reduce_mean/after_conv", "reduce_mean_after_conv.onnx")
+
+
 # ---------------------------------------------------------------------------
 # Main
 # ---------------------------------------------------------------------------
@@ -415,10 +682,36 @@ if __name__ == "__main__":
    avgpool_include_pad()
    maxpool_after_conv()

+    print("\nGenerating ReduceMean tests:")
+    reducemean_basic()
+    reducemean_keepdims_0()
+    reducemean_4d_spatial()
+    reducemean_after_conv()
+
    print("\nGenerating Relu tests:")
    relu_basic()
    relu_4d()
    relu_after_conv()
    relu_after_gemm()

+    print("\nGenerating Sigmoid tests:")
+    sigmoid_basic()
+    sigmoid_4d()
+    sigmoid_after_gemm()
+
+    print("\nGenerating Add tests:")
+    add_basic()
+    add_broadcast_row()
+    add_after_gemm()
+
+    print("\nGenerating Mul tests:")
+    mul_basic()
+    mul_scalar_constant()
+    mul_after_conv()
+
+    print("\nGenerating Div tests:")
+    div_basic()
+    div_scalar_constant()
+    div_after_gemm()
+
    print("\nDone.")