extend operation support for conv and gemm

add more tests in validation
2026-03-23 14:46:08 +01:00
parent 2676f2c7ef
commit 670d6ce94f
29 changed files with 982 additions and 29 deletions
@@ -0,0 +1,47 @@
+# Validation Operations
+
+ONNX test models used by `validate.py` to verify the Raptor compiler + PIM simulator pipeline.
+
+Generated tests can be regenerated with:
+```
+python3 validation/operations/gen_tests.py
+```
+
+## Conv
+
+| Test | Directory | Input | Output | Kernel | Stride | Padding | Bias | Notes |
+|------|-----------|-------|--------|--------|--------|---------|------|-------|
+| Simple | `conv/simple` | [1,3,3,3] | [1,1,2,2] | 2x2 | 1 | none | no | Basic conv, hand-crafted |
+| With constant | `conv/with_constant` | [1,3,3,3] | [1,1,3,3] | 2x2 | 1 | SAME_UPPER | yes | Hand-crafted, constant weight+bias |
+| Batch 2 | `conv/batch_2` | [2,3,3,3] | [2,1,3,3] | 2x2 | 1 | SAME_UPPER | yes | Batched input |
+| Kernel 3x3 | `conv/kernel_3x3` | [1,1,5,5] | [1,1,3,3] | 3x3 | 1 | none | no | Larger kernel |
+| Stride 2 | `conv/stride_2` | [1,1,6,6] | [1,1,2,2] | 3x3 | 2 | none | no | Strided convolution |
+| Multi channel | `conv/multi_channel` | [1,3,5,5] | [1,4,3,3] | 3x3 | 1 | none | no | 3 in channels, 4 out channels |
+| Pointwise 1x1 | `conv/pointwise_1x1` | [1,8,4,4] | [1,4,4,4] | 1x1 | 1 | none | no | Channel mixing |
+| SAME padding 3x3 | `conv/same_padding_3x3` | [1,1,5,5] | [1,1,5,5] | 3x3 | 1 | SAME_UPPER | no | Spatial dims preserved |
+| Explicit padding | `conv/explicit_padding` | [1,1,4,4] | [1,1,4,4] | 3x3 | 1 | [1,1,1,1] | no | Symmetric explicit pads |
+| With bias 3x3 | `conv/with_bias_3x3` | [1,3,5,5] | [1,2,3,3] | 3x3 | 1 | none | yes | Multi-channel with bias |
+| Large spatial | `conv/large_spatial` | [1,1,8,8] | [1,1,6,6] | 3x3 | 1 | none | no | Larger spatial input |
+
+## Gemm
+
+| Test | Directory | A (input) | W (weight) | Output | transB | alpha | beta | Bias | Notes |
+|------|-----------|-----------|------------|--------|--------|-------|------|------|-------|
+| Default | `gemm/` | [10,132] | [132,132] | [10,132] | no | 1 | 1 | no | Hand-crafted, square weights |
+| Non-square | `gemm/non_square` | [4,128] | [128,64] | [4,64] | no | 1 | 1 | no | K != N |
+| With bias | `gemm/with_bias` | [4,128] | [128,128] | [4,128] | no | 1 | 1 | [128] | Bias vector |
+| transB | `gemm/transB` | [4,128] | [64,128] | [4,64] | yes | 1 | 1 | no | Transposed weight |
+| Alpha/beta | `gemm/alpha_beta` | [4,64] | [64,64] | [4,64] | no | 0.5 | 0.25 | [64] | Scaled matmul + bias |
+| Small | `gemm/small` | [2,8] | [8,4] | [2,4] | no | 1 | 1 | no | Tiny matrices |
+| Large | `gemm/large` | [8,256] | [256,128] | [8,128] | no | 1 | 1 | no | Larger matrices |
+| transB + bias | `gemm/transB_with_bias` | [4,128] | [64,128] | [4,64] | yes | 1 | 1 | [64] | Combined |
+
+## Gemv
+
+| Test | Directory | Input | W (weight) | Output | Bias | Notes |
+|------|-----------|-------|------------|--------|------|-------|
+| Simple | `gemv/simple` | [1,132] | [132,132] | [1,132] | no | Single-sample matmul |
+| Constant | `gemv/constant` | _(none)_ | [132,132] | [1,132] | no | All inputs constant |
+| Homogeneous const | `gemv/with_homogeneous_constant` | [1,132] | [132,132] | [1,132] | [1,132] | Bias matches output shape |
+| Heterogeneous const | `gemv/with_heterogeneous_constant` | [1,132] | [132,132] | [1,132] | [1,132] | Different constant pattern |
+| Scalar const | `gemv/with_scalar_constant` | [1,132] | [132,132] | [1,132] | [1,1] | Scalar bias, broadcast |
@@ -0,0 +1,276 @@
+#!/usr/bin/env python3
+"""Generate ONNX test models for validating GEMM and Conv implementations."""
+
+import numpy as np
+import onnx
+from onnx import helper, TensorProto, numpy_helper
+from pathlib import Path
+
+OPERATIONS_DIR = Path(__file__).parent
+
+
+def save_model(model, directory, filename):
+    """Save an ONNX model, creating the directory if needed."""
+    d = OPERATIONS_DIR / directory
+    d.mkdir(parents=True, exist_ok=True)
+    path = d / filename
+    onnx.checker.check_model(model)
+    onnx.save(model, str(path))
+    print(f"  {path.relative_to(OPERATIONS_DIR)}")
+
+
+# ---------------------------------------------------------------------------
+# GEMM tests
+# ---------------------------------------------------------------------------
+
+def gemm_non_square():
+    """GEMM with non-square weight matrix: [B, K] @ [K, N], K != N."""
+    B, K, N = 4, 128, 64
+    W = numpy_helper.from_array(np.random.default_rng(42).uniform(-1, 1, (K, N)).astype(np.float32), name="W")
+    A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
+    node = helper.make_node("Gemm", ["A", "W"], ["Y"])
+    graph = helper.make_graph([node], "gemm_non_square", [A], [Y], initializer=[W])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "gemm/non_square", "gemm_non_square.onnx")
+
+
+def gemm_with_bias():
+    """GEMM with bias: Y = A @ W + C."""
+    B, K, N = 4, 128, 128
+    rng = np.random.default_rng(43)
+    W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
+    C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
+    A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
+    node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"])
+    graph = helper.make_graph([node], "gemm_with_bias", [A], [Y], initializer=[W, C])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "gemm/with_bias", "gemm_with_bias.onnx")
+
+
+def gemm_transB():
+    """GEMM with transB=1: Y = A @ W^T."""
+    B, K, N = 4, 128, 64
+    rng = np.random.default_rng(44)
+    # W stored as [N, K], transposed during computation
+    W = numpy_helper.from_array(rng.uniform(-1, 1, (N, K)).astype(np.float32), name="W")
+    A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
+    node = helper.make_node("Gemm", ["A", "W"], ["Y"], transB=1)
+    graph = helper.make_graph([node], "gemm_transB", [A], [Y], initializer=[W])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "gemm/transB", "gemm_transB.onnx")
+
+
+def gemm_alpha_beta():
+    """GEMM with alpha and beta: Y = 0.5 * A @ W + 0.25 * C."""
+    B, K, N = 4, 64, 64
+    rng = np.random.default_rng(45)
+    W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
+    C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
+    A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
+    node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"], alpha=0.5, beta=0.25)
+    graph = helper.make_graph([node], "gemm_alpha_beta", [A], [Y], initializer=[W, C])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "gemm/alpha_beta", "gemm_alpha_beta.onnx")
+
+
+def gemm_small():
+    """Small GEMM: [2, 8] @ [8, 4]."""
+    B, K, N = 2, 8, 4
+    rng = np.random.default_rng(46)
+    W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
+    A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
+    node = helper.make_node("Gemm", ["A", "W"], ["Y"])
+    graph = helper.make_graph([node], "gemm_small", [A], [Y], initializer=[W])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "gemm/small", "gemm_small.onnx")
+
+
+def gemm_large():
+    """Larger GEMM: [8, 256] @ [256, 128]."""
+    B, K, N = 8, 256, 128
+    rng = np.random.default_rng(47)
+    W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
+    A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
+    node = helper.make_node("Gemm", ["A", "W"], ["Y"])
+    graph = helper.make_graph([node], "gemm_large", [A], [Y], initializer=[W])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "gemm/large", "gemm_large.onnx")
+
+
+def gemm_transB_with_bias():
+    """GEMM with transB and bias: Y = A @ W^T + C."""
+    B, K, N = 4, 128, 64
+    rng = np.random.default_rng(48)
+    W = numpy_helper.from_array(rng.uniform(-1, 1, (N, K)).astype(np.float32), name="W")
+    C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
+    A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
+    node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"], transB=1)
+    graph = helper.make_graph([node], "gemm_transB_with_bias", [A], [Y], initializer=[W, C])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "gemm/transB_with_bias", "gemm_transB_with_bias.onnx")
+
+
+# ---------------------------------------------------------------------------
+# Conv tests
+# ---------------------------------------------------------------------------
+
+def conv_3x3_kernel():
+    """Conv with 3x3 kernel, no padding."""
+    # Input: [1, 1, 5, 5], Kernel: [1, 1, 3, 3] -> Output: [1, 1, 3, 3]
+    X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 5, 5])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 3, 3])
+    W = numpy_helper.from_array(
+        np.random.default_rng(50).uniform(-1, 1, (1, 1, 3, 3)).astype(np.float32), name="W")
+    node = helper.make_node("Conv", ["X", "W"], ["Y"],
+                            kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0])
+    graph = helper.make_graph([node], "conv_3x3", [X], [Y], initializer=[W])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "conv/kernel_3x3", "conv_kernel_3x3.onnx")
+
+
+def conv_stride2():
+    """Conv with 3x3 kernel and stride 2."""
+    # Input: [1, 1, 6, 6], Kernel: [1, 1, 3, 3], stride 2 -> Output: [1, 1, 2, 2]
+    X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 6, 6])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 2, 2])
+    W = numpy_helper.from_array(
+        np.random.default_rng(51).uniform(-1, 1, (1, 1, 3, 3)).astype(np.float32), name="W")
+    node = helper.make_node("Conv", ["X", "W"], ["Y"],
+                            kernel_shape=[3, 3], strides=[2, 2], pads=[0, 0, 0, 0])
+    graph = helper.make_graph([node], "conv_stride2", [X], [Y], initializer=[W])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "conv/stride_2", "conv_stride_2.onnx")
+
+
+def conv_multi_channel():
+    """Conv with multiple input and output channels."""
+    # Input: [1, 3, 5, 5], Kernel: [4, 3, 3, 3] -> Output: [1, 4, 3, 3]
+    X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 3, 3])
+    W = numpy_helper.from_array(
+        np.random.default_rng(52).uniform(-1, 1, (4, 3, 3, 3)).astype(np.float32), name="W")
+    node = helper.make_node("Conv", ["X", "W"], ["Y"],
+                            kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0])
+    graph = helper.make_graph([node], "conv_multi_channel", [X], [Y], initializer=[W])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "conv/multi_channel", "conv_multi_channel.onnx")
+
+
+def conv_1x1():
+    """1x1 pointwise convolution (channel mixing)."""
+    # Input: [1, 8, 4, 4], Kernel: [4, 8, 1, 1] -> Output: [1, 4, 4, 4]
+    X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 8, 4, 4])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 4, 4])
+    W = numpy_helper.from_array(
+        np.random.default_rng(53).uniform(-1, 1, (4, 8, 1, 1)).astype(np.float32), name="W")
+    node = helper.make_node("Conv", ["X", "W"], ["Y"],
+                            kernel_shape=[1, 1], strides=[1, 1], pads=[0, 0, 0, 0])
+    graph = helper.make_graph([node], "conv_1x1", [X], [Y], initializer=[W])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "conv/pointwise_1x1", "conv_1x1.onnx")
+
+
+def conv_same_padding_3x3():
+    """Conv 3x3 with SAME_UPPER padding, preserving spatial dimensions."""
+    # Input: [1, 1, 5, 5], Kernel: [1, 1, 3, 3], SAME_UPPER -> Output: [1, 1, 5, 5]
+    X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 5, 5])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 5, 5])
+    W = numpy_helper.from_array(
+        np.random.default_rng(54).uniform(-1, 1, (1, 1, 3, 3)).astype(np.float32), name="W")
+    node = helper.make_node("Conv", ["X", "W"], ["Y"],
+                            kernel_shape=[3, 3], strides=[1, 1], auto_pad="SAME_UPPER")
+    graph = helper.make_graph([node], "conv_same_3x3", [X], [Y], initializer=[W])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "conv/same_padding_3x3", "conv_same_padding_3x3.onnx")
+
+
+def conv_explicit_padding():
+    """Conv 3x3 with explicit asymmetric padding."""
+    # Input: [1, 1, 4, 4], Kernel: [1, 1, 3, 3], pads=[1,1,1,1] -> Output: [1, 1, 4, 4]
+    X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 4, 4])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 4, 4])
+    W = numpy_helper.from_array(
+        np.random.default_rng(55).uniform(-1, 1, (1, 1, 3, 3)).astype(np.float32), name="W")
+    node = helper.make_node("Conv", ["X", "W"], ["Y"],
+                            kernel_shape=[3, 3], strides=[1, 1], pads=[1, 1, 1, 1])
+    graph = helper.make_graph([node], "conv_explicit_pad", [X], [Y], initializer=[W])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "conv/explicit_padding", "conv_explicit_padding.onnx")
+
+
+def conv_with_bias_3x3():
+    """Conv 3x3 with bias."""
+    # Input: [1, 3, 5, 5], Kernel: [2, 3, 3, 3], Bias: [2] -> Output: [1, 2, 3, 3]
+    rng = np.random.default_rng(56)
+    X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 3, 3])
+    W = numpy_helper.from_array(rng.uniform(-1, 1, (2, 3, 3, 3)).astype(np.float32), name="W")
+    B = numpy_helper.from_array(rng.uniform(-1, 1, (2,)).astype(np.float32), name="B")
+    node = helper.make_node("Conv", ["X", "W", "B"], ["Y"],
+                            kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0])
+    graph = helper.make_graph([node], "conv_with_bias_3x3", [X], [Y], initializer=[W, B])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "conv/with_bias_3x3", "conv_with_bias_3x3.onnx")
+
+
+def conv_batch_2():
+    """Batched conv (batch=2) with SAME_UPPER padding and bias."""
+    # Input: [2, 3, 3, 3], Kernel: [1, 3, 2, 2], Bias: [1] -> Output: [2, 1, 3, 3]
+    rng = np.random.default_rng(57)
+    X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 3, 3, 3])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 1, 3, 3])
+    W = numpy_helper.from_array(rng.uniform(-1, 1, (1, 3, 2, 2)).astype(np.float32), name="W")
+    B = numpy_helper.from_array(rng.uniform(-1, 1, (1,)).astype(np.float32), name="B")
+    node = helper.make_node("Conv", ["X", "W", "B"], ["Y"],
+                            kernel_shape=[2, 2], strides=[1, 1], auto_pad="SAME_UPPER")
+    graph = helper.make_graph([node], "conv_batch_2", [X], [Y], initializer=[W, B])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "conv/batch_2", "conv_batch_2.onnx")
+
+
+def conv_large_spatial():
+    """Conv on larger spatial input: [1, 1, 8, 8] with 3x3 kernel."""
+    X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 8, 8])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 6, 6])
+    W = numpy_helper.from_array(
+        np.random.default_rng(58).uniform(-1, 1, (1, 1, 3, 3)).astype(np.float32), name="W")
+    node = helper.make_node("Conv", ["X", "W"], ["Y"],
+                            kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0])
+    graph = helper.make_graph([node], "conv_large_spatial", [X], [Y], initializer=[W])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    save_model(model, "conv/large_spatial", "conv_large_spatial.onnx")
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+if __name__ == "__main__":
+    print("Generating GEMM tests:")
+    gemm_non_square()
+    gemm_with_bias()
+    gemm_transB()
+    gemm_alpha_beta()
+    gemm_small()
+    gemm_large()
+    gemm_transB_with_bias()
+
+    print("\nGenerating Conv tests:")
+    conv_3x3_kernel()
+    conv_stride2()
+    conv_multi_channel()
+    conv_1x1()
+    conv_same_padding_3x3()
+    conv_explicit_padding()
+    conv_with_bias_3x3()
+    conv_batch_2()
+    conv_large_spatial()
+
+    print("\nDone.")