extend operation support for conv and gemm

add more tests in validation
This commit is contained in:
NiccoloN
2026-03-23 14:46:08 +01:00
parent 2676f2c7ef
commit 670d6ce94f
29 changed files with 982 additions and 29 deletions

View File

@@ -0,0 +1,47 @@
# Validation Operations
ONNX test models used by `validate.py` to verify the Raptor compiler + PIM simulator pipeline.
Generated tests can be regenerated with:
```
python3 validation/operations/gen_tests.py
```
## Conv
| Test | Directory | Input | Output | Kernel | Stride | Padding | Bias | Notes |
|------|-----------|-------|--------|--------|--------|---------|------|-------|
| Simple | `conv/simple` | [1,3,3,3] | [1,1,2,2] | 2x2 | 1 | none | no | Basic conv, hand-crafted |
| With constant | `conv/with_constant` | [1,3,3,3] | [1,1,3,3] | 2x2 | 1 | SAME_UPPER | yes | Hand-crafted, constant weight+bias |
| Batch 2 | `conv/batch_2` | [2,3,3,3] | [2,1,3,3] | 2x2 | 1 | SAME_UPPER | yes | Batched input |
| Kernel 3x3 | `conv/kernel_3x3` | [1,1,5,5] | [1,1,3,3] | 3x3 | 1 | none | no | Larger kernel |
| Stride 2 | `conv/stride_2` | [1,1,6,6] | [1,1,2,2] | 3x3 | 2 | none | no | Strided convolution |
| Multi channel | `conv/multi_channel` | [1,3,5,5] | [1,4,3,3] | 3x3 | 1 | none | no | 3 in channels, 4 out channels |
| Pointwise 1x1 | `conv/pointwise_1x1` | [1,8,4,4] | [1,4,4,4] | 1x1 | 1 | none | no | Channel mixing |
| SAME padding 3x3 | `conv/same_padding_3x3` | [1,1,5,5] | [1,1,5,5] | 3x3 | 1 | SAME_UPPER | no | Spatial dims preserved |
| Explicit padding | `conv/explicit_padding` | [1,1,4,4] | [1,1,4,4] | 3x3 | 1 | [1,1,1,1] | no | Symmetric explicit pads |
| With bias 3x3 | `conv/with_bias_3x3` | [1,3,5,5] | [1,2,3,3] | 3x3 | 1 | none | yes | Multi-channel with bias |
| Large spatial | `conv/large_spatial` | [1,1,8,8] | [1,1,6,6] | 3x3 | 1 | none | no | Larger spatial input |
## Gemm
| Test | Directory | A (input) | W (weight) | Output | transB | alpha | beta | Bias | Notes |
|------|-----------|-----------|------------|--------|--------|-------|------|------|-------|
| Default | `gemm/` | [10,132] | [132,132] | [10,132] | no | 1 | 1 | no | Hand-crafted, square weights |
| Non-square | `gemm/non_square` | [4,128] | [128,64] | [4,64] | no | 1 | 1 | no | K != N |
| With bias | `gemm/with_bias` | [4,128] | [128,128] | [4,128] | no | 1 | 1 | [128] | Bias vector |
| transB | `gemm/transB` | [4,128] | [64,128] | [4,64] | yes | 1 | 1 | no | Transposed weight |
| Alpha/beta | `gemm/alpha_beta` | [4,64] | [64,64] | [4,64] | no | 0.5 | 0.25 | [64] | Scaled matmul + bias |
| Small | `gemm/small` | [2,8] | [8,4] | [2,4] | no | 1 | 1 | no | Tiny matrices |
| Large | `gemm/large` | [8,256] | [256,128] | [8,128] | no | 1 | 1 | no | Larger matrices |
| transB + bias | `gemm/transB_with_bias` | [4,128] | [64,128] | [4,64] | yes | 1 | 1 | [64] | Combined |
## Gemv
| Test | Directory | Input | W (weight) | Output | Bias | Notes |
|------|-----------|-------|------------|--------|------|-------|
| Simple | `gemv/simple` | [1,132] | [132,132] | [1,132] | no | Single-sample matmul |
| Constant | `gemv/constant` | _(none)_ | [132,132] | [1,132] | no | All inputs constant |
| Homogeneous const | `gemv/with_homogeneous_constant` | [1,132] | [132,132] | [1,132] | [1,132] | Bias matches output shape |
| Heterogeneous const | `gemv/with_heterogeneous_constant` | [1,132] | [132,132] | [1,132] | [1,132] | Different constant pattern |
| Scalar const | `gemv/with_scalar_constant` | [1,132] | [132,132] | [1,132] | [1,1] | Scalar bias, broadcast |

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,276 @@
#!/usr/bin/env python3
"""Generate ONNX test models for validating GEMM and Conv implementations."""
import numpy as np
import onnx
from onnx import helper, TensorProto, numpy_helper
from pathlib import Path
OPERATIONS_DIR = Path(__file__).parent
def save_model(model, directory, filename):
"""Save an ONNX model, creating the directory if needed."""
d = OPERATIONS_DIR / directory
d.mkdir(parents=True, exist_ok=True)
path = d / filename
onnx.checker.check_model(model)
onnx.save(model, str(path))
print(f" {path.relative_to(OPERATIONS_DIR)}")
# ---------------------------------------------------------------------------
# GEMM tests
# ---------------------------------------------------------------------------
def gemm_non_square():
"""GEMM with non-square weight matrix: [B, K] @ [K, N], K != N."""
B, K, N = 4, 128, 64
W = numpy_helper.from_array(np.random.default_rng(42).uniform(-1, 1, (K, N)).astype(np.float32), name="W")
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
node = helper.make_node("Gemm", ["A", "W"], ["Y"])
graph = helper.make_graph([node], "gemm_non_square", [A], [Y], initializer=[W])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "gemm/non_square", "gemm_non_square.onnx")
def gemm_with_bias():
"""GEMM with bias: Y = A @ W + C."""
B, K, N = 4, 128, 128
rng = np.random.default_rng(43)
W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"])
graph = helper.make_graph([node], "gemm_with_bias", [A], [Y], initializer=[W, C])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "gemm/with_bias", "gemm_with_bias.onnx")
def gemm_transB():
"""GEMM with transB=1: Y = A @ W^T."""
B, K, N = 4, 128, 64
rng = np.random.default_rng(44)
# W stored as [N, K], transposed during computation
W = numpy_helper.from_array(rng.uniform(-1, 1, (N, K)).astype(np.float32), name="W")
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
node = helper.make_node("Gemm", ["A", "W"], ["Y"], transB=1)
graph = helper.make_graph([node], "gemm_transB", [A], [Y], initializer=[W])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "gemm/transB", "gemm_transB.onnx")
def gemm_alpha_beta():
"""GEMM with alpha and beta: Y = 0.5 * A @ W + 0.25 * C."""
B, K, N = 4, 64, 64
rng = np.random.default_rng(45)
W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"], alpha=0.5, beta=0.25)
graph = helper.make_graph([node], "gemm_alpha_beta", [A], [Y], initializer=[W, C])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "gemm/alpha_beta", "gemm_alpha_beta.onnx")
def gemm_small():
"""Small GEMM: [2, 8] @ [8, 4]."""
B, K, N = 2, 8, 4
rng = np.random.default_rng(46)
W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
node = helper.make_node("Gemm", ["A", "W"], ["Y"])
graph = helper.make_graph([node], "gemm_small", [A], [Y], initializer=[W])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "gemm/small", "gemm_small.onnx")
def gemm_large():
"""Larger GEMM: [8, 256] @ [256, 128]."""
B, K, N = 8, 256, 128
rng = np.random.default_rng(47)
W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
node = helper.make_node("Gemm", ["A", "W"], ["Y"])
graph = helper.make_graph([node], "gemm_large", [A], [Y], initializer=[W])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "gemm/large", "gemm_large.onnx")
def gemm_transB_with_bias():
"""GEMM with transB and bias: Y = A @ W^T + C."""
B, K, N = 4, 128, 64
rng = np.random.default_rng(48)
W = numpy_helper.from_array(rng.uniform(-1, 1, (N, K)).astype(np.float32), name="W")
C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"], transB=1)
graph = helper.make_graph([node], "gemm_transB_with_bias", [A], [Y], initializer=[W, C])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "gemm/transB_with_bias", "gemm_transB_with_bias.onnx")
# ---------------------------------------------------------------------------
# Conv tests
# ---------------------------------------------------------------------------
def conv_3x3_kernel():
"""Conv with 3x3 kernel, no padding."""
# Input: [1, 1, 5, 5], Kernel: [1, 1, 3, 3] -> Output: [1, 1, 3, 3]
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 5, 5])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 3, 3])
W = numpy_helper.from_array(
np.random.default_rng(50).uniform(-1, 1, (1, 1, 3, 3)).astype(np.float32), name="W")
node = helper.make_node("Conv", ["X", "W"], ["Y"],
kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0])
graph = helper.make_graph([node], "conv_3x3", [X], [Y], initializer=[W])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "conv/kernel_3x3", "conv_kernel_3x3.onnx")
def conv_stride2():
"""Conv with 3x3 kernel and stride 2."""
# Input: [1, 1, 6, 6], Kernel: [1, 1, 3, 3], stride 2 -> Output: [1, 1, 2, 2]
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 6, 6])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 2, 2])
W = numpy_helper.from_array(
np.random.default_rng(51).uniform(-1, 1, (1, 1, 3, 3)).astype(np.float32), name="W")
node = helper.make_node("Conv", ["X", "W"], ["Y"],
kernel_shape=[3, 3], strides=[2, 2], pads=[0, 0, 0, 0])
graph = helper.make_graph([node], "conv_stride2", [X], [Y], initializer=[W])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "conv/stride_2", "conv_stride_2.onnx")
def conv_multi_channel():
"""Conv with multiple input and output channels."""
# Input: [1, 3, 5, 5], Kernel: [4, 3, 3, 3] -> Output: [1, 4, 3, 3]
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 3, 3])
W = numpy_helper.from_array(
np.random.default_rng(52).uniform(-1, 1, (4, 3, 3, 3)).astype(np.float32), name="W")
node = helper.make_node("Conv", ["X", "W"], ["Y"],
kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0])
graph = helper.make_graph([node], "conv_multi_channel", [X], [Y], initializer=[W])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "conv/multi_channel", "conv_multi_channel.onnx")
def conv_1x1():
"""1x1 pointwise convolution (channel mixing)."""
# Input: [1, 8, 4, 4], Kernel: [4, 8, 1, 1] -> Output: [1, 4, 4, 4]
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 8, 4, 4])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 4, 4])
W = numpy_helper.from_array(
np.random.default_rng(53).uniform(-1, 1, (4, 8, 1, 1)).astype(np.float32), name="W")
node = helper.make_node("Conv", ["X", "W"], ["Y"],
kernel_shape=[1, 1], strides=[1, 1], pads=[0, 0, 0, 0])
graph = helper.make_graph([node], "conv_1x1", [X], [Y], initializer=[W])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "conv/pointwise_1x1", "conv_1x1.onnx")
def conv_same_padding_3x3():
"""Conv 3x3 with SAME_UPPER padding, preserving spatial dimensions."""
# Input: [1, 1, 5, 5], Kernel: [1, 1, 3, 3], SAME_UPPER -> Output: [1, 1, 5, 5]
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 5, 5])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 5, 5])
W = numpy_helper.from_array(
np.random.default_rng(54).uniform(-1, 1, (1, 1, 3, 3)).astype(np.float32), name="W")
node = helper.make_node("Conv", ["X", "W"], ["Y"],
kernel_shape=[3, 3], strides=[1, 1], auto_pad="SAME_UPPER")
graph = helper.make_graph([node], "conv_same_3x3", [X], [Y], initializer=[W])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "conv/same_padding_3x3", "conv_same_padding_3x3.onnx")
def conv_explicit_padding():
"""Conv 3x3 with explicit asymmetric padding."""
# Input: [1, 1, 4, 4], Kernel: [1, 1, 3, 3], pads=[1,1,1,1] -> Output: [1, 1, 4, 4]
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 4, 4])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 4, 4])
W = numpy_helper.from_array(
np.random.default_rng(55).uniform(-1, 1, (1, 1, 3, 3)).astype(np.float32), name="W")
node = helper.make_node("Conv", ["X", "W"], ["Y"],
kernel_shape=[3, 3], strides=[1, 1], pads=[1, 1, 1, 1])
graph = helper.make_graph([node], "conv_explicit_pad", [X], [Y], initializer=[W])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "conv/explicit_padding", "conv_explicit_padding.onnx")
def conv_with_bias_3x3():
"""Conv 3x3 with bias."""
# Input: [1, 3, 5, 5], Kernel: [2, 3, 3, 3], Bias: [2] -> Output: [1, 2, 3, 3]
rng = np.random.default_rng(56)
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 3, 3])
W = numpy_helper.from_array(rng.uniform(-1, 1, (2, 3, 3, 3)).astype(np.float32), name="W")
B = numpy_helper.from_array(rng.uniform(-1, 1, (2,)).astype(np.float32), name="B")
node = helper.make_node("Conv", ["X", "W", "B"], ["Y"],
kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0])
graph = helper.make_graph([node], "conv_with_bias_3x3", [X], [Y], initializer=[W, B])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "conv/with_bias_3x3", "conv_with_bias_3x3.onnx")
def conv_batch_2():
"""Batched conv (batch=2) with SAME_UPPER padding and bias."""
# Input: [2, 3, 3, 3], Kernel: [1, 3, 2, 2], Bias: [1] -> Output: [2, 1, 3, 3]
rng = np.random.default_rng(57)
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 3, 3, 3])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 1, 3, 3])
W = numpy_helper.from_array(rng.uniform(-1, 1, (1, 3, 2, 2)).astype(np.float32), name="W")
B = numpy_helper.from_array(rng.uniform(-1, 1, (1,)).astype(np.float32), name="B")
node = helper.make_node("Conv", ["X", "W", "B"], ["Y"],
kernel_shape=[2, 2], strides=[1, 1], auto_pad="SAME_UPPER")
graph = helper.make_graph([node], "conv_batch_2", [X], [Y], initializer=[W, B])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "conv/batch_2", "conv_batch_2.onnx")
def conv_large_spatial():
"""Conv on larger spatial input: [1, 1, 8, 8] with 3x3 kernel."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 8, 8])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 6, 6])
W = numpy_helper.from_array(
np.random.default_rng(58).uniform(-1, 1, (1, 1, 3, 3)).astype(np.float32), name="W")
node = helper.make_node("Conv", ["X", "W"], ["Y"],
kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0])
graph = helper.make_graph([node], "conv_large_spatial", [X], [Y], initializer=[W])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "conv/large_spatial", "conv_large_spatial.onnx")
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
if __name__ == "__main__":
print("Generating GEMM tests:")
gemm_non_square()
gemm_with_bias()
gemm_transB()
gemm_alpha_beta()
gemm_small()
gemm_large()
gemm_transB_with_bias()
print("\nGenerating Conv tests:")
conv_3x3_kernel()
conv_stride2()
conv_multi_channel()
conv_1x1()
conv_same_padding_3x3()
conv_explicit_padding()
conv_with_bias_3x3()
conv_batch_2()
conv_large_spatial()
print("\nDone.")