#!/usr/bin/env python3 """Generate ONNX test models for validating GEMM, Conv, and Pooling implementations.""" import numpy as np import onnx from onnx import helper, TensorProto, numpy_helper from pathlib import Path OPERATIONS_DIR = Path(__file__).parent def save_model(model, directory, filename): """Save an ONNX model, creating the directory if needed.""" d = OPERATIONS_DIR / directory d.mkdir(parents=True, exist_ok=True) path = d / filename onnx.checker.check_model(model) onnx.save(model, str(path)) print(f" {path.relative_to(OPERATIONS_DIR)}") # --------------------------------------------------------------------------- # GEMM tests # --------------------------------------------------------------------------- def gemm_non_square(): """GEMM with non-square weight matrix: [B, K] @ [K, N], K != N.""" B, K, N = 4, 128, 64 W = numpy_helper.from_array(np.random.default_rng(42).uniform(-1, 1, (K, N)).astype(np.float32), name="W") A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) node = helper.make_node("Gemm", ["A", "W"], ["Y"]) graph = helper.make_graph([node], "gemm_non_square", [A], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gemm/non_square", "gemm_non_square.onnx") def gemm_with_bias(): """GEMM with bias: Y = A @ W + C.""" B, K, N = 4, 128, 128 rng = np.random.default_rng(43) W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C") A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"]) graph = helper.make_graph([node], "gemm_with_bias", [A], [Y], initializer=[W, C]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gemm/with_bias", "gemm_with_bias.onnx") def gemm_transB(): """GEMM with transB=1: Y = A @ W^T.""" B, K, N = 4, 128, 64 rng = np.random.default_rng(44) # W stored as [N, K], transposed during computation W = numpy_helper.from_array(rng.uniform(-1, 1, (N, K)).astype(np.float32), name="W") A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) node = helper.make_node("Gemm", ["A", "W"], ["Y"], transB=1) graph = helper.make_graph([node], "gemm_transB", [A], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gemm/transB", "gemm_transB.onnx") def gemm_alpha_beta(): """GEMM with alpha and beta: Y = 0.5 * A @ W + 0.25 * C.""" B, K, N = 4, 64, 64 rng = np.random.default_rng(45) W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C") A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"], alpha=0.5, beta=0.25) graph = helper.make_graph([node], "gemm_alpha_beta", [A], [Y], initializer=[W, C]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gemm/alpha_beta", "gemm_alpha_beta.onnx") def gemm_small(): """Small GEMM: [2, 8] @ [8, 4].""" B, K, N = 2, 8, 4 rng = np.random.default_rng(46) W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) node = helper.make_node("Gemm", ["A", "W"], ["Y"]) graph = helper.make_graph([node], "gemm_small", [A], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gemm/small", "gemm_small.onnx") def gemm_large(): """Larger GEMM: [8, 256] @ [256, 128].""" B, K, N = 8, 256, 128 rng = np.random.default_rng(47) W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) node = helper.make_node("Gemm", ["A", "W"], ["Y"]) graph = helper.make_graph([node], "gemm_large", [A], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gemm/large", "gemm_large.onnx") def gemm_transB_with_bias(): """GEMM with transB and bias: Y = A @ W^T + C.""" B, K, N = 4, 128, 64 rng = np.random.default_rng(48) W = numpy_helper.from_array(rng.uniform(-1, 1, (N, K)).astype(np.float32), name="W") C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C") A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"], transB=1) graph = helper.make_graph([node], "gemm_transB_with_bias", [A], [Y], initializer=[W, C]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gemm/transB_with_bias", "gemm_transB_with_bias.onnx") # --------------------------------------------------------------------------- # Conv tests # --------------------------------------------------------------------------- def conv_3x3_kernel(): """Conv with 3x3 kernel, no padding.""" # Input: [1, 1, 5, 5], Kernel: [1, 1, 3, 3] -> Output: [1, 1, 3, 3] X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 5, 5]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 3, 3]) W = numpy_helper.from_array( np.random.default_rng(50).uniform(-1, 1, (1, 1, 3, 3)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "conv_3x3", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/kernel_3x3", "conv_kernel_3x3.onnx") def conv_stride2(): """Conv with 3x3 kernel and stride 2.""" # Input: [1, 1, 6, 6], Kernel: [1, 1, 3, 3], stride 2 -> Output: [1, 1, 2, 2] X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 6, 6]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 2, 2]) W = numpy_helper.from_array( np.random.default_rng(51).uniform(-1, 1, (1, 1, 3, 3)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[3, 3], strides=[2, 2], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "conv_stride2", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/stride_2", "conv_stride_2.onnx") def conv_multi_channel(): """Conv with multiple input and output channels.""" # Input: [1, 3, 5, 5], Kernel: [4, 3, 3, 3] -> Output: [1, 4, 3, 3] X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 3, 3]) W = numpy_helper.from_array( np.random.default_rng(52).uniform(-1, 1, (4, 3, 3, 3)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "conv_multi_channel", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/multi_channel", "conv_multi_channel.onnx") def conv_1x1(): """1x1 pointwise convolution (channel mixing).""" # Input: [1, 8, 4, 4], Kernel: [4, 8, 1, 1] -> Output: [1, 4, 4, 4] X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 8, 4, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 4, 4]) W = numpy_helper.from_array( np.random.default_rng(53).uniform(-1, 1, (4, 8, 1, 1)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[1, 1], strides=[1, 1], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "conv_1x1", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/pointwise_1x1", "conv_1x1.onnx") def conv_same_padding_3x3(): """Conv 3x3 with SAME_UPPER padding, preserving spatial dimensions.""" # Input: [1, 1, 5, 5], Kernel: [1, 1, 3, 3], SAME_UPPER -> Output: [1, 1, 5, 5] X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 5, 5]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 5, 5]) W = numpy_helper.from_array( np.random.default_rng(54).uniform(-1, 1, (1, 1, 3, 3)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[3, 3], strides=[1, 1], auto_pad="SAME_UPPER") graph = helper.make_graph([node], "conv_same_3x3", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/same_padding_3x3", "conv_same_padding_3x3.onnx") def conv_explicit_padding(): """Conv 3x3 with explicit asymmetric padding.""" # Input: [1, 1, 4, 4], Kernel: [1, 1, 3, 3], pads=[1,1,1,1] -> Output: [1, 1, 4, 4] X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 4, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 4, 4]) W = numpy_helper.from_array( np.random.default_rng(55).uniform(-1, 1, (1, 1, 3, 3)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[3, 3], strides=[1, 1], pads=[1, 1, 1, 1]) graph = helper.make_graph([node], "conv_explicit_pad", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/explicit_padding", "conv_explicit_padding.onnx") def conv_with_bias_3x3(): """Conv 3x3 with bias.""" # Input: [1, 3, 5, 5], Kernel: [2, 3, 3, 3], Bias: [2] -> Output: [1, 2, 3, 3] rng = np.random.default_rng(56) X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 3, 3]) W = numpy_helper.from_array(rng.uniform(-1, 1, (2, 3, 3, 3)).astype(np.float32), name="W") B = numpy_helper.from_array(rng.uniform(-1, 1, (2,)).astype(np.float32), name="B") node = helper.make_node("Conv", ["X", "W", "B"], ["Y"], kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "conv_with_bias_3x3", [X], [Y], initializer=[W, B]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/with_bias_3x3", "conv_with_bias_3x3.onnx") def conv_batch_2(): """Batched conv (batch=2) with SAME_UPPER padding and bias.""" # Input: [2, 3, 3, 3], Kernel: [1, 3, 2, 2], Bias: [1] -> Output: [2, 1, 3, 3] rng = np.random.default_rng(57) X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 3, 3, 3]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 1, 3, 3]) W = numpy_helper.from_array(rng.uniform(-1, 1, (1, 3, 2, 2)).astype(np.float32), name="W") B = numpy_helper.from_array(rng.uniform(-1, 1, (1,)).astype(np.float32), name="B") node = helper.make_node("Conv", ["X", "W", "B"], ["Y"], kernel_shape=[2, 2], strides=[1, 1], auto_pad="SAME_UPPER") graph = helper.make_graph([node], "conv_batch_2", [X], [Y], initializer=[W, B]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/batch_2", "conv_batch_2.onnx") def conv_large_spatial(): """Conv on larger spatial input: [1, 1, 8, 8] with 3x3 kernel.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 8, 8]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 6, 6]) W = numpy_helper.from_array( np.random.default_rng(58).uniform(-1, 1, (1, 1, 3, 3)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "conv_large_spatial", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/large_spatial", "conv_large_spatial.onnx") # --------------------------------------------------------------------------- # Pooling tests # --------------------------------------------------------------------------- def maxpool_basic(): """MaxPool 2x2 with stride 1.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 4, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 3, 3]) node = helper.make_node("MaxPool", ["X"], ["Y"], kernel_shape=[2, 2], strides=[1, 1], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "maxpool_basic", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "pool/max_basic", "maxpool_basic.onnx") def maxpool_stride2_multichannel(): """MaxPool 2x2 with stride 2 on multiple channels.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 5, 6, 6]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 5, 3, 3]) node = helper.make_node("MaxPool", ["X"], ["Y"], kernel_shape=[2, 2], strides=[2, 2], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "maxpool_stride2_multichannel", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "pool/max_stride2_multichannel", "maxpool_stride2_multichannel.onnx") def maxpool_same_upper(): """MaxPool 3x3 with SAME_UPPER padding.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 5, 5]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 3, 3]) node = helper.make_node("MaxPool", ["X"], ["Y"], kernel_shape=[3, 3], strides=[2, 2], auto_pad="SAME_UPPER") graph = helper.make_graph([node], "maxpool_same_upper", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "pool/max_same_upper", "maxpool_same_upper.onnx") def avgpool_basic(): """AveragePool 2x2 with stride 1.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 4, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 3, 3, 3]) node = helper.make_node("AveragePool", ["X"], ["Y"], kernel_shape=[2, 2], strides=[1, 1], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "avgpool_basic", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "pool/avg_basic", "avgpool_basic.onnx") def avgpool_explicit_padding(): """AveragePool 3x3 with explicit padding, excluding pad from the divisor.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 2, 4, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 2, 2]) node = helper.make_node("AveragePool", ["X"], ["Y"], kernel_shape=[3, 3], strides=[2, 2], pads=[1, 1, 1, 1], count_include_pad=0) graph = helper.make_graph([node], "avgpool_explicit_padding", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "pool/avg_explicit_padding", "avgpool_explicit_padding.onnx") def avgpool_include_pad(): """AveragePool 3x3 with explicit padding, including pad in the divisor.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 2, 4, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 2, 2]) node = helper.make_node("AveragePool", ["X"], ["Y"], kernel_shape=[3, 3], strides=[2, 2], pads=[1, 1, 1, 1], count_include_pad=1) graph = helper.make_graph([node], "avgpool_include_pad", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "pool/avg_include_pad", "avgpool_include_pad.onnx") def maxpool_after_conv(): """Conv followed by MaxPool to validate pooling on lowered conv results.""" rng = np.random.default_rng(59) X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 6, 6]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 2, 2]) W = numpy_helper.from_array(rng.uniform(-1, 1, (4, 3, 3, 3)).astype(np.float32), name="W") conv = helper.make_node("Conv", ["X", "W"], ["C"], kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0]) pool = helper.make_node("MaxPool", ["C"], ["Y"], kernel_shape=[2, 2], strides=[2, 2], pads=[0, 0, 0, 0]) graph = helper.make_graph([conv, pool], "maxpool_after_conv", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "pool/max_after_conv", "maxpool_after_conv.onnx") # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- if __name__ == "__main__": print("Generating GEMM tests:") gemm_non_square() gemm_with_bias() gemm_transB() gemm_alpha_beta() gemm_small() gemm_large() gemm_transB_with_bias() print("\nGenerating Conv tests:") conv_3x3_kernel() conv_stride2() conv_multi_channel() conv_1x1() conv_same_padding_3x3() conv_explicit_padding() conv_with_bias_3x3() conv_batch_2() conv_large_spatial() print("\nGenerating Pooling tests:") maxpool_basic() maxpool_stride2_multichannel() maxpool_same_upper() avgpool_basic() avgpool_explicit_padding() avgpool_include_pad() maxpool_after_conv() print("\nDone.")