#!/usr/bin/env python3 """Generate ONNX test models for validating supported ONNX operations.""" import numpy as np import onnx from onnx import helper, TensorProto, numpy_helper from pathlib import Path OPERATIONS_DIR = Path(__file__).parent def save_model(model, directory, filename): """Save an ONNX model, creating the directory if needed.""" d = OPERATIONS_DIR / directory d.mkdir(parents=True, exist_ok=True) path = d / filename onnx.checker.check_model(model) onnx.save(model, str(path)) print(f" {path.relative_to(OPERATIONS_DIR)}") def make_int64_initializer(name, values): return numpy_helper.from_array(np.asarray(values, dtype=np.int64), name=name) # --------------------------------------------------------------------------- # Conv tests # --------------------------------------------------------------------------- def conv_3x3_kernel(): """Conv with 3x3 kernel, no padding.""" # Input: [1, 1, 5, 5], Kernel: [1, 1, 3, 3] -> Output: [1, 1, 3, 3] X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 5, 5]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 3, 3]) W = numpy_helper.from_array( np.random.default_rng(50).uniform(-1, 1, (1, 1, 3, 3)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "conv_3x3", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/kernel_3x3", "conv_kernel_3x3.onnx") def conv_stride2(): """Conv with 3x3 kernel and stride 2.""" # Input: [1, 1, 6, 6], Kernel: [1, 1, 3, 3], stride 2 -> Output: [1, 1, 2, 2] X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 6, 6]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 2, 2]) W = numpy_helper.from_array( np.random.default_rng(51).uniform(-1, 1, (1, 1, 3, 3)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[3, 3], strides=[2, 2], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "conv_stride2", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/stride_2", "conv_stride_2.onnx") def conv_multi_channel(): """Conv with multiple input and output channels.""" # Input: [1, 3, 5, 5], Kernel: [4, 3, 3, 3] -> Output: [1, 4, 3, 3] X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 3, 3]) W = numpy_helper.from_array( np.random.default_rng(52).uniform(-1, 1, (4, 3, 3, 3)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "conv_multi_channel", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/multi_channel", "conv_multi_channel.onnx") def conv_1x1(): """1x1 pointwise convolution (channel mixing).""" # Input: [1, 8, 4, 4], Kernel: [4, 8, 1, 1] -> Output: [1, 4, 4, 4] X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 8, 4, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 4, 4]) W = numpy_helper.from_array( np.random.default_rng(53).uniform(-1, 1, (4, 8, 1, 1)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[1, 1], strides=[1, 1], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "conv_1x1", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/pointwise_1x1", "conv_1x1.onnx") def conv_same_padding_3x3(): """Conv 3x3 with SAME_UPPER padding, preserving spatial dimensions.""" # Input: [1, 1, 5, 5], Kernel: [1, 1, 3, 3], SAME_UPPER -> Output: [1, 1, 5, 5] X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 5, 5]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 5, 5]) W = numpy_helper.from_array( np.random.default_rng(54).uniform(-1, 1, (1, 1, 3, 3)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[3, 3], strides=[1, 1], auto_pad="SAME_UPPER") graph = helper.make_graph([node], "conv_same_3x3", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/same_padding_3x3", "conv_same_padding_3x3.onnx") def conv_explicit_padding(): """Conv 3x3 with explicit symmetric padding.""" # Input: [1, 1, 4, 4], Kernel: [1, 1, 3, 3], pads=[1,1,1,1] -> Output: [1, 1, 4, 4] X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 4, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 4, 4]) W = numpy_helper.from_array( np.random.default_rng(55).uniform(-1, 1, (1, 1, 3, 3)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[3, 3], strides=[1, 1], pads=[1, 1, 1, 1]) graph = helper.make_graph([node], "conv_explicit_pad", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/explicit_padding", "conv_explicit_padding.onnx") def conv_with_bias_3x3(): """Conv 3x3 with bias.""" # Input: [1, 3, 5, 5], Kernel: [2, 3, 3, 3], Bias: [2] -> Output: [1, 2, 3, 3] rng = np.random.default_rng(56) X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 3, 3]) W = numpy_helper.from_array(rng.uniform(-1, 1, (2, 3, 3, 3)).astype(np.float32), name="W") B = numpy_helper.from_array(rng.uniform(-1, 1, (2,)).astype(np.float32), name="B") node = helper.make_node("Conv", ["X", "W", "B"], ["Y"], kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "conv_with_bias_3x3", [X], [Y], initializer=[W, B]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/with_bias_3x3", "conv_with_bias_3x3.onnx") def conv_batch_2(): """Batched conv (batch=2) with SAME_UPPER padding and bias.""" # Input: [2, 3, 3, 3], Kernel: [1, 3, 2, 2], Bias: [1] -> Output: [2, 1, 3, 3] rng = np.random.default_rng(57) X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 3, 3, 3]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 1, 3, 3]) W = numpy_helper.from_array(rng.uniform(-1, 1, (1, 3, 2, 2)).astype(np.float32), name="W") B = numpy_helper.from_array(rng.uniform(-1, 1, (1,)).astype(np.float32), name="B") node = helper.make_node("Conv", ["X", "W", "B"], ["Y"], kernel_shape=[2, 2], strides=[1, 1], auto_pad="SAME_UPPER") graph = helper.make_graph([node], "conv_batch_2", [X], [Y], initializer=[W, B]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/batch_2", "conv_batch_2.onnx") def conv_large_spatial(): """Conv on larger spatial input: [1, 1, 8, 8] with 3x3 kernel.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 8, 8]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 6, 6]) W = numpy_helper.from_array( np.random.default_rng(58).uniform(-1, 1, (1, 1, 3, 3)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "conv_large_spatial", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/large_spatial", "conv_large_spatial.onnx") def conv_grouped_two_groups(): """Grouped Conv with two groups, pointwise kernels, and bias.""" rng = np.random.default_rng(59) X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 4, 4, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 4, 4]) W = numpy_helper.from_array(rng.uniform(-1, 1, (4, 2, 1, 1)).astype(np.float32), name="W") B = numpy_helper.from_array(rng.uniform(-1, 1, (4,)).astype(np.float32), name="B") node = helper.make_node("Conv", ["X", "W", "B"], ["Y"], kernel_shape=[1, 1], strides=[1, 1], pads=[0, 0, 0, 0], group=2) graph = helper.make_graph([node], "conv_grouped_two_groups", [X], [Y], initializer=[W, B]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/grouped_two_groups", "conv_grouped_two_groups.onnx") def conv_depthwise_grouped(): """Depthwise-style grouped Conv with one input channel per group.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 4, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 3, 2, 2]) W = numpy_helper.from_array( np.random.default_rng(60).uniform(-1, 1, (3, 1, 3, 3)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0], group=3) graph = helper.make_graph([node], "conv_depthwise_grouped", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/depthwise_grouped", "conv_depthwise_grouped.onnx") def conv_dynamic(): """Conv with input and weight both provided at runtime.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 4, 4]) W = helper.make_tensor_value_info("W", TensorProto.FLOAT, [1, 1, 3, 3]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 2, 2]) node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "conv_dynamic", [X, W], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/dynamic", "conv_dynamic.onnx") def conv_huge_pointwise_1024(): """Huge 1x1 Conv with 1024 input and output channels.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1024, 1, 1]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1024, 1, 1]) W = numpy_helper.from_array( np.random.default_rng(73).uniform(-1, 1, (1024, 1024, 1, 1)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[1, 1], strides=[1, 1], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "conv_huge_pointwise_1024", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/huge_pointwise_1024", "conv_huge_pointwise_1024.onnx") def conv_huge_pointwise_1024_dynamic(): """Huge 1x1 Conv with runtime weights and 1024 channels.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1024, 1, 1]) W = helper.make_tensor_value_info("W", TensorProto.FLOAT, [1024, 1024, 1, 1]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1024, 1, 1]) node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[1, 1], strides=[1, 1], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "conv_huge_pointwise_1024_dynamic", [X, W], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/huge_pointwise_1024_dynamic", "conv_huge_pointwise_1024_dynamic.onnx") def conv_large_output_channels_1x1(): """1x1 Conv with modest inputs and very large output channel count.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 64, 1, 1]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1024, 1, 1]) W = numpy_helper.from_array( np.random.default_rng(74).uniform(-1, 1, (1024, 64, 1, 1)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[1, 1], strides=[1, 1], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "conv_large_output_channels_1x1", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/large_output_channels_1x1", "conv_large_output_channels_1x1.onnx") def conv_large_input_channels_1x1(): """1x1 Conv with very large input channel count and modest outputs.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1024, 1, 1]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 64, 1, 1]) W = numpy_helper.from_array( np.random.default_rng(75).uniform(-1, 1, (64, 1024, 1, 1)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[1, 1], strides=[1, 1], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "conv_large_input_channels_1x1", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/large_input_channels_1x1", "conv_large_input_channels_1x1.onnx") def conv_depthwise_1024_channels(): """Depthwise 1x1 Conv with 1024 groups and preserved spatial shape.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1024, 4, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1024, 4, 4]) W = numpy_helper.from_array( np.random.default_rng(76).uniform(-1, 1, (1024, 1, 1, 1)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[1, 1], strides=[1, 1], pads=[0, 0, 0, 0], group=1024) graph = helper.make_graph([node], "conv_depthwise_1024_channels", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/depthwise_1024_channels", "conv_depthwise_1024_channels.onnx") def conv_grouped_many_groups(): """Grouped 1x1 Conv with many groups and high channel counts.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1024, 2, 2]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1024, 2, 2]) W = numpy_helper.from_array( np.random.default_rng(77).uniform(-1, 1, (1024, 16, 1, 1)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[1, 1], strides=[1, 1], pads=[0, 0, 0, 0], group=64) graph = helper.make_graph([node], "conv_grouped_many_groups", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/grouped_many_groups", "conv_grouped_many_groups.onnx") def conv_non_square_kernel_1x3(): """Conv with a non-square 1x3 kernel.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 5, 3]) W = numpy_helper.from_array( np.random.default_rng(78).uniform(-1, 1, (4, 3, 1, 3)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[1, 3], strides=[1, 1], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "conv_non_square_kernel_1x3", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/non_square_kernel_1x3", "conv_non_square_kernel_1x3.onnx") def conv_non_square_kernel_3x1(): """Conv with a non-square 3x1 kernel.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 3, 5]) W = numpy_helper.from_array( np.random.default_rng(79).uniform(-1, 1, (4, 3, 3, 1)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[3, 1], strides=[1, 1], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "conv_non_square_kernel_3x1", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/non_square_kernel_3x1", "conv_non_square_kernel_3x1.onnx") def conv_non_uniform_stride(): """Conv with non-uniform stride across spatial dimensions.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 6, 7]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 4, 3]) W = numpy_helper.from_array( np.random.default_rng(80).uniform(-1, 1, (4, 3, 3, 3)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[3, 3], strides=[1, 2], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "conv_non_uniform_stride", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/non_uniform_stride", "conv_non_uniform_stride.onnx") def conv_dilated_3x3(): """Conv with a dilated 3x3 kernel.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 7, 7]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 3, 3]) W = numpy_helper.from_array( np.random.default_rng(81).uniform(-1, 1, (1, 1, 3, 3)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0], dilations=[2, 2]) graph = helper.make_graph([node], "conv_dilated_3x3", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/dilated_3x3", "conv_dilated_3x3.onnx") def conv_real_asymmetric_padding(): """Conv with truly asymmetric explicit padding.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 4, 5]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 4, 7]) W = numpy_helper.from_array( np.random.default_rng(82).uniform(-1, 1, (2, 1, 3, 2)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[3, 2], strides=[1, 1], pads=[0, 1, 2, 3]) graph = helper.make_graph([node], "conv_real_asymmetric_padding", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/real_asymmetric_padding", "conv_real_asymmetric_padding.onnx") def conv_same_lower_3x3(): """Conv 3x3 with SAME_LOWER padding, preserving spatial dimensions.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 5, 5]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 5, 5]) W = numpy_helper.from_array( np.random.default_rng(83).uniform(-1, 1, (1, 1, 3, 3)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[3, 3], strides=[1, 1], auto_pad="SAME_LOWER") graph = helper.make_graph([node], "conv_same_lower_3x3", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/same_lower_3x3", "conv_same_lower_3x3.onnx") def conv_kernel_equals_input_spatial(): """Conv where the kernel spans the full input spatial extent.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 7, 7]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 8, 1, 1]) W = numpy_helper.from_array( np.random.default_rng(84).uniform(-1, 1, (8, 3, 7, 7)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[7, 7], strides=[1, 1], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "conv_kernel_equals_input_spatial", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/kernel_equals_input_spatial", "conv_kernel_equals_input_spatial.onnx") def conv_batch_4_pointwise(): """Batched 1x1 Conv with batch size 4.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 128, 1, 1]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 64, 1, 1]) W = numpy_helper.from_array( np.random.default_rng(85).uniform(-1, 1, (64, 128, 1, 1)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], kernel_shape=[1, 1], strides=[1, 1], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "conv_batch_4_pointwise", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/batch_4_pointwise", "conv_batch_4_pointwise.onnx") def conv_without_kernel_shape_attr(): """Conv where kernel_shape is inferred from the weight tensor.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 5, 5]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 3, 3]) W = numpy_helper.from_array( np.random.default_rng(86).uniform(-1, 1, (1, 1, 3, 3)).astype(np.float32), name="W") node = helper.make_node("Conv", ["X", "W"], ["Y"], strides=[1, 1], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "conv_without_kernel_shape_attr", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "conv/without_kernel_shape_attr", "conv_without_kernel_shape_attr.onnx") # --------------------------------------------------------------------------- # GEMM tests # --------------------------------------------------------------------------- def gemm_simple(): """Simple GEMM with square weights: [10, 132] @ [132, 132].""" B, K, N = 10, 132, 132 W = numpy_helper.from_array(np.random.default_rng(41).uniform(-1, 1, (K, N)).astype(np.float32), name="W") A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) node = helper.make_node("Gemm", ["A", "W"], ["Y"]) graph = helper.make_graph([node], "gemm_simple", [A], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gemm/simple", "gemm_simple.onnx") def gemm_non_square(): """GEMM with non-square weight matrix: [B, K] @ [K, N], K != N.""" B, K, N = 4, 128, 64 W = numpy_helper.from_array(np.random.default_rng(42).uniform(-1, 1, (K, N)).astype(np.float32), name="W") A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) node = helper.make_node("Gemm", ["A", "W"], ["Y"]) graph = helper.make_graph([node], "gemm_non_square", [A], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gemm/non_square", "gemm_non_square.onnx") def gemm_with_bias(): """GEMM with bias: Y = A @ W + C.""" B, K, N = 4, 128, 128 rng = np.random.default_rng(43) W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C") A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"]) graph = helper.make_graph([node], "gemm_with_bias", [A], [Y], initializer=[W, C]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gemm/with_bias", "gemm_with_bias.onnx") def gemm_transB(): """GEMM with transB=1: Y = A @ W^T.""" B, K, N = 4, 128, 64 rng = np.random.default_rng(44) # W stored as [N, K], transposed during computation W = numpy_helper.from_array(rng.uniform(-1, 1, (N, K)).astype(np.float32), name="W") A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) node = helper.make_node("Gemm", ["A", "W"], ["Y"], transB=1) graph = helper.make_graph([node], "gemm_transB", [A], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gemm/transB", "gemm_transB.onnx") def gemm_alpha_beta(): """GEMM with alpha and beta: Y = 0.5 * A @ W + 0.25 * C.""" B, K, N = 4, 64, 64 rng = np.random.default_rng(45) W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C") A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"], alpha=0.5, beta=0.25) graph = helper.make_graph([node], "gemm_alpha_beta", [A], [Y], initializer=[W, C]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gemm/alpha_beta", "gemm_alpha_beta.onnx") def gemm_small(): """Small GEMM: [2, 8] @ [8, 4].""" B, K, N = 2, 8, 4 rng = np.random.default_rng(46) W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) node = helper.make_node("Gemm", ["A", "W"], ["Y"]) graph = helper.make_graph([node], "gemm_small", [A], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gemm/small", "gemm_small.onnx") def gemm_large(): """Larger GEMM: [8, 256] @ [256, 128].""" B, K, N = 8, 256, 128 rng = np.random.default_rng(47) W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) node = helper.make_node("Gemm", ["A", "W"], ["Y"]) graph = helper.make_graph([node], "gemm_large", [A], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gemm/large", "gemm_large.onnx") def gemm_transB_with_bias(): """GEMM with transB and bias: Y = A @ W^T + C.""" B, K, N = 4, 128, 64 rng = np.random.default_rng(48) W = numpy_helper.from_array(rng.uniform(-1, 1, (N, K)).astype(np.float32), name="W") C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C") A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"], transB=1) graph = helper.make_graph([node], "gemm_transB_with_bias", [A], [Y], initializer=[W, C]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gemm/transB_with_bias", "gemm_transB_with_bias.onnx") def gemm_dynamic(): """GEMM with both matrix operands provided at runtime.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [2, 8]) B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [8, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 4]) node = helper.make_node("Gemm", ["A", "B"], ["Y"]) graph = helper.make_graph([node], "gemm_dynamic", [A, B], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gemm/dynamic", "gemm_dynamic.onnx") def gemm_dynamic_transB(): """GEMM with runtime matrix operands and transposed runtime B.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [2, 8]) B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [4, 8]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 4]) node = helper.make_node("Gemm", ["A", "B"], ["Y"], transB=1) graph = helper.make_graph([node], "gemm_dynamic_transB", [A, B], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gemm/dynamic_transB", "gemm_dynamic_transB.onnx") def gemm_dynamic_bias(): """GEMM with runtime matrix operands and runtime bias.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [2, 8]) B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [8, 4]) C = helper.make_tensor_value_info("C", TensorProto.FLOAT, [4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 4]) node = helper.make_node("Gemm", ["A", "B", "C"], ["Y"]) graph = helper.make_graph([node], "gemm_dynamic_bias", [A, B, C], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gemm/dynamic_bias", "gemm_dynamic_bias.onnx") def gemm_dynamic_alpha(): """GEMM with runtime matrix operands and runtime alpha scaling.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [2, 8]) B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [8, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 4]) node = helper.make_node("Gemm", ["A", "B"], ["Y"], alpha=0.5) graph = helper.make_graph([node], "gemm_dynamic_alpha", [A, B], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gemm/dynamic_alpha", "gemm_dynamic_alpha.onnx") def gemm_dynamic_beta(): """GEMM with runtime matrix operands, runtime bias, and runtime beta scaling.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [2, 8]) B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [8, 4]) C = helper.make_tensor_value_info("C", TensorProto.FLOAT, [4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 4]) node = helper.make_node("Gemm", ["A", "B", "C"], ["Y"], beta=2.0) graph = helper.make_graph([node], "gemm_dynamic_beta", [A, B, C], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gemm/dynamic_beta", "gemm_dynamic_beta.onnx") def gemm_dynamic_bias_alpha_beta(): """GEMM with runtime matrix operands, runtime bias, alpha, and beta.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [2, 8]) B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [8, 4]) C = helper.make_tensor_value_info("C", TensorProto.FLOAT, [4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 4]) node = helper.make_node("Gemm", ["A", "B", "C"], ["Y"], alpha=0.5, beta=2.0) graph = helper.make_graph([node], "gemm_dynamic_bias_alpha_beta", [A, B, C], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gemm/dynamic_bias_alpha_beta", "gemm_dynamic_bias_alpha_beta.onnx") def gemm_huge_1024(): """Huge GEMM with 1024-wide inner and output dimensions.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 1024]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1024]) W = numpy_helper.from_array( np.random.default_rng(87).uniform(-1, 1, (1024, 1024)).astype(np.float32), name="W") node = helper.make_node("Gemm", ["A", "W"], ["Y"]) graph = helper.make_graph([node], "gemm_huge_1024", [A], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gemm/huge_1024", "gemm_huge_1024.onnx") def gemm_large_k_small_n(): """GEMM with large K and smaller output width.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 1024]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 64]) W = numpy_helper.from_array( np.random.default_rng(88).uniform(-1, 1, (1024, 64)).astype(np.float32), name="W") node = helper.make_node("Gemm", ["A", "W"], ["Y"]) graph = helper.make_graph([node], "gemm_large_k_small_n", [A], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gemm/large_k_small_n", "gemm_large_k_small_n.onnx") def gemm_small_k_large_n(): """GEMM with modest K and very large output width.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 64]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1024]) W = numpy_helper.from_array( np.random.default_rng(89).uniform(-1, 1, (64, 1024)).astype(np.float32), name="W") node = helper.make_node("Gemm", ["A", "W"], ["Y"]) graph = helper.make_graph([node], "gemm_small_k_large_n", [A], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gemm/small_k_large_n", "gemm_small_k_large_n.onnx") def gemm_transA(): """GEMM with transA=1: A is stored as [K, M] and used as [M, K].""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [8, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 6]) W = numpy_helper.from_array(np.random.default_rng(90).uniform(-1, 1, (8, 6)).astype(np.float32), name="W") node = helper.make_node("Gemm", ["A", "W"], ["Y"], transA=1) graph = helper.make_graph([node], "gemm_transA", [A], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gemm/transA", "gemm_transA.onnx") def gemm_transA_transB(): """GEMM with transA=1 and transB=1.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [8, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 6]) W = numpy_helper.from_array(np.random.default_rng(91).uniform(-1, 1, (6, 8)).astype(np.float32), name="W") node = helper.make_node("Gemm", ["A", "W"], ["Y"], transA=1, transB=1) graph = helper.make_graph([node], "gemm_transA_transB", [A], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gemm/transA_transB", "gemm_transA_transB.onnx") def gemm_bias_rank2_broadcast(): """GEMM with rank-2 bias broadcasting across rows.""" rng = np.random.default_rng(92) A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [4, 8]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 6]) W = numpy_helper.from_array(rng.uniform(-1, 1, (8, 6)).astype(np.float32), name="W") C = numpy_helper.from_array(rng.uniform(-1, 1, (1, 6)).astype(np.float32), name="C") node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"]) graph = helper.make_graph([node], "gemm_bias_rank2_broadcast", [A], [Y], initializer=[W, C]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gemm/bias_rank2_broadcast", "gemm_bias_rank2_broadcast.onnx") def gemm_scalar_bias(): """GEMM with scalar bias broadcasting to the full output.""" rng = np.random.default_rng(93) A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [4, 8]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 6]) W = numpy_helper.from_array(rng.uniform(-1, 1, (8, 6)).astype(np.float32), name="W") C = numpy_helper.from_array(np.asarray(0.25, dtype=np.float32), name="C") node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"]) graph = helper.make_graph([node], "gemm_scalar_bias", [A], [Y], initializer=[W, C]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gemm/scalar_bias", "gemm_scalar_bias.onnx") # --------------------------------------------------------------------------- # MatMul tests # --------------------------------------------------------------------------- def matmul_basic(): """Direct 2D MatMul with constant RHS.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [2, 3]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 4]) B = numpy_helper.from_array(np.random.default_rng(49).uniform(-1, 1, (3, 4)).astype(np.float32), name="B") node = helper.make_node("MatMul", ["A", "B"], ["Y"]) graph = helper.make_graph([node], "matmul_basic", [A], [Y], initializer=[B]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "matmul/basic", "matmul_basic.onnx") def matmul_left_constant(): """Direct 2D MatMul with constant LHS.""" A = numpy_helper.from_array(np.random.default_rng(69).uniform(-1, 1, (2, 3)).astype(np.float32), name="A") B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [3, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 4]) node = helper.make_node("MatMul", ["A", "B"], ["Y"]) graph = helper.make_graph([node], "matmul_left_constant", [B], [Y], initializer=[A]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "matmul/left_constant", "matmul_left_constant.onnx") def matmul_dynamic(): """Direct 2D MatMul with both operands provided at runtime.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [2, 3]) B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [3, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 4]) node = helper.make_node("MatMul", ["A", "B"], ["Y"]) graph = helper.make_graph([node], "matmul_dynamic", [A, B], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "matmul/dynamic", "matmul_dynamic.onnx") def matmul_batched_3d(): """Batched 3D MatMul with matching batch dimensions.""" rng = np.random.default_rng(50) A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [2, 2, 3]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 2, 4]) B = numpy_helper.from_array(rng.uniform(-1, 1, (2, 3, 4)).astype(np.float32), name="B") node = helper.make_node("MatMul", ["A", "B"], ["Y"]) graph = helper.make_graph([node], "matmul_batched_3d", [A], [Y], initializer=[B]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "matmul/batched_3d", "matmul_batched_3d.onnx") def matmul_batched_3d_dynamic(): """Batched 3D MatMul with both operands provided at runtime.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [2, 2, 3]) B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [2, 3, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 2, 4]) node = helper.make_node("MatMul", ["A", "B"], ["Y"]) graph = helper.make_graph([node], "matmul_batched_3d_dynamic", [A, B], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "matmul/batched_3d_dynamic", "matmul_batched_3d_dynamic.onnx") def matmul_batched_left_constant(): """Batched 3D MatMul with constant LHS and runtime RHS.""" rng = np.random.default_rng(70) A = numpy_helper.from_array(rng.uniform(-1, 1, (2, 2, 3)).astype(np.float32), name="A") B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [2, 3, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 2, 4]) node = helper.make_node("MatMul", ["A", "B"], ["Y"]) graph = helper.make_graph([node], "matmul_batched_left_constant", [B], [Y], initializer=[A]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "matmul/batched_left_constant", "matmul_batched_left_constant.onnx") def matmul_batched_rhs_broadcast(): """Batched 3D MatMul with 2D constant RHS broadcast across batch.""" rng = np.random.default_rng(71) A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [2, 2, 3]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 2, 4]) B = numpy_helper.from_array(rng.uniform(-1, 1, (3, 4)).astype(np.float32), name="B") node = helper.make_node("MatMul", ["A", "B"], ["Y"]) graph = helper.make_graph([node], "matmul_batched_rhs_broadcast", [A], [Y], initializer=[B]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "matmul/batched_rhs_broadcast", "matmul_batched_rhs_broadcast.onnx") def matmul_batched_lhs_broadcast(): """Batched 3D MatMul with 2D runtime LHS broadcast across batched RHS.""" rng = np.random.default_rng(72) A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [2, 3]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 2, 4]) B = numpy_helper.from_array(rng.uniform(-1, 1, (2, 3, 4)).astype(np.float32), name="B") node = helper.make_node("MatMul", ["A", "B"], ["Y"]) graph = helper.make_graph([node], "matmul_batched_lhs_broadcast", [A], [Y], initializer=[B]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "matmul/batched_lhs_broadcast", "matmul_batched_lhs_broadcast.onnx") def matmul_huge_1024(): """Huge MatMul with 1024-wide inner and output dimensions.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 1024]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1024]) B = numpy_helper.from_array( np.random.default_rng(94).uniform(-1, 1, (1024, 1024)).astype(np.float32), name="B") node = helper.make_node("MatMul", ["A", "B"], ["Y"]) graph = helper.make_graph([node], "matmul_huge_1024", [A], [Y], initializer=[B]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "matmul/huge_1024", "matmul_huge_1024.onnx") def matmul_vector_matrix(): """Vector-matrix MatMul producing a 1D output.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1024]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [64]) B = numpy_helper.from_array(np.random.default_rng(95).uniform(-1, 1, (1024, 64)).astype(np.float32), name="B") node = helper.make_node("MatMul", ["A", "B"], ["Y"]) graph = helper.make_graph([node], "matmul_vector_matrix", [A], [Y], initializer=[B]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "matmul/vector_matrix", "matmul_vector_matrix.onnx") def matmul_matrix_vector(): """Matrix-vector MatMul producing a 1D output.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [64, 1024]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [64]) B = numpy_helper.from_array(np.random.default_rng(96).uniform(-1, 1, (1024,)).astype(np.float32), name="B") node = helper.make_node("MatMul", ["A", "B"], ["Y"]) graph = helper.make_graph([node], "matmul_matrix_vector", [A], [Y], initializer=[B]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "matmul/matrix_vector", "matmul_matrix_vector.onnx") def matmul_vector_vector_dot(): """Vector-vector MatMul producing a scalar output.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1024]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, []) B = numpy_helper.from_array(np.random.default_rng(97).uniform(-1, 1, (1024,)).astype(np.float32), name="B") node = helper.make_node("MatMul", ["A", "B"], ["Y"]) graph = helper.make_graph([node], "matmul_vector_vector_dot", [A], [Y], initializer=[B]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "matmul/vector_vector_dot", "matmul_vector_vector_dot.onnx") def matmul_batched_4d_broadcast(): """Batched 4D MatMul with broadcast across leading dimensions.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [2, 1, 3, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 5, 3, 6]) B = numpy_helper.from_array(np.random.default_rng(98).uniform(-1, 1, (1, 5, 4, 6)).astype(np.float32), name="B") node = helper.make_node("MatMul", ["A", "B"], ["Y"]) graph = helper.make_graph([node], "matmul_batched_4d_broadcast", [A], [Y], initializer=[B]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "matmul/batched_4d_broadcast", "matmul_batched_4d_broadcast.onnx") # --------------------------------------------------------------------------- # Pooling tests # --------------------------------------------------------------------------- def maxpool_basic(): """MaxPool 2x2 with stride 1.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 4, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 3, 3]) node = helper.make_node("MaxPool", ["X"], ["Y"], kernel_shape=[2, 2], strides=[1, 1], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "maxpool_basic", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "pool/max_basic", "maxpool_basic.onnx") def maxpool_stride2_multichannel(): """MaxPool 2x2 with stride 2 on multiple channels.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 5, 6, 6]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 5, 3, 3]) node = helper.make_node("MaxPool", ["X"], ["Y"], kernel_shape=[2, 2], strides=[2, 2], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "maxpool_stride2_multichannel", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "pool/max_stride2_multichannel", "maxpool_stride2_multichannel.onnx") def maxpool_same_upper(): """MaxPool 3x3 with SAME_UPPER padding.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 5, 5]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 3, 3]) node = helper.make_node("MaxPool", ["X"], ["Y"], kernel_shape=[3, 3], strides=[2, 2], auto_pad="SAME_UPPER") graph = helper.make_graph([node], "maxpool_same_upper", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "pool/max_same_upper", "maxpool_same_upper.onnx") def avgpool_basic(): """AveragePool 2x2 with stride 1.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 4, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 3, 3, 3]) node = helper.make_node("AveragePool", ["X"], ["Y"], kernel_shape=[2, 2], strides=[1, 1], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "avgpool_basic", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "pool/avg_basic", "avgpool_basic.onnx") def avgpool_explicit_padding(): """AveragePool 3x3 with explicit padding, excluding pad from the divisor.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 2, 4, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 2, 2]) node = helper.make_node("AveragePool", ["X"], ["Y"], kernel_shape=[3, 3], strides=[2, 2], pads=[1, 1, 1, 1], count_include_pad=0) graph = helper.make_graph([node], "avgpool_explicit_padding", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "pool/avg_explicit_padding", "avgpool_explicit_padding.onnx") def avgpool_include_pad(): """AveragePool 3x3 with explicit padding, including pad in the divisor.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 2, 4, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 2, 2]) node = helper.make_node("AveragePool", ["X"], ["Y"], kernel_shape=[3, 3], strides=[2, 2], pads=[1, 1, 1, 1], count_include_pad=1) graph = helper.make_graph([node], "avgpool_include_pad", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "pool/avg_include_pad", "avgpool_include_pad.onnx") def maxpool_after_conv(): """Conv followed by MaxPool to validate pooling on lowered conv results.""" rng = np.random.default_rng(59) X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 6, 6]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 4, 2, 2]) W = numpy_helper.from_array(rng.uniform(-1, 1, (4, 3, 3, 3)).astype(np.float32), name="W") conv = helper.make_node("Conv", ["X", "W"], ["C"], kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0]) pool = helper.make_node("MaxPool", ["C"], ["Y"], kernel_shape=[2, 2], strides=[2, 2], pads=[0, 0, 0, 0]) graph = helper.make_graph([conv, pool], "maxpool_after_conv", [X], [Y], initializer=[W]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "pool/max_after_conv", "maxpool_after_conv.onnx") def maxpool_ceil_mode(): """MaxPool with ceil_mode enabled.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 5, 5]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 2, 2]) node = helper.make_node("MaxPool", ["X"], ["Y"], kernel_shape=[2, 2], strides=[2, 2], pads=[0, 0, 0, 0], ceil_mode=1) graph = helper.make_graph([node], "maxpool_ceil_mode", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "pool/max_ceil_mode", "maxpool_ceil_mode.onnx") def avgpool_ceil_mode(): """AveragePool with ceil_mode enabled.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 2, 5, 5]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 2, 2]) node = helper.make_node("AveragePool", ["X"], ["Y"], kernel_shape=[2, 2], strides=[2, 2], pads=[0, 0, 0, 0], ceil_mode=1) graph = helper.make_graph([node], "avgpool_ceil_mode", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "pool/avg_ceil_mode", "avgpool_ceil_mode.onnx") def maxpool_real_asymmetric_padding(): """MaxPool with truly asymmetric explicit padding.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 4, 5]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 4, 3]) node = helper.make_node("MaxPool", ["X"], ["Y"], kernel_shape=[3, 3], strides=[1, 2], pads=[0, 1, 2, 1]) graph = helper.make_graph([node], "maxpool_real_asymmetric_padding", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "pool/max_real_asymmetric_padding", "maxpool_real_asymmetric_padding.onnx") def avgpool_real_asymmetric_padding(): """AveragePool with truly asymmetric explicit padding.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 2, 4, 5]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 4, 3]) node = helper.make_node("AveragePool", ["X"], ["Y"], kernel_shape=[3, 3], strides=[1, 2], pads=[0, 1, 2, 1], count_include_pad=0) graph = helper.make_graph([node], "avgpool_real_asymmetric_padding", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "pool/avg_real_asymmetric_padding", "avgpool_real_asymmetric_padding.onnx") def maxpool_non_square_kernel(): """MaxPool with a non-square kernel.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 6]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 3, 4, 2]) node = helper.make_node("MaxPool", ["X"], ["Y"], kernel_shape=[2, 3], strides=[1, 2], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "maxpool_non_square_kernel", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "pool/max_non_square_kernel", "maxpool_non_square_kernel.onnx") def avgpool_non_uniform_stride(): """AveragePool with non-uniform stride.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 6]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 3, 4, 2]) node = helper.make_node("AveragePool", ["X"], ["Y"], kernel_shape=[2, 3], strides=[1, 2], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "avgpool_non_uniform_stride", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "pool/avg_non_uniform_stride", "avgpool_non_uniform_stride.onnx") def maxpool_global_style_kernel_equals_input(): """MaxPool where the kernel covers the full spatial input.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 8, 4, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 8, 1, 1]) node = helper.make_node("MaxPool", ["X"], ["Y"], kernel_shape=[4, 4], strides=[1, 1], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "maxpool_global_style_kernel_equals_input", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "pool/max_global_style_kernel_equals_input", "maxpool_global_style_kernel_equals_input.onnx") def avgpool_large_channels(): """AveragePool with a large channel count and small spatial dimensions.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1024, 2, 2]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1024, 1, 1]) node = helper.make_node("AveragePool", ["X"], ["Y"], kernel_shape=[2, 2], strides=[1, 1], pads=[0, 0, 0, 0]) graph = helper.make_graph([node], "avgpool_large_channels", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "pool/avg_large_channels", "avgpool_large_channels.onnx") # --------------------------------------------------------------------------- # ReduceMean tests # --------------------------------------------------------------------------- def reducemean_basic(): """ReduceMean over the feature dimension, preserving rank.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 1]) node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[1], keepdims=1) graph = helper.make_graph([node], "reducemean_basic", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "reduce_mean/basic", "reduce_mean_basic.onnx") def reducemean_keepdims_0(): """ReduceMean over the feature dimension, dropping the reduced axis.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4]) node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[1], keepdims=0) graph = helper.make_graph([node], "reducemean_keepdims_0", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "reduce_mean/keepdims_0", "reduce_mean_keepdims_0.onnx") def reducemean_4d_spatial(): """ReduceMean over H and W on an NCHW tensor.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 4, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 3, 1, 1]) node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[2, 3], keepdims=1) graph = helper.make_graph([node], "reducemean_4d_spatial", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "reduce_mean/4d_spatial", "reduce_mean_4d_spatial.onnx") def reducemean_after_conv(): """Conv followed by ReduceMean over the spatial dimensions.""" rng = np.random.default_rng(62) X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 1, 1]) W = numpy_helper.from_array(rng.uniform(-1, 1, (2, 3, 3, 3)).astype(np.float32), name="W") B = numpy_helper.from_array(rng.uniform(-1, 1, (2,)).astype(np.float32), name="B") conv = helper.make_node("Conv", ["X", "W", "B"], ["C"], kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0]) reduce = helper.make_node("ReduceMean", ["C"], ["Y"], axes=[2, 3], keepdims=1) graph = helper.make_graph([conv, reduce], "reducemean_after_conv", [X], [Y], initializer=[W, B]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "reduce_mean/after_conv", "reduce_mean_after_conv.onnx") def reducemean_negative_axis(): """ReduceMean using a negative axis.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 3, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 3, 1]) node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[-1], keepdims=1) graph = helper.make_graph([node], "reducemean_negative_axis", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "reduce_mean/negative_axis", "reduce_mean_negative_axis.onnx") def reducemean_all_axes_keepdims_1(): """ReduceMean across all axes while preserving rank.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 3, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 1]) node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[0, 1, 2], keepdims=1) graph = helper.make_graph([node], "reducemean_all_axes_keepdims_1", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "reduce_mean/all_axes_keepdims_1", "reduce_mean_all_axes_keepdims_1.onnx") def reducemean_all_axes_keepdims_0(): """ReduceMean across all axes producing a scalar.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 3, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, []) node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[0, 1, 2], keepdims=0) graph = helper.make_graph([node], "reducemean_all_axes_keepdims_0", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "reduce_mean/all_axes_keepdims_0", "reduce_mean_all_axes_keepdims_0.onnx") def reducemean_4d_spatial_keepdims_0(): """ReduceMean over H and W on an NCHW tensor, dropping reduced axes.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 4, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 3]) node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[2, 3], keepdims=0) graph = helper.make_graph([node], "reducemean_4d_spatial_keepdims_0", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "reduce_mean/4d_spatial_keepdims_0", "reduce_mean_4d_spatial_keepdims_0.onnx") def reducemean_channel_axis_nchw(): """ReduceMean over the channel axis of an NCHW tensor.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1024, 2, 2]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 2, 2]) node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[1], keepdims=1) graph = helper.make_graph([node], "reducemean_channel_axis_nchw", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "reduce_mean/channel_axis_nchw", "reduce_mean_channel_axis_nchw.onnx") def reducemean_large_dimension_1024(): """ReduceMean over a large 1024-length dimension.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1024]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1]) node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[1], keepdims=1) graph = helper.make_graph([node], "reducemean_large_dimension_1024", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "reduce_mean/large_dimension_1024", "reduce_mean_large_dimension_1024.onnx") # --------------------------------------------------------------------------- # Relu tests # --------------------------------------------------------------------------- def relu_basic(): """Standalone Relu on a simple 2D tensor.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8]) node = helper.make_node("Relu", ["X"], ["Y"]) graph = helper.make_graph([node], "relu_basic", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "relu/basic", "relu_basic.onnx") def relu_4d(): """Standalone Relu on an NCHW tensor.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 3, 4, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 3, 4, 4]) node = helper.make_node("Relu", ["X"], ["Y"]) graph = helper.make_graph([node], "relu_4d", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "relu/4d", "relu_4d.onnx") def relu_after_conv(): """Conv followed by Relu.""" rng = np.random.default_rng(60) X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 3, 3]) W = numpy_helper.from_array(rng.uniform(-1, 1, (2, 3, 3, 3)).astype(np.float32), name="W") B = numpy_helper.from_array(rng.uniform(-1, 1, (2,)).astype(np.float32), name="B") conv = helper.make_node("Conv", ["X", "W", "B"], ["C"], kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0]) relu = helper.make_node("Relu", ["C"], ["Y"]) graph = helper.make_graph([conv, relu], "relu_after_conv", [X], [Y], initializer=[W, B]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "relu/after_conv", "relu_after_conv.onnx") def relu_after_gemm(): """Gemm followed by Relu.""" B, K, N = 4, 64, 32 rng = np.random.default_rng(61) W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C") A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) gemm = helper.make_node("Gemm", ["A", "W", "C"], ["G"]) relu = helper.make_node("Relu", ["G"], ["Y"]) graph = helper.make_graph([gemm, relu], "relu_after_gemm", [A], [Y], initializer=[W, C]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "relu/after_gemm", "relu_after_gemm.onnx") # --------------------------------------------------------------------------- # Sigmoid tests # --------------------------------------------------------------------------- def sigmoid_basic(): """Standalone Sigmoid on a simple 2D tensor.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8]) node = helper.make_node("Sigmoid", ["X"], ["Y"]) graph = helper.make_graph([node], "sigmoid_basic", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "sigmoid/basic", "sigmoid_basic.onnx") def sigmoid_4d(): """Standalone Sigmoid on an NCHW tensor.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 3, 4, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 3, 4, 4]) node = helper.make_node("Sigmoid", ["X"], ["Y"]) graph = helper.make_graph([node], "sigmoid_4d", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "sigmoid/4d", "sigmoid_4d.onnx") def sigmoid_after_gemm(): """Gemm followed by Sigmoid.""" B, K, N = 4, 64, 32 rng = np.random.default_rng(63) W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C") A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) gemm = helper.make_node("Gemm", ["A", "W", "C"], ["G"]) sigmoid = helper.make_node("Sigmoid", ["G"], ["Y"]) graph = helper.make_graph([gemm, sigmoid], "sigmoid_after_gemm", [A], [Y], initializer=[W, C]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "sigmoid/after_gemm", "sigmoid_after_gemm.onnx") # --------------------------------------------------------------------------- # Softmax tests # --------------------------------------------------------------------------- def softmax_basic(): """Softmax over the last dimension of a 2D tensor.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [3, 5]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [3, 5]) node = helper.make_node("Softmax", ["X"], ["Y"], axis=1) graph = helper.make_graph([node], "softmax_basic", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "softmax/basic", "softmax_basic.onnx") def softmax_3d_last_axis(): """Softmax over the last axis of a 3D tensor.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 3, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 3, 4]) node = helper.make_node("Softmax", ["X"], ["Y"], axis=2) graph = helper.make_graph([node], "softmax_3d_last_axis", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "softmax/3d_last_axis", "softmax_3d_last_axis.onnx") def softmax_channel_axis(): """Softmax over the channel axis of an NCHW tensor.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 2, 2]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 3, 2, 2]) node = helper.make_node("Softmax", ["X"], ["Y"], axis=1) graph = helper.make_graph([node], "softmax_channel_axis", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "softmax/channel_axis", "softmax_channel_axis.onnx") def softmax_negative_axis(): """Softmax using a negative axis.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 3, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 3, 4]) node = helper.make_node("Softmax", ["X"], ["Y"], axis=-1) graph = helper.make_graph([node], "softmax_negative_axis", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "softmax/negative_axis", "softmax_negative_axis.onnx") def softmax_large_dimension_1024(): """Softmax across a large last dimension.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1024]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1024]) node = helper.make_node("Softmax", ["X"], ["Y"], axis=1) graph = helper.make_graph([node], "softmax_large_dimension_1024", [X], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "softmax/large_dimension_1024", "softmax_large_dimension_1024.onnx") # --------------------------------------------------------------------------- # Resize tests # --------------------------------------------------------------------------- def resize_nearest_2x(): """Resize an NCHW tensor with nearest-neighbor upsampling by a factor of 2.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 2, 3]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 4, 6]) roi = numpy_helper.from_array(np.asarray([], dtype=np.float32), name="roi") scales = numpy_helper.from_array(np.asarray([1.0, 1.0, 2.0, 2.0], dtype=np.float32), name="scales") node = helper.make_node( "Resize", ["X", "roi", "scales"], ["Y"], mode="nearest", coordinate_transformation_mode="asymmetric", nearest_mode="floor") graph = helper.make_graph([node], "resize_nearest_2x", [X], [Y], initializer=[roi, scales]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "resize/nearest_2x", "resize_nearest_2x.onnx") def resize_nearest_non_uniform(): """Resize an NCHW tensor with non-uniform nearest-neighbor scales.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 2, 3]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 6, 6]) roi = numpy_helper.from_array(np.asarray([], dtype=np.float32), name="roi") scales = numpy_helper.from_array(np.asarray([1.0, 1.0, 3.0, 2.0], dtype=np.float32), name="scales") node = helper.make_node( "Resize", ["X", "roi", "scales"], ["Y"], mode="nearest", coordinate_transformation_mode="asymmetric", nearest_mode="floor") graph = helper.make_graph([node], "resize_nearest_non_uniform", [X], [Y], initializer=[roi, scales]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "resize/non_uniform", "resize_non_uniform.onnx") def resize_with_sizes(): """Resize an NCHW tensor to explicit output sizes.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 2, 3]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 3, 5]) roi = numpy_helper.from_array(np.asarray([], dtype=np.float32), name="roi") sizes = make_int64_initializer("sizes", [1, 1, 3, 5]) node = helper.make_node( "Resize", ["X", "roi", "", "sizes"], ["Y"], mode="nearest", coordinate_transformation_mode="asymmetric", nearest_mode="floor") graph = helper.make_graph([node], "resize_with_sizes", [X], [Y], initializer=[roi, sizes]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "resize/with_sizes", "resize_with_sizes.onnx") def resize_nearest_downsample(): """Resize an NCHW tensor with nearest-neighbor downsampling.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 4, 6]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 2, 3]) roi = numpy_helper.from_array(np.asarray([], dtype=np.float32), name="roi") scales = numpy_helper.from_array(np.asarray([1.0, 1.0, 0.5, 0.5], dtype=np.float32), name="scales") node = helper.make_node( "Resize", ["X", "roi", "scales"], ["Y"], mode="nearest", coordinate_transformation_mode="asymmetric", nearest_mode="floor") graph = helper.make_graph([node], "resize_nearest_downsample", [X], [Y], initializer=[roi, scales]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "resize/nearest_downsample", "resize_nearest_downsample.onnx") def resize_height_only(): """Resize only the height dimension of an NCHW tensor.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 2, 3]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 4, 3]) roi = numpy_helper.from_array(np.asarray([], dtype=np.float32), name="roi") scales = numpy_helper.from_array(np.asarray([1.0, 1.0, 2.0, 1.0], dtype=np.float32), name="scales") node = helper.make_node( "Resize", ["X", "roi", "scales"], ["Y"], mode="nearest", coordinate_transformation_mode="asymmetric", nearest_mode="floor") graph = helper.make_graph([node], "resize_height_only", [X], [Y], initializer=[roi, scales]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "resize/height_only", "resize_height_only.onnx") def resize_width_only(): """Resize only the width dimension of an NCHW tensor.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 2, 3]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 2, 6]) roi = numpy_helper.from_array(np.asarray([], dtype=np.float32), name="roi") scales = numpy_helper.from_array(np.asarray([1.0, 1.0, 1.0, 2.0], dtype=np.float32), name="scales") node = helper.make_node( "Resize", ["X", "roi", "scales"], ["Y"], mode="nearest", coordinate_transformation_mode="asymmetric", nearest_mode="floor") graph = helper.make_graph([node], "resize_width_only", [X], [Y], initializer=[roi, scales]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "resize/width_only", "resize_width_only.onnx") # --------------------------------------------------------------------------- # Split tests # --------------------------------------------------------------------------- def split_basic(): """Split a 2D tensor into two outputs along the feature axis.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 6]) Y0 = helper.make_tensor_value_info("Y0", TensorProto.FLOAT, [2, 2]) Y1 = helper.make_tensor_value_info("Y1", TensorProto.FLOAT, [2, 4]) split = make_int64_initializer("split", [2, 4]) node = helper.make_node("Split", ["X", "split"], ["Y0", "Y1"], axis=1) graph = helper.make_graph([node], "split_basic", [X], [Y0, Y1], initializer=[split]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "split/basic", "split_basic.onnx") def split_equal_three_way(): """Split a 2D tensor evenly into three outputs.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 6]) Y0 = helper.make_tensor_value_info("Y0", TensorProto.FLOAT, [2, 2]) Y1 = helper.make_tensor_value_info("Y1", TensorProto.FLOAT, [2, 2]) Y2 = helper.make_tensor_value_info("Y2", TensorProto.FLOAT, [2, 2]) node = helper.make_node("Split", ["X"], ["Y0", "Y1", "Y2"], axis=1) graph = helper.make_graph([node], "split_equal_three_way", [X], [Y0, Y1, Y2]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "split/equal_three_way", "split_equal_three_way.onnx") def split_negative_axis(): """Split a tensor using a negative axis.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 3, 6]) Y0 = helper.make_tensor_value_info("Y0", TensorProto.FLOAT, [2, 3, 2]) Y1 = helper.make_tensor_value_info("Y1", TensorProto.FLOAT, [2, 3, 4]) split = make_int64_initializer("split", [2, 4]) node = helper.make_node("Split", ["X", "split"], ["Y0", "Y1"], axis=-1) graph = helper.make_graph([node], "split_negative_axis", [X], [Y0, Y1], initializer=[split]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "split/negative_axis", "split_negative_axis.onnx") def split_uneven_channel_axis_4d(): """Split an NCHW tensor unevenly along the channel axis.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 7, 2, 2]) Y0 = helper.make_tensor_value_info("Y0", TensorProto.FLOAT, [1, 2, 2, 2]) Y1 = helper.make_tensor_value_info("Y1", TensorProto.FLOAT, [1, 5, 2, 2]) split = make_int64_initializer("split", [2, 5]) node = helper.make_node("Split", ["X", "split"], ["Y0", "Y1"], axis=1) graph = helper.make_graph([node], "split_uneven_channel_axis_4d", [X], [Y0, Y1], initializer=[split]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "split/uneven_channel_axis_4d", "split_uneven_channel_axis_4d.onnx") # --------------------------------------------------------------------------- # Gather tests # --------------------------------------------------------------------------- def gather_axis1(): """Gather selected columns from a 2D tensor.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [3, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [3, 2]) indices = make_int64_initializer("indices", [0, 2]) node = helper.make_node("Gather", ["X", "indices"], ["Y"], axis=1) graph = helper.make_graph([node], "gather_axis1", [X], [Y], initializer=[indices]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gather/axis1", "gather_axis1.onnx") def gather_axis0_matrix_indices(): """Gather rows using a 2D indices tensor.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 3]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 2, 3]) indices = make_int64_initializer("indices", [[0, 2], [3, 1]]) node = helper.make_node("Gather", ["X", "indices"], ["Y"], axis=0) graph = helper.make_graph([node], "gather_axis0_matrix_indices", [X], [Y], initializer=[indices]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gather/axis0_matrix_indices", "gather_axis0_matrix_indices.onnx") def gather_negative_indices(): """Gather with negative indices along axis 0.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 3]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 3]) indices = make_int64_initializer("indices", [-1, -3]) node = helper.make_node("Gather", ["X", "indices"], ["Y"], axis=0) graph = helper.make_graph([node], "gather_negative_indices", [X], [Y], initializer=[indices]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gather/negative_indices", "gather_negative_indices.onnx") def gather_negative_axis(): """Gather using a negative axis.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 3, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 3, 2]) indices = make_int64_initializer("indices", [0, 2]) node = helper.make_node("Gather", ["X", "indices"], ["Y"], axis=-1) graph = helper.make_graph([node], "gather_negative_axis", [X], [Y], initializer=[indices]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gather/negative_axis", "gather_negative_axis.onnx") def gather_3d_input_axis1(): """Gather along axis 1 of a 3D input tensor.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 4, 3]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 2, 3]) indices = make_int64_initializer("indices", [1, 3]) node = helper.make_node("Gather", ["X", "indices"], ["Y"], axis=1) graph = helper.make_graph([node], "gather_3d_input_axis1", [X], [Y], initializer=[indices]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "gather/3d_input_axis1", "gather_3d_input_axis1.onnx") # --------------------------------------------------------------------------- # Concat tests # --------------------------------------------------------------------------- def concat_channel_axis(): """Concat two runtime NCHW tensors along the channel axis.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 1, 2, 2]) B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 2, 2, 2]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 3, 2, 2]) node = helper.make_node("Concat", ["A", "B"], ["Y"], axis=1) graph = helper.make_graph([node], "concat_channel_axis", [A, B], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "concat/channel_axis", "concat_channel_axis.onnx") def concat_negative_axis(): """Concat along a negative axis.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [2, 3, 2]) B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [2, 3, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 3, 6]) node = helper.make_node("Concat", ["A", "B"], ["Y"], axis=-1) graph = helper.make_graph([node], "concat_negative_axis", [A, B], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "concat/negative_axis", "concat_negative_axis.onnx") def concat_three_inputs_channel_axis(): """Concat three runtime NCHW tensors along the channel axis.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 1, 2, 2]) B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 2, 2, 2]) C = helper.make_tensor_value_info("C", TensorProto.FLOAT, [1, 3, 2, 2]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 6, 2, 2]) node = helper.make_node("Concat", ["A", "B", "C"], ["Y"], axis=1) graph = helper.make_graph([node], "concat_three_inputs_channel_axis", [A, B, C], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "concat/three_inputs_channel_axis", "concat_three_inputs_channel_axis.onnx") # --------------------------------------------------------------------------- # Reshape tests # --------------------------------------------------------------------------- def reshape_same_rank(): """Runtime tensor Reshape with a static shape initializer and unchanged rank.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 3]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [3, 2]) shape = make_int64_initializer("shape", [3, 2]) node = helper.make_node("Reshape", ["X", "shape"], ["Y"]) graph = helper.make_graph([node], "reshape_same_rank", [X], [Y], initializer=[shape]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "reshape/same_rank", "reshape_same_rank.onnx") def reshape_infer_dim_minus_one(): """Reshape using -1 to infer one output dimension.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 3, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 12]) shape = make_int64_initializer("shape", [2, -1]) node = helper.make_node("Reshape", ["X", "shape"], ["Y"]) graph = helper.make_graph([node], "reshape_infer_dim_minus_one", [X], [Y], initializer=[shape]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "reshape/infer_dim_minus_one", "reshape_infer_dim_minus_one.onnx") def reshape_zero_copies_input_dim(): """Reshape using 0 to copy an input dimension.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 3, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 12]) shape = make_int64_initializer("shape", [0, -1]) node = helper.make_node("Reshape", ["X", "shape"], ["Y"]) graph = helper.make_graph([node], "reshape_zero_copies_input_dim", [X], [Y], initializer=[shape]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "reshape/zero_copies_input_dim", "reshape_zero_copies_input_dim.onnx") def reshape_4d_to_2d_flatten(): """Reshape a 4D tensor to a 2D flattened view.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 4, 5]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 60]) shape = make_int64_initializer("shape", [1, 60]) node = helper.make_node("Reshape", ["X", "shape"], ["Y"]) graph = helper.make_graph([node], "reshape_4d_to_2d_flatten", [X], [Y], initializer=[shape]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "reshape/4d_to_2d_flatten", "reshape_4d_to_2d_flatten.onnx") # --------------------------------------------------------------------------- # Add tests # --------------------------------------------------------------------------- def add_basic(): """Elementwise Add on two inputs with identical shapes.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [4, 8]) B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [4, 8]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8]) node = helper.make_node("Add", ["A", "B"], ["Y"]) graph = helper.make_graph([node], "add_basic", [A, B], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "add/basic", "add_basic.onnx") def add_broadcast_row(): """Elementwise Add with row-vector broadcasting.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [4, 8]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8]) B = numpy_helper.from_array(np.random.default_rng(64).uniform(-1, 1, (8,)).astype(np.float32), name="B") node = helper.make_node("Add", ["A", "B"], ["Y"]) graph = helper.make_graph([node], "add_broadcast_row", [A], [Y], initializer=[B]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "add/broadcast_row", "add_broadcast_row.onnx") def add_after_gemm(): """Gemm followed by Add with a broadcast bias vector.""" B, K, N = 4, 64, 32 rng = np.random.default_rng(65) W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C") D = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="D") A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) gemm = helper.make_node("Gemm", ["A", "W", "C"], ["G"]) add = helper.make_node("Add", ["G", "D"], ["Y"]) graph = helper.make_graph([gemm, add], "add_after_gemm", [A], [Y], initializer=[W, C, D]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "add/after_gemm", "add_after_gemm.onnx") def add_channel_broadcast_1024(): """Elementwise Add with NCHW per-channel broadcasting over 1024 channels.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 1024, 1, 1]) B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 1024, 1, 1]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1024, 1, 1]) node = helper.make_node("Add", ["A", "B"], ["Y"]) graph = helper.make_graph([node], "add_channel_broadcast_1024", [A, B], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "add/channel_broadcast_1024", "add_channel_broadcast_1024.onnx") def add_scalar_runtime(): """Elementwise Add with a runtime scalar RHS.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 1024, 1, 1]) B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 1, 1, 1]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1024, 1, 1]) node = helper.make_node("Add", ["A", "B"], ["Y"]) graph = helper.make_graph([node], "add_scalar_runtime", [A, B], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "add/scalar_runtime", "add_scalar_runtime.onnx") def add_leading_dimension_broadcast(): """Elementwise Add with trailing-dimension broadcasting.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [2, 3, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 3, 4]) B = numpy_helper.from_array(np.random.default_rng(99).uniform(-1, 1, (4,)).astype(np.float32), name="B") node = helper.make_node("Add", ["A", "B"], ["Y"]) graph = helper.make_graph([node], "add_leading_dimension_broadcast", [A], [Y], initializer=[B]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "add/leading_dimension_broadcast", "add_leading_dimension_broadcast.onnx") # --------------------------------------------------------------------------- # Mul tests # --------------------------------------------------------------------------- def mul_basic(): """Elementwise Mul on two inputs with identical shapes.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [4, 8]) B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [4, 8]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8]) node = helper.make_node("Mul", ["A", "B"], ["Y"]) graph = helper.make_graph([node], "mul_basic", [A, B], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "mul/basic", "mul_basic.onnx") def mul_scalar_constant(): """Elementwise Mul with scalar broadcasting.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8]) S = numpy_helper.from_array(np.asarray([1.5], dtype=np.float32), name="S") node = helper.make_node("Mul", ["X", "S"], ["Y"]) graph = helper.make_graph([node], "mul_scalar_constant", [X], [Y], initializer=[S]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "mul/scalar_constant", "mul_scalar_constant.onnx") def mul_after_conv(): """Conv followed by Mul with per-channel scaling.""" rng = np.random.default_rng(66) X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 3, 3]) W = numpy_helper.from_array(rng.uniform(-1, 1, (2, 3, 3, 3)).astype(np.float32), name="W") B = numpy_helper.from_array(rng.uniform(-1, 1, (2,)).astype(np.float32), name="B") S = numpy_helper.from_array(rng.uniform(0.5, 1.5, (1, 2, 1, 1)).astype(np.float32), name="S") conv = helper.make_node("Conv", ["X", "W", "B"], ["C"], kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0]) mul = helper.make_node("Mul", ["C", "S"], ["Y"]) graph = helper.make_graph([conv, mul], "mul_after_conv", [X], [Y], initializer=[W, B, S]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "mul/after_conv", "mul_after_conv.onnx") def mul_channel_broadcast_1024(): """Elementwise Mul with NCHW per-channel broadcasting over 1024 channels.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 1024, 1, 1]) B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 1024, 1, 1]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1024, 1, 1]) node = helper.make_node("Mul", ["A", "B"], ["Y"]) graph = helper.make_graph([node], "mul_channel_broadcast_1024", [A, B], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "mul/channel_broadcast_1024", "mul_channel_broadcast_1024.onnx") def mul_scalar_runtime(): """Elementwise Mul with a runtime scalar RHS.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 1024, 1, 1]) B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 1, 1, 1]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1024, 1, 1]) node = helper.make_node("Mul", ["A", "B"], ["Y"]) graph = helper.make_graph([node], "mul_scalar_runtime", [A, B], [Y]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "mul/scalar_runtime", "mul_scalar_runtime.onnx") def mul_leading_dimension_broadcast(): """Elementwise Mul with trailing-dimension broadcasting.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [2, 3, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 3, 4]) B = numpy_helper.from_array(np.random.default_rng(100).uniform(-1, 1, (4,)).astype(np.float32), name="B") node = helper.make_node("Mul", ["A", "B"], ["Y"]) graph = helper.make_graph([node], "mul_leading_dimension_broadcast", [A], [Y], initializer=[B]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "mul/leading_dimension_broadcast", "mul_leading_dimension_broadcast.onnx") # --------------------------------------------------------------------------- # Div tests # --------------------------------------------------------------------------- def div_basic(): """Elementwise Div by a same-shape constant tensor.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8]) D = numpy_helper.from_array(np.random.default_rng(67).uniform(0.5, 2.0, (4, 8)).astype(np.float32), name="D") node = helper.make_node("Div", ["X", "D"], ["Y"]) graph = helper.make_graph([node], "div_basic", [X], [Y], initializer=[D]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "div/basic", "div_basic.onnx") def div_scalar_constant(): """Elementwise Div with scalar broadcasting.""" X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8]) S = numpy_helper.from_array(np.asarray([2.0], dtype=np.float32), name="S") node = helper.make_node("Div", ["X", "S"], ["Y"]) graph = helper.make_graph([node], "div_scalar_constant", [X], [Y], initializer=[S]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "div/scalar_constant", "div_scalar_constant.onnx") def div_after_gemm(): """Gemm followed by Div with a broadcast divisor vector.""" B, K, N = 4, 64, 32 rng = np.random.default_rng(68) W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C") D = numpy_helper.from_array(rng.uniform(0.5, 2.0, (N,)).astype(np.float32), name="D") A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) gemm = helper.make_node("Gemm", ["A", "W", "C"], ["G"]) div = helper.make_node("Div", ["G", "D"], ["Y"]) graph = helper.make_graph([gemm, div], "div_after_gemm", [A], [Y], initializer=[W, C, D]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "div/after_gemm", "div_after_gemm.onnx") def div_channel_broadcast_1024(): """Elementwise Div with NCHW per-channel broadcasting over 1024 channels.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 1024, 1, 1]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1024, 1, 1]) B = numpy_helper.from_array(np.random.default_rng(102).uniform(0.5, 2.0, (1, 1024, 1, 1)).astype(np.float32), name="B") node = helper.make_node("Div", ["A", "B"], ["Y"]) graph = helper.make_graph([node], "div_channel_broadcast_1024", [A], [Y], initializer=[B]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "div/channel_broadcast_1024", "div_channel_broadcast_1024.onnx") def div_runtime_scalar_rhs(): """Elementwise Div by a scalar constant.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [1, 1024, 1, 1]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1024, 1, 1]) B = numpy_helper.from_array(np.asarray([2.0], dtype=np.float32), name="B") node = helper.make_node("Div", ["A", "B"], ["Y"]) graph = helper.make_graph([node], "div_runtime_scalar_rhs", [A], [Y], initializer=[B]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "div/runtime_scalar_rhs", "div_runtime_scalar_rhs.onnx") def div_runtime_scalar_lhs(): """Elementwise Div with a scalar constant numerator.""" B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [1, 1024, 1, 1]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1024, 1, 1]) A = numpy_helper.from_array(np.asarray([[[[2.0]]]], dtype=np.float32), name="A") node = helper.make_node("Div", ["A", "B"], ["Y"]) graph = helper.make_graph([node], "div_runtime_scalar_lhs", [B], [Y], initializer=[A]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "div/runtime_scalar_lhs", "div_runtime_scalar_lhs.onnx") def div_leading_dimension_broadcast(): """Elementwise Div with trailing-dimension broadcasting.""" A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [2, 3, 4]) Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 3, 4]) B = numpy_helper.from_array(np.random.default_rng(101).uniform(0.5, 2.0, (4,)).astype(np.float32), name="B") node = helper.make_node("Div", ["A", "B"], ["Y"]) graph = helper.make_graph([node], "div_leading_dimension_broadcast", [A], [Y], initializer=[B]) model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) save_model(model, "div/leading_dimension_broadcast", "div_leading_dimension_broadcast.onnx") # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- if __name__ == "__main__": print("Generating GEMM tests:") gemm_simple() gemm_non_square() gemm_with_bias() gemm_transB() gemm_alpha_beta() gemm_small() gemm_large() gemm_transB_with_bias() gemm_dynamic() gemm_dynamic_transB() gemm_dynamic_bias() gemm_dynamic_alpha() gemm_dynamic_beta() gemm_dynamic_bias_alpha_beta() gemm_huge_1024() gemm_large_k_small_n() gemm_small_k_large_n() gemm_transA() gemm_transA_transB() gemm_bias_rank2_broadcast() gemm_scalar_bias() print("\nGenerating Conv tests:") conv_3x3_kernel() conv_stride2() conv_multi_channel() conv_1x1() conv_same_padding_3x3() conv_explicit_padding() conv_with_bias_3x3() conv_batch_2() conv_large_spatial() conv_grouped_two_groups() conv_depthwise_grouped() conv_dynamic() conv_huge_pointwise_1024() conv_huge_pointwise_1024_dynamic() conv_large_output_channels_1x1() conv_large_input_channels_1x1() conv_depthwise_1024_channels() conv_grouped_many_groups() conv_non_square_kernel_1x3() conv_non_square_kernel_3x1() conv_non_uniform_stride() conv_dilated_3x3() conv_real_asymmetric_padding() conv_same_lower_3x3() conv_kernel_equals_input_spatial() conv_batch_4_pointwise() conv_without_kernel_shape_attr() print("\nGenerating MatMul tests:") matmul_basic() matmul_left_constant() matmul_dynamic() matmul_batched_3d() matmul_batched_3d_dynamic() matmul_batched_left_constant() matmul_batched_rhs_broadcast() matmul_batched_lhs_broadcast() matmul_huge_1024() matmul_vector_matrix() matmul_matrix_vector() matmul_vector_vector_dot() matmul_batched_4d_broadcast() print("\nGenerating Pooling tests:") maxpool_basic() maxpool_stride2_multichannel() maxpool_same_upper() avgpool_basic() avgpool_explicit_padding() avgpool_include_pad() maxpool_after_conv() maxpool_ceil_mode() avgpool_ceil_mode() maxpool_real_asymmetric_padding() avgpool_real_asymmetric_padding() maxpool_non_square_kernel() avgpool_non_uniform_stride() maxpool_global_style_kernel_equals_input() avgpool_large_channels() print("\nGenerating ReduceMean tests:") reducemean_basic() reducemean_keepdims_0() reducemean_4d_spatial() reducemean_after_conv() reducemean_negative_axis() reducemean_all_axes_keepdims_1() reducemean_all_axes_keepdims_0() reducemean_4d_spatial_keepdims_0() reducemean_channel_axis_nchw() reducemean_large_dimension_1024() print("\nGenerating Relu tests:") relu_basic() relu_4d() relu_after_conv() relu_after_gemm() print("\nGenerating Sigmoid tests:") sigmoid_basic() sigmoid_4d() sigmoid_after_gemm() print("\nGenerating Split tests:") split_basic() split_equal_three_way() split_negative_axis() split_uneven_channel_axis_4d() print("\nGenerating Softmax tests:") softmax_basic() softmax_3d_last_axis() softmax_channel_axis() softmax_negative_axis() softmax_large_dimension_1024() print("\nGenerating Resize tests:") resize_nearest_2x() resize_nearest_non_uniform() resize_with_sizes() resize_nearest_downsample() resize_height_only() resize_width_only() print("\nGenerating Gather tests:") gather_axis1() gather_axis0_matrix_indices() gather_negative_indices() gather_negative_axis() gather_3d_input_axis1() print("\nGenerating Concat tests:") concat_channel_axis() concat_negative_axis() concat_three_inputs_channel_axis() print("\nGenerating Reshape tests:") reshape_same_rank() reshape_infer_dim_minus_one() reshape_zero_copies_input_dim() reshape_4d_to_2d_flatten() print("\nGenerating Add tests:") add_basic() add_broadcast_row() add_after_gemm() add_channel_broadcast_1024() add_scalar_runtime() add_leading_dimension_broadcast() print("\nGenerating Mul tests:") mul_basic() mul_scalar_constant() mul_after_conv() mul_channel_broadcast_1024() mul_scalar_runtime() mul_leading_dimension_broadcast() print("\nGenerating Div tests:") div_basic() div_scalar_constant() div_after_gemm() div_channel_broadcast_1024() div_runtime_scalar_rhs() div_runtime_scalar_lhs() div_leading_dimension_broadcast() print("\nDone.")