diff --git a/backend-simulators/pim/pim-simulator/src/lib/json_to_instruction/json_isa.rs b/backend-simulators/pim/pim-simulator/src/lib/json_to_instruction/json_isa.rs index 068e639..f2885b8 100644 --- a/backend-simulators/pim/pim-simulator/src/lib/json_to_instruction/json_isa.rs +++ b/backend-simulators/pim/pim-simulator/src/lib/json_to_instruction/json_isa.rs @@ -1,4 +1,4 @@ -use anyhow::{Context, Result}; +use anyhow::{Context, Result, ensure}; use paste::paste; use std::{collections::HashMap, mem::offset_of, sync::LazyLock}; @@ -36,6 +36,7 @@ static SIMD: LazyLock> = LazyLock::new(|| { add_to_json_map!(storage, vvmax); add_to_json_map!(storage, vvsll); add_to_json_map!(storage, vvsra); + add_to_json_map!(storage, vavg); add_to_json_map!(storage, vrelu); add_to_json_map!(storage, vtanh); add_to_json_map!(storage, vsigm); @@ -339,6 +340,7 @@ fn json_to_vavg( let rd = json_i64!(json, "rd") as i32; let rs1 = json_i64!(json, "rs1") as i32; let rs2 = json_i64!(json, "rs2") as i32; + ensure!(rs2 == 1, "vavg only supports stride 1"); let len = json_i64!(json, "len") as i32; let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap()); inst_data_builder diff --git a/backend-simulators/pim/pim-simulator/src/lib/memory_manager/type_traits.rs b/backend-simulators/pim/pim-simulator/src/lib/memory_manager/type_traits.rs index 7eedea7..151209e 100644 --- a/backend-simulators/pim/pim-simulator/src/lib/memory_manager/type_traits.rs +++ b/backend-simulators/pim/pim-simulator/src/lib/memory_manager/type_traits.rs @@ -55,19 +55,15 @@ pub trait HasSigm { impl HasSigm for f32 { fn sigm(self) -> Self { - let x = self; - let e = std::f32::consts::E; - let ex = x.powf(x); - (ex) / (1.0+ex) + let ex = self.exp(); + ex / (1.0 + ex) } } impl HasSigm for f64 { fn sigm(self) -> Self { - let x = self; - let e = std::f64::consts::E; - let ex = x.powf(x); - (ex) / (1.0+ex) + let ex = self.exp(); + ex / (1.0 + ex) } } diff --git a/src/PIM/Compiler/PimCodeGen.cpp b/src/PIM/Compiler/PimCodeGen.cpp index 7c1f6e9..d799c46 100644 --- a/src/PIM/Compiler/PimCodeGen.cpp +++ b/src/PIM/Compiler/PimCodeGen.cpp @@ -121,6 +121,13 @@ json::Object PimCodeGen::createEmptyOffset() { return offset; } +static json::Object createRs1OnlyOffset() { + json::Object offset; + offset["offset_select"] = 1; + offset["offset_value"] = 0; + return offset; +} + void PimCodeGen::emitInstruction(json::Object instruction) const { coreFileStream << json::Value(std::move(instruction)) << ','; } @@ -331,7 +338,8 @@ void PimCodeGen::codeGenVAvgOp(pim::PimVAvgOp vavgOp) const { json["op"] = "vavg"; json["rd"] = 0; json["rs1"] = 1; - json["offset"] = createEmptyOffset(); + json["rs2"] = 1; + json["offset"] = createRs1OnlyOffset(); json["len"] = getValueSizeInBytes(vavgOp.getInput()); emitInstruction(std::move(json)); } diff --git a/src/PIM/Conversion/ONNXToSpatial/CMakeLists.txt b/src/PIM/Conversion/ONNXToSpatial/CMakeLists.txt index 245ff6f..1e3b3ae 100644 --- a/src/PIM/Conversion/ONNXToSpatial/CMakeLists.txt +++ b/src/PIM/Conversion/ONNXToSpatial/CMakeLists.txt @@ -4,10 +4,13 @@ add_public_tablegen_target(ONNXToSpatialIncGen) add_pim_library(OMONNXToSpatial Patterns/Math/Conv.cpp + Patterns/Math/Elementwise.cpp Patterns/Math/Gemm.cpp Patterns/Math/MatMul.cpp + Patterns/Math/ReduceMean.cpp Patterns/NN/Pool.cpp Patterns/NN/Relu.cpp + Patterns/NN/Sigmoid.cpp Patterns/Tensor/Concat.cpp Patterns/Tensor/Reshape.cpp ONNXToSpatialPass.cpp diff --git a/src/PIM/Conversion/ONNXToSpatial/Common.hpp b/src/PIM/Conversion/ONNXToSpatial/Common.hpp index 864148d..56a5920 100644 --- a/src/PIM/Conversion/ONNXToSpatial/Common.hpp +++ b/src/PIM/Conversion/ONNXToSpatial/Common.hpp @@ -14,8 +14,6 @@ #include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp" #include "src/Dialect/ONNX/ONNXOps.hpp" -#define DEFINE_MAP_OP(opname) opname, - namespace onnx_mlir { template diff --git a/src/PIM/Conversion/ONNXToSpatial/ONNXToSpatialPass.cpp b/src/PIM/Conversion/ONNXToSpatial/ONNXToSpatialPass.cpp index a6dc98f..759f4e7 100644 --- a/src/PIM/Conversion/ONNXToSpatial/ONNXToSpatialPass.cpp +++ b/src/PIM/Conversion/ONNXToSpatial/ONNXToSpatialPass.cpp @@ -72,11 +72,15 @@ void ONNXToSpatialPass::runOnOperation() { target.addLegalDialect(); target.addDynamicallyLegalOp( [](ONNXMatMulOp op) { return cast(op.getY().getType()).getRank() != 2; }); + target.addIllegalOp(); + target.addIllegalOp(); + target.addIllegalOp(); target.addIllegalOp(); target.addIllegalOp(); target.addIllegalOp(); target.addIllegalOp(); target.addIllegalOp(); + target.addIllegalOp(); target.addIllegalOp(); target.addIllegalOp(); target.addIllegalOp(); @@ -86,10 +90,13 @@ void ONNXToSpatialPass::runOnOperation() { RewritePatternSet patterns(ctx); patterns.add(ctx); + populateElementwisePatterns(patterns, ctx); populateGemmPatterns(patterns, ctx); populateConvPatterns(patterns, ctx); populatePoolPatterns(patterns, ctx); + populateReduceMeanPatterns(patterns, ctx); populateReluPatterns(patterns, ctx); + populateSigmoidPatterns(patterns, ctx); populateConcatPatterns(patterns, ctx); populateReshapePatterns(patterns, ctx); diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns.hpp b/src/PIM/Conversion/ONNXToSpatial/Patterns.hpp index 58f9a10..38232ba 100644 --- a/src/PIM/Conversion/ONNXToSpatial/Patterns.hpp +++ b/src/PIM/Conversion/ONNXToSpatial/Patterns.hpp @@ -7,14 +7,20 @@ namespace onnx_mlir { void populateConvPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx); +void populateElementwisePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx); + void populateGemmPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx); void populateMatMulRewritePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx); void populatePoolPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx); +void populateReduceMeanPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx); + void populateReluPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx); +void populateSigmoidPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx); + void populateConcatPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx); void populateReshapePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx); diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/Elementwise.cpp b/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/Elementwise.cpp new file mode 100644 index 0000000..21221e5 --- /dev/null +++ b/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/Elementwise.cpp @@ -0,0 +1,204 @@ +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/Transforms/DialectConversion.h" + +#include "llvm/ADT/SmallVector.h" + +#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common.hpp" +#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Patterns.hpp" +#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp" +#include "src/Dialect/ONNX/ONNXOps.hpp" + +using namespace mlir; + +namespace onnx_mlir { +namespace { + +static SmallVector computeRowMajorStrides(ArrayRef shape) { + SmallVector strides(shape.size(), 1); + for (int64_t i = static_cast(shape.size()) - 2; i >= 0; --i) + strides[i] = strides[i + 1] * shape[i + 1]; + return strides; +} + +static DenseElementsAttr getDenseConstantAttr(Value value) { + if (auto constantOp = value.getDefiningOp()) + return dyn_cast(constantOp.getValue()); + + if (auto constantOp = value.getDefiningOp()) + return dyn_cast_or_null(constantOp.getValueAttr()); + + return nullptr; +} + +static FailureOr materializeBroadcastedConstantTensor(Value value, + RankedTensorType resultType, + ConversionPatternRewriter& rewriter, + Location loc) { + auto denseAttr = getDenseConstantAttr(value); + if (!denseAttr) + return failure(); + + auto sourceType = dyn_cast(denseAttr.getType()); + if (!sourceType || !sourceType.hasStaticShape() || !resultType.hasStaticShape()) + return failure(); + + if (sourceType == resultType) + return value; + + ArrayRef sourceShape = sourceType.getShape(); + ArrayRef resultShape = resultType.getShape(); + if (sourceShape.size() > resultShape.size()) + return failure(); + + const int64_t rankOffset = static_cast(resultShape.size() - sourceShape.size()); + for (int64_t i = 0; i < static_cast(resultShape.size()); ++i) { + const int64_t sourceIndex = i - rankOffset; + const int64_t sourceDim = sourceIndex < 0 ? 1 : sourceShape[sourceIndex]; + const int64_t resultDim = resultShape[i]; + if (sourceDim != 1 && sourceDim != resultDim) + return failure(); + } + + SmallVector sourceValues(denseAttr.getValues()); + SmallVector sourceStrides = computeRowMajorStrides(sourceShape); + SmallVector resultStrides = computeRowMajorStrides(resultShape); + + SmallVector resultValues; + resultValues.reserve(resultType.getNumElements()); + + for (int64_t flatIndex = 0; flatIndex < resultType.getNumElements(); ++flatIndex) { + int64_t remaining = flatIndex; + int64_t sourceFlatIndex = 0; + + for (int64_t i = 0; i < static_cast(resultShape.size()); ++i) { + const int64_t resultIndex = resultStrides.empty() ? 0 : remaining / resultStrides[i]; + remaining = resultStrides.empty() ? 0 : remaining % resultStrides[i]; + + const int64_t sourceIndex = i - rankOffset; + if (sourceIndex < 0) + continue; + + const int64_t sourceDim = sourceShape[sourceIndex]; + const int64_t mappedIndex = sourceDim == 1 ? 0 : resultIndex; + sourceFlatIndex += mappedIndex * sourceStrides[sourceIndex]; + } + + resultValues.push_back(sourceValues[sourceFlatIndex]); + } + + auto broadcastedAttr = DenseElementsAttr::get(resultType, resultValues); + return arith::ConstantOp::create(rewriter, loc, resultType, broadcastedAttr).getResult(); +} + +static FailureOr prepareElementwiseOperand(Value value, + RankedTensorType resultType, + ConversionPatternRewriter& rewriter, + Location loc) { + auto valueType = dyn_cast(value.getType()); + if (!valueType || !valueType.hasStaticShape()) + return failure(); + + if (valueType == resultType) + return value; + + return materializeBroadcastedConstantTensor(value, resultType, rewriter, loc); +} + +static FailureOr materializeReciprocalTensor(Value value, + RankedTensorType resultType, + ConversionPatternRewriter& rewriter, + Location loc) { + auto broadcastedValue = materializeBroadcastedConstantTensor(value, resultType, rewriter, loc); + if (failed(broadcastedValue)) + return failure(); + + auto denseAttr = dyn_cast(getDenseConstantAttr(*broadcastedValue)); + if (!denseAttr) + return failure(); + + SmallVector reciprocalValues; + reciprocalValues.reserve(denseAttr.getNumElements()); + for (const APFloat& valueAttr : denseAttr.getValues()) { + APFloat reciprocal(valueAttr.getSemantics(), 1); + auto status = reciprocal.divide(valueAttr, APFloat::rmNearestTiesToEven); + if (status & APFloat::opInvalidOp) + return failure(); + reciprocalValues.push_back(std::move(reciprocal)); + } + + auto reciprocalAttr = DenseFPElementsAttr::get(resultType, reciprocalValues); + return arith::ConstantOp::create(rewriter, loc, resultType, reciprocalAttr).getResult(); +} + +template +struct BinaryElementwiseToSpatialCompute : OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + using Adaptor = typename OnnxOp::Adaptor; + + LogicalResult matchAndRewrite(OnnxOp op, Adaptor adaptor, ConversionPatternRewriter& rewriter) const override { + auto resultType = dyn_cast(op->getResult(0).getType()); + if (!resultType || !resultType.hasStaticShape()) + return failure(); + + Location loc = op.getLoc(); + auto lhs = prepareElementwiseOperand(adaptor.getOperands()[0], resultType, rewriter, loc); + if (failed(lhs)) + return failure(); + + auto rhs = prepareElementwiseOperand(adaptor.getOperands()[1], resultType, rewriter, loc); + if (failed(rhs)) + return failure(); + + constexpr size_t numInputs = 2; + auto computeOp = + createSpatCompute(rewriter, loc, resultType, {}, ValueRange {*lhs, *rhs}, [&](Value x, Value y) { + auto loweredOp = SpatialOp::create(rewriter, loc, resultType, x, y); + spatial::SpatYieldOp::create(rewriter, loc, loweredOp.getResult()); + }); + + rewriter.replaceOp(op, computeOp); + return success(); + } +}; + +struct DivToSpatialCompute : OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(ONNXDivOp op, ONNXDivOpAdaptor adaptor, ConversionPatternRewriter& rewriter) const override { + auto resultType = dyn_cast(op.getResult().getType()); + if (!resultType || !resultType.hasStaticShape()) + return failure(); + + Location loc = op.getLoc(); + auto lhs = prepareElementwiseOperand(adaptor.getA(), resultType, rewriter, loc); + if (failed(lhs)) + return failure(); + + auto reciprocalRhs = materializeReciprocalTensor(adaptor.getB(), resultType, rewriter, loc); + if (failed(reciprocalRhs)) + return failure(); + + constexpr size_t numInputs = 2; + auto computeOp = createSpatCompute( + rewriter, loc, resultType, {}, ValueRange {*lhs, *reciprocalRhs}, [&](Value x, Value reciprocal) { + auto mulOp = spatial::SpatVMulOp::create(rewriter, loc, resultType, x, reciprocal); + spatial::SpatYieldOp::create(rewriter, loc, mulOp.getResult()); + }); + + rewriter.replaceOp(op, computeOp); + return success(); + } +}; + +} // namespace + +void populateElementwisePatterns(RewritePatternSet& patterns, MLIRContext* ctx) { + patterns.add>(ctx); + patterns.add>(ctx); + patterns.add(ctx); +} + +} // namespace onnx_mlir diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/ReduceMean.cpp b/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/ReduceMean.cpp new file mode 100644 index 0000000..c78e078 --- /dev/null +++ b/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/ReduceMean.cpp @@ -0,0 +1,163 @@ +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Transforms/DialectConversion.h" + +#include "llvm/ADT/SmallVector.h" + +#include + +#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common.hpp" +#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Patterns.hpp" +#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp" +#include "src/Dialect/ONNX/ONNXOps.hpp" + +using namespace mlir; + +namespace onnx_mlir { +namespace { + +static SmallVector normalizeAxes(ArrayAttr axesAttr, int64_t rank) { + SmallVector normalizedAxes; + if (!axesAttr) { + normalizedAxes.reserve(rank); + for (int64_t axis = 0; axis < rank; axis++) + normalizedAxes.push_back(axis); + return normalizedAxes; + } + + normalizedAxes.reserve(axesAttr.size()); + for (Attribute attr : axesAttr) { + int64_t axis = cast(attr).getInt(); + normalizedAxes.push_back(axis >= 0 ? axis : rank + axis); + } + + llvm::sort(normalizedAxes); + normalizedAxes.erase(std::unique(normalizedAxes.begin(), normalizedAxes.end()), normalizedAxes.end()); + return normalizedAxes; +} + +static SmallVector buildReducedAxesMask(ArrayRef axes, int64_t rank) { + SmallVector reducedAxes(rank, false); + for (int64_t axis : axes) { + if (axis < 0 || axis >= rank) + return {}; + reducedAxes[axis] = true; + } + return reducedAxes; +} + +static RankedTensorType getAllOnesType(RankedTensorType inputType, Type elementType) { + return RankedTensorType::get(SmallVector(inputType.getRank(), 1), elementType); +} + +static SmallVector buildCollapseReassociation(ArrayRef reducedAxes) { + SmallVector reassociation; + ReassociationIndices currentGroup; + + for (auto [axis, isReduced] : llvm::enumerate(reducedAxes)) { + currentGroup.push_back(axis); + if (!isReduced) { + reassociation.push_back(currentGroup); + currentGroup.clear(); + } + } + + if (!currentGroup.empty()) { + if (reassociation.empty()) + reassociation.push_back(std::move(currentGroup)); + else + reassociation.back().append(currentGroup.begin(), currentGroup.end()); + } + + return reassociation; +} + +static Value createAverageCompute(Value input, + RankedTensorType resultType, + ConversionPatternRewriter& rewriter, + Location loc) { + constexpr size_t numInputs = 1; + auto computeOp = createSpatCompute(rewriter, loc, resultType, {}, ValueRange {input}, [&](Value x) { + auto avgOp = spatial::SpatVAvgOp::create(rewriter, loc, resultType, x); + spatial::SpatYieldOp::create(rewriter, loc, avgOp.getResult()); + }); + return computeOp.getResult(0); +} + +static Value buildReduceMeanKeepdims(Value input, + ArrayRef reducedAxes, + int64_t axis, + RankedTensorType leafType, + ConversionPatternRewriter& rewriter, + Location loc) { + int64_t rank = cast(input.getType()).getRank(); + if (axis == rank) + return createAverageCompute(input, leafType, rewriter, loc); + + if (reducedAxes[axis]) + return buildReduceMeanKeepdims(input, reducedAxes, axis + 1, leafType, rewriter, loc); + + SmallVector slices = sliceTensor(input, axis, /*sliceSize=*/1, rewriter, loc); + SmallVector reducedSlices; + reducedSlices.reserve(slices.size()); + for (Value slice : slices) + reducedSlices.push_back(buildReduceMeanKeepdims(slice, reducedAxes, axis + 1, leafType, rewriter, loc)); + + return reducedSlices.size() == 1 ? reducedSlices.front() + : tensor::ConcatOp::create(rewriter, loc, axis, reducedSlices).getResult(); +} + +static Value squeezeReducedAxes(Value keepdimsValue, + RankedTensorType resultType, + ArrayRef reducedAxes, + ConversionPatternRewriter& rewriter, + Location loc) { + if (resultType.getRank() == 0) { + SmallVector indices(cast(keepdimsValue.getType()).getRank(), + arith::ConstantIndexOp::create(rewriter, loc, 0)); + Value element = tensor::ExtractOp::create(rewriter, loc, keepdimsValue, indices); + return tensor::FromElementsOp::create(rewriter, loc, resultType, ValueRange {element}); + } + + return tensor::CollapseShapeOp::create( + rewriter, loc, resultType, keepdimsValue, buildCollapseReassociation(reducedAxes)) + .getResult(); +} + +struct ReduceMeanToSpatialCompute : OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult matchAndRewrite(ONNXReduceMeanV13Op reduceMeanOp, + ONNXReduceMeanV13OpAdaptor adaptor, + ConversionPatternRewriter& rewriter) const override { + auto inputType = dyn_cast(adaptor.getData().getType()); + auto resultType = dyn_cast(reduceMeanOp.getReduced().getType()); + if (!inputType || !resultType || !inputType.hasStaticShape() || !resultType.hasStaticShape()) + return failure(); + + SmallVector axes = normalizeAxes(reduceMeanOp.getAxesAttr(), inputType.getRank()); + SmallVector reducedAxes = buildReducedAxesMask(axes, inputType.getRank()); + if (reducedAxes.empty() && inputType.getRank() != 0) + return failure(); + + Location loc = reduceMeanOp.getLoc(); + RankedTensorType leafType = getAllOnesType(inputType, resultType.getElementType()); + Value reducedKeepdims = buildReduceMeanKeepdims(adaptor.getData(), reducedAxes, /*axis=*/0, leafType, rewriter, loc); + + if (reduceMeanOp.getKeepdims() != 0) { + rewriter.replaceOp(reduceMeanOp, reducedKeepdims); + return success(); + } + + Value reduced = squeezeReducedAxes(reducedKeepdims, resultType, reducedAxes, rewriter, loc); + rewriter.replaceOp(reduceMeanOp, reduced); + return success(); + } +}; + +} // namespace + +void populateReduceMeanPatterns(RewritePatternSet& patterns, MLIRContext* ctx) { + patterns.add(ctx); +} + +} // namespace onnx_mlir diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns/NN/Sigmoid.cpp b/src/PIM/Conversion/ONNXToSpatial/Patterns/NN/Sigmoid.cpp new file mode 100644 index 0000000..1fc13e8 --- /dev/null +++ b/src/PIM/Conversion/ONNXToSpatial/Patterns/NN/Sigmoid.cpp @@ -0,0 +1,36 @@ +#include "mlir/Transforms/DialectConversion.h" + +#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common.hpp" +#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp" +#include "src/Dialect/ONNX/ONNXOps.hpp" + +using namespace mlir; + +namespace onnx_mlir { +namespace { + +struct SigmoidToSpatialCompute : OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult matchAndRewrite(ONNXSigmoidOp sigmoidOp, + ONNXSigmoidOpAdaptor adaptor, + ConversionPatternRewriter& rewriter) const override { + Location loc = sigmoidOp.getLoc(); + Type resultType = sigmoidOp.getResult().getType(); + constexpr size_t numInputs = 1; + auto computeOp = createSpatCompute(rewriter, loc, resultType, {}, adaptor.getX(), [&](Value x) { + auto spatSigmoidOp = spatial::SpatSigmoidOp::create(rewriter, loc, resultType, x); + spatial::SpatYieldOp::create(rewriter, loc, spatSigmoidOp.getResult()); + }); + rewriter.replaceOp(sigmoidOp, computeOp); + return success(); + } +}; + +} // namespace + +void populateSigmoidPatterns(RewritePatternSet& patterns, MLIRContext* ctx) { + patterns.add(ctx); +} + +} // namespace onnx_mlir diff --git a/src/PIM/Conversion/SpatialToPim/SpatialToPim.td b/src/PIM/Conversion/SpatialToPim/SpatialToPim.td index de37037..ce94a90 100644 --- a/src/PIM/Conversion/SpatialToPim/SpatialToPim.td +++ b/src/PIM/Conversion/SpatialToPim/SpatialToPim.td @@ -39,6 +39,12 @@ def spatToPimVVMul : Pat< (NativeCodeCall<"onnx_mlir::getBestOutputTensorFromOperandsOrAllocate($_builder, $0.getDefiningOp())"> $srcOpRes)) >; +def spatToPimVAvg : Pat< + (SpatVAvgOp:$srcOpRes $input), + (PimVAvgOp $input, + (NativeCodeCall<"onnx_mlir::getBestOutputTensorFromOperandsOrAllocate($_builder, $0.getDefiningOp())"> $srcOpRes)) +>; + def spatToPimVVMax : Pat< (SpatVMaxOp:$srcOpRes $a, $b), (PimVVMaxOp $a, $b, @@ -51,4 +57,10 @@ def spatToPimVRelu : Pat< (NativeCodeCall<"onnx_mlir::getBestOutputTensorFromOperandsOrAllocate($_builder, $0.getDefiningOp())"> $srcOpRes)) >; +def spatToPimVSigm : Pat< + (SpatSigmoidOp:$srcOpRes $input), + (PimVSigmOp $input, + (NativeCodeCall<"onnx_mlir::getBestOutputTensorFromOperandsOrAllocate($_builder, $0.getDefiningOp())"> $srcOpRes)) +>; + #endif // SPATIAL_TO_PIM diff --git a/src/PIM/Conversion/SpatialToPim/SpatialToPimPass.cpp b/src/PIM/Conversion/SpatialToPim/SpatialToPimPass.cpp index fedb72d..b527f6d 100644 --- a/src/PIM/Conversion/SpatialToPim/SpatialToPimPass.cpp +++ b/src/PIM/Conversion/SpatialToPim/SpatialToPimPass.cpp @@ -161,26 +161,41 @@ void SpatialToPimPass::runOnOperation() { } for (auto receiveOp : funcOp.getOps()) { - operationsToRemove.push_back(receiveOp); + markOpToRemove(receiveOp); runOnReceiveOp(receiveOp, rewriter); } for (auto computeOp : funcOp.getOps()) { - operationsToRemove.push_back(computeOp); + markOpToRemove(computeOp); runOnComputeOp(computeOp, rewriter); } enlargeVMMOutTensorsToCrossbarSize(funcOp, rewriter); replaceReturnOpOperands(returnOp, rewriter); - // Remove all ComputeOps - for (auto opToRemove : llvm::reverse(operationsToRemove)) { - if (!opToRemove->use_empty()) { + SmallVector pendingRemovals(operationsToRemove.begin(), operationsToRemove.end()); + while (!pendingRemovals.empty()) { + bool erasedAnyOp = false; + for (auto it = pendingRemovals.begin(); it != pendingRemovals.end();) { + Operation* opToRemove = *it; + if (!opToRemove->use_empty()) { + ++it; + continue; + } + + rewriter.eraseOp(opToRemove); + it = pendingRemovals.erase(it); + erasedAnyOp = true; + } + + if (erasedAnyOp) + continue; + + for (auto opToRemove : pendingRemovals) { opToRemove->dump(); for (auto user : opToRemove->getUsers()) user->dump(); - assert(false && "opToRemove should be unused at this point"); } - rewriter.eraseOp(opToRemove); + assert(false && "tracked op removal reached a cycle or missed dependency"); } // Dump to file for debug @@ -284,10 +299,19 @@ void SpatialToPimPass::runOnComputeOp(spatial::SpatWeightedCompute computeOp, IR auto concatUses = concatValue.getUses(); auto numConcatUses = rangeLength(concatUses); if (numConcatUses == 1) { - OpOperand& concatUse = *concatUses.begin(); - Operation* concatUser = concatUse.getOwner(); + Value chainedValue = concatValue; + Operation* concatUser = concatUses.begin()->getOwner(); + + while (isChannelUseChainOp(concatUser)) { + auto chainUses = concatUser->getResult(0).getUses(); + if (rangeLength(chainUses) != 1) + break; + chainedValue = concatUser->getResult(0); + concatUser = chainUses.begin()->getOwner(); + } + if (isa(concatUser)) { - size_t concatIndexInReturn = concatUse.getOperandNumber(); + size_t concatIndexInReturn = chainedValue.getUses().begin()->getOperandNumber(); size_t resultIndexInConcat = resultUses.begin()->getOperandNumber(); size_t offset = 0; for (auto operand : concatOp->getOperands().take_front(resultIndexInConcat)) @@ -602,10 +626,22 @@ void SpatialToPimPass::replaceReturnOpOperands(func::ReturnOp& returnOp, IRRewri rewriter.modifyOpInPlace(returnOp, [&] { returnOp.setOperand(orderWithinReturn, outputTensors[orderWithinReturn]); }); - if (isa(returnOperand)) { - auto returnOperandUses = it.value().getUses(); - if (rangeLength(returnOperandUses) == 0) - rewriter.eraseOp(returnOperand); + Operation* opToErase = returnOperand; + while (opToErase) { + bool isExclusivelyOwnedByReturnChain = opToErase->use_empty() || opToErase->hasOneUse(); + if (!isExclusivelyOwnedByReturnChain) + break; + + if (isChannelUseChainOp(opToErase)) { + Value source = opToErase->getOperand(0); + markOpToRemove(opToErase); + opToErase = source.getDefiningOp(); + continue; + } + + if (isa(opToErase)) + markOpToRemove(opToErase); + break; } } } diff --git a/src/PIM/Dialect/Spatial/Spatial.td b/src/PIM/Dialect/Spatial/Spatial.td index 84de4a2..a970bdf 100644 --- a/src/PIM/Dialect/Spatial/Spatial.td +++ b/src/PIM/Dialect/Spatial/Spatial.td @@ -239,6 +239,22 @@ def SpatSumOp : SpatOp<"sum", []> { }]; } +def SpatVAvgOp : SpatOp<"vavg", []> { + let summary = "Average all elements of the input tensor to a single scalar wrapped in a tensor"; + + let arguments = (ins + SpatTensor:$input + ); + + let results = (outs + SpatTensor:$output + ); + + let assemblyFormat = [{ + `(` $input `)` attr-dict `:` type($input) `->` type($output) + }]; +} + def SpatSigmoidOp : SpatOp<"sigmoid", []> { let summary = "Element-wise sigmoid activation"; diff --git a/src/PIM/Dialect/Spatial/Transforms/SpatialBufferizableOpInterface.cpp b/src/PIM/Dialect/Spatial/Transforms/SpatialBufferizableOpInterface.cpp index 5d0869f..d81df66 100644 --- a/src/PIM/Dialect/Spatial/Transforms/SpatialBufferizableOpInterface.cpp +++ b/src/PIM/Dialect/Spatial/Transforms/SpatialBufferizableOpInterface.cpp @@ -361,7 +361,7 @@ struct ChannelBroadcastReceiveOpInterface } /* - * Turn the channel receive to pim.load using by creating a new global buffer + * Turn the broadcast receive into a regular pim.receive from the broadcaster. */ LogicalResult bufferize(Operation* op, RewriterBase& rewriter, @@ -370,8 +370,21 @@ struct ChannelBroadcastReceiveOpInterface auto outputTensor = createEmptyFromType(op->getResult(0).getType(), op->getLoc(), rewriter); - auto outputType = cast(outputTensor.getType()); - auto outputSize = outputType.getNumElements() * outputType.getElementTypeBitWidth() / 8; + auto numElements = cast(outputTensor.getType()).getNumElements(); + auto elementSize = cast(outputTensor.getType()).getElementTypeBitWidth() / 8; + + auto precomputedOtherCoreId = op->getAttr(PRECOMPUTED_OTHER_CORE_ID_ATTR_NAME); + if (precomputedOtherCoreId) { + Value newValue = pim::PimReceiveOp::create(rewriter, + op->getLoc(), + outputTensor.getType(), + outputTensor, + rewriter.getI32IntegerAttr(numElements * elementSize), + cast(precomputedOtherCoreId)) + .getOutput(); + replaceOpWithBufferizedValues(rewriter, op, newValue); + return success(); + } auto channelNewOp = op->getOperand(0).getDefiningOp(); if (!channelNewOp) { @@ -379,31 +392,30 @@ struct ChannelBroadcastReceiveOpInterface return failure(); } - // The first 'broadcast' operation creates the buffer just after the - // channelNewOp, while the other 'broadcast' operation need to find this - // buffer allocation just after the channelNewOp - Value bufferAllocation; - if (auto allocOpAfterChannel = dyn_cast(channelNewOp->getNextNode())) { - // Buffer already allocated, load from this buffer - bufferAllocation = allocOpAfterChannel; - } - else { - // Buffer was not allocated previously, allocate it after channelNewOp - rewriter.setInsertionPointAfter(channelNewOp); - bufferAllocation = createEmptyFromType(op->getResult(0).getType(), op->getLoc(), rewriter); - } + auto srcCoreId = [&]() -> FailureOr { + for (Operation* user : channelNewOp->getUsers()) { + auto sendOp = dyn_cast(user); + if (!sendOp) + continue; + auto sendCoreIdAttr = cast(sendOp->getParentOp()).getCoreIdAttr(); + op->setAttr(PRECOMPUTED_OTHER_CORE_ID_ATTR_NAME, sendCoreIdAttr); + return cast(sendOp->getParentOp()).getCoreId(); + } + op->emitError("ChannelBroadcastReceiveOp has no matching ChannelBroadcastSendOp"); + return failure(); + }(); + if (failed(srcCoreId)) + return failure(); - rewriter.setInsertionPoint(op); - auto memCopyHostToDevOp = pim::PimMemCopyHostToDevOp::create(rewriter, - op->getLoc(), - outputTensor.getType(), - outputTensor, - bufferAllocation, - rewriter.getI32IntegerAttr(0), - rewriter.getI32IntegerAttr(0), - rewriter.getI32IntegerAttr(outputSize)); + Value newValue = pim::PimReceiveOp::create(rewriter, + op->getLoc(), + outputTensor.getType(), + outputTensor, + rewriter.getI32IntegerAttr(numElements * elementSize), + rewriter.getI32IntegerAttr(srcCoreId.value())) + .getOutput(); - replaceOpWithBufferizedValues(rewriter, op, memCopyHostToDevOp.getOutput()); + replaceOpWithBufferizedValues(rewriter, op, newValue); return success(); } @@ -428,8 +440,7 @@ struct ChannelBroadcastSendOpInterface } /* - * Turn the channel send into a device-to-host copy into the shared - * broadcast buffer that receive ops load from later. + * Turn the broadcast send into one pim.send per broadcast receiver. */ LogicalResult bufferize(Operation* op, RewriterBase& rewriter, @@ -448,32 +459,32 @@ struct ChannelBroadcastSendOpInterface return failure(); } - // The first 'broadcast' operation creates the buffer just after the - // channelNewOp, while the other 'broadcast' operation need to find this - // buffer allocation just after the channelNewOp - Value bufferAllocation; - if (auto allocOpAfterChannel = dyn_cast(channelNewOp->getNextNode())) { - // Buffer already allocated, load from this buffer - bufferAllocation = allocOpAfterChannel; - } - else { - // Buffer was not allocated previously, allocate it after channelNewOp - rewriter.setInsertionPointAfter(channelNewOp); - bufferAllocation = createEmptyFromType(srcTensor.getType(), op->getLoc(), rewriter); - } - auto srcType = cast(srcTensor.getType()); auto sizeInBytes = srcType.getNumElements() * srcType.getElementTypeBitWidth() / 8; + auto srcCoreIdAttr = cast(op->getParentOp()).getCoreIdAttr(); rewriter.setInsertionPoint(op); - pim::PimMemCopyDevToHostOp::create(rewriter, - op->getLoc(), - bufferAllocation.getType(), - bufferAllocation, - srcMemRef, - rewriter.getI32IntegerAttr(0), - rewriter.getI32IntegerAttr(0), - rewriter.getI32IntegerAttr(sizeInBytes)); + bool foundReceiver = false; + for (Operation* user : channelNewOp->getUsers()) { + auto receiveOp = dyn_cast(user); + if (!receiveOp) + continue; + + foundReceiver = true; + auto dstCoreId = cast(receiveOp->getParentOp()).getCoreId(); + receiveOp->setAttr(PRECOMPUTED_OTHER_CORE_ID_ATTR_NAME, srcCoreIdAttr); + pim::PimSendOp::create(rewriter, + op->getLoc(), + srcMemRef, + rewriter.getI32IntegerAttr(sizeInBytes), + rewriter.getI32IntegerAttr(dstCoreId)); + } + + if (!foundReceiver) { + op->emitError("SpatChannelBroadcastSendOp has no matching ChannelBroadcastReceiveOp"); + return failure(); + } + rewriter.eraseOp(op); return success(); } diff --git a/validation/operations/README.md b/validation/operations/README.md index 6594703..4870159 100644 --- a/validation/operations/README.md +++ b/validation/operations/README.md @@ -3,66 +3,108 @@ ONNX test models used by `validate.py` to verify the Raptor compiler + PIM simulator pipeline. Generated tests can be regenerated with: + ``` python3 validation/operations/gen_tests.py ``` ## Conv -| Test | Directory | Input | Output | Kernel | Stride | Padding | Bias | Notes | -|------|-----------|-------|--------|--------|--------|---------|------|-------| -| Simple | `conv/simple` | [1,3,3,3] | [1,1,2,2] | 2x2 | 1 | none | no | Basic conv, hand-crafted | -| With constant | `conv/with_constant` | [1,3,3,3] | [1,1,3,3] | 2x2 | 1 | SAME_UPPER | yes | Hand-crafted, constant weight+bias | -| Batch 2 | `conv/batch_2` | [2,3,3,3] | [2,1,3,3] | 2x2 | 1 | SAME_UPPER | yes | Batched input | -| Kernel 3x3 | `conv/kernel_3x3` | [1,1,5,5] | [1,1,3,3] | 3x3 | 1 | none | no | Larger kernel | -| Stride 2 | `conv/stride_2` | [1,1,6,6] | [1,1,2,2] | 3x3 | 2 | none | no | Strided convolution | -| Multi channel | `conv/multi_channel` | [1,3,5,5] | [1,4,3,3] | 3x3 | 1 | none | no | 3 in channels, 4 out channels | -| Pointwise 1x1 | `conv/pointwise_1x1` | [1,8,4,4] | [1,4,4,4] | 1x1 | 1 | none | no | Channel mixing | -| SAME padding 3x3 | `conv/same_padding_3x3` | [1,1,5,5] | [1,1,5,5] | 3x3 | 1 | SAME_UPPER | no | Spatial dims preserved | -| Explicit padding | `conv/explicit_padding` | [1,1,4,4] | [1,1,4,4] | 3x3 | 1 | [1,1,1,1] | no | Symmetric explicit pads | -| With bias 3x3 | `conv/with_bias_3x3` | [1,3,5,5] | [1,2,3,3] | 3x3 | 1 | none | yes | Multi-channel with bias | -| Large spatial | `conv/large_spatial` | [1,1,8,8] | [1,1,6,6] | 3x3 | 1 | none | no | Larger spatial input | - -## Pool - -| Test | Directory | Input | Output | Kernel | Stride | Padding | Notes | -|------|-----------|-------|--------|--------|--------|---------|-------| -| Max basic | `pool/max_basic` | [1,1,4,4] | [1,1,3,3] | 2x2 | 1 | none | Basic max pooling | -| Max stride 2 multi-channel | `pool/max_stride2_multichannel` | [1,5,6,6] | [1,5,3,3] | 2x2 | 2 | none | Channel-preserving max pool | -| Max SAME_UPPER | `pool/max_same_upper` | [1,1,5,5] | [1,1,3,3] | 3x3 | 2 | SAME_UPPER | Deprecated auto_pad path | -| Avg basic | `pool/avg_basic` | [1,3,4,4] | [1,3,3,3] | 2x2 | 1 | none | Basic average pooling | -| Avg explicit padding | `pool/avg_explicit_padding` | [1,2,4,4] | [1,2,2,2] | 3x3 | 2 | [1,1,1,1] | `count_include_pad=0` | -| Avg include pad | `pool/avg_include_pad` | [1,2,4,4] | [1,2,2,2] | 3x3 | 2 | [1,1,1,1] | `count_include_pad=1` | -| Max after Conv | `pool/max_after_conv` | [1,3,6,6] | [1,4,2,2] | Conv 3x3 then Pool 2x2 | 2 | none | Regression for `pool(conv(...))` | - -## Relu - -| Test | Directory | Input | Output | Notes | -|------|-----------|-------|--------|-------| -| Basic | `relu/basic` | [4,8] | [4,8] | Standalone 2D Relu | -| 4D | `relu/4d` | [2,3,4,4] | [2,3,4,4] | Standalone NCHW Relu | -| After Conv | `relu/after_conv` | [1,3,5,5] | [1,2,3,3] | Conv 3x3 + bias, then Relu | -| After Gemm | `relu/after_gemm` | [4,64] | [4,32] | Gemm + bias, then Relu | +| Test | Directory | Input | Output | Kernel | Stride | Padding | Bias | Notes | +|------------------|-------------------------|-----------|-----------|--------|--------|------------|------|------------------------------------| +| Simple | `conv/simple` | [1,3,3,3] | [1,1,2,2] | 2x2 | 1 | none | no | Basic conv, hand-crafted | +| With constant | `conv/with_constant` | [1,3,3,3] | [1,1,3,3] | 2x2 | 1 | SAME_UPPER | yes | Hand-crafted, constant weight+bias | +| Batch 2 | `conv/batch_2` | [2,3,3,3] | [2,1,3,3] | 2x2 | 1 | SAME_UPPER | yes | Batched input | +| Kernel 3x3 | `conv/kernel_3x3` | [1,1,5,5] | [1,1,3,3] | 3x3 | 1 | none | no | Larger kernel | +| Stride 2 | `conv/stride_2` | [1,1,6,6] | [1,1,2,2] | 3x3 | 2 | none | no | Strided convolution | +| Multi channel | `conv/multi_channel` | [1,3,5,5] | [1,4,3,3] | 3x3 | 1 | none | no | 3 in channels, 4 out channels | +| Pointwise 1x1 | `conv/pointwise_1x1` | [1,8,4,4] | [1,4,4,4] | 1x1 | 1 | none | no | Channel mixing | +| SAME padding 3x3 | `conv/same_padding_3x3` | [1,1,5,5] | [1,1,5,5] | 3x3 | 1 | SAME_UPPER | no | Spatial dims preserved | +| Explicit padding | `conv/explicit_padding` | [1,1,4,4] | [1,1,4,4] | 3x3 | 1 | [1,1,1,1] | no | Symmetric explicit pads | +| With bias 3x3 | `conv/with_bias_3x3` | [1,3,5,5] | [1,2,3,3] | 3x3 | 1 | none | yes | Multi-channel with bias | +| Large spatial | `conv/large_spatial` | [1,1,8,8] | [1,1,6,6] | 3x3 | 1 | none | no | Larger spatial input | ## Gemm -| Test | Directory | A (input) | W (weight) | Output | transB | alpha | beta | Bias | Notes | -|------|-----------|-----------|------------|--------|--------|-------|------|------|-------| -| Default | `gemm/` | [10,132] | [132,132] | [10,132] | no | 1 | 1 | no | Hand-crafted, square weights | -| Non-square | `gemm/non_square` | [4,128] | [128,64] | [4,64] | no | 1 | 1 | no | K != N | -| With bias | `gemm/with_bias` | [4,128] | [128,128] | [4,128] | no | 1 | 1 | [128] | Bias vector | -| transB | `gemm/transB` | [4,128] | [64,128] | [4,64] | yes | 1 | 1 | no | Transposed weight | -| Alpha/beta | `gemm/alpha_beta` | [4,64] | [64,64] | [4,64] | no | 0.5 | 0.25 | [64] | Scaled matmul + bias | -| Small | `gemm/small` | [2,8] | [8,4] | [2,4] | no | 1 | 1 | no | Tiny matrices | -| Large | `gemm/large` | [8,256] | [256,128] | [8,128] | no | 1 | 1 | no | Larger matrices | -| transB + bias | `gemm/transB_with_bias` | [4,128] | [64,128] | [4,64] | yes | 1 | 1 | [64] | Combined | +| Test | Directory | A (input) | W (weight) | Output | transB | alpha | beta | Bias | Notes | +|---------------|-------------------------|-----------|------------|----------|--------|-------|------|-------|------------------------------| +| Default | `gemm/` | [10,132] | [132,132] | [10,132] | no | 1 | 1 | no | Hand-crafted, square weights | +| Non-square | `gemm/non_square` | [4,128] | [128,64] | [4,64] | no | 1 | 1 | no | K != N | +| With bias | `gemm/with_bias` | [4,128] | [128,128] | [4,128] | no | 1 | 1 | [128] | Bias vector | +| transB | `gemm/transB` | [4,128] | [64,128] | [4,64] | yes | 1 | 1 | no | Transposed weight | +| Alpha/beta | `gemm/alpha_beta` | [4,64] | [64,64] | [4,64] | no | 0.5 | 0.25 | [64] | Scaled matmul + bias | +| Small | `gemm/small` | [2,8] | [8,4] | [2,4] | no | 1 | 1 | no | Tiny matrices | +| Large | `gemm/large` | [8,256] | [256,128] | [8,128] | no | 1 | 1 | no | Larger matrices | +| transB + bias | `gemm/transB_with_bias` | [4,128] | [64,128] | [4,64] | yes | 1 | 1 | [64] | Combined | ## Gemv -| Test | Directory | Input | W (weight) | Output | Bias | Notes | -|------|-----------|-------|------------|--------|------|-------| -| Simple | `gemv/simple` | [1,132] | [132,132] | [1,132] | no | Single-sample matmul | -| Constant | `gemv/constant` | _(none)_ | [132,132] | [1,132] | no | All inputs constant | -| Homogeneous const | `gemv/with_homogeneous_constant` | [1,132] | [132,132] | [1,132] | [1,132] | Bias matches output shape | -| Heterogeneous const | `gemv/with_heterogeneous_constant` | [1,132] | [132,132] | [1,132] | [1,132] | Different constant pattern | -| Scalar const | `gemv/with_scalar_constant` | [1,132] | [132,132] | [1,132] | [1,1] | Scalar bias, broadcast | +| Test | Directory | Input | W (weight) | Output | Bias | Notes | +|---------------------|------------------------------------|----------|------------|---------|---------|----------------------------| +| Simple | `gemv/simple` | [1,132] | [132,132] | [1,132] | no | Single-sample matmul | +| Constant | `gemv/constant` | _(none)_ | [132,132] | [1,132] | no | All inputs constant | +| Homogeneous const | `gemv/with_homogeneous_constant` | [1,132] | [132,132] | [1,132] | [1,132] | Bias matches output shape | +| Heterogeneous const | `gemv/with_heterogeneous_constant` | [1,132] | [132,132] | [1,132] | [1,132] | Different constant pattern | +| Scalar const | `gemv/with_scalar_constant` | [1,132] | [132,132] | [1,132] | [1,1] | Scalar bias, broadcast | + +## Pool + +| Test | Directory | Input | Output | Kernel | Stride | Padding | Notes | +|----------------------------|---------------------------------|-----------|-----------|------------------------|--------|------------|----------------------------------| +| Max basic | `pool/max_basic` | [1,1,4,4] | [1,1,3,3] | 2x2 | 1 | none | Basic max pooling | +| Max stride 2 multi-channel | `pool/max_stride2_multichannel` | [1,5,6,6] | [1,5,3,3] | 2x2 | 2 | none | Channel-preserving max pool | +| Max SAME_UPPER | `pool/max_same_upper` | [1,1,5,5] | [1,1,3,3] | 3x3 | 2 | SAME_UPPER | Deprecated auto_pad path | +| Avg basic | `pool/avg_basic` | [1,3,4,4] | [1,3,3,3] | 2x2 | 1 | none | Basic average pooling | +| Avg explicit padding | `pool/avg_explicit_padding` | [1,2,4,4] | [1,2,2,2] | 3x3 | 2 | [1,1,1,1] | `count_include_pad=0` | +| Avg include pad | `pool/avg_include_pad` | [1,2,4,4] | [1,2,2,2] | 3x3 | 2 | [1,1,1,1] | `count_include_pad=1` | +| Max after Conv | `pool/max_after_conv` | [1,3,6,6] | [1,4,2,2] | Conv 3x3 then Pool 2x2 | 2 | none | Regression for `pool(conv(...))` | + +## ReduceMean + +| Test | Directory | Input | Output | Axes | Keepdims | Notes | +|------------|--------------------------|-----------|-----------|-------|----------|-------------------------------------------------| +| Basic | `reduce_mean/basic` | [4,8] | [4,1] | [1] | 1 | Reduce feature dimension, preserving rank | +| Keepdims 0 | `reduce_mean/keepdims_0` | [4,8] | [4] | [1] | 0 | Reduce feature dimension, dropping reduced axis | +| 4D spatial | `reduce_mean/4d_spatial` | [1,3,4,4] | [1,3,1,1] | [2,3] | 1 | Reduce H and W on NCHW input | +| After Conv | `reduce_mean/after_conv` | [1,3,5,5] | [1,2,1,1] | [2,3] | 1 | Conv 3x3 + bias, then spatial ReduceMean | + +## Relu + +| Test | Directory | Input | Output | Notes | +|------------|-------------------|-----------|-----------|----------------------------| +| Basic | `relu/basic` | [4,8] | [4,8] | Standalone 2D Relu | +| 4D | `relu/4d` | [2,3,4,4] | [2,3,4,4] | Standalone NCHW Relu | +| After Conv | `relu/after_conv` | [1,3,5,5] | [1,2,3,3] | Conv 3x3 + bias, then Relu | +| After Gemm | `relu/after_gemm` | [4,64] | [4,32] | Gemm + bias, then Relu | + +## Sigmoid + +| Test | Directory | Input | Output | Notes | +|------------|----------------------|-----------|-----------|---------------------------| +| Basic | `sigmoid/basic` | [4,8] | [4,8] | Standalone 2D Sigmoid | +| 4D | `sigmoid/4d` | [2,3,4,4] | [2,3,4,4] | Standalone NCHW Sigmoid | +| After Gemm | `sigmoid/after_gemm` | [4,64] | [4,32] | Gemm + bias, then Sigmoid | + +## Add + +| Test | Directory | Input(s) | Output | Notes | +|---------------|---------------------|------------------|--------|---------------------------------------------| +| Basic | `add/basic` | A:[4,8], B:[4,8] | [4,8] | Elementwise add, same-shape inputs | +| Broadcast row | `add/broadcast_row` | A:[4,8], B:[8] | [4,8] | Row-vector broadcasting via initializer | +| After Gemm | `add/after_gemm` | A:[4,64], D:[32] | [4,32] | Gemm + bias, then Add with broadcast vector | + +## Mul + +| Test | Directory | Input(s) | Output | Notes | +|-----------------|-----------------------|--------------------------|-----------|-------------------------------------------| +| Basic | `mul/basic` | A:[4,8], B:[4,8] | [4,8] | Elementwise multiply, same-shape inputs | +| Scalar constant | `mul/scalar_constant` | X:[4,8], S:[1] | [4,8] | Scalar broadcasting via initializer | +| After Conv | `mul/after_conv` | X:[1,3,5,5], S:[1,2,1,1] | [1,2,3,3] | Conv 3x3 + bias, then per-channel scaling | + +## Div + +| Test | Directory | Input(s) | Output | Notes | +|-----------------|-----------------------|------------------|--------|------------------------------------------------------| +| Basic | `div/basic` | X:[4,8], D:[4,8] | [4,8] | Elementwise divide by same-shape constant tensor | +| Scalar constant | `div/scalar_constant` | X:[4,8], S:[1] | [4,8] | Scalar broadcasting via initializer | +| After Gemm | `div/after_gemm` | A:[4,64], D:[32] | [4,32] | Gemm + bias, then Div with positive broadcast vector | diff --git a/validation/operations/add/after_gemm/add_after_gemm.onnx b/validation/operations/add/after_gemm/add_after_gemm.onnx new file mode 100644 index 0000000..f88f43d Binary files /dev/null and b/validation/operations/add/after_gemm/add_after_gemm.onnx differ diff --git a/validation/operations/add/basic/add_basic.onnx b/validation/operations/add/basic/add_basic.onnx new file mode 100644 index 0000000..c7699c1 Binary files /dev/null and b/validation/operations/add/basic/add_basic.onnx differ diff --git a/validation/operations/add/broadcast_row/add_broadcast_row.onnx b/validation/operations/add/broadcast_row/add_broadcast_row.onnx new file mode 100644 index 0000000..abdfccb Binary files /dev/null and b/validation/operations/add/broadcast_row/add_broadcast_row.onnx differ diff --git a/validation/operations/div/after_gemm/div_after_gemm.onnx b/validation/operations/div/after_gemm/div_after_gemm.onnx new file mode 100644 index 0000000..ae8770d Binary files /dev/null and b/validation/operations/div/after_gemm/div_after_gemm.onnx differ diff --git a/validation/operations/div/basic/div_basic.onnx b/validation/operations/div/basic/div_basic.onnx new file mode 100644 index 0000000..d2dd4f5 Binary files /dev/null and b/validation/operations/div/basic/div_basic.onnx differ diff --git a/validation/operations/div/scalar_constant/div_scalar_constant.onnx b/validation/operations/div/scalar_constant/div_scalar_constant.onnx new file mode 100644 index 0000000..b61f587 Binary files /dev/null and b/validation/operations/div/scalar_constant/div_scalar_constant.onnx differ diff --git a/validation/operations/gen_tests.py b/validation/operations/gen_tests.py index 777725b..c146f8d 100644 --- a/validation/operations/gen_tests.py +++ b/validation/operations/gen_tests.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -"""Generate ONNX test models for validating GEMM, Conv, Pooling, and Relu implementations.""" +"""Generate ONNX test models for validating GEMM, Conv, Pooling, Relu, and ReduceMean implementations.""" import numpy as np import onnx @@ -19,102 +19,8 @@ def save_model(model, directory, filename): print(f" {path.relative_to(OPERATIONS_DIR)}") -# --------------------------------------------------------------------------- -# GEMM tests -# --------------------------------------------------------------------------- - -def gemm_non_square(): - """GEMM with non-square weight matrix: [B, K] @ [K, N], K != N.""" - B, K, N = 4, 128, 64 - W = numpy_helper.from_array(np.random.default_rng(42).uniform(-1, 1, (K, N)).astype(np.float32), name="W") - A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) - Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) - node = helper.make_node("Gemm", ["A", "W"], ["Y"]) - graph = helper.make_graph([node], "gemm_non_square", [A], [Y], initializer=[W]) - model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) - save_model(model, "gemm/non_square", "gemm_non_square.onnx") - - -def gemm_with_bias(): - """GEMM with bias: Y = A @ W + C.""" - B, K, N = 4, 128, 128 - rng = np.random.default_rng(43) - W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") - C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C") - A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) - Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) - node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"]) - graph = helper.make_graph([node], "gemm_with_bias", [A], [Y], initializer=[W, C]) - model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) - save_model(model, "gemm/with_bias", "gemm_with_bias.onnx") - - -def gemm_transB(): - """GEMM with transB=1: Y = A @ W^T.""" - B, K, N = 4, 128, 64 - rng = np.random.default_rng(44) - # W stored as [N, K], transposed during computation - W = numpy_helper.from_array(rng.uniform(-1, 1, (N, K)).astype(np.float32), name="W") - A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) - Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) - node = helper.make_node("Gemm", ["A", "W"], ["Y"], transB=1) - graph = helper.make_graph([node], "gemm_transB", [A], [Y], initializer=[W]) - model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) - save_model(model, "gemm/transB", "gemm_transB.onnx") - - -def gemm_alpha_beta(): - """GEMM with alpha and beta: Y = 0.5 * A @ W + 0.25 * C.""" - B, K, N = 4, 64, 64 - rng = np.random.default_rng(45) - W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") - C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C") - A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) - Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) - node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"], alpha=0.5, beta=0.25) - graph = helper.make_graph([node], "gemm_alpha_beta", [A], [Y], initializer=[W, C]) - model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) - save_model(model, "gemm/alpha_beta", "gemm_alpha_beta.onnx") - - -def gemm_small(): - """Small GEMM: [2, 8] @ [8, 4].""" - B, K, N = 2, 8, 4 - rng = np.random.default_rng(46) - W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") - A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) - Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) - node = helper.make_node("Gemm", ["A", "W"], ["Y"]) - graph = helper.make_graph([node], "gemm_small", [A], [Y], initializer=[W]) - model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) - save_model(model, "gemm/small", "gemm_small.onnx") - - -def gemm_large(): - """Larger GEMM: [8, 256] @ [256, 128].""" - B, K, N = 8, 256, 128 - rng = np.random.default_rng(47) - W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") - A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) - Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) - node = helper.make_node("Gemm", ["A", "W"], ["Y"]) - graph = helper.make_graph([node], "gemm_large", [A], [Y], initializer=[W]) - model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) - save_model(model, "gemm/large", "gemm_large.onnx") - - -def gemm_transB_with_bias(): - """GEMM with transB and bias: Y = A @ W^T + C.""" - B, K, N = 4, 128, 64 - rng = np.random.default_rng(48) - W = numpy_helper.from_array(rng.uniform(-1, 1, (N, K)).astype(np.float32), name="W") - C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C") - A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) - Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) - node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"], transB=1) - graph = helper.make_graph([node], "gemm_transB_with_bias", [A], [Y], initializer=[W, C]) - model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) - save_model(model, "gemm/transB_with_bias", "gemm_transB_with_bias.onnx") +def make_int64_initializer(name, values): + return numpy_helper.from_array(np.asarray(values, dtype=np.int64), name=name) # --------------------------------------------------------------------------- @@ -248,6 +154,104 @@ def conv_large_spatial(): save_model(model, "conv/large_spatial", "conv_large_spatial.onnx") +# --------------------------------------------------------------------------- +# GEMM tests +# --------------------------------------------------------------------------- + +def gemm_non_square(): + """GEMM with non-square weight matrix: [B, K] @ [K, N], K != N.""" + B, K, N = 4, 128, 64 + W = numpy_helper.from_array(np.random.default_rng(42).uniform(-1, 1, (K, N)).astype(np.float32), name="W") + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) + node = helper.make_node("Gemm", ["A", "W"], ["Y"]) + graph = helper.make_graph([node], "gemm_non_square", [A], [Y], initializer=[W]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "gemm/non_square", "gemm_non_square.onnx") + + +def gemm_with_bias(): + """GEMM with bias: Y = A @ W + C.""" + B, K, N = 4, 128, 128 + rng = np.random.default_rng(43) + W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") + C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C") + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) + node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"]) + graph = helper.make_graph([node], "gemm_with_bias", [A], [Y], initializer=[W, C]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "gemm/with_bias", "gemm_with_bias.onnx") + + +def gemm_transB(): + """GEMM with transB=1: Y = A @ W^T.""" + B, K, N = 4, 128, 64 + rng = np.random.default_rng(44) + # W stored as [N, K], transposed during computation + W = numpy_helper.from_array(rng.uniform(-1, 1, (N, K)).astype(np.float32), name="W") + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) + node = helper.make_node("Gemm", ["A", "W"], ["Y"], transB=1) + graph = helper.make_graph([node], "gemm_transB", [A], [Y], initializer=[W]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "gemm/transB", "gemm_transB.onnx") + + +def gemm_alpha_beta(): + """GEMM with alpha and beta: Y = 0.5 * A @ W + 0.25 * C.""" + B, K, N = 4, 64, 64 + rng = np.random.default_rng(45) + W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") + C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C") + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) + node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"], alpha=0.5, beta=0.25) + graph = helper.make_graph([node], "gemm_alpha_beta", [A], [Y], initializer=[W, C]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "gemm/alpha_beta", "gemm_alpha_beta.onnx") + + +def gemm_small(): + """Small GEMM: [2, 8] @ [8, 4].""" + B, K, N = 2, 8, 4 + rng = np.random.default_rng(46) + W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) + node = helper.make_node("Gemm", ["A", "W"], ["Y"]) + graph = helper.make_graph([node], "gemm_small", [A], [Y], initializer=[W]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "gemm/small", "gemm_small.onnx") + + +def gemm_large(): + """Larger GEMM: [8, 256] @ [256, 128].""" + B, K, N = 8, 256, 128 + rng = np.random.default_rng(47) + W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) + node = helper.make_node("Gemm", ["A", "W"], ["Y"]) + graph = helper.make_graph([node], "gemm_large", [A], [Y], initializer=[W]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "gemm/large", "gemm_large.onnx") + + +def gemm_transB_with_bias(): + """GEMM with transB and bias: Y = A @ W^T + C.""" + B, K, N = 4, 128, 64 + rng = np.random.default_rng(48) + W = numpy_helper.from_array(rng.uniform(-1, 1, (N, K)).astype(np.float32), name="W") + C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C") + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) + node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"], transB=1) + graph = helper.make_graph([node], "gemm_transB_with_bias", [A], [Y], initializer=[W, C]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "gemm/transB_with_bias", "gemm_transB_with_bias.onnx") + + # --------------------------------------------------------------------------- # Pooling tests # --------------------------------------------------------------------------- @@ -327,6 +331,55 @@ def maxpool_after_conv(): save_model(model, "pool/max_after_conv", "maxpool_after_conv.onnx") +# --------------------------------------------------------------------------- +# ReduceMean tests +# --------------------------------------------------------------------------- + +def reducemean_basic(): + """ReduceMean over the feature dimension, preserving rank.""" + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 1]) + node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[1], keepdims=1) + graph = helper.make_graph([node], "reducemean_basic", [X], [Y]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "reduce_mean/basic", "reduce_mean_basic.onnx") + + +def reducemean_keepdims_0(): + """ReduceMean over the feature dimension, dropping the reduced axis.""" + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4]) + node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[1], keepdims=0) + graph = helper.make_graph([node], "reducemean_keepdims_0", [X], [Y]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "reduce_mean/keepdims_0", "reduce_mean_keepdims_0.onnx") + + +def reducemean_4d_spatial(): + """ReduceMean over H and W on an NCHW tensor.""" + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 4, 4]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 3, 1, 1]) + node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[2, 3], keepdims=1) + graph = helper.make_graph([node], "reducemean_4d_spatial", [X], [Y]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "reduce_mean/4d_spatial", "reduce_mean_4d_spatial.onnx") + + +def reducemean_after_conv(): + """Conv followed by ReduceMean over the spatial dimensions.""" + rng = np.random.default_rng(62) + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 1, 1]) + W = numpy_helper.from_array(rng.uniform(-1, 1, (2, 3, 3, 3)).astype(np.float32), name="W") + B = numpy_helper.from_array(rng.uniform(-1, 1, (2,)).astype(np.float32), name="B") + conv = helper.make_node("Conv", ["X", "W", "B"], ["C"], + kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0]) + reduce = helper.make_node("ReduceMean", ["C"], ["Y"], axes=[2, 3], keepdims=1) + graph = helper.make_graph([conv, reduce], "reducemean_after_conv", [X], [Y], initializer=[W, B]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "reduce_mean/after_conv", "reduce_mean_after_conv.onnx") + + # --------------------------------------------------------------------------- # Relu tests # --------------------------------------------------------------------------- @@ -381,6 +434,220 @@ def relu_after_gemm(): save_model(model, "relu/after_gemm", "relu_after_gemm.onnx") +# --------------------------------------------------------------------------- +# Sigmoid tests +# --------------------------------------------------------------------------- + +def sigmoid_basic(): + """Standalone Sigmoid on a simple 2D tensor.""" + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8]) + node = helper.make_node("Sigmoid", ["X"], ["Y"]) + graph = helper.make_graph([node], "sigmoid_basic", [X], [Y]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "sigmoid/basic", "sigmoid_basic.onnx") + + +def sigmoid_4d(): + """Standalone Sigmoid on an NCHW tensor.""" + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 3, 4, 4]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 3, 4, 4]) + node = helper.make_node("Sigmoid", ["X"], ["Y"]) + graph = helper.make_graph([node], "sigmoid_4d", [X], [Y]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "sigmoid/4d", "sigmoid_4d.onnx") + + +def sigmoid_after_gemm(): + """Gemm followed by Sigmoid.""" + B, K, N = 4, 64, 32 + rng = np.random.default_rng(63) + W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") + C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C") + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) + gemm = helper.make_node("Gemm", ["A", "W", "C"], ["G"]) + sigmoid = helper.make_node("Sigmoid", ["G"], ["Y"]) + graph = helper.make_graph([gemm, sigmoid], "sigmoid_after_gemm", [A], [Y], initializer=[W, C]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "sigmoid/after_gemm", "sigmoid_after_gemm.onnx") + + +# --------------------------------------------------------------------------- +# Add tests +# --------------------------------------------------------------------------- + +def add_basic(): + """Elementwise Add on two inputs with identical shapes.""" + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [4, 8]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [4, 8]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8]) + node = helper.make_node("Add", ["A", "B"], ["Y"]) + graph = helper.make_graph([node], "add_basic", [A, B], [Y]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "add/basic", "add_basic.onnx") + + +def add_broadcast_row(): + """Elementwise Add with row-vector broadcasting.""" + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [4, 8]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8]) + B = numpy_helper.from_array(np.random.default_rng(64).uniform(-1, 1, (8,)).astype(np.float32), name="B") + node = helper.make_node("Add", ["A", "B"], ["Y"]) + graph = helper.make_graph([node], "add_broadcast_row", [A], [Y], initializer=[B]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "add/broadcast_row", "add_broadcast_row.onnx") + + +def add_after_gemm(): + """Gemm followed by Add with a broadcast bias vector.""" + B, K, N = 4, 64, 32 + rng = np.random.default_rng(65) + W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") + C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C") + D = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="D") + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) + gemm = helper.make_node("Gemm", ["A", "W", "C"], ["G"]) + add = helper.make_node("Add", ["G", "D"], ["Y"]) + graph = helper.make_graph([gemm, add], "add_after_gemm", [A], [Y], initializer=[W, C, D]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "add/after_gemm", "add_after_gemm.onnx") + + +# --------------------------------------------------------------------------- +# Mul tests +# --------------------------------------------------------------------------- + +def mul_basic(): + """Elementwise Mul on two inputs with identical shapes.""" + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [4, 8]) + B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [4, 8]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8]) + node = helper.make_node("Mul", ["A", "B"], ["Y"]) + graph = helper.make_graph([node], "mul_basic", [A, B], [Y]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "mul/basic", "mul_basic.onnx") + + +def mul_scalar_constant(): + """Elementwise Mul with scalar broadcasting.""" + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8]) + S = numpy_helper.from_array(np.asarray([1.5], dtype=np.float32), name="S") + node = helper.make_node("Mul", ["X", "S"], ["Y"]) + graph = helper.make_graph([node], "mul_scalar_constant", [X], [Y], initializer=[S]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "mul/scalar_constant", "mul_scalar_constant.onnx") + + +def mul_after_conv(): + """Conv followed by Mul with per-channel scaling.""" + rng = np.random.default_rng(66) + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 3, 3]) + W = numpy_helper.from_array(rng.uniform(-1, 1, (2, 3, 3, 3)).astype(np.float32), name="W") + B = numpy_helper.from_array(rng.uniform(-1, 1, (2,)).astype(np.float32), name="B") + S = numpy_helper.from_array(rng.uniform(0.5, 1.5, (1, 2, 1, 1)).astype(np.float32), name="S") + conv = helper.make_node("Conv", ["X", "W", "B"], ["C"], + kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0]) + mul = helper.make_node("Mul", ["C", "S"], ["Y"]) + graph = helper.make_graph([conv, mul], "mul_after_conv", [X], [Y], initializer=[W, B, S]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "mul/after_conv", "mul_after_conv.onnx") + + +# --------------------------------------------------------------------------- +# Div tests +# --------------------------------------------------------------------------- + +def div_basic(): + """Elementwise Div by a same-shape constant tensor.""" + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8]) + D = numpy_helper.from_array(np.random.default_rng(67).uniform(0.5, 2.0, (4, 8)).astype(np.float32), name="D") + node = helper.make_node("Div", ["X", "D"], ["Y"]) + graph = helper.make_graph([node], "div_basic", [X], [Y], initializer=[D]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "div/basic", "div_basic.onnx") + + +def div_scalar_constant(): + """Elementwise Div with scalar broadcasting.""" + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8]) + S = numpy_helper.from_array(np.asarray([2.0], dtype=np.float32), name="S") + node = helper.make_node("Div", ["X", "S"], ["Y"]) + graph = helper.make_graph([node], "div_scalar_constant", [X], [Y], initializer=[S]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "div/scalar_constant", "div_scalar_constant.onnx") + + +def div_after_gemm(): + """Gemm followed by Div with a broadcast divisor vector.""" + B, K, N = 4, 64, 32 + rng = np.random.default_rng(68) + W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W") + C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C") + D = numpy_helper.from_array(rng.uniform(0.5, 2.0, (N,)).astype(np.float32), name="D") + A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N]) + gemm = helper.make_node("Gemm", ["A", "W", "C"], ["G"]) + div = helper.make_node("Div", ["G", "D"], ["Y"]) + graph = helper.make_graph([gemm, div], "div_after_gemm", [A], [Y], initializer=[W, C, D]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "div/after_gemm", "div_after_gemm.onnx") + + +# --------------------------------------------------------------------------- +# ReduceMean tests +# --------------------------------------------------------------------------- + +def reducemean_basic(): + """ReduceMean over the feature dimension, preserving rank.""" + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 1]) + node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[1], keepdims=1) + graph = helper.make_graph([node], "reducemean_basic", [X], [Y]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "reduce_mean/basic", "reduce_mean_basic.onnx") + + +def reducemean_keepdims_0(): + """ReduceMean over the feature dimension, dropping the reduced axis.""" + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4]) + node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[1], keepdims=0) + graph = helper.make_graph([node], "reducemean_keepdims_0", [X], [Y]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "reduce_mean/keepdims_0", "reduce_mean_keepdims_0.onnx") + + +def reducemean_4d_spatial(): + """ReduceMean over H and W on an NCHW tensor.""" + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 4, 4]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 3, 1, 1]) + node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[2, 3], keepdims=1) + graph = helper.make_graph([node], "reducemean_4d_spatial", [X], [Y]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "reduce_mean/4d_spatial", "reduce_mean_4d_spatial.onnx") + + +def reducemean_after_conv(): + """Conv followed by ReduceMean over the spatial dimensions.""" + rng = np.random.default_rng(62) + X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5]) + Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 1, 1]) + W = numpy_helper.from_array(rng.uniform(-1, 1, (2, 3, 3, 3)).astype(np.float32), name="W") + B = numpy_helper.from_array(rng.uniform(-1, 1, (2,)).astype(np.float32), name="B") + conv = helper.make_node("Conv", ["X", "W", "B"], ["C"], + kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0]) + reduce = helper.make_node("ReduceMean", ["C"], ["Y"], axes=[2, 3], keepdims=1) + graph = helper.make_graph([conv, reduce], "reducemean_after_conv", [X], [Y], initializer=[W, B]) + model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)]) + save_model(model, "reduce_mean/after_conv", "reduce_mean_after_conv.onnx") + + # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- @@ -415,10 +682,36 @@ if __name__ == "__main__": avgpool_include_pad() maxpool_after_conv() + print("\nGenerating ReduceMean tests:") + reducemean_basic() + reducemean_keepdims_0() + reducemean_4d_spatial() + reducemean_after_conv() + print("\nGenerating Relu tests:") relu_basic() relu_4d() relu_after_conv() relu_after_gemm() + print("\nGenerating Sigmoid tests:") + sigmoid_basic() + sigmoid_4d() + sigmoid_after_gemm() + + print("\nGenerating Add tests:") + add_basic() + add_broadcast_row() + add_after_gemm() + + print("\nGenerating Mul tests:") + mul_basic() + mul_scalar_constant() + mul_after_conv() + + print("\nGenerating Div tests:") + div_basic() + div_scalar_constant() + div_after_gemm() + print("\nDone.") diff --git a/validation/operations/mul/after_conv/mul_after_conv.onnx b/validation/operations/mul/after_conv/mul_after_conv.onnx new file mode 100644 index 0000000..8c88597 Binary files /dev/null and b/validation/operations/mul/after_conv/mul_after_conv.onnx differ diff --git a/validation/operations/mul/basic/mul_basic.onnx b/validation/operations/mul/basic/mul_basic.onnx new file mode 100644 index 0000000..589c035 Binary files /dev/null and b/validation/operations/mul/basic/mul_basic.onnx differ diff --git a/validation/operations/mul/scalar_constant/mul_scalar_constant.onnx b/validation/operations/mul/scalar_constant/mul_scalar_constant.onnx new file mode 100644 index 0000000..600cf8b Binary files /dev/null and b/validation/operations/mul/scalar_constant/mul_scalar_constant.onnx differ diff --git a/validation/operations/reduce_mean/4d_spatial/reduce_mean_4d_spatial.onnx b/validation/operations/reduce_mean/4d_spatial/reduce_mean_4d_spatial.onnx new file mode 100644 index 0000000..4658251 Binary files /dev/null and b/validation/operations/reduce_mean/4d_spatial/reduce_mean_4d_spatial.onnx differ diff --git a/validation/operations/reduce_mean/after_conv/reduce_mean_after_conv.onnx b/validation/operations/reduce_mean/after_conv/reduce_mean_after_conv.onnx new file mode 100644 index 0000000..deac445 Binary files /dev/null and b/validation/operations/reduce_mean/after_conv/reduce_mean_after_conv.onnx differ diff --git a/validation/operations/reduce_mean/basic/reduce_mean_basic.onnx b/validation/operations/reduce_mean/basic/reduce_mean_basic.onnx new file mode 100644 index 0000000..5760eb6 Binary files /dev/null and b/validation/operations/reduce_mean/basic/reduce_mean_basic.onnx differ diff --git a/validation/operations/reduce_mean/keepdims_0/reduce_mean_keepdims_0.onnx b/validation/operations/reduce_mean/keepdims_0/reduce_mean_keepdims_0.onnx new file mode 100644 index 0000000..53626bc Binary files /dev/null and b/validation/operations/reduce_mean/keepdims_0/reduce_mean_keepdims_0.onnx differ diff --git a/validation/operations/sigmoid/4d/sigmoid_4d.onnx b/validation/operations/sigmoid/4d/sigmoid_4d.onnx new file mode 100644 index 0000000..b0fb523 Binary files /dev/null and b/validation/operations/sigmoid/4d/sigmoid_4d.onnx differ diff --git a/validation/operations/sigmoid/after_gemm/sigmoid_after_gemm.onnx b/validation/operations/sigmoid/after_gemm/sigmoid_after_gemm.onnx new file mode 100644 index 0000000..58245f1 Binary files /dev/null and b/validation/operations/sigmoid/after_gemm/sigmoid_after_gemm.onnx differ diff --git a/validation/operations/sigmoid/basic/sigmoid_basic.onnx b/validation/operations/sigmoid/basic/sigmoid_basic.onnx new file mode 100644 index 0000000..977731e Binary files /dev/null and b/validation/operations/sigmoid/basic/sigmoid_basic.onnx differ