add support for operations: reduceMean, add, mul, div, sigmoid
Some checks failed
Validate Operations / validate-operations (push) Failing after 51m52s

This commit is contained in:
NiccoloN
2026-03-30 15:41:12 +02:00
parent 5e7114f517
commit 39830be888
32 changed files with 1057 additions and 224 deletions

View File

@@ -1,4 +1,4 @@
use anyhow::{Context, Result};
use anyhow::{Context, Result, ensure};
use paste::paste;
use std::{collections::HashMap, mem::offset_of, sync::LazyLock};
@@ -36,6 +36,7 @@ static SIMD: LazyLock<HashMap<String, FunctorType>> = LazyLock::new(|| {
add_to_json_map!(storage, vvmax);
add_to_json_map!(storage, vvsll);
add_to_json_map!(storage, vvsra);
add_to_json_map!(storage, vavg);
add_to_json_map!(storage, vrelu);
add_to_json_map!(storage, vtanh);
add_to_json_map!(storage, vsigm);
@@ -339,6 +340,7 @@ fn json_to_vavg(
let rd = json_i64!(json, "rd") as i32;
let rs1 = json_i64!(json, "rs1") as i32;
let rs2 = json_i64!(json, "rs2") as i32;
ensure!(rs2 == 1, "vavg only supports stride 1");
let len = json_i64!(json, "len") as i32;
let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
inst_data_builder

View File

@@ -55,19 +55,15 @@ pub trait HasSigm {
impl HasSigm for f32 {
fn sigm(self) -> Self {
let x = self;
let e = std::f32::consts::E;
let ex = x.powf(x);
(ex) / (1.0+ex)
let ex = self.exp();
ex / (1.0 + ex)
}
}
impl HasSigm for f64 {
fn sigm(self) -> Self {
let x = self;
let e = std::f64::consts::E;
let ex = x.powf(x);
(ex) / (1.0+ex)
let ex = self.exp();
ex / (1.0 + ex)
}
}

View File

@@ -121,6 +121,13 @@ json::Object PimCodeGen::createEmptyOffset() {
return offset;
}
static json::Object createRs1OnlyOffset() {
json::Object offset;
offset["offset_select"] = 1;
offset["offset_value"] = 0;
return offset;
}
void PimCodeGen::emitInstruction(json::Object instruction) const {
coreFileStream << json::Value(std::move(instruction)) << ',';
}
@@ -331,7 +338,8 @@ void PimCodeGen::codeGenVAvgOp(pim::PimVAvgOp vavgOp) const {
json["op"] = "vavg";
json["rd"] = 0;
json["rs1"] = 1;
json["offset"] = createEmptyOffset();
json["rs2"] = 1;
json["offset"] = createRs1OnlyOffset();
json["len"] = getValueSizeInBytes(vavgOp.getInput());
emitInstruction(std::move(json));
}

View File

@@ -4,10 +4,13 @@ add_public_tablegen_target(ONNXToSpatialIncGen)
add_pim_library(OMONNXToSpatial
Patterns/Math/Conv.cpp
Patterns/Math/Elementwise.cpp
Patterns/Math/Gemm.cpp
Patterns/Math/MatMul.cpp
Patterns/Math/ReduceMean.cpp
Patterns/NN/Pool.cpp
Patterns/NN/Relu.cpp
Patterns/NN/Sigmoid.cpp
Patterns/Tensor/Concat.cpp
Patterns/Tensor/Reshape.cpp
ONNXToSpatialPass.cpp

View File

@@ -14,8 +14,6 @@
#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
#include "src/Dialect/ONNX/ONNXOps.hpp"
#define DEFINE_MAP_OP(opname) opname,
namespace onnx_mlir {
template <class ShapedType>

View File

@@ -72,11 +72,15 @@ void ONNXToSpatialPass::runOnOperation() {
target.addLegalDialect<spatial::SpatialDialect, ONNXDialect, tensor::TensorDialect, arith::ArithDialect>();
target.addDynamicallyLegalOp<ONNXMatMulOp>(
[](ONNXMatMulOp op) { return cast<ShapedType>(op.getY().getType()).getRank() != 2; });
target.addIllegalOp<ONNXAddOp>();
target.addIllegalOp<ONNXDivOp>();
target.addIllegalOp<ONNXMulOp>();
target.addIllegalOp<ONNXGemmOp>();
target.addIllegalOp<ONNXConvOp>();
target.addIllegalOp<ONNXMaxPoolSingleOutOp>();
target.addIllegalOp<ONNXAveragePoolOp>();
target.addIllegalOp<ONNXReluOp>();
target.addIllegalOp<ONNXSigmoidOp>();
target.addIllegalOp<ONNXSoftmaxOp>();
target.addIllegalOp<ONNXConcatOp>();
target.addIllegalOp<ONNXReshapeOp>();
@@ -86,10 +90,13 @@ void ONNXToSpatialPass::runOnOperation() {
RewritePatternSet patterns(ctx);
patterns.add<removeLRN>(ctx);
populateElementwisePatterns(patterns, ctx);
populateGemmPatterns(patterns, ctx);
populateConvPatterns(patterns, ctx);
populatePoolPatterns(patterns, ctx);
populateReduceMeanPatterns(patterns, ctx);
populateReluPatterns(patterns, ctx);
populateSigmoidPatterns(patterns, ctx);
populateConcatPatterns(patterns, ctx);
populateReshapePatterns(patterns, ctx);

View File

@@ -7,14 +7,20 @@ namespace onnx_mlir {
void populateConvPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populateElementwisePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populateGemmPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populateMatMulRewritePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populatePoolPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populateReduceMeanPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populateReluPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populateSigmoidPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populateConcatPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populateReshapePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);

View File

@@ -0,0 +1,204 @@
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/Transforms/DialectConversion.h"
#include "llvm/ADT/SmallVector.h"
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common.hpp"
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Patterns.hpp"
#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
#include "src/Dialect/ONNX/ONNXOps.hpp"
using namespace mlir;
namespace onnx_mlir {
namespace {
static SmallVector<int64_t> computeRowMajorStrides(ArrayRef<int64_t> shape) {
SmallVector<int64_t> strides(shape.size(), 1);
for (int64_t i = static_cast<int64_t>(shape.size()) - 2; i >= 0; --i)
strides[i] = strides[i + 1] * shape[i + 1];
return strides;
}
static DenseElementsAttr getDenseConstantAttr(Value value) {
if (auto constantOp = value.getDefiningOp<arith::ConstantOp>())
return dyn_cast<DenseElementsAttr>(constantOp.getValue());
if (auto constantOp = value.getDefiningOp<ONNXConstantOp>())
return dyn_cast_or_null<DenseElementsAttr>(constantOp.getValueAttr());
return nullptr;
}
static FailureOr<Value> materializeBroadcastedConstantTensor(Value value,
RankedTensorType resultType,
ConversionPatternRewriter& rewriter,
Location loc) {
auto denseAttr = getDenseConstantAttr(value);
if (!denseAttr)
return failure();
auto sourceType = dyn_cast<RankedTensorType>(denseAttr.getType());
if (!sourceType || !sourceType.hasStaticShape() || !resultType.hasStaticShape())
return failure();
if (sourceType == resultType)
return value;
ArrayRef<int64_t> sourceShape = sourceType.getShape();
ArrayRef<int64_t> resultShape = resultType.getShape();
if (sourceShape.size() > resultShape.size())
return failure();
const int64_t rankOffset = static_cast<int64_t>(resultShape.size() - sourceShape.size());
for (int64_t i = 0; i < static_cast<int64_t>(resultShape.size()); ++i) {
const int64_t sourceIndex = i - rankOffset;
const int64_t sourceDim = sourceIndex < 0 ? 1 : sourceShape[sourceIndex];
const int64_t resultDim = resultShape[i];
if (sourceDim != 1 && sourceDim != resultDim)
return failure();
}
SmallVector<Attribute> sourceValues(denseAttr.getValues<Attribute>());
SmallVector<int64_t> sourceStrides = computeRowMajorStrides(sourceShape);
SmallVector<int64_t> resultStrides = computeRowMajorStrides(resultShape);
SmallVector<Attribute> resultValues;
resultValues.reserve(resultType.getNumElements());
for (int64_t flatIndex = 0; flatIndex < resultType.getNumElements(); ++flatIndex) {
int64_t remaining = flatIndex;
int64_t sourceFlatIndex = 0;
for (int64_t i = 0; i < static_cast<int64_t>(resultShape.size()); ++i) {
const int64_t resultIndex = resultStrides.empty() ? 0 : remaining / resultStrides[i];
remaining = resultStrides.empty() ? 0 : remaining % resultStrides[i];
const int64_t sourceIndex = i - rankOffset;
if (sourceIndex < 0)
continue;
const int64_t sourceDim = sourceShape[sourceIndex];
const int64_t mappedIndex = sourceDim == 1 ? 0 : resultIndex;
sourceFlatIndex += mappedIndex * sourceStrides[sourceIndex];
}
resultValues.push_back(sourceValues[sourceFlatIndex]);
}
auto broadcastedAttr = DenseElementsAttr::get(resultType, resultValues);
return arith::ConstantOp::create(rewriter, loc, resultType, broadcastedAttr).getResult();
}
static FailureOr<Value> prepareElementwiseOperand(Value value,
RankedTensorType resultType,
ConversionPatternRewriter& rewriter,
Location loc) {
auto valueType = dyn_cast<RankedTensorType>(value.getType());
if (!valueType || !valueType.hasStaticShape())
return failure();
if (valueType == resultType)
return value;
return materializeBroadcastedConstantTensor(value, resultType, rewriter, loc);
}
static FailureOr<Value> materializeReciprocalTensor(Value value,
RankedTensorType resultType,
ConversionPatternRewriter& rewriter,
Location loc) {
auto broadcastedValue = materializeBroadcastedConstantTensor(value, resultType, rewriter, loc);
if (failed(broadcastedValue))
return failure();
auto denseAttr = dyn_cast<DenseFPElementsAttr>(getDenseConstantAttr(*broadcastedValue));
if (!denseAttr)
return failure();
SmallVector<APFloat> reciprocalValues;
reciprocalValues.reserve(denseAttr.getNumElements());
for (const APFloat& valueAttr : denseAttr.getValues<APFloat>()) {
APFloat reciprocal(valueAttr.getSemantics(), 1);
auto status = reciprocal.divide(valueAttr, APFloat::rmNearestTiesToEven);
if (status & APFloat::opInvalidOp)
return failure();
reciprocalValues.push_back(std::move(reciprocal));
}
auto reciprocalAttr = DenseFPElementsAttr::get(resultType, reciprocalValues);
return arith::ConstantOp::create(rewriter, loc, resultType, reciprocalAttr).getResult();
}
template <typename OnnxOp, typename SpatialOp>
struct BinaryElementwiseToSpatialCompute : OpConversionPattern<OnnxOp> {
using OpConversionPattern<OnnxOp>::OpConversionPattern;
using Adaptor = typename OnnxOp::Adaptor;
LogicalResult matchAndRewrite(OnnxOp op, Adaptor adaptor, ConversionPatternRewriter& rewriter) const override {
auto resultType = dyn_cast<RankedTensorType>(op->getResult(0).getType());
if (!resultType || !resultType.hasStaticShape())
return failure();
Location loc = op.getLoc();
auto lhs = prepareElementwiseOperand(adaptor.getOperands()[0], resultType, rewriter, loc);
if (failed(lhs))
return failure();
auto rhs = prepareElementwiseOperand(adaptor.getOperands()[1], resultType, rewriter, loc);
if (failed(rhs))
return failure();
constexpr size_t numInputs = 2;
auto computeOp =
createSpatCompute<numInputs>(rewriter, loc, resultType, {}, ValueRange {*lhs, *rhs}, [&](Value x, Value y) {
auto loweredOp = SpatialOp::create(rewriter, loc, resultType, x, y);
spatial::SpatYieldOp::create(rewriter, loc, loweredOp.getResult());
});
rewriter.replaceOp(op, computeOp);
return success();
}
};
struct DivToSpatialCompute : OpConversionPattern<ONNXDivOp> {
using OpConversionPattern::OpConversionPattern;
LogicalResult
matchAndRewrite(ONNXDivOp op, ONNXDivOpAdaptor adaptor, ConversionPatternRewriter& rewriter) const override {
auto resultType = dyn_cast<RankedTensorType>(op.getResult().getType());
if (!resultType || !resultType.hasStaticShape())
return failure();
Location loc = op.getLoc();
auto lhs = prepareElementwiseOperand(adaptor.getA(), resultType, rewriter, loc);
if (failed(lhs))
return failure();
auto reciprocalRhs = materializeReciprocalTensor(adaptor.getB(), resultType, rewriter, loc);
if (failed(reciprocalRhs))
return failure();
constexpr size_t numInputs = 2;
auto computeOp = createSpatCompute<numInputs>(
rewriter, loc, resultType, {}, ValueRange {*lhs, *reciprocalRhs}, [&](Value x, Value reciprocal) {
auto mulOp = spatial::SpatVMulOp::create(rewriter, loc, resultType, x, reciprocal);
spatial::SpatYieldOp::create(rewriter, loc, mulOp.getResult());
});
rewriter.replaceOp(op, computeOp);
return success();
}
};
} // namespace
void populateElementwisePatterns(RewritePatternSet& patterns, MLIRContext* ctx) {
patterns.add<BinaryElementwiseToSpatialCompute<ONNXAddOp, spatial::SpatVAddOp>>(ctx);
patterns.add<BinaryElementwiseToSpatialCompute<ONNXMulOp, spatial::SpatVMulOp>>(ctx);
patterns.add<DivToSpatialCompute>(ctx);
}
} // namespace onnx_mlir

View File

@@ -0,0 +1,163 @@
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/Transforms/DialectConversion.h"
#include "llvm/ADT/SmallVector.h"
#include <algorithm>
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common.hpp"
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Patterns.hpp"
#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
#include "src/Dialect/ONNX/ONNXOps.hpp"
using namespace mlir;
namespace onnx_mlir {
namespace {
static SmallVector<int64_t> normalizeAxes(ArrayAttr axesAttr, int64_t rank) {
SmallVector<int64_t> normalizedAxes;
if (!axesAttr) {
normalizedAxes.reserve(rank);
for (int64_t axis = 0; axis < rank; axis++)
normalizedAxes.push_back(axis);
return normalizedAxes;
}
normalizedAxes.reserve(axesAttr.size());
for (Attribute attr : axesAttr) {
int64_t axis = cast<IntegerAttr>(attr).getInt();
normalizedAxes.push_back(axis >= 0 ? axis : rank + axis);
}
llvm::sort(normalizedAxes);
normalizedAxes.erase(std::unique(normalizedAxes.begin(), normalizedAxes.end()), normalizedAxes.end());
return normalizedAxes;
}
static SmallVector<bool> buildReducedAxesMask(ArrayRef<int64_t> axes, int64_t rank) {
SmallVector<bool> reducedAxes(rank, false);
for (int64_t axis : axes) {
if (axis < 0 || axis >= rank)
return {};
reducedAxes[axis] = true;
}
return reducedAxes;
}
static RankedTensorType getAllOnesType(RankedTensorType inputType, Type elementType) {
return RankedTensorType::get(SmallVector<int64_t>(inputType.getRank(), 1), elementType);
}
static SmallVector<ReassociationIndices> buildCollapseReassociation(ArrayRef<bool> reducedAxes) {
SmallVector<ReassociationIndices> reassociation;
ReassociationIndices currentGroup;
for (auto [axis, isReduced] : llvm::enumerate(reducedAxes)) {
currentGroup.push_back(axis);
if (!isReduced) {
reassociation.push_back(currentGroup);
currentGroup.clear();
}
}
if (!currentGroup.empty()) {
if (reassociation.empty())
reassociation.push_back(std::move(currentGroup));
else
reassociation.back().append(currentGroup.begin(), currentGroup.end());
}
return reassociation;
}
static Value createAverageCompute(Value input,
RankedTensorType resultType,
ConversionPatternRewriter& rewriter,
Location loc) {
constexpr size_t numInputs = 1;
auto computeOp = createSpatCompute<numInputs>(rewriter, loc, resultType, {}, ValueRange {input}, [&](Value x) {
auto avgOp = spatial::SpatVAvgOp::create(rewriter, loc, resultType, x);
spatial::SpatYieldOp::create(rewriter, loc, avgOp.getResult());
});
return computeOp.getResult(0);
}
static Value buildReduceMeanKeepdims(Value input,
ArrayRef<bool> reducedAxes,
int64_t axis,
RankedTensorType leafType,
ConversionPatternRewriter& rewriter,
Location loc) {
int64_t rank = cast<RankedTensorType>(input.getType()).getRank();
if (axis == rank)
return createAverageCompute(input, leafType, rewriter, loc);
if (reducedAxes[axis])
return buildReduceMeanKeepdims(input, reducedAxes, axis + 1, leafType, rewriter, loc);
SmallVector<Value> slices = sliceTensor(input, axis, /*sliceSize=*/1, rewriter, loc);
SmallVector<Value> reducedSlices;
reducedSlices.reserve(slices.size());
for (Value slice : slices)
reducedSlices.push_back(buildReduceMeanKeepdims(slice, reducedAxes, axis + 1, leafType, rewriter, loc));
return reducedSlices.size() == 1 ? reducedSlices.front()
: tensor::ConcatOp::create(rewriter, loc, axis, reducedSlices).getResult();
}
static Value squeezeReducedAxes(Value keepdimsValue,
RankedTensorType resultType,
ArrayRef<bool> reducedAxes,
ConversionPatternRewriter& rewriter,
Location loc) {
if (resultType.getRank() == 0) {
SmallVector<Value> indices(cast<RankedTensorType>(keepdimsValue.getType()).getRank(),
arith::ConstantIndexOp::create(rewriter, loc, 0));
Value element = tensor::ExtractOp::create(rewriter, loc, keepdimsValue, indices);
return tensor::FromElementsOp::create(rewriter, loc, resultType, ValueRange {element});
}
return tensor::CollapseShapeOp::create(
rewriter, loc, resultType, keepdimsValue, buildCollapseReassociation(reducedAxes))
.getResult();
}
struct ReduceMeanToSpatialCompute : OpConversionPattern<ONNXReduceMeanV13Op> {
using OpConversionPattern::OpConversionPattern;
LogicalResult matchAndRewrite(ONNXReduceMeanV13Op reduceMeanOp,
ONNXReduceMeanV13OpAdaptor adaptor,
ConversionPatternRewriter& rewriter) const override {
auto inputType = dyn_cast<RankedTensorType>(adaptor.getData().getType());
auto resultType = dyn_cast<RankedTensorType>(reduceMeanOp.getReduced().getType());
if (!inputType || !resultType || !inputType.hasStaticShape() || !resultType.hasStaticShape())
return failure();
SmallVector<int64_t> axes = normalizeAxes(reduceMeanOp.getAxesAttr(), inputType.getRank());
SmallVector<bool> reducedAxes = buildReducedAxesMask(axes, inputType.getRank());
if (reducedAxes.empty() && inputType.getRank() != 0)
return failure();
Location loc = reduceMeanOp.getLoc();
RankedTensorType leafType = getAllOnesType(inputType, resultType.getElementType());
Value reducedKeepdims = buildReduceMeanKeepdims(adaptor.getData(), reducedAxes, /*axis=*/0, leafType, rewriter, loc);
if (reduceMeanOp.getKeepdims() != 0) {
rewriter.replaceOp(reduceMeanOp, reducedKeepdims);
return success();
}
Value reduced = squeezeReducedAxes(reducedKeepdims, resultType, reducedAxes, rewriter, loc);
rewriter.replaceOp(reduceMeanOp, reduced);
return success();
}
};
} // namespace
void populateReduceMeanPatterns(RewritePatternSet& patterns, MLIRContext* ctx) {
patterns.add<ReduceMeanToSpatialCompute>(ctx);
}
} // namespace onnx_mlir

View File

@@ -0,0 +1,36 @@
#include "mlir/Transforms/DialectConversion.h"
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common.hpp"
#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
#include "src/Dialect/ONNX/ONNXOps.hpp"
using namespace mlir;
namespace onnx_mlir {
namespace {
struct SigmoidToSpatialCompute : OpConversionPattern<ONNXSigmoidOp> {
using OpConversionPattern::OpConversionPattern;
LogicalResult matchAndRewrite(ONNXSigmoidOp sigmoidOp,
ONNXSigmoidOpAdaptor adaptor,
ConversionPatternRewriter& rewriter) const override {
Location loc = sigmoidOp.getLoc();
Type resultType = sigmoidOp.getResult().getType();
constexpr size_t numInputs = 1;
auto computeOp = createSpatCompute<numInputs>(rewriter, loc, resultType, {}, adaptor.getX(), [&](Value x) {
auto spatSigmoidOp = spatial::SpatSigmoidOp::create(rewriter, loc, resultType, x);
spatial::SpatYieldOp::create(rewriter, loc, spatSigmoidOp.getResult());
});
rewriter.replaceOp(sigmoidOp, computeOp);
return success();
}
};
} // namespace
void populateSigmoidPatterns(RewritePatternSet& patterns, MLIRContext* ctx) {
patterns.add<SigmoidToSpatialCompute>(ctx);
}
} // namespace onnx_mlir

View File

@@ -39,6 +39,12 @@ def spatToPimVVMul : Pat<
(NativeCodeCall<"onnx_mlir::getBestOutputTensorFromOperandsOrAllocate($_builder, $0.getDefiningOp())"> $srcOpRes))
>;
def spatToPimVAvg : Pat<
(SpatVAvgOp:$srcOpRes $input),
(PimVAvgOp $input,
(NativeCodeCall<"onnx_mlir::getBestOutputTensorFromOperandsOrAllocate($_builder, $0.getDefiningOp())"> $srcOpRes))
>;
def spatToPimVVMax : Pat<
(SpatVMaxOp:$srcOpRes $a, $b),
(PimVVMaxOp $a, $b,
@@ -51,4 +57,10 @@ def spatToPimVRelu : Pat<
(NativeCodeCall<"onnx_mlir::getBestOutputTensorFromOperandsOrAllocate($_builder, $0.getDefiningOp())"> $srcOpRes))
>;
def spatToPimVSigm : Pat<
(SpatSigmoidOp:$srcOpRes $input),
(PimVSigmOp $input,
(NativeCodeCall<"onnx_mlir::getBestOutputTensorFromOperandsOrAllocate($_builder, $0.getDefiningOp())"> $srcOpRes))
>;
#endif // SPATIAL_TO_PIM

View File

@@ -161,26 +161,41 @@ void SpatialToPimPass::runOnOperation() {
}
for (auto receiveOp : funcOp.getOps<spatial::SpatChannelReceiveOp>()) {
operationsToRemove.push_back(receiveOp);
markOpToRemove(receiveOp);
runOnReceiveOp(receiveOp, rewriter);
}
for (auto computeOp : funcOp.getOps<spatial::SpatWeightedCompute>()) {
operationsToRemove.push_back(computeOp);
markOpToRemove(computeOp);
runOnComputeOp(computeOp, rewriter);
}
enlargeVMMOutTensorsToCrossbarSize(funcOp, rewriter);
replaceReturnOpOperands(returnOp, rewriter);
// Remove all ComputeOps
for (auto opToRemove : llvm::reverse(operationsToRemove)) {
if (!opToRemove->use_empty()) {
SmallVector<Operation*> pendingRemovals(operationsToRemove.begin(), operationsToRemove.end());
while (!pendingRemovals.empty()) {
bool erasedAnyOp = false;
for (auto it = pendingRemovals.begin(); it != pendingRemovals.end();) {
Operation* opToRemove = *it;
if (!opToRemove->use_empty()) {
++it;
continue;
}
rewriter.eraseOp(opToRemove);
it = pendingRemovals.erase(it);
erasedAnyOp = true;
}
if (erasedAnyOp)
continue;
for (auto opToRemove : pendingRemovals) {
opToRemove->dump();
for (auto user : opToRemove->getUsers())
user->dump();
assert(false && "opToRemove should be unused at this point");
}
rewriter.eraseOp(opToRemove);
assert(false && "tracked op removal reached a cycle or missed dependency");
}
// Dump to file for debug
@@ -284,10 +299,19 @@ void SpatialToPimPass::runOnComputeOp(spatial::SpatWeightedCompute computeOp, IR
auto concatUses = concatValue.getUses();
auto numConcatUses = rangeLength(concatUses);
if (numConcatUses == 1) {
OpOperand& concatUse = *concatUses.begin();
Operation* concatUser = concatUse.getOwner();
Value chainedValue = concatValue;
Operation* concatUser = concatUses.begin()->getOwner();
while (isChannelUseChainOp(concatUser)) {
auto chainUses = concatUser->getResult(0).getUses();
if (rangeLength(chainUses) != 1)
break;
chainedValue = concatUser->getResult(0);
concatUser = chainUses.begin()->getOwner();
}
if (isa<func::ReturnOp>(concatUser)) {
size_t concatIndexInReturn = concatUse.getOperandNumber();
size_t concatIndexInReturn = chainedValue.getUses().begin()->getOperandNumber();
size_t resultIndexInConcat = resultUses.begin()->getOperandNumber();
size_t offset = 0;
for (auto operand : concatOp->getOperands().take_front(resultIndexInConcat))
@@ -602,10 +626,22 @@ void SpatialToPimPass::replaceReturnOpOperands(func::ReturnOp& returnOp, IRRewri
rewriter.modifyOpInPlace(returnOp,
[&] { returnOp.setOperand(orderWithinReturn, outputTensors[orderWithinReturn]); });
if (isa<tensor::ConcatOp>(returnOperand)) {
auto returnOperandUses = it.value().getUses();
if (rangeLength(returnOperandUses) == 0)
rewriter.eraseOp(returnOperand);
Operation* opToErase = returnOperand;
while (opToErase) {
bool isExclusivelyOwnedByReturnChain = opToErase->use_empty() || opToErase->hasOneUse();
if (!isExclusivelyOwnedByReturnChain)
break;
if (isChannelUseChainOp(opToErase)) {
Value source = opToErase->getOperand(0);
markOpToRemove(opToErase);
opToErase = source.getDefiningOp();
continue;
}
if (isa<tensor::ConcatOp>(opToErase))
markOpToRemove(opToErase);
break;
}
}
}

View File

@@ -239,6 +239,22 @@ def SpatSumOp : SpatOp<"sum", []> {
}];
}
def SpatVAvgOp : SpatOp<"vavg", []> {
let summary = "Average all elements of the input tensor to a single scalar wrapped in a tensor";
let arguments = (ins
SpatTensor:$input
);
let results = (outs
SpatTensor:$output
);
let assemblyFormat = [{
`(` $input `)` attr-dict `:` type($input) `->` type($output)
}];
}
def SpatSigmoidOp : SpatOp<"sigmoid", []> {
let summary = "Element-wise sigmoid activation";

View File

@@ -361,7 +361,7 @@ struct ChannelBroadcastReceiveOpInterface
}
/*
* Turn the channel receive to pim.load using by creating a new global buffer
* Turn the broadcast receive into a regular pim.receive from the broadcaster.
*/
LogicalResult bufferize(Operation* op,
RewriterBase& rewriter,
@@ -370,8 +370,21 @@ struct ChannelBroadcastReceiveOpInterface
auto outputTensor = createEmptyFromType(op->getResult(0).getType(), op->getLoc(), rewriter);
auto outputType = cast<ShapedType>(outputTensor.getType());
auto outputSize = outputType.getNumElements() * outputType.getElementTypeBitWidth() / 8;
auto numElements = cast<ShapedType>(outputTensor.getType()).getNumElements();
auto elementSize = cast<ShapedType>(outputTensor.getType()).getElementTypeBitWidth() / 8;
auto precomputedOtherCoreId = op->getAttr(PRECOMPUTED_OTHER_CORE_ID_ATTR_NAME);
if (precomputedOtherCoreId) {
Value newValue = pim::PimReceiveOp::create(rewriter,
op->getLoc(),
outputTensor.getType(),
outputTensor,
rewriter.getI32IntegerAttr(numElements * elementSize),
cast<IntegerAttr>(precomputedOtherCoreId))
.getOutput();
replaceOpWithBufferizedValues(rewriter, op, newValue);
return success();
}
auto channelNewOp = op->getOperand(0).getDefiningOp<SpatChannelNewOp>();
if (!channelNewOp) {
@@ -379,31 +392,30 @@ struct ChannelBroadcastReceiveOpInterface
return failure();
}
// The first 'broadcast' operation creates the buffer just after the
// channelNewOp, while the other 'broadcast' operation need to find this
// buffer allocation just after the channelNewOp
Value bufferAllocation;
if (auto allocOpAfterChannel = dyn_cast<memref::AllocOp>(channelNewOp->getNextNode())) {
// Buffer already allocated, load from this buffer
bufferAllocation = allocOpAfterChannel;
}
else {
// Buffer was not allocated previously, allocate it after channelNewOp
rewriter.setInsertionPointAfter(channelNewOp);
bufferAllocation = createEmptyFromType(op->getResult(0).getType(), op->getLoc(), rewriter);
}
auto srcCoreId = [&]() -> FailureOr<uint32_t> {
for (Operation* user : channelNewOp->getUsers()) {
auto sendOp = dyn_cast<SpatChannelBroadcastSendOp>(user);
if (!sendOp)
continue;
auto sendCoreIdAttr = cast<pim::PimCoreOp>(sendOp->getParentOp()).getCoreIdAttr();
op->setAttr(PRECOMPUTED_OTHER_CORE_ID_ATTR_NAME, sendCoreIdAttr);
return cast<pim::PimCoreOp>(sendOp->getParentOp()).getCoreId();
}
op->emitError("ChannelBroadcastReceiveOp has no matching ChannelBroadcastSendOp");
return failure();
}();
if (failed(srcCoreId))
return failure();
rewriter.setInsertionPoint(op);
auto memCopyHostToDevOp = pim::PimMemCopyHostToDevOp::create(rewriter,
op->getLoc(),
outputTensor.getType(),
outputTensor,
bufferAllocation,
rewriter.getI32IntegerAttr(0),
rewriter.getI32IntegerAttr(0),
rewriter.getI32IntegerAttr(outputSize));
Value newValue = pim::PimReceiveOp::create(rewriter,
op->getLoc(),
outputTensor.getType(),
outputTensor,
rewriter.getI32IntegerAttr(numElements * elementSize),
rewriter.getI32IntegerAttr(srcCoreId.value()))
.getOutput();
replaceOpWithBufferizedValues(rewriter, op, memCopyHostToDevOp.getOutput());
replaceOpWithBufferizedValues(rewriter, op, newValue);
return success();
}
@@ -428,8 +440,7 @@ struct ChannelBroadcastSendOpInterface
}
/*
* Turn the channel send into a device-to-host copy into the shared
* broadcast buffer that receive ops load from later.
* Turn the broadcast send into one pim.send per broadcast receiver.
*/
LogicalResult bufferize(Operation* op,
RewriterBase& rewriter,
@@ -448,32 +459,32 @@ struct ChannelBroadcastSendOpInterface
return failure();
}
// The first 'broadcast' operation creates the buffer just after the
// channelNewOp, while the other 'broadcast' operation need to find this
// buffer allocation just after the channelNewOp
Value bufferAllocation;
if (auto allocOpAfterChannel = dyn_cast<memref::AllocOp>(channelNewOp->getNextNode())) {
// Buffer already allocated, load from this buffer
bufferAllocation = allocOpAfterChannel;
}
else {
// Buffer was not allocated previously, allocate it after channelNewOp
rewriter.setInsertionPointAfter(channelNewOp);
bufferAllocation = createEmptyFromType(srcTensor.getType(), op->getLoc(), rewriter);
}
auto srcType = cast<ShapedType>(srcTensor.getType());
auto sizeInBytes = srcType.getNumElements() * srcType.getElementTypeBitWidth() / 8;
auto srcCoreIdAttr = cast<pim::PimCoreOp>(op->getParentOp()).getCoreIdAttr();
rewriter.setInsertionPoint(op);
pim::PimMemCopyDevToHostOp::create(rewriter,
op->getLoc(),
bufferAllocation.getType(),
bufferAllocation,
srcMemRef,
rewriter.getI32IntegerAttr(0),
rewriter.getI32IntegerAttr(0),
rewriter.getI32IntegerAttr(sizeInBytes));
bool foundReceiver = false;
for (Operation* user : channelNewOp->getUsers()) {
auto receiveOp = dyn_cast<SpatChannelBroadcastReceiveOp>(user);
if (!receiveOp)
continue;
foundReceiver = true;
auto dstCoreId = cast<pim::PimCoreOp>(receiveOp->getParentOp()).getCoreId();
receiveOp->setAttr(PRECOMPUTED_OTHER_CORE_ID_ATTR_NAME, srcCoreIdAttr);
pim::PimSendOp::create(rewriter,
op->getLoc(),
srcMemRef,
rewriter.getI32IntegerAttr(sizeInBytes),
rewriter.getI32IntegerAttr(dstCoreId));
}
if (!foundReceiver) {
op->emitError("SpatChannelBroadcastSendOp has no matching ChannelBroadcastReceiveOp");
return failure();
}
rewriter.eraseOp(op);
return success();
}

View File

@@ -3,66 +3,108 @@
ONNX test models used by `validate.py` to verify the Raptor compiler + PIM simulator pipeline.
Generated tests can be regenerated with:
```
python3 validation/operations/gen_tests.py
```
## Conv
| Test | Directory | Input | Output | Kernel | Stride | Padding | Bias | Notes |
|------|-----------|-------|--------|--------|--------|---------|------|-------|
| Simple | `conv/simple` | [1,3,3,3] | [1,1,2,2] | 2x2 | 1 | none | no | Basic conv, hand-crafted |
| With constant | `conv/with_constant` | [1,3,3,3] | [1,1,3,3] | 2x2 | 1 | SAME_UPPER | yes | Hand-crafted, constant weight+bias |
| Batch 2 | `conv/batch_2` | [2,3,3,3] | [2,1,3,3] | 2x2 | 1 | SAME_UPPER | yes | Batched input |
| Kernel 3x3 | `conv/kernel_3x3` | [1,1,5,5] | [1,1,3,3] | 3x3 | 1 | none | no | Larger kernel |
| Stride 2 | `conv/stride_2` | [1,1,6,6] | [1,1,2,2] | 3x3 | 2 | none | no | Strided convolution |
| Multi channel | `conv/multi_channel` | [1,3,5,5] | [1,4,3,3] | 3x3 | 1 | none | no | 3 in channels, 4 out channels |
| Pointwise 1x1 | `conv/pointwise_1x1` | [1,8,4,4] | [1,4,4,4] | 1x1 | 1 | none | no | Channel mixing |
| SAME padding 3x3 | `conv/same_padding_3x3` | [1,1,5,5] | [1,1,5,5] | 3x3 | 1 | SAME_UPPER | no | Spatial dims preserved |
| Explicit padding | `conv/explicit_padding` | [1,1,4,4] | [1,1,4,4] | 3x3 | 1 | [1,1,1,1] | no | Symmetric explicit pads |
| With bias 3x3 | `conv/with_bias_3x3` | [1,3,5,5] | [1,2,3,3] | 3x3 | 1 | none | yes | Multi-channel with bias |
| Large spatial | `conv/large_spatial` | [1,1,8,8] | [1,1,6,6] | 3x3 | 1 | none | no | Larger spatial input |
## Pool
| Test | Directory | Input | Output | Kernel | Stride | Padding | Notes |
|------|-----------|-------|--------|--------|--------|---------|-------|
| Max basic | `pool/max_basic` | [1,1,4,4] | [1,1,3,3] | 2x2 | 1 | none | Basic max pooling |
| Max stride 2 multi-channel | `pool/max_stride2_multichannel` | [1,5,6,6] | [1,5,3,3] | 2x2 | 2 | none | Channel-preserving max pool |
| Max SAME_UPPER | `pool/max_same_upper` | [1,1,5,5] | [1,1,3,3] | 3x3 | 2 | SAME_UPPER | Deprecated auto_pad path |
| Avg basic | `pool/avg_basic` | [1,3,4,4] | [1,3,3,3] | 2x2 | 1 | none | Basic average pooling |
| Avg explicit padding | `pool/avg_explicit_padding` | [1,2,4,4] | [1,2,2,2] | 3x3 | 2 | [1,1,1,1] | `count_include_pad=0` |
| Avg include pad | `pool/avg_include_pad` | [1,2,4,4] | [1,2,2,2] | 3x3 | 2 | [1,1,1,1] | `count_include_pad=1` |
| Max after Conv | `pool/max_after_conv` | [1,3,6,6] | [1,4,2,2] | Conv 3x3 then Pool 2x2 | 2 | none | Regression for `pool(conv(...))` |
## Relu
| Test | Directory | Input | Output | Notes |
|------|-----------|-------|--------|-------|
| Basic | `relu/basic` | [4,8] | [4,8] | Standalone 2D Relu |
| 4D | `relu/4d` | [2,3,4,4] | [2,3,4,4] | Standalone NCHW Relu |
| After Conv | `relu/after_conv` | [1,3,5,5] | [1,2,3,3] | Conv 3x3 + bias, then Relu |
| After Gemm | `relu/after_gemm` | [4,64] | [4,32] | Gemm + bias, then Relu |
| Test | Directory | Input | Output | Kernel | Stride | Padding | Bias | Notes |
|------------------|-------------------------|-----------|-----------|--------|--------|------------|------|------------------------------------|
| Simple | `conv/simple` | [1,3,3,3] | [1,1,2,2] | 2x2 | 1 | none | no | Basic conv, hand-crafted |
| With constant | `conv/with_constant` | [1,3,3,3] | [1,1,3,3] | 2x2 | 1 | SAME_UPPER | yes | Hand-crafted, constant weight+bias |
| Batch 2 | `conv/batch_2` | [2,3,3,3] | [2,1,3,3] | 2x2 | 1 | SAME_UPPER | yes | Batched input |
| Kernel 3x3 | `conv/kernel_3x3` | [1,1,5,5] | [1,1,3,3] | 3x3 | 1 | none | no | Larger kernel |
| Stride 2 | `conv/stride_2` | [1,1,6,6] | [1,1,2,2] | 3x3 | 2 | none | no | Strided convolution |
| Multi channel | `conv/multi_channel` | [1,3,5,5] | [1,4,3,3] | 3x3 | 1 | none | no | 3 in channels, 4 out channels |
| Pointwise 1x1 | `conv/pointwise_1x1` | [1,8,4,4] | [1,4,4,4] | 1x1 | 1 | none | no | Channel mixing |
| SAME padding 3x3 | `conv/same_padding_3x3` | [1,1,5,5] | [1,1,5,5] | 3x3 | 1 | SAME_UPPER | no | Spatial dims preserved |
| Explicit padding | `conv/explicit_padding` | [1,1,4,4] | [1,1,4,4] | 3x3 | 1 | [1,1,1,1] | no | Symmetric explicit pads |
| With bias 3x3 | `conv/with_bias_3x3` | [1,3,5,5] | [1,2,3,3] | 3x3 | 1 | none | yes | Multi-channel with bias |
| Large spatial | `conv/large_spatial` | [1,1,8,8] | [1,1,6,6] | 3x3 | 1 | none | no | Larger spatial input |
## Gemm
| Test | Directory | A (input) | W (weight) | Output | transB | alpha | beta | Bias | Notes |
|------|-----------|-----------|------------|--------|--------|-------|------|------|-------|
| Default | `gemm/` | [10,132] | [132,132] | [10,132] | no | 1 | 1 | no | Hand-crafted, square weights |
| Non-square | `gemm/non_square` | [4,128] | [128,64] | [4,64] | no | 1 | 1 | no | K != N |
| With bias | `gemm/with_bias` | [4,128] | [128,128] | [4,128] | no | 1 | 1 | [128] | Bias vector |
| transB | `gemm/transB` | [4,128] | [64,128] | [4,64] | yes | 1 | 1 | no | Transposed weight |
| Alpha/beta | `gemm/alpha_beta` | [4,64] | [64,64] | [4,64] | no | 0.5 | 0.25 | [64] | Scaled matmul + bias |
| Small | `gemm/small` | [2,8] | [8,4] | [2,4] | no | 1 | 1 | no | Tiny matrices |
| Large | `gemm/large` | [8,256] | [256,128] | [8,128] | no | 1 | 1 | no | Larger matrices |
| transB + bias | `gemm/transB_with_bias` | [4,128] | [64,128] | [4,64] | yes | 1 | 1 | [64] | Combined |
| Test | Directory | A (input) | W (weight) | Output | transB | alpha | beta | Bias | Notes |
|---------------|-------------------------|-----------|------------|----------|--------|-------|------|-------|------------------------------|
| Default | `gemm/` | [10,132] | [132,132] | [10,132] | no | 1 | 1 | no | Hand-crafted, square weights |
| Non-square | `gemm/non_square` | [4,128] | [128,64] | [4,64] | no | 1 | 1 | no | K != N |
| With bias | `gemm/with_bias` | [4,128] | [128,128] | [4,128] | no | 1 | 1 | [128] | Bias vector |
| transB | `gemm/transB` | [4,128] | [64,128] | [4,64] | yes | 1 | 1 | no | Transposed weight |
| Alpha/beta | `gemm/alpha_beta` | [4,64] | [64,64] | [4,64] | no | 0.5 | 0.25 | [64] | Scaled matmul + bias |
| Small | `gemm/small` | [2,8] | [8,4] | [2,4] | no | 1 | 1 | no | Tiny matrices |
| Large | `gemm/large` | [8,256] | [256,128] | [8,128] | no | 1 | 1 | no | Larger matrices |
| transB + bias | `gemm/transB_with_bias` | [4,128] | [64,128] | [4,64] | yes | 1 | 1 | [64] | Combined |
## Gemv
| Test | Directory | Input | W (weight) | Output | Bias | Notes |
|------|-----------|-------|------------|--------|------|-------|
| Simple | `gemv/simple` | [1,132] | [132,132] | [1,132] | no | Single-sample matmul |
| Constant | `gemv/constant` | _(none)_ | [132,132] | [1,132] | no | All inputs constant |
| Homogeneous const | `gemv/with_homogeneous_constant` | [1,132] | [132,132] | [1,132] | [1,132] | Bias matches output shape |
| Heterogeneous const | `gemv/with_heterogeneous_constant` | [1,132] | [132,132] | [1,132] | [1,132] | Different constant pattern |
| Scalar const | `gemv/with_scalar_constant` | [1,132] | [132,132] | [1,132] | [1,1] | Scalar bias, broadcast |
| Test | Directory | Input | W (weight) | Output | Bias | Notes |
|---------------------|------------------------------------|----------|------------|---------|---------|----------------------------|
| Simple | `gemv/simple` | [1,132] | [132,132] | [1,132] | no | Single-sample matmul |
| Constant | `gemv/constant` | _(none)_ | [132,132] | [1,132] | no | All inputs constant |
| Homogeneous const | `gemv/with_homogeneous_constant` | [1,132] | [132,132] | [1,132] | [1,132] | Bias matches output shape |
| Heterogeneous const | `gemv/with_heterogeneous_constant` | [1,132] | [132,132] | [1,132] | [1,132] | Different constant pattern |
| Scalar const | `gemv/with_scalar_constant` | [1,132] | [132,132] | [1,132] | [1,1] | Scalar bias, broadcast |
## Pool
| Test | Directory | Input | Output | Kernel | Stride | Padding | Notes |
|----------------------------|---------------------------------|-----------|-----------|------------------------|--------|------------|----------------------------------|
| Max basic | `pool/max_basic` | [1,1,4,4] | [1,1,3,3] | 2x2 | 1 | none | Basic max pooling |
| Max stride 2 multi-channel | `pool/max_stride2_multichannel` | [1,5,6,6] | [1,5,3,3] | 2x2 | 2 | none | Channel-preserving max pool |
| Max SAME_UPPER | `pool/max_same_upper` | [1,1,5,5] | [1,1,3,3] | 3x3 | 2 | SAME_UPPER | Deprecated auto_pad path |
| Avg basic | `pool/avg_basic` | [1,3,4,4] | [1,3,3,3] | 2x2 | 1 | none | Basic average pooling |
| Avg explicit padding | `pool/avg_explicit_padding` | [1,2,4,4] | [1,2,2,2] | 3x3 | 2 | [1,1,1,1] | `count_include_pad=0` |
| Avg include pad | `pool/avg_include_pad` | [1,2,4,4] | [1,2,2,2] | 3x3 | 2 | [1,1,1,1] | `count_include_pad=1` |
| Max after Conv | `pool/max_after_conv` | [1,3,6,6] | [1,4,2,2] | Conv 3x3 then Pool 2x2 | 2 | none | Regression for `pool(conv(...))` |
## ReduceMean
| Test | Directory | Input | Output | Axes | Keepdims | Notes |
|------------|--------------------------|-----------|-----------|-------|----------|-------------------------------------------------|
| Basic | `reduce_mean/basic` | [4,8] | [4,1] | [1] | 1 | Reduce feature dimension, preserving rank |
| Keepdims 0 | `reduce_mean/keepdims_0` | [4,8] | [4] | [1] | 0 | Reduce feature dimension, dropping reduced axis |
| 4D spatial | `reduce_mean/4d_spatial` | [1,3,4,4] | [1,3,1,1] | [2,3] | 1 | Reduce H and W on NCHW input |
| After Conv | `reduce_mean/after_conv` | [1,3,5,5] | [1,2,1,1] | [2,3] | 1 | Conv 3x3 + bias, then spatial ReduceMean |
## Relu
| Test | Directory | Input | Output | Notes |
|------------|-------------------|-----------|-----------|----------------------------|
| Basic | `relu/basic` | [4,8] | [4,8] | Standalone 2D Relu |
| 4D | `relu/4d` | [2,3,4,4] | [2,3,4,4] | Standalone NCHW Relu |
| After Conv | `relu/after_conv` | [1,3,5,5] | [1,2,3,3] | Conv 3x3 + bias, then Relu |
| After Gemm | `relu/after_gemm` | [4,64] | [4,32] | Gemm + bias, then Relu |
## Sigmoid
| Test | Directory | Input | Output | Notes |
|------------|----------------------|-----------|-----------|---------------------------|
| Basic | `sigmoid/basic` | [4,8] | [4,8] | Standalone 2D Sigmoid |
| 4D | `sigmoid/4d` | [2,3,4,4] | [2,3,4,4] | Standalone NCHW Sigmoid |
| After Gemm | `sigmoid/after_gemm` | [4,64] | [4,32] | Gemm + bias, then Sigmoid |
## Add
| Test | Directory | Input(s) | Output | Notes |
|---------------|---------------------|------------------|--------|---------------------------------------------|
| Basic | `add/basic` | A:[4,8], B:[4,8] | [4,8] | Elementwise add, same-shape inputs |
| Broadcast row | `add/broadcast_row` | A:[4,8], B:[8] | [4,8] | Row-vector broadcasting via initializer |
| After Gemm | `add/after_gemm` | A:[4,64], D:[32] | [4,32] | Gemm + bias, then Add with broadcast vector |
## Mul
| Test | Directory | Input(s) | Output | Notes |
|-----------------|-----------------------|--------------------------|-----------|-------------------------------------------|
| Basic | `mul/basic` | A:[4,8], B:[4,8] | [4,8] | Elementwise multiply, same-shape inputs |
| Scalar constant | `mul/scalar_constant` | X:[4,8], S:[1] | [4,8] | Scalar broadcasting via initializer |
| After Conv | `mul/after_conv` | X:[1,3,5,5], S:[1,2,1,1] | [1,2,3,3] | Conv 3x3 + bias, then per-channel scaling |
## Div
| Test | Directory | Input(s) | Output | Notes |
|-----------------|-----------------------|------------------|--------|------------------------------------------------------|
| Basic | `div/basic` | X:[4,8], D:[4,8] | [4,8] | Elementwise divide by same-shape constant tensor |
| Scalar constant | `div/scalar_constant` | X:[4,8], S:[1] | [4,8] | Scalar broadcasting via initializer |
| After Gemm | `div/after_gemm` | A:[4,64], D:[32] | [4,32] | Gemm + bias, then Div with positive broadcast vector |

Binary file not shown.

Binary file not shown.

View File

@@ -1,5 +1,5 @@
#!/usr/bin/env python3
"""Generate ONNX test models for validating GEMM, Conv, Pooling, and Relu implementations."""
"""Generate ONNX test models for validating GEMM, Conv, Pooling, Relu, and ReduceMean implementations."""
import numpy as np
import onnx
@@ -19,102 +19,8 @@ def save_model(model, directory, filename):
print(f" {path.relative_to(OPERATIONS_DIR)}")
# ---------------------------------------------------------------------------
# GEMM tests
# ---------------------------------------------------------------------------
def gemm_non_square():
"""GEMM with non-square weight matrix: [B, K] @ [K, N], K != N."""
B, K, N = 4, 128, 64
W = numpy_helper.from_array(np.random.default_rng(42).uniform(-1, 1, (K, N)).astype(np.float32), name="W")
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
node = helper.make_node("Gemm", ["A", "W"], ["Y"])
graph = helper.make_graph([node], "gemm_non_square", [A], [Y], initializer=[W])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "gemm/non_square", "gemm_non_square.onnx")
def gemm_with_bias():
"""GEMM with bias: Y = A @ W + C."""
B, K, N = 4, 128, 128
rng = np.random.default_rng(43)
W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"])
graph = helper.make_graph([node], "gemm_with_bias", [A], [Y], initializer=[W, C])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "gemm/with_bias", "gemm_with_bias.onnx")
def gemm_transB():
"""GEMM with transB=1: Y = A @ W^T."""
B, K, N = 4, 128, 64
rng = np.random.default_rng(44)
# W stored as [N, K], transposed during computation
W = numpy_helper.from_array(rng.uniform(-1, 1, (N, K)).astype(np.float32), name="W")
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
node = helper.make_node("Gemm", ["A", "W"], ["Y"], transB=1)
graph = helper.make_graph([node], "gemm_transB", [A], [Y], initializer=[W])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "gemm/transB", "gemm_transB.onnx")
def gemm_alpha_beta():
"""GEMM with alpha and beta: Y = 0.5 * A @ W + 0.25 * C."""
B, K, N = 4, 64, 64
rng = np.random.default_rng(45)
W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"], alpha=0.5, beta=0.25)
graph = helper.make_graph([node], "gemm_alpha_beta", [A], [Y], initializer=[W, C])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "gemm/alpha_beta", "gemm_alpha_beta.onnx")
def gemm_small():
"""Small GEMM: [2, 8] @ [8, 4]."""
B, K, N = 2, 8, 4
rng = np.random.default_rng(46)
W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
node = helper.make_node("Gemm", ["A", "W"], ["Y"])
graph = helper.make_graph([node], "gemm_small", [A], [Y], initializer=[W])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "gemm/small", "gemm_small.onnx")
def gemm_large():
"""Larger GEMM: [8, 256] @ [256, 128]."""
B, K, N = 8, 256, 128
rng = np.random.default_rng(47)
W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
node = helper.make_node("Gemm", ["A", "W"], ["Y"])
graph = helper.make_graph([node], "gemm_large", [A], [Y], initializer=[W])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "gemm/large", "gemm_large.onnx")
def gemm_transB_with_bias():
"""GEMM with transB and bias: Y = A @ W^T + C."""
B, K, N = 4, 128, 64
rng = np.random.default_rng(48)
W = numpy_helper.from_array(rng.uniform(-1, 1, (N, K)).astype(np.float32), name="W")
C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"], transB=1)
graph = helper.make_graph([node], "gemm_transB_with_bias", [A], [Y], initializer=[W, C])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "gemm/transB_with_bias", "gemm_transB_with_bias.onnx")
def make_int64_initializer(name, values):
return numpy_helper.from_array(np.asarray(values, dtype=np.int64), name=name)
# ---------------------------------------------------------------------------
@@ -248,6 +154,104 @@ def conv_large_spatial():
save_model(model, "conv/large_spatial", "conv_large_spatial.onnx")
# ---------------------------------------------------------------------------
# GEMM tests
# ---------------------------------------------------------------------------
def gemm_non_square():
"""GEMM with non-square weight matrix: [B, K] @ [K, N], K != N."""
B, K, N = 4, 128, 64
W = numpy_helper.from_array(np.random.default_rng(42).uniform(-1, 1, (K, N)).astype(np.float32), name="W")
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
node = helper.make_node("Gemm", ["A", "W"], ["Y"])
graph = helper.make_graph([node], "gemm_non_square", [A], [Y], initializer=[W])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "gemm/non_square", "gemm_non_square.onnx")
def gemm_with_bias():
"""GEMM with bias: Y = A @ W + C."""
B, K, N = 4, 128, 128
rng = np.random.default_rng(43)
W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"])
graph = helper.make_graph([node], "gemm_with_bias", [A], [Y], initializer=[W, C])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "gemm/with_bias", "gemm_with_bias.onnx")
def gemm_transB():
"""GEMM with transB=1: Y = A @ W^T."""
B, K, N = 4, 128, 64
rng = np.random.default_rng(44)
# W stored as [N, K], transposed during computation
W = numpy_helper.from_array(rng.uniform(-1, 1, (N, K)).astype(np.float32), name="W")
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
node = helper.make_node("Gemm", ["A", "W"], ["Y"], transB=1)
graph = helper.make_graph([node], "gemm_transB", [A], [Y], initializer=[W])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "gemm/transB", "gemm_transB.onnx")
def gemm_alpha_beta():
"""GEMM with alpha and beta: Y = 0.5 * A @ W + 0.25 * C."""
B, K, N = 4, 64, 64
rng = np.random.default_rng(45)
W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"], alpha=0.5, beta=0.25)
graph = helper.make_graph([node], "gemm_alpha_beta", [A], [Y], initializer=[W, C])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "gemm/alpha_beta", "gemm_alpha_beta.onnx")
def gemm_small():
"""Small GEMM: [2, 8] @ [8, 4]."""
B, K, N = 2, 8, 4
rng = np.random.default_rng(46)
W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
node = helper.make_node("Gemm", ["A", "W"], ["Y"])
graph = helper.make_graph([node], "gemm_small", [A], [Y], initializer=[W])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "gemm/small", "gemm_small.onnx")
def gemm_large():
"""Larger GEMM: [8, 256] @ [256, 128]."""
B, K, N = 8, 256, 128
rng = np.random.default_rng(47)
W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
node = helper.make_node("Gemm", ["A", "W"], ["Y"])
graph = helper.make_graph([node], "gemm_large", [A], [Y], initializer=[W])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "gemm/large", "gemm_large.onnx")
def gemm_transB_with_bias():
"""GEMM with transB and bias: Y = A @ W^T + C."""
B, K, N = 4, 128, 64
rng = np.random.default_rng(48)
W = numpy_helper.from_array(rng.uniform(-1, 1, (N, K)).astype(np.float32), name="W")
C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"], transB=1)
graph = helper.make_graph([node], "gemm_transB_with_bias", [A], [Y], initializer=[W, C])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "gemm/transB_with_bias", "gemm_transB_with_bias.onnx")
# ---------------------------------------------------------------------------
# Pooling tests
# ---------------------------------------------------------------------------
@@ -327,6 +331,55 @@ def maxpool_after_conv():
save_model(model, "pool/max_after_conv", "maxpool_after_conv.onnx")
# ---------------------------------------------------------------------------
# ReduceMean tests
# ---------------------------------------------------------------------------
def reducemean_basic():
"""ReduceMean over the feature dimension, preserving rank."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 1])
node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[1], keepdims=1)
graph = helper.make_graph([node], "reducemean_basic", [X], [Y])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "reduce_mean/basic", "reduce_mean_basic.onnx")
def reducemean_keepdims_0():
"""ReduceMean over the feature dimension, dropping the reduced axis."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4])
node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[1], keepdims=0)
graph = helper.make_graph([node], "reducemean_keepdims_0", [X], [Y])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "reduce_mean/keepdims_0", "reduce_mean_keepdims_0.onnx")
def reducemean_4d_spatial():
"""ReduceMean over H and W on an NCHW tensor."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 4, 4])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 3, 1, 1])
node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[2, 3], keepdims=1)
graph = helper.make_graph([node], "reducemean_4d_spatial", [X], [Y])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "reduce_mean/4d_spatial", "reduce_mean_4d_spatial.onnx")
def reducemean_after_conv():
"""Conv followed by ReduceMean over the spatial dimensions."""
rng = np.random.default_rng(62)
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 1, 1])
W = numpy_helper.from_array(rng.uniform(-1, 1, (2, 3, 3, 3)).astype(np.float32), name="W")
B = numpy_helper.from_array(rng.uniform(-1, 1, (2,)).astype(np.float32), name="B")
conv = helper.make_node("Conv", ["X", "W", "B"], ["C"],
kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0])
reduce = helper.make_node("ReduceMean", ["C"], ["Y"], axes=[2, 3], keepdims=1)
graph = helper.make_graph([conv, reduce], "reducemean_after_conv", [X], [Y], initializer=[W, B])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "reduce_mean/after_conv", "reduce_mean_after_conv.onnx")
# ---------------------------------------------------------------------------
# Relu tests
# ---------------------------------------------------------------------------
@@ -381,6 +434,220 @@ def relu_after_gemm():
save_model(model, "relu/after_gemm", "relu_after_gemm.onnx")
# ---------------------------------------------------------------------------
# Sigmoid tests
# ---------------------------------------------------------------------------
def sigmoid_basic():
"""Standalone Sigmoid on a simple 2D tensor."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8])
node = helper.make_node("Sigmoid", ["X"], ["Y"])
graph = helper.make_graph([node], "sigmoid_basic", [X], [Y])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "sigmoid/basic", "sigmoid_basic.onnx")
def sigmoid_4d():
"""Standalone Sigmoid on an NCHW tensor."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 3, 4, 4])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 3, 4, 4])
node = helper.make_node("Sigmoid", ["X"], ["Y"])
graph = helper.make_graph([node], "sigmoid_4d", [X], [Y])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "sigmoid/4d", "sigmoid_4d.onnx")
def sigmoid_after_gemm():
"""Gemm followed by Sigmoid."""
B, K, N = 4, 64, 32
rng = np.random.default_rng(63)
W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
gemm = helper.make_node("Gemm", ["A", "W", "C"], ["G"])
sigmoid = helper.make_node("Sigmoid", ["G"], ["Y"])
graph = helper.make_graph([gemm, sigmoid], "sigmoid_after_gemm", [A], [Y], initializer=[W, C])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "sigmoid/after_gemm", "sigmoid_after_gemm.onnx")
# ---------------------------------------------------------------------------
# Add tests
# ---------------------------------------------------------------------------
def add_basic():
"""Elementwise Add on two inputs with identical shapes."""
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [4, 8])
B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [4, 8])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8])
node = helper.make_node("Add", ["A", "B"], ["Y"])
graph = helper.make_graph([node], "add_basic", [A, B], [Y])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "add/basic", "add_basic.onnx")
def add_broadcast_row():
"""Elementwise Add with row-vector broadcasting."""
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [4, 8])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8])
B = numpy_helper.from_array(np.random.default_rng(64).uniform(-1, 1, (8,)).astype(np.float32), name="B")
node = helper.make_node("Add", ["A", "B"], ["Y"])
graph = helper.make_graph([node], "add_broadcast_row", [A], [Y], initializer=[B])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "add/broadcast_row", "add_broadcast_row.onnx")
def add_after_gemm():
"""Gemm followed by Add with a broadcast bias vector."""
B, K, N = 4, 64, 32
rng = np.random.default_rng(65)
W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
D = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="D")
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
gemm = helper.make_node("Gemm", ["A", "W", "C"], ["G"])
add = helper.make_node("Add", ["G", "D"], ["Y"])
graph = helper.make_graph([gemm, add], "add_after_gemm", [A], [Y], initializer=[W, C, D])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "add/after_gemm", "add_after_gemm.onnx")
# ---------------------------------------------------------------------------
# Mul tests
# ---------------------------------------------------------------------------
def mul_basic():
"""Elementwise Mul on two inputs with identical shapes."""
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [4, 8])
B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [4, 8])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8])
node = helper.make_node("Mul", ["A", "B"], ["Y"])
graph = helper.make_graph([node], "mul_basic", [A, B], [Y])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "mul/basic", "mul_basic.onnx")
def mul_scalar_constant():
"""Elementwise Mul with scalar broadcasting."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8])
S = numpy_helper.from_array(np.asarray([1.5], dtype=np.float32), name="S")
node = helper.make_node("Mul", ["X", "S"], ["Y"])
graph = helper.make_graph([node], "mul_scalar_constant", [X], [Y], initializer=[S])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "mul/scalar_constant", "mul_scalar_constant.onnx")
def mul_after_conv():
"""Conv followed by Mul with per-channel scaling."""
rng = np.random.default_rng(66)
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 3, 3])
W = numpy_helper.from_array(rng.uniform(-1, 1, (2, 3, 3, 3)).astype(np.float32), name="W")
B = numpy_helper.from_array(rng.uniform(-1, 1, (2,)).astype(np.float32), name="B")
S = numpy_helper.from_array(rng.uniform(0.5, 1.5, (1, 2, 1, 1)).astype(np.float32), name="S")
conv = helper.make_node("Conv", ["X", "W", "B"], ["C"],
kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0])
mul = helper.make_node("Mul", ["C", "S"], ["Y"])
graph = helper.make_graph([conv, mul], "mul_after_conv", [X], [Y], initializer=[W, B, S])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "mul/after_conv", "mul_after_conv.onnx")
# ---------------------------------------------------------------------------
# Div tests
# ---------------------------------------------------------------------------
def div_basic():
"""Elementwise Div by a same-shape constant tensor."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8])
D = numpy_helper.from_array(np.random.default_rng(67).uniform(0.5, 2.0, (4, 8)).astype(np.float32), name="D")
node = helper.make_node("Div", ["X", "D"], ["Y"])
graph = helper.make_graph([node], "div_basic", [X], [Y], initializer=[D])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "div/basic", "div_basic.onnx")
def div_scalar_constant():
"""Elementwise Div with scalar broadcasting."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8])
S = numpy_helper.from_array(np.asarray([2.0], dtype=np.float32), name="S")
node = helper.make_node("Div", ["X", "S"], ["Y"])
graph = helper.make_graph([node], "div_scalar_constant", [X], [Y], initializer=[S])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "div/scalar_constant", "div_scalar_constant.onnx")
def div_after_gemm():
"""Gemm followed by Div with a broadcast divisor vector."""
B, K, N = 4, 64, 32
rng = np.random.default_rng(68)
W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
D = numpy_helper.from_array(rng.uniform(0.5, 2.0, (N,)).astype(np.float32), name="D")
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
gemm = helper.make_node("Gemm", ["A", "W", "C"], ["G"])
div = helper.make_node("Div", ["G", "D"], ["Y"])
graph = helper.make_graph([gemm, div], "div_after_gemm", [A], [Y], initializer=[W, C, D])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "div/after_gemm", "div_after_gemm.onnx")
# ---------------------------------------------------------------------------
# ReduceMean tests
# ---------------------------------------------------------------------------
def reducemean_basic():
"""ReduceMean over the feature dimension, preserving rank."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 1])
node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[1], keepdims=1)
graph = helper.make_graph([node], "reducemean_basic", [X], [Y])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "reduce_mean/basic", "reduce_mean_basic.onnx")
def reducemean_keepdims_0():
"""ReduceMean over the feature dimension, dropping the reduced axis."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4])
node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[1], keepdims=0)
graph = helper.make_graph([node], "reducemean_keepdims_0", [X], [Y])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "reduce_mean/keepdims_0", "reduce_mean_keepdims_0.onnx")
def reducemean_4d_spatial():
"""ReduceMean over H and W on an NCHW tensor."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 4, 4])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 3, 1, 1])
node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[2, 3], keepdims=1)
graph = helper.make_graph([node], "reducemean_4d_spatial", [X], [Y])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "reduce_mean/4d_spatial", "reduce_mean_4d_spatial.onnx")
def reducemean_after_conv():
"""Conv followed by ReduceMean over the spatial dimensions."""
rng = np.random.default_rng(62)
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 1, 1])
W = numpy_helper.from_array(rng.uniform(-1, 1, (2, 3, 3, 3)).astype(np.float32), name="W")
B = numpy_helper.from_array(rng.uniform(-1, 1, (2,)).astype(np.float32), name="B")
conv = helper.make_node("Conv", ["X", "W", "B"], ["C"],
kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0])
reduce = helper.make_node("ReduceMean", ["C"], ["Y"], axes=[2, 3], keepdims=1)
graph = helper.make_graph([conv, reduce], "reducemean_after_conv", [X], [Y], initializer=[W, B])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "reduce_mean/after_conv", "reduce_mean_after_conv.onnx")
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
@@ -415,10 +682,36 @@ if __name__ == "__main__":
avgpool_include_pad()
maxpool_after_conv()
print("\nGenerating ReduceMean tests:")
reducemean_basic()
reducemean_keepdims_0()
reducemean_4d_spatial()
reducemean_after_conv()
print("\nGenerating Relu tests:")
relu_basic()
relu_4d()
relu_after_conv()
relu_after_gemm()
print("\nGenerating Sigmoid tests:")
sigmoid_basic()
sigmoid_4d()
sigmoid_after_gemm()
print("\nGenerating Add tests:")
add_basic()
add_broadcast_row()
add_after_gemm()
print("\nGenerating Mul tests:")
mul_basic()
mul_scalar_constant()
mul_after_conv()
print("\nGenerating Div tests:")
div_basic()
div_scalar_constant()
div_after_gemm()
print("\nDone.")

Binary file not shown.

Binary file not shown.

Binary file not shown.