add support for operations: reduceMean, add, mul, div, sigmoid
Some checks failed
Validate Operations / validate-operations (push) Failing after 51m52s
Some checks failed
Validate Operations / validate-operations (push) Failing after 51m52s
This commit is contained in:
@@ -1,4 +1,4 @@
|
|||||||
use anyhow::{Context, Result};
|
use anyhow::{Context, Result, ensure};
|
||||||
use paste::paste;
|
use paste::paste;
|
||||||
use std::{collections::HashMap, mem::offset_of, sync::LazyLock};
|
use std::{collections::HashMap, mem::offset_of, sync::LazyLock};
|
||||||
|
|
||||||
@@ -36,6 +36,7 @@ static SIMD: LazyLock<HashMap<String, FunctorType>> = LazyLock::new(|| {
|
|||||||
add_to_json_map!(storage, vvmax);
|
add_to_json_map!(storage, vvmax);
|
||||||
add_to_json_map!(storage, vvsll);
|
add_to_json_map!(storage, vvsll);
|
||||||
add_to_json_map!(storage, vvsra);
|
add_to_json_map!(storage, vvsra);
|
||||||
|
add_to_json_map!(storage, vavg);
|
||||||
add_to_json_map!(storage, vrelu);
|
add_to_json_map!(storage, vrelu);
|
||||||
add_to_json_map!(storage, vtanh);
|
add_to_json_map!(storage, vtanh);
|
||||||
add_to_json_map!(storage, vsigm);
|
add_to_json_map!(storage, vsigm);
|
||||||
@@ -339,6 +340,7 @@ fn json_to_vavg(
|
|||||||
let rd = json_i64!(json, "rd") as i32;
|
let rd = json_i64!(json, "rd") as i32;
|
||||||
let rs1 = json_i64!(json, "rs1") as i32;
|
let rs1 = json_i64!(json, "rs1") as i32;
|
||||||
let rs2 = json_i64!(json, "rs2") as i32;
|
let rs2 = json_i64!(json, "rs2") as i32;
|
||||||
|
ensure!(rs2 == 1, "vavg only supports stride 1");
|
||||||
let len = json_i64!(json, "len") as i32;
|
let len = json_i64!(json, "len") as i32;
|
||||||
let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
|
let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
|
||||||
inst_data_builder
|
inst_data_builder
|
||||||
|
|||||||
@@ -55,19 +55,15 @@ pub trait HasSigm {
|
|||||||
|
|
||||||
impl HasSigm for f32 {
|
impl HasSigm for f32 {
|
||||||
fn sigm(self) -> Self {
|
fn sigm(self) -> Self {
|
||||||
let x = self;
|
let ex = self.exp();
|
||||||
let e = std::f32::consts::E;
|
ex / (1.0 + ex)
|
||||||
let ex = x.powf(x);
|
|
||||||
(ex) / (1.0+ex)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl HasSigm for f64 {
|
impl HasSigm for f64 {
|
||||||
fn sigm(self) -> Self {
|
fn sigm(self) -> Self {
|
||||||
let x = self;
|
let ex = self.exp();
|
||||||
let e = std::f64::consts::E;
|
ex / (1.0 + ex)
|
||||||
let ex = x.powf(x);
|
|
||||||
(ex) / (1.0+ex)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -121,6 +121,13 @@ json::Object PimCodeGen::createEmptyOffset() {
|
|||||||
return offset;
|
return offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static json::Object createRs1OnlyOffset() {
|
||||||
|
json::Object offset;
|
||||||
|
offset["offset_select"] = 1;
|
||||||
|
offset["offset_value"] = 0;
|
||||||
|
return offset;
|
||||||
|
}
|
||||||
|
|
||||||
void PimCodeGen::emitInstruction(json::Object instruction) const {
|
void PimCodeGen::emitInstruction(json::Object instruction) const {
|
||||||
coreFileStream << json::Value(std::move(instruction)) << ',';
|
coreFileStream << json::Value(std::move(instruction)) << ',';
|
||||||
}
|
}
|
||||||
@@ -331,7 +338,8 @@ void PimCodeGen::codeGenVAvgOp(pim::PimVAvgOp vavgOp) const {
|
|||||||
json["op"] = "vavg";
|
json["op"] = "vavg";
|
||||||
json["rd"] = 0;
|
json["rd"] = 0;
|
||||||
json["rs1"] = 1;
|
json["rs1"] = 1;
|
||||||
json["offset"] = createEmptyOffset();
|
json["rs2"] = 1;
|
||||||
|
json["offset"] = createRs1OnlyOffset();
|
||||||
json["len"] = getValueSizeInBytes(vavgOp.getInput());
|
json["len"] = getValueSizeInBytes(vavgOp.getInput());
|
||||||
emitInstruction(std::move(json));
|
emitInstruction(std::move(json));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,10 +4,13 @@ add_public_tablegen_target(ONNXToSpatialIncGen)
|
|||||||
|
|
||||||
add_pim_library(OMONNXToSpatial
|
add_pim_library(OMONNXToSpatial
|
||||||
Patterns/Math/Conv.cpp
|
Patterns/Math/Conv.cpp
|
||||||
|
Patterns/Math/Elementwise.cpp
|
||||||
Patterns/Math/Gemm.cpp
|
Patterns/Math/Gemm.cpp
|
||||||
Patterns/Math/MatMul.cpp
|
Patterns/Math/MatMul.cpp
|
||||||
|
Patterns/Math/ReduceMean.cpp
|
||||||
Patterns/NN/Pool.cpp
|
Patterns/NN/Pool.cpp
|
||||||
Patterns/NN/Relu.cpp
|
Patterns/NN/Relu.cpp
|
||||||
|
Patterns/NN/Sigmoid.cpp
|
||||||
Patterns/Tensor/Concat.cpp
|
Patterns/Tensor/Concat.cpp
|
||||||
Patterns/Tensor/Reshape.cpp
|
Patterns/Tensor/Reshape.cpp
|
||||||
ONNXToSpatialPass.cpp
|
ONNXToSpatialPass.cpp
|
||||||
|
|||||||
@@ -14,8 +14,6 @@
|
|||||||
#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
|
#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
|
||||||
#include "src/Dialect/ONNX/ONNXOps.hpp"
|
#include "src/Dialect/ONNX/ONNXOps.hpp"
|
||||||
|
|
||||||
#define DEFINE_MAP_OP(opname) opname,
|
|
||||||
|
|
||||||
namespace onnx_mlir {
|
namespace onnx_mlir {
|
||||||
|
|
||||||
template <class ShapedType>
|
template <class ShapedType>
|
||||||
|
|||||||
@@ -72,11 +72,15 @@ void ONNXToSpatialPass::runOnOperation() {
|
|||||||
target.addLegalDialect<spatial::SpatialDialect, ONNXDialect, tensor::TensorDialect, arith::ArithDialect>();
|
target.addLegalDialect<spatial::SpatialDialect, ONNXDialect, tensor::TensorDialect, arith::ArithDialect>();
|
||||||
target.addDynamicallyLegalOp<ONNXMatMulOp>(
|
target.addDynamicallyLegalOp<ONNXMatMulOp>(
|
||||||
[](ONNXMatMulOp op) { return cast<ShapedType>(op.getY().getType()).getRank() != 2; });
|
[](ONNXMatMulOp op) { return cast<ShapedType>(op.getY().getType()).getRank() != 2; });
|
||||||
|
target.addIllegalOp<ONNXAddOp>();
|
||||||
|
target.addIllegalOp<ONNXDivOp>();
|
||||||
|
target.addIllegalOp<ONNXMulOp>();
|
||||||
target.addIllegalOp<ONNXGemmOp>();
|
target.addIllegalOp<ONNXGemmOp>();
|
||||||
target.addIllegalOp<ONNXConvOp>();
|
target.addIllegalOp<ONNXConvOp>();
|
||||||
target.addIllegalOp<ONNXMaxPoolSingleOutOp>();
|
target.addIllegalOp<ONNXMaxPoolSingleOutOp>();
|
||||||
target.addIllegalOp<ONNXAveragePoolOp>();
|
target.addIllegalOp<ONNXAveragePoolOp>();
|
||||||
target.addIllegalOp<ONNXReluOp>();
|
target.addIllegalOp<ONNXReluOp>();
|
||||||
|
target.addIllegalOp<ONNXSigmoidOp>();
|
||||||
target.addIllegalOp<ONNXSoftmaxOp>();
|
target.addIllegalOp<ONNXSoftmaxOp>();
|
||||||
target.addIllegalOp<ONNXConcatOp>();
|
target.addIllegalOp<ONNXConcatOp>();
|
||||||
target.addIllegalOp<ONNXReshapeOp>();
|
target.addIllegalOp<ONNXReshapeOp>();
|
||||||
@@ -86,10 +90,13 @@ void ONNXToSpatialPass::runOnOperation() {
|
|||||||
RewritePatternSet patterns(ctx);
|
RewritePatternSet patterns(ctx);
|
||||||
patterns.add<removeLRN>(ctx);
|
patterns.add<removeLRN>(ctx);
|
||||||
|
|
||||||
|
populateElementwisePatterns(patterns, ctx);
|
||||||
populateGemmPatterns(patterns, ctx);
|
populateGemmPatterns(patterns, ctx);
|
||||||
populateConvPatterns(patterns, ctx);
|
populateConvPatterns(patterns, ctx);
|
||||||
populatePoolPatterns(patterns, ctx);
|
populatePoolPatterns(patterns, ctx);
|
||||||
|
populateReduceMeanPatterns(patterns, ctx);
|
||||||
populateReluPatterns(patterns, ctx);
|
populateReluPatterns(patterns, ctx);
|
||||||
|
populateSigmoidPatterns(patterns, ctx);
|
||||||
populateConcatPatterns(patterns, ctx);
|
populateConcatPatterns(patterns, ctx);
|
||||||
populateReshapePatterns(patterns, ctx);
|
populateReshapePatterns(patterns, ctx);
|
||||||
|
|
||||||
|
|||||||
@@ -7,14 +7,20 @@ namespace onnx_mlir {
|
|||||||
|
|
||||||
void populateConvPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
|
void populateConvPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
|
||||||
|
|
||||||
|
void populateElementwisePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
|
||||||
|
|
||||||
void populateGemmPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
|
void populateGemmPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
|
||||||
|
|
||||||
void populateMatMulRewritePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
|
void populateMatMulRewritePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
|
||||||
|
|
||||||
void populatePoolPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
|
void populatePoolPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
|
||||||
|
|
||||||
|
void populateReduceMeanPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
|
||||||
|
|
||||||
void populateReluPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
|
void populateReluPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
|
||||||
|
|
||||||
|
void populateSigmoidPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
|
||||||
|
|
||||||
void populateConcatPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
|
void populateConcatPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
|
||||||
|
|
||||||
void populateReshapePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
|
void populateReshapePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
|
||||||
|
|||||||
204
src/PIM/Conversion/ONNXToSpatial/Patterns/Math/Elementwise.cpp
Normal file
204
src/PIM/Conversion/ONNXToSpatial/Patterns/Math/Elementwise.cpp
Normal file
@@ -0,0 +1,204 @@
|
|||||||
|
#include "mlir/Dialect/Arith/IR/Arith.h"
|
||||||
|
#include "mlir/IR/BuiltinAttributes.h"
|
||||||
|
#include "mlir/IR/BuiltinTypes.h"
|
||||||
|
#include "mlir/Transforms/DialectConversion.h"
|
||||||
|
|
||||||
|
#include "llvm/ADT/SmallVector.h"
|
||||||
|
|
||||||
|
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common.hpp"
|
||||||
|
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Patterns.hpp"
|
||||||
|
#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
|
||||||
|
#include "src/Dialect/ONNX/ONNXOps.hpp"
|
||||||
|
|
||||||
|
using namespace mlir;
|
||||||
|
|
||||||
|
namespace onnx_mlir {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
static SmallVector<int64_t> computeRowMajorStrides(ArrayRef<int64_t> shape) {
|
||||||
|
SmallVector<int64_t> strides(shape.size(), 1);
|
||||||
|
for (int64_t i = static_cast<int64_t>(shape.size()) - 2; i >= 0; --i)
|
||||||
|
strides[i] = strides[i + 1] * shape[i + 1];
|
||||||
|
return strides;
|
||||||
|
}
|
||||||
|
|
||||||
|
static DenseElementsAttr getDenseConstantAttr(Value value) {
|
||||||
|
if (auto constantOp = value.getDefiningOp<arith::ConstantOp>())
|
||||||
|
return dyn_cast<DenseElementsAttr>(constantOp.getValue());
|
||||||
|
|
||||||
|
if (auto constantOp = value.getDefiningOp<ONNXConstantOp>())
|
||||||
|
return dyn_cast_or_null<DenseElementsAttr>(constantOp.getValueAttr());
|
||||||
|
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static FailureOr<Value> materializeBroadcastedConstantTensor(Value value,
|
||||||
|
RankedTensorType resultType,
|
||||||
|
ConversionPatternRewriter& rewriter,
|
||||||
|
Location loc) {
|
||||||
|
auto denseAttr = getDenseConstantAttr(value);
|
||||||
|
if (!denseAttr)
|
||||||
|
return failure();
|
||||||
|
|
||||||
|
auto sourceType = dyn_cast<RankedTensorType>(denseAttr.getType());
|
||||||
|
if (!sourceType || !sourceType.hasStaticShape() || !resultType.hasStaticShape())
|
||||||
|
return failure();
|
||||||
|
|
||||||
|
if (sourceType == resultType)
|
||||||
|
return value;
|
||||||
|
|
||||||
|
ArrayRef<int64_t> sourceShape = sourceType.getShape();
|
||||||
|
ArrayRef<int64_t> resultShape = resultType.getShape();
|
||||||
|
if (sourceShape.size() > resultShape.size())
|
||||||
|
return failure();
|
||||||
|
|
||||||
|
const int64_t rankOffset = static_cast<int64_t>(resultShape.size() - sourceShape.size());
|
||||||
|
for (int64_t i = 0; i < static_cast<int64_t>(resultShape.size()); ++i) {
|
||||||
|
const int64_t sourceIndex = i - rankOffset;
|
||||||
|
const int64_t sourceDim = sourceIndex < 0 ? 1 : sourceShape[sourceIndex];
|
||||||
|
const int64_t resultDim = resultShape[i];
|
||||||
|
if (sourceDim != 1 && sourceDim != resultDim)
|
||||||
|
return failure();
|
||||||
|
}
|
||||||
|
|
||||||
|
SmallVector<Attribute> sourceValues(denseAttr.getValues<Attribute>());
|
||||||
|
SmallVector<int64_t> sourceStrides = computeRowMajorStrides(sourceShape);
|
||||||
|
SmallVector<int64_t> resultStrides = computeRowMajorStrides(resultShape);
|
||||||
|
|
||||||
|
SmallVector<Attribute> resultValues;
|
||||||
|
resultValues.reserve(resultType.getNumElements());
|
||||||
|
|
||||||
|
for (int64_t flatIndex = 0; flatIndex < resultType.getNumElements(); ++flatIndex) {
|
||||||
|
int64_t remaining = flatIndex;
|
||||||
|
int64_t sourceFlatIndex = 0;
|
||||||
|
|
||||||
|
for (int64_t i = 0; i < static_cast<int64_t>(resultShape.size()); ++i) {
|
||||||
|
const int64_t resultIndex = resultStrides.empty() ? 0 : remaining / resultStrides[i];
|
||||||
|
remaining = resultStrides.empty() ? 0 : remaining % resultStrides[i];
|
||||||
|
|
||||||
|
const int64_t sourceIndex = i - rankOffset;
|
||||||
|
if (sourceIndex < 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
const int64_t sourceDim = sourceShape[sourceIndex];
|
||||||
|
const int64_t mappedIndex = sourceDim == 1 ? 0 : resultIndex;
|
||||||
|
sourceFlatIndex += mappedIndex * sourceStrides[sourceIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
resultValues.push_back(sourceValues[sourceFlatIndex]);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto broadcastedAttr = DenseElementsAttr::get(resultType, resultValues);
|
||||||
|
return arith::ConstantOp::create(rewriter, loc, resultType, broadcastedAttr).getResult();
|
||||||
|
}
|
||||||
|
|
||||||
|
static FailureOr<Value> prepareElementwiseOperand(Value value,
|
||||||
|
RankedTensorType resultType,
|
||||||
|
ConversionPatternRewriter& rewriter,
|
||||||
|
Location loc) {
|
||||||
|
auto valueType = dyn_cast<RankedTensorType>(value.getType());
|
||||||
|
if (!valueType || !valueType.hasStaticShape())
|
||||||
|
return failure();
|
||||||
|
|
||||||
|
if (valueType == resultType)
|
||||||
|
return value;
|
||||||
|
|
||||||
|
return materializeBroadcastedConstantTensor(value, resultType, rewriter, loc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static FailureOr<Value> materializeReciprocalTensor(Value value,
|
||||||
|
RankedTensorType resultType,
|
||||||
|
ConversionPatternRewriter& rewriter,
|
||||||
|
Location loc) {
|
||||||
|
auto broadcastedValue = materializeBroadcastedConstantTensor(value, resultType, rewriter, loc);
|
||||||
|
if (failed(broadcastedValue))
|
||||||
|
return failure();
|
||||||
|
|
||||||
|
auto denseAttr = dyn_cast<DenseFPElementsAttr>(getDenseConstantAttr(*broadcastedValue));
|
||||||
|
if (!denseAttr)
|
||||||
|
return failure();
|
||||||
|
|
||||||
|
SmallVector<APFloat> reciprocalValues;
|
||||||
|
reciprocalValues.reserve(denseAttr.getNumElements());
|
||||||
|
for (const APFloat& valueAttr : denseAttr.getValues<APFloat>()) {
|
||||||
|
APFloat reciprocal(valueAttr.getSemantics(), 1);
|
||||||
|
auto status = reciprocal.divide(valueAttr, APFloat::rmNearestTiesToEven);
|
||||||
|
if (status & APFloat::opInvalidOp)
|
||||||
|
return failure();
|
||||||
|
reciprocalValues.push_back(std::move(reciprocal));
|
||||||
|
}
|
||||||
|
|
||||||
|
auto reciprocalAttr = DenseFPElementsAttr::get(resultType, reciprocalValues);
|
||||||
|
return arith::ConstantOp::create(rewriter, loc, resultType, reciprocalAttr).getResult();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename OnnxOp, typename SpatialOp>
|
||||||
|
struct BinaryElementwiseToSpatialCompute : OpConversionPattern<OnnxOp> {
|
||||||
|
using OpConversionPattern<OnnxOp>::OpConversionPattern;
|
||||||
|
using Adaptor = typename OnnxOp::Adaptor;
|
||||||
|
|
||||||
|
LogicalResult matchAndRewrite(OnnxOp op, Adaptor adaptor, ConversionPatternRewriter& rewriter) const override {
|
||||||
|
auto resultType = dyn_cast<RankedTensorType>(op->getResult(0).getType());
|
||||||
|
if (!resultType || !resultType.hasStaticShape())
|
||||||
|
return failure();
|
||||||
|
|
||||||
|
Location loc = op.getLoc();
|
||||||
|
auto lhs = prepareElementwiseOperand(adaptor.getOperands()[0], resultType, rewriter, loc);
|
||||||
|
if (failed(lhs))
|
||||||
|
return failure();
|
||||||
|
|
||||||
|
auto rhs = prepareElementwiseOperand(adaptor.getOperands()[1], resultType, rewriter, loc);
|
||||||
|
if (failed(rhs))
|
||||||
|
return failure();
|
||||||
|
|
||||||
|
constexpr size_t numInputs = 2;
|
||||||
|
auto computeOp =
|
||||||
|
createSpatCompute<numInputs>(rewriter, loc, resultType, {}, ValueRange {*lhs, *rhs}, [&](Value x, Value y) {
|
||||||
|
auto loweredOp = SpatialOp::create(rewriter, loc, resultType, x, y);
|
||||||
|
spatial::SpatYieldOp::create(rewriter, loc, loweredOp.getResult());
|
||||||
|
});
|
||||||
|
|
||||||
|
rewriter.replaceOp(op, computeOp);
|
||||||
|
return success();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct DivToSpatialCompute : OpConversionPattern<ONNXDivOp> {
|
||||||
|
using OpConversionPattern::OpConversionPattern;
|
||||||
|
|
||||||
|
LogicalResult
|
||||||
|
matchAndRewrite(ONNXDivOp op, ONNXDivOpAdaptor adaptor, ConversionPatternRewriter& rewriter) const override {
|
||||||
|
auto resultType = dyn_cast<RankedTensorType>(op.getResult().getType());
|
||||||
|
if (!resultType || !resultType.hasStaticShape())
|
||||||
|
return failure();
|
||||||
|
|
||||||
|
Location loc = op.getLoc();
|
||||||
|
auto lhs = prepareElementwiseOperand(adaptor.getA(), resultType, rewriter, loc);
|
||||||
|
if (failed(lhs))
|
||||||
|
return failure();
|
||||||
|
|
||||||
|
auto reciprocalRhs = materializeReciprocalTensor(adaptor.getB(), resultType, rewriter, loc);
|
||||||
|
if (failed(reciprocalRhs))
|
||||||
|
return failure();
|
||||||
|
|
||||||
|
constexpr size_t numInputs = 2;
|
||||||
|
auto computeOp = createSpatCompute<numInputs>(
|
||||||
|
rewriter, loc, resultType, {}, ValueRange {*lhs, *reciprocalRhs}, [&](Value x, Value reciprocal) {
|
||||||
|
auto mulOp = spatial::SpatVMulOp::create(rewriter, loc, resultType, x, reciprocal);
|
||||||
|
spatial::SpatYieldOp::create(rewriter, loc, mulOp.getResult());
|
||||||
|
});
|
||||||
|
|
||||||
|
rewriter.replaceOp(op, computeOp);
|
||||||
|
return success();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
void populateElementwisePatterns(RewritePatternSet& patterns, MLIRContext* ctx) {
|
||||||
|
patterns.add<BinaryElementwiseToSpatialCompute<ONNXAddOp, spatial::SpatVAddOp>>(ctx);
|
||||||
|
patterns.add<BinaryElementwiseToSpatialCompute<ONNXMulOp, spatial::SpatVMulOp>>(ctx);
|
||||||
|
patterns.add<DivToSpatialCompute>(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace onnx_mlir
|
||||||
163
src/PIM/Conversion/ONNXToSpatial/Patterns/Math/ReduceMean.cpp
Normal file
163
src/PIM/Conversion/ONNXToSpatial/Patterns/Math/ReduceMean.cpp
Normal file
@@ -0,0 +1,163 @@
|
|||||||
|
#include "mlir/Dialect/Tensor/IR/Tensor.h"
|
||||||
|
#include "mlir/Transforms/DialectConversion.h"
|
||||||
|
|
||||||
|
#include "llvm/ADT/SmallVector.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common.hpp"
|
||||||
|
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Patterns.hpp"
|
||||||
|
#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
|
||||||
|
#include "src/Dialect/ONNX/ONNXOps.hpp"
|
||||||
|
|
||||||
|
using namespace mlir;
|
||||||
|
|
||||||
|
namespace onnx_mlir {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
static SmallVector<int64_t> normalizeAxes(ArrayAttr axesAttr, int64_t rank) {
|
||||||
|
SmallVector<int64_t> normalizedAxes;
|
||||||
|
if (!axesAttr) {
|
||||||
|
normalizedAxes.reserve(rank);
|
||||||
|
for (int64_t axis = 0; axis < rank; axis++)
|
||||||
|
normalizedAxes.push_back(axis);
|
||||||
|
return normalizedAxes;
|
||||||
|
}
|
||||||
|
|
||||||
|
normalizedAxes.reserve(axesAttr.size());
|
||||||
|
for (Attribute attr : axesAttr) {
|
||||||
|
int64_t axis = cast<IntegerAttr>(attr).getInt();
|
||||||
|
normalizedAxes.push_back(axis >= 0 ? axis : rank + axis);
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm::sort(normalizedAxes);
|
||||||
|
normalizedAxes.erase(std::unique(normalizedAxes.begin(), normalizedAxes.end()), normalizedAxes.end());
|
||||||
|
return normalizedAxes;
|
||||||
|
}
|
||||||
|
|
||||||
|
static SmallVector<bool> buildReducedAxesMask(ArrayRef<int64_t> axes, int64_t rank) {
|
||||||
|
SmallVector<bool> reducedAxes(rank, false);
|
||||||
|
for (int64_t axis : axes) {
|
||||||
|
if (axis < 0 || axis >= rank)
|
||||||
|
return {};
|
||||||
|
reducedAxes[axis] = true;
|
||||||
|
}
|
||||||
|
return reducedAxes;
|
||||||
|
}
|
||||||
|
|
||||||
|
static RankedTensorType getAllOnesType(RankedTensorType inputType, Type elementType) {
|
||||||
|
return RankedTensorType::get(SmallVector<int64_t>(inputType.getRank(), 1), elementType);
|
||||||
|
}
|
||||||
|
|
||||||
|
static SmallVector<ReassociationIndices> buildCollapseReassociation(ArrayRef<bool> reducedAxes) {
|
||||||
|
SmallVector<ReassociationIndices> reassociation;
|
||||||
|
ReassociationIndices currentGroup;
|
||||||
|
|
||||||
|
for (auto [axis, isReduced] : llvm::enumerate(reducedAxes)) {
|
||||||
|
currentGroup.push_back(axis);
|
||||||
|
if (!isReduced) {
|
||||||
|
reassociation.push_back(currentGroup);
|
||||||
|
currentGroup.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!currentGroup.empty()) {
|
||||||
|
if (reassociation.empty())
|
||||||
|
reassociation.push_back(std::move(currentGroup));
|
||||||
|
else
|
||||||
|
reassociation.back().append(currentGroup.begin(), currentGroup.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
return reassociation;
|
||||||
|
}
|
||||||
|
|
||||||
|
static Value createAverageCompute(Value input,
|
||||||
|
RankedTensorType resultType,
|
||||||
|
ConversionPatternRewriter& rewriter,
|
||||||
|
Location loc) {
|
||||||
|
constexpr size_t numInputs = 1;
|
||||||
|
auto computeOp = createSpatCompute<numInputs>(rewriter, loc, resultType, {}, ValueRange {input}, [&](Value x) {
|
||||||
|
auto avgOp = spatial::SpatVAvgOp::create(rewriter, loc, resultType, x);
|
||||||
|
spatial::SpatYieldOp::create(rewriter, loc, avgOp.getResult());
|
||||||
|
});
|
||||||
|
return computeOp.getResult(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static Value buildReduceMeanKeepdims(Value input,
|
||||||
|
ArrayRef<bool> reducedAxes,
|
||||||
|
int64_t axis,
|
||||||
|
RankedTensorType leafType,
|
||||||
|
ConversionPatternRewriter& rewriter,
|
||||||
|
Location loc) {
|
||||||
|
int64_t rank = cast<RankedTensorType>(input.getType()).getRank();
|
||||||
|
if (axis == rank)
|
||||||
|
return createAverageCompute(input, leafType, rewriter, loc);
|
||||||
|
|
||||||
|
if (reducedAxes[axis])
|
||||||
|
return buildReduceMeanKeepdims(input, reducedAxes, axis + 1, leafType, rewriter, loc);
|
||||||
|
|
||||||
|
SmallVector<Value> slices = sliceTensor(input, axis, /*sliceSize=*/1, rewriter, loc);
|
||||||
|
SmallVector<Value> reducedSlices;
|
||||||
|
reducedSlices.reserve(slices.size());
|
||||||
|
for (Value slice : slices)
|
||||||
|
reducedSlices.push_back(buildReduceMeanKeepdims(slice, reducedAxes, axis + 1, leafType, rewriter, loc));
|
||||||
|
|
||||||
|
return reducedSlices.size() == 1 ? reducedSlices.front()
|
||||||
|
: tensor::ConcatOp::create(rewriter, loc, axis, reducedSlices).getResult();
|
||||||
|
}
|
||||||
|
|
||||||
|
static Value squeezeReducedAxes(Value keepdimsValue,
|
||||||
|
RankedTensorType resultType,
|
||||||
|
ArrayRef<bool> reducedAxes,
|
||||||
|
ConversionPatternRewriter& rewriter,
|
||||||
|
Location loc) {
|
||||||
|
if (resultType.getRank() == 0) {
|
||||||
|
SmallVector<Value> indices(cast<RankedTensorType>(keepdimsValue.getType()).getRank(),
|
||||||
|
arith::ConstantIndexOp::create(rewriter, loc, 0));
|
||||||
|
Value element = tensor::ExtractOp::create(rewriter, loc, keepdimsValue, indices);
|
||||||
|
return tensor::FromElementsOp::create(rewriter, loc, resultType, ValueRange {element});
|
||||||
|
}
|
||||||
|
|
||||||
|
return tensor::CollapseShapeOp::create(
|
||||||
|
rewriter, loc, resultType, keepdimsValue, buildCollapseReassociation(reducedAxes))
|
||||||
|
.getResult();
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ReduceMeanToSpatialCompute : OpConversionPattern<ONNXReduceMeanV13Op> {
|
||||||
|
using OpConversionPattern::OpConversionPattern;
|
||||||
|
|
||||||
|
LogicalResult matchAndRewrite(ONNXReduceMeanV13Op reduceMeanOp,
|
||||||
|
ONNXReduceMeanV13OpAdaptor adaptor,
|
||||||
|
ConversionPatternRewriter& rewriter) const override {
|
||||||
|
auto inputType = dyn_cast<RankedTensorType>(adaptor.getData().getType());
|
||||||
|
auto resultType = dyn_cast<RankedTensorType>(reduceMeanOp.getReduced().getType());
|
||||||
|
if (!inputType || !resultType || !inputType.hasStaticShape() || !resultType.hasStaticShape())
|
||||||
|
return failure();
|
||||||
|
|
||||||
|
SmallVector<int64_t> axes = normalizeAxes(reduceMeanOp.getAxesAttr(), inputType.getRank());
|
||||||
|
SmallVector<bool> reducedAxes = buildReducedAxesMask(axes, inputType.getRank());
|
||||||
|
if (reducedAxes.empty() && inputType.getRank() != 0)
|
||||||
|
return failure();
|
||||||
|
|
||||||
|
Location loc = reduceMeanOp.getLoc();
|
||||||
|
RankedTensorType leafType = getAllOnesType(inputType, resultType.getElementType());
|
||||||
|
Value reducedKeepdims = buildReduceMeanKeepdims(adaptor.getData(), reducedAxes, /*axis=*/0, leafType, rewriter, loc);
|
||||||
|
|
||||||
|
if (reduceMeanOp.getKeepdims() != 0) {
|
||||||
|
rewriter.replaceOp(reduceMeanOp, reducedKeepdims);
|
||||||
|
return success();
|
||||||
|
}
|
||||||
|
|
||||||
|
Value reduced = squeezeReducedAxes(reducedKeepdims, resultType, reducedAxes, rewriter, loc);
|
||||||
|
rewriter.replaceOp(reduceMeanOp, reduced);
|
||||||
|
return success();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
void populateReduceMeanPatterns(RewritePatternSet& patterns, MLIRContext* ctx) {
|
||||||
|
patterns.add<ReduceMeanToSpatialCompute>(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace onnx_mlir
|
||||||
36
src/PIM/Conversion/ONNXToSpatial/Patterns/NN/Sigmoid.cpp
Normal file
36
src/PIM/Conversion/ONNXToSpatial/Patterns/NN/Sigmoid.cpp
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
#include "mlir/Transforms/DialectConversion.h"
|
||||||
|
|
||||||
|
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common.hpp"
|
||||||
|
#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
|
||||||
|
#include "src/Dialect/ONNX/ONNXOps.hpp"
|
||||||
|
|
||||||
|
using namespace mlir;
|
||||||
|
|
||||||
|
namespace onnx_mlir {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
struct SigmoidToSpatialCompute : OpConversionPattern<ONNXSigmoidOp> {
|
||||||
|
using OpConversionPattern::OpConversionPattern;
|
||||||
|
|
||||||
|
LogicalResult matchAndRewrite(ONNXSigmoidOp sigmoidOp,
|
||||||
|
ONNXSigmoidOpAdaptor adaptor,
|
||||||
|
ConversionPatternRewriter& rewriter) const override {
|
||||||
|
Location loc = sigmoidOp.getLoc();
|
||||||
|
Type resultType = sigmoidOp.getResult().getType();
|
||||||
|
constexpr size_t numInputs = 1;
|
||||||
|
auto computeOp = createSpatCompute<numInputs>(rewriter, loc, resultType, {}, adaptor.getX(), [&](Value x) {
|
||||||
|
auto spatSigmoidOp = spatial::SpatSigmoidOp::create(rewriter, loc, resultType, x);
|
||||||
|
spatial::SpatYieldOp::create(rewriter, loc, spatSigmoidOp.getResult());
|
||||||
|
});
|
||||||
|
rewriter.replaceOp(sigmoidOp, computeOp);
|
||||||
|
return success();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
void populateSigmoidPatterns(RewritePatternSet& patterns, MLIRContext* ctx) {
|
||||||
|
patterns.add<SigmoidToSpatialCompute>(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace onnx_mlir
|
||||||
@@ -39,6 +39,12 @@ def spatToPimVVMul : Pat<
|
|||||||
(NativeCodeCall<"onnx_mlir::getBestOutputTensorFromOperandsOrAllocate($_builder, $0.getDefiningOp())"> $srcOpRes))
|
(NativeCodeCall<"onnx_mlir::getBestOutputTensorFromOperandsOrAllocate($_builder, $0.getDefiningOp())"> $srcOpRes))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
def spatToPimVAvg : Pat<
|
||||||
|
(SpatVAvgOp:$srcOpRes $input),
|
||||||
|
(PimVAvgOp $input,
|
||||||
|
(NativeCodeCall<"onnx_mlir::getBestOutputTensorFromOperandsOrAllocate($_builder, $0.getDefiningOp())"> $srcOpRes))
|
||||||
|
>;
|
||||||
|
|
||||||
def spatToPimVVMax : Pat<
|
def spatToPimVVMax : Pat<
|
||||||
(SpatVMaxOp:$srcOpRes $a, $b),
|
(SpatVMaxOp:$srcOpRes $a, $b),
|
||||||
(PimVVMaxOp $a, $b,
|
(PimVVMaxOp $a, $b,
|
||||||
@@ -51,4 +57,10 @@ def spatToPimVRelu : Pat<
|
|||||||
(NativeCodeCall<"onnx_mlir::getBestOutputTensorFromOperandsOrAllocate($_builder, $0.getDefiningOp())"> $srcOpRes))
|
(NativeCodeCall<"onnx_mlir::getBestOutputTensorFromOperandsOrAllocate($_builder, $0.getDefiningOp())"> $srcOpRes))
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
def spatToPimVSigm : Pat<
|
||||||
|
(SpatSigmoidOp:$srcOpRes $input),
|
||||||
|
(PimVSigmOp $input,
|
||||||
|
(NativeCodeCall<"onnx_mlir::getBestOutputTensorFromOperandsOrAllocate($_builder, $0.getDefiningOp())"> $srcOpRes))
|
||||||
|
>;
|
||||||
|
|
||||||
#endif // SPATIAL_TO_PIM
|
#endif // SPATIAL_TO_PIM
|
||||||
|
|||||||
@@ -161,26 +161,41 @@ void SpatialToPimPass::runOnOperation() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (auto receiveOp : funcOp.getOps<spatial::SpatChannelReceiveOp>()) {
|
for (auto receiveOp : funcOp.getOps<spatial::SpatChannelReceiveOp>()) {
|
||||||
operationsToRemove.push_back(receiveOp);
|
markOpToRemove(receiveOp);
|
||||||
runOnReceiveOp(receiveOp, rewriter);
|
runOnReceiveOp(receiveOp, rewriter);
|
||||||
}
|
}
|
||||||
for (auto computeOp : funcOp.getOps<spatial::SpatWeightedCompute>()) {
|
for (auto computeOp : funcOp.getOps<spatial::SpatWeightedCompute>()) {
|
||||||
operationsToRemove.push_back(computeOp);
|
markOpToRemove(computeOp);
|
||||||
runOnComputeOp(computeOp, rewriter);
|
runOnComputeOp(computeOp, rewriter);
|
||||||
}
|
}
|
||||||
|
|
||||||
enlargeVMMOutTensorsToCrossbarSize(funcOp, rewriter);
|
enlargeVMMOutTensorsToCrossbarSize(funcOp, rewriter);
|
||||||
replaceReturnOpOperands(returnOp, rewriter);
|
replaceReturnOpOperands(returnOp, rewriter);
|
||||||
|
|
||||||
// Remove all ComputeOps
|
SmallVector<Operation*> pendingRemovals(operationsToRemove.begin(), operationsToRemove.end());
|
||||||
for (auto opToRemove : llvm::reverse(operationsToRemove)) {
|
while (!pendingRemovals.empty()) {
|
||||||
if (!opToRemove->use_empty()) {
|
bool erasedAnyOp = false;
|
||||||
|
for (auto it = pendingRemovals.begin(); it != pendingRemovals.end();) {
|
||||||
|
Operation* opToRemove = *it;
|
||||||
|
if (!opToRemove->use_empty()) {
|
||||||
|
++it;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
rewriter.eraseOp(opToRemove);
|
||||||
|
it = pendingRemovals.erase(it);
|
||||||
|
erasedAnyOp = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (erasedAnyOp)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
for (auto opToRemove : pendingRemovals) {
|
||||||
opToRemove->dump();
|
opToRemove->dump();
|
||||||
for (auto user : opToRemove->getUsers())
|
for (auto user : opToRemove->getUsers())
|
||||||
user->dump();
|
user->dump();
|
||||||
assert(false && "opToRemove should be unused at this point");
|
|
||||||
}
|
}
|
||||||
rewriter.eraseOp(opToRemove);
|
assert(false && "tracked op removal reached a cycle or missed dependency");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Dump to file for debug
|
// Dump to file for debug
|
||||||
@@ -284,10 +299,19 @@ void SpatialToPimPass::runOnComputeOp(spatial::SpatWeightedCompute computeOp, IR
|
|||||||
auto concatUses = concatValue.getUses();
|
auto concatUses = concatValue.getUses();
|
||||||
auto numConcatUses = rangeLength(concatUses);
|
auto numConcatUses = rangeLength(concatUses);
|
||||||
if (numConcatUses == 1) {
|
if (numConcatUses == 1) {
|
||||||
OpOperand& concatUse = *concatUses.begin();
|
Value chainedValue = concatValue;
|
||||||
Operation* concatUser = concatUse.getOwner();
|
Operation* concatUser = concatUses.begin()->getOwner();
|
||||||
|
|
||||||
|
while (isChannelUseChainOp(concatUser)) {
|
||||||
|
auto chainUses = concatUser->getResult(0).getUses();
|
||||||
|
if (rangeLength(chainUses) != 1)
|
||||||
|
break;
|
||||||
|
chainedValue = concatUser->getResult(0);
|
||||||
|
concatUser = chainUses.begin()->getOwner();
|
||||||
|
}
|
||||||
|
|
||||||
if (isa<func::ReturnOp>(concatUser)) {
|
if (isa<func::ReturnOp>(concatUser)) {
|
||||||
size_t concatIndexInReturn = concatUse.getOperandNumber();
|
size_t concatIndexInReturn = chainedValue.getUses().begin()->getOperandNumber();
|
||||||
size_t resultIndexInConcat = resultUses.begin()->getOperandNumber();
|
size_t resultIndexInConcat = resultUses.begin()->getOperandNumber();
|
||||||
size_t offset = 0;
|
size_t offset = 0;
|
||||||
for (auto operand : concatOp->getOperands().take_front(resultIndexInConcat))
|
for (auto operand : concatOp->getOperands().take_front(resultIndexInConcat))
|
||||||
@@ -602,10 +626,22 @@ void SpatialToPimPass::replaceReturnOpOperands(func::ReturnOp& returnOp, IRRewri
|
|||||||
rewriter.modifyOpInPlace(returnOp,
|
rewriter.modifyOpInPlace(returnOp,
|
||||||
[&] { returnOp.setOperand(orderWithinReturn, outputTensors[orderWithinReturn]); });
|
[&] { returnOp.setOperand(orderWithinReturn, outputTensors[orderWithinReturn]); });
|
||||||
|
|
||||||
if (isa<tensor::ConcatOp>(returnOperand)) {
|
Operation* opToErase = returnOperand;
|
||||||
auto returnOperandUses = it.value().getUses();
|
while (opToErase) {
|
||||||
if (rangeLength(returnOperandUses) == 0)
|
bool isExclusivelyOwnedByReturnChain = opToErase->use_empty() || opToErase->hasOneUse();
|
||||||
rewriter.eraseOp(returnOperand);
|
if (!isExclusivelyOwnedByReturnChain)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (isChannelUseChainOp(opToErase)) {
|
||||||
|
Value source = opToErase->getOperand(0);
|
||||||
|
markOpToRemove(opToErase);
|
||||||
|
opToErase = source.getDefiningOp();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isa<tensor::ConcatOp>(opToErase))
|
||||||
|
markOpToRemove(opToErase);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -239,6 +239,22 @@ def SpatSumOp : SpatOp<"sum", []> {
|
|||||||
}];
|
}];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def SpatVAvgOp : SpatOp<"vavg", []> {
|
||||||
|
let summary = "Average all elements of the input tensor to a single scalar wrapped in a tensor";
|
||||||
|
|
||||||
|
let arguments = (ins
|
||||||
|
SpatTensor:$input
|
||||||
|
);
|
||||||
|
|
||||||
|
let results = (outs
|
||||||
|
SpatTensor:$output
|
||||||
|
);
|
||||||
|
|
||||||
|
let assemblyFormat = [{
|
||||||
|
`(` $input `)` attr-dict `:` type($input) `->` type($output)
|
||||||
|
}];
|
||||||
|
}
|
||||||
|
|
||||||
def SpatSigmoidOp : SpatOp<"sigmoid", []> {
|
def SpatSigmoidOp : SpatOp<"sigmoid", []> {
|
||||||
let summary = "Element-wise sigmoid activation";
|
let summary = "Element-wise sigmoid activation";
|
||||||
|
|
||||||
|
|||||||
@@ -361,7 +361,7 @@ struct ChannelBroadcastReceiveOpInterface
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Turn the channel receive to pim.load using by creating a new global buffer
|
* Turn the broadcast receive into a regular pim.receive from the broadcaster.
|
||||||
*/
|
*/
|
||||||
LogicalResult bufferize(Operation* op,
|
LogicalResult bufferize(Operation* op,
|
||||||
RewriterBase& rewriter,
|
RewriterBase& rewriter,
|
||||||
@@ -370,8 +370,21 @@ struct ChannelBroadcastReceiveOpInterface
|
|||||||
|
|
||||||
auto outputTensor = createEmptyFromType(op->getResult(0).getType(), op->getLoc(), rewriter);
|
auto outputTensor = createEmptyFromType(op->getResult(0).getType(), op->getLoc(), rewriter);
|
||||||
|
|
||||||
auto outputType = cast<ShapedType>(outputTensor.getType());
|
auto numElements = cast<ShapedType>(outputTensor.getType()).getNumElements();
|
||||||
auto outputSize = outputType.getNumElements() * outputType.getElementTypeBitWidth() / 8;
|
auto elementSize = cast<ShapedType>(outputTensor.getType()).getElementTypeBitWidth() / 8;
|
||||||
|
|
||||||
|
auto precomputedOtherCoreId = op->getAttr(PRECOMPUTED_OTHER_CORE_ID_ATTR_NAME);
|
||||||
|
if (precomputedOtherCoreId) {
|
||||||
|
Value newValue = pim::PimReceiveOp::create(rewriter,
|
||||||
|
op->getLoc(),
|
||||||
|
outputTensor.getType(),
|
||||||
|
outputTensor,
|
||||||
|
rewriter.getI32IntegerAttr(numElements * elementSize),
|
||||||
|
cast<IntegerAttr>(precomputedOtherCoreId))
|
||||||
|
.getOutput();
|
||||||
|
replaceOpWithBufferizedValues(rewriter, op, newValue);
|
||||||
|
return success();
|
||||||
|
}
|
||||||
|
|
||||||
auto channelNewOp = op->getOperand(0).getDefiningOp<SpatChannelNewOp>();
|
auto channelNewOp = op->getOperand(0).getDefiningOp<SpatChannelNewOp>();
|
||||||
if (!channelNewOp) {
|
if (!channelNewOp) {
|
||||||
@@ -379,31 +392,30 @@ struct ChannelBroadcastReceiveOpInterface
|
|||||||
return failure();
|
return failure();
|
||||||
}
|
}
|
||||||
|
|
||||||
// The first 'broadcast' operation creates the buffer just after the
|
auto srcCoreId = [&]() -> FailureOr<uint32_t> {
|
||||||
// channelNewOp, while the other 'broadcast' operation need to find this
|
for (Operation* user : channelNewOp->getUsers()) {
|
||||||
// buffer allocation just after the channelNewOp
|
auto sendOp = dyn_cast<SpatChannelBroadcastSendOp>(user);
|
||||||
Value bufferAllocation;
|
if (!sendOp)
|
||||||
if (auto allocOpAfterChannel = dyn_cast<memref::AllocOp>(channelNewOp->getNextNode())) {
|
continue;
|
||||||
// Buffer already allocated, load from this buffer
|
auto sendCoreIdAttr = cast<pim::PimCoreOp>(sendOp->getParentOp()).getCoreIdAttr();
|
||||||
bufferAllocation = allocOpAfterChannel;
|
op->setAttr(PRECOMPUTED_OTHER_CORE_ID_ATTR_NAME, sendCoreIdAttr);
|
||||||
}
|
return cast<pim::PimCoreOp>(sendOp->getParentOp()).getCoreId();
|
||||||
else {
|
}
|
||||||
// Buffer was not allocated previously, allocate it after channelNewOp
|
op->emitError("ChannelBroadcastReceiveOp has no matching ChannelBroadcastSendOp");
|
||||||
rewriter.setInsertionPointAfter(channelNewOp);
|
return failure();
|
||||||
bufferAllocation = createEmptyFromType(op->getResult(0).getType(), op->getLoc(), rewriter);
|
}();
|
||||||
}
|
if (failed(srcCoreId))
|
||||||
|
return failure();
|
||||||
|
|
||||||
rewriter.setInsertionPoint(op);
|
Value newValue = pim::PimReceiveOp::create(rewriter,
|
||||||
auto memCopyHostToDevOp = pim::PimMemCopyHostToDevOp::create(rewriter,
|
op->getLoc(),
|
||||||
op->getLoc(),
|
outputTensor.getType(),
|
||||||
outputTensor.getType(),
|
outputTensor,
|
||||||
outputTensor,
|
rewriter.getI32IntegerAttr(numElements * elementSize),
|
||||||
bufferAllocation,
|
rewriter.getI32IntegerAttr(srcCoreId.value()))
|
||||||
rewriter.getI32IntegerAttr(0),
|
.getOutput();
|
||||||
rewriter.getI32IntegerAttr(0),
|
|
||||||
rewriter.getI32IntegerAttr(outputSize));
|
|
||||||
|
|
||||||
replaceOpWithBufferizedValues(rewriter, op, memCopyHostToDevOp.getOutput());
|
replaceOpWithBufferizedValues(rewriter, op, newValue);
|
||||||
|
|
||||||
return success();
|
return success();
|
||||||
}
|
}
|
||||||
@@ -428,8 +440,7 @@ struct ChannelBroadcastSendOpInterface
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Turn the channel send into a device-to-host copy into the shared
|
* Turn the broadcast send into one pim.send per broadcast receiver.
|
||||||
* broadcast buffer that receive ops load from later.
|
|
||||||
*/
|
*/
|
||||||
LogicalResult bufferize(Operation* op,
|
LogicalResult bufferize(Operation* op,
|
||||||
RewriterBase& rewriter,
|
RewriterBase& rewriter,
|
||||||
@@ -448,32 +459,32 @@ struct ChannelBroadcastSendOpInterface
|
|||||||
return failure();
|
return failure();
|
||||||
}
|
}
|
||||||
|
|
||||||
// The first 'broadcast' operation creates the buffer just after the
|
|
||||||
// channelNewOp, while the other 'broadcast' operation need to find this
|
|
||||||
// buffer allocation just after the channelNewOp
|
|
||||||
Value bufferAllocation;
|
|
||||||
if (auto allocOpAfterChannel = dyn_cast<memref::AllocOp>(channelNewOp->getNextNode())) {
|
|
||||||
// Buffer already allocated, load from this buffer
|
|
||||||
bufferAllocation = allocOpAfterChannel;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// Buffer was not allocated previously, allocate it after channelNewOp
|
|
||||||
rewriter.setInsertionPointAfter(channelNewOp);
|
|
||||||
bufferAllocation = createEmptyFromType(srcTensor.getType(), op->getLoc(), rewriter);
|
|
||||||
}
|
|
||||||
|
|
||||||
auto srcType = cast<ShapedType>(srcTensor.getType());
|
auto srcType = cast<ShapedType>(srcTensor.getType());
|
||||||
auto sizeInBytes = srcType.getNumElements() * srcType.getElementTypeBitWidth() / 8;
|
auto sizeInBytes = srcType.getNumElements() * srcType.getElementTypeBitWidth() / 8;
|
||||||
|
auto srcCoreIdAttr = cast<pim::PimCoreOp>(op->getParentOp()).getCoreIdAttr();
|
||||||
|
|
||||||
rewriter.setInsertionPoint(op);
|
rewriter.setInsertionPoint(op);
|
||||||
pim::PimMemCopyDevToHostOp::create(rewriter,
|
bool foundReceiver = false;
|
||||||
op->getLoc(),
|
for (Operation* user : channelNewOp->getUsers()) {
|
||||||
bufferAllocation.getType(),
|
auto receiveOp = dyn_cast<SpatChannelBroadcastReceiveOp>(user);
|
||||||
bufferAllocation,
|
if (!receiveOp)
|
||||||
srcMemRef,
|
continue;
|
||||||
rewriter.getI32IntegerAttr(0),
|
|
||||||
rewriter.getI32IntegerAttr(0),
|
foundReceiver = true;
|
||||||
rewriter.getI32IntegerAttr(sizeInBytes));
|
auto dstCoreId = cast<pim::PimCoreOp>(receiveOp->getParentOp()).getCoreId();
|
||||||
|
receiveOp->setAttr(PRECOMPUTED_OTHER_CORE_ID_ATTR_NAME, srcCoreIdAttr);
|
||||||
|
pim::PimSendOp::create(rewriter,
|
||||||
|
op->getLoc(),
|
||||||
|
srcMemRef,
|
||||||
|
rewriter.getI32IntegerAttr(sizeInBytes),
|
||||||
|
rewriter.getI32IntegerAttr(dstCoreId));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!foundReceiver) {
|
||||||
|
op->emitError("SpatChannelBroadcastSendOp has no matching ChannelBroadcastReceiveOp");
|
||||||
|
return failure();
|
||||||
|
}
|
||||||
|
|
||||||
rewriter.eraseOp(op);
|
rewriter.eraseOp(op);
|
||||||
return success();
|
return success();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,66 +3,108 @@
|
|||||||
ONNX test models used by `validate.py` to verify the Raptor compiler + PIM simulator pipeline.
|
ONNX test models used by `validate.py` to verify the Raptor compiler + PIM simulator pipeline.
|
||||||
|
|
||||||
Generated tests can be regenerated with:
|
Generated tests can be regenerated with:
|
||||||
|
|
||||||
```
|
```
|
||||||
python3 validation/operations/gen_tests.py
|
python3 validation/operations/gen_tests.py
|
||||||
```
|
```
|
||||||
|
|
||||||
## Conv
|
## Conv
|
||||||
|
|
||||||
| Test | Directory | Input | Output | Kernel | Stride | Padding | Bias | Notes |
|
| Test | Directory | Input | Output | Kernel | Stride | Padding | Bias | Notes |
|
||||||
|------|-----------|-------|--------|--------|--------|---------|------|-------|
|
|------------------|-------------------------|-----------|-----------|--------|--------|------------|------|------------------------------------|
|
||||||
| Simple | `conv/simple` | [1,3,3,3] | [1,1,2,2] | 2x2 | 1 | none | no | Basic conv, hand-crafted |
|
| Simple | `conv/simple` | [1,3,3,3] | [1,1,2,2] | 2x2 | 1 | none | no | Basic conv, hand-crafted |
|
||||||
| With constant | `conv/with_constant` | [1,3,3,3] | [1,1,3,3] | 2x2 | 1 | SAME_UPPER | yes | Hand-crafted, constant weight+bias |
|
| With constant | `conv/with_constant` | [1,3,3,3] | [1,1,3,3] | 2x2 | 1 | SAME_UPPER | yes | Hand-crafted, constant weight+bias |
|
||||||
| Batch 2 | `conv/batch_2` | [2,3,3,3] | [2,1,3,3] | 2x2 | 1 | SAME_UPPER | yes | Batched input |
|
| Batch 2 | `conv/batch_2` | [2,3,3,3] | [2,1,3,3] | 2x2 | 1 | SAME_UPPER | yes | Batched input |
|
||||||
| Kernel 3x3 | `conv/kernel_3x3` | [1,1,5,5] | [1,1,3,3] | 3x3 | 1 | none | no | Larger kernel |
|
| Kernel 3x3 | `conv/kernel_3x3` | [1,1,5,5] | [1,1,3,3] | 3x3 | 1 | none | no | Larger kernel |
|
||||||
| Stride 2 | `conv/stride_2` | [1,1,6,6] | [1,1,2,2] | 3x3 | 2 | none | no | Strided convolution |
|
| Stride 2 | `conv/stride_2` | [1,1,6,6] | [1,1,2,2] | 3x3 | 2 | none | no | Strided convolution |
|
||||||
| Multi channel | `conv/multi_channel` | [1,3,5,5] | [1,4,3,3] | 3x3 | 1 | none | no | 3 in channels, 4 out channels |
|
| Multi channel | `conv/multi_channel` | [1,3,5,5] | [1,4,3,3] | 3x3 | 1 | none | no | 3 in channels, 4 out channels |
|
||||||
| Pointwise 1x1 | `conv/pointwise_1x1` | [1,8,4,4] | [1,4,4,4] | 1x1 | 1 | none | no | Channel mixing |
|
| Pointwise 1x1 | `conv/pointwise_1x1` | [1,8,4,4] | [1,4,4,4] | 1x1 | 1 | none | no | Channel mixing |
|
||||||
| SAME padding 3x3 | `conv/same_padding_3x3` | [1,1,5,5] | [1,1,5,5] | 3x3 | 1 | SAME_UPPER | no | Spatial dims preserved |
|
| SAME padding 3x3 | `conv/same_padding_3x3` | [1,1,5,5] | [1,1,5,5] | 3x3 | 1 | SAME_UPPER | no | Spatial dims preserved |
|
||||||
| Explicit padding | `conv/explicit_padding` | [1,1,4,4] | [1,1,4,4] | 3x3 | 1 | [1,1,1,1] | no | Symmetric explicit pads |
|
| Explicit padding | `conv/explicit_padding` | [1,1,4,4] | [1,1,4,4] | 3x3 | 1 | [1,1,1,1] | no | Symmetric explicit pads |
|
||||||
| With bias 3x3 | `conv/with_bias_3x3` | [1,3,5,5] | [1,2,3,3] | 3x3 | 1 | none | yes | Multi-channel with bias |
|
| With bias 3x3 | `conv/with_bias_3x3` | [1,3,5,5] | [1,2,3,3] | 3x3 | 1 | none | yes | Multi-channel with bias |
|
||||||
| Large spatial | `conv/large_spatial` | [1,1,8,8] | [1,1,6,6] | 3x3 | 1 | none | no | Larger spatial input |
|
| Large spatial | `conv/large_spatial` | [1,1,8,8] | [1,1,6,6] | 3x3 | 1 | none | no | Larger spatial input |
|
||||||
|
|
||||||
## Pool
|
|
||||||
|
|
||||||
| Test | Directory | Input | Output | Kernel | Stride | Padding | Notes |
|
|
||||||
|------|-----------|-------|--------|--------|--------|---------|-------|
|
|
||||||
| Max basic | `pool/max_basic` | [1,1,4,4] | [1,1,3,3] | 2x2 | 1 | none | Basic max pooling |
|
|
||||||
| Max stride 2 multi-channel | `pool/max_stride2_multichannel` | [1,5,6,6] | [1,5,3,3] | 2x2 | 2 | none | Channel-preserving max pool |
|
|
||||||
| Max SAME_UPPER | `pool/max_same_upper` | [1,1,5,5] | [1,1,3,3] | 3x3 | 2 | SAME_UPPER | Deprecated auto_pad path |
|
|
||||||
| Avg basic | `pool/avg_basic` | [1,3,4,4] | [1,3,3,3] | 2x2 | 1 | none | Basic average pooling |
|
|
||||||
| Avg explicit padding | `pool/avg_explicit_padding` | [1,2,4,4] | [1,2,2,2] | 3x3 | 2 | [1,1,1,1] | `count_include_pad=0` |
|
|
||||||
| Avg include pad | `pool/avg_include_pad` | [1,2,4,4] | [1,2,2,2] | 3x3 | 2 | [1,1,1,1] | `count_include_pad=1` |
|
|
||||||
| Max after Conv | `pool/max_after_conv` | [1,3,6,6] | [1,4,2,2] | Conv 3x3 then Pool 2x2 | 2 | none | Regression for `pool(conv(...))` |
|
|
||||||
|
|
||||||
## Relu
|
|
||||||
|
|
||||||
| Test | Directory | Input | Output | Notes |
|
|
||||||
|------|-----------|-------|--------|-------|
|
|
||||||
| Basic | `relu/basic` | [4,8] | [4,8] | Standalone 2D Relu |
|
|
||||||
| 4D | `relu/4d` | [2,3,4,4] | [2,3,4,4] | Standalone NCHW Relu |
|
|
||||||
| After Conv | `relu/after_conv` | [1,3,5,5] | [1,2,3,3] | Conv 3x3 + bias, then Relu |
|
|
||||||
| After Gemm | `relu/after_gemm` | [4,64] | [4,32] | Gemm + bias, then Relu |
|
|
||||||
|
|
||||||
## Gemm
|
## Gemm
|
||||||
|
|
||||||
| Test | Directory | A (input) | W (weight) | Output | transB | alpha | beta | Bias | Notes |
|
| Test | Directory | A (input) | W (weight) | Output | transB | alpha | beta | Bias | Notes |
|
||||||
|------|-----------|-----------|------------|--------|--------|-------|------|------|-------|
|
|---------------|-------------------------|-----------|------------|----------|--------|-------|------|-------|------------------------------|
|
||||||
| Default | `gemm/` | [10,132] | [132,132] | [10,132] | no | 1 | 1 | no | Hand-crafted, square weights |
|
| Default | `gemm/` | [10,132] | [132,132] | [10,132] | no | 1 | 1 | no | Hand-crafted, square weights |
|
||||||
| Non-square | `gemm/non_square` | [4,128] | [128,64] | [4,64] | no | 1 | 1 | no | K != N |
|
| Non-square | `gemm/non_square` | [4,128] | [128,64] | [4,64] | no | 1 | 1 | no | K != N |
|
||||||
| With bias | `gemm/with_bias` | [4,128] | [128,128] | [4,128] | no | 1 | 1 | [128] | Bias vector |
|
| With bias | `gemm/with_bias` | [4,128] | [128,128] | [4,128] | no | 1 | 1 | [128] | Bias vector |
|
||||||
| transB | `gemm/transB` | [4,128] | [64,128] | [4,64] | yes | 1 | 1 | no | Transposed weight |
|
| transB | `gemm/transB` | [4,128] | [64,128] | [4,64] | yes | 1 | 1 | no | Transposed weight |
|
||||||
| Alpha/beta | `gemm/alpha_beta` | [4,64] | [64,64] | [4,64] | no | 0.5 | 0.25 | [64] | Scaled matmul + bias |
|
| Alpha/beta | `gemm/alpha_beta` | [4,64] | [64,64] | [4,64] | no | 0.5 | 0.25 | [64] | Scaled matmul + bias |
|
||||||
| Small | `gemm/small` | [2,8] | [8,4] | [2,4] | no | 1 | 1 | no | Tiny matrices |
|
| Small | `gemm/small` | [2,8] | [8,4] | [2,4] | no | 1 | 1 | no | Tiny matrices |
|
||||||
| Large | `gemm/large` | [8,256] | [256,128] | [8,128] | no | 1 | 1 | no | Larger matrices |
|
| Large | `gemm/large` | [8,256] | [256,128] | [8,128] | no | 1 | 1 | no | Larger matrices |
|
||||||
| transB + bias | `gemm/transB_with_bias` | [4,128] | [64,128] | [4,64] | yes | 1 | 1 | [64] | Combined |
|
| transB + bias | `gemm/transB_with_bias` | [4,128] | [64,128] | [4,64] | yes | 1 | 1 | [64] | Combined |
|
||||||
|
|
||||||
## Gemv
|
## Gemv
|
||||||
|
|
||||||
| Test | Directory | Input | W (weight) | Output | Bias | Notes |
|
| Test | Directory | Input | W (weight) | Output | Bias | Notes |
|
||||||
|------|-----------|-------|------------|--------|------|-------|
|
|---------------------|------------------------------------|----------|------------|---------|---------|----------------------------|
|
||||||
| Simple | `gemv/simple` | [1,132] | [132,132] | [1,132] | no | Single-sample matmul |
|
| Simple | `gemv/simple` | [1,132] | [132,132] | [1,132] | no | Single-sample matmul |
|
||||||
| Constant | `gemv/constant` | _(none)_ | [132,132] | [1,132] | no | All inputs constant |
|
| Constant | `gemv/constant` | _(none)_ | [132,132] | [1,132] | no | All inputs constant |
|
||||||
| Homogeneous const | `gemv/with_homogeneous_constant` | [1,132] | [132,132] | [1,132] | [1,132] | Bias matches output shape |
|
| Homogeneous const | `gemv/with_homogeneous_constant` | [1,132] | [132,132] | [1,132] | [1,132] | Bias matches output shape |
|
||||||
| Heterogeneous const | `gemv/with_heterogeneous_constant` | [1,132] | [132,132] | [1,132] | [1,132] | Different constant pattern |
|
| Heterogeneous const | `gemv/with_heterogeneous_constant` | [1,132] | [132,132] | [1,132] | [1,132] | Different constant pattern |
|
||||||
| Scalar const | `gemv/with_scalar_constant` | [1,132] | [132,132] | [1,132] | [1,1] | Scalar bias, broadcast |
|
| Scalar const | `gemv/with_scalar_constant` | [1,132] | [132,132] | [1,132] | [1,1] | Scalar bias, broadcast |
|
||||||
|
|
||||||
|
## Pool
|
||||||
|
|
||||||
|
| Test | Directory | Input | Output | Kernel | Stride | Padding | Notes |
|
||||||
|
|----------------------------|---------------------------------|-----------|-----------|------------------------|--------|------------|----------------------------------|
|
||||||
|
| Max basic | `pool/max_basic` | [1,1,4,4] | [1,1,3,3] | 2x2 | 1 | none | Basic max pooling |
|
||||||
|
| Max stride 2 multi-channel | `pool/max_stride2_multichannel` | [1,5,6,6] | [1,5,3,3] | 2x2 | 2 | none | Channel-preserving max pool |
|
||||||
|
| Max SAME_UPPER | `pool/max_same_upper` | [1,1,5,5] | [1,1,3,3] | 3x3 | 2 | SAME_UPPER | Deprecated auto_pad path |
|
||||||
|
| Avg basic | `pool/avg_basic` | [1,3,4,4] | [1,3,3,3] | 2x2 | 1 | none | Basic average pooling |
|
||||||
|
| Avg explicit padding | `pool/avg_explicit_padding` | [1,2,4,4] | [1,2,2,2] | 3x3 | 2 | [1,1,1,1] | `count_include_pad=0` |
|
||||||
|
| Avg include pad | `pool/avg_include_pad` | [1,2,4,4] | [1,2,2,2] | 3x3 | 2 | [1,1,1,1] | `count_include_pad=1` |
|
||||||
|
| Max after Conv | `pool/max_after_conv` | [1,3,6,6] | [1,4,2,2] | Conv 3x3 then Pool 2x2 | 2 | none | Regression for `pool(conv(...))` |
|
||||||
|
|
||||||
|
## ReduceMean
|
||||||
|
|
||||||
|
| Test | Directory | Input | Output | Axes | Keepdims | Notes |
|
||||||
|
|------------|--------------------------|-----------|-----------|-------|----------|-------------------------------------------------|
|
||||||
|
| Basic | `reduce_mean/basic` | [4,8] | [4,1] | [1] | 1 | Reduce feature dimension, preserving rank |
|
||||||
|
| Keepdims 0 | `reduce_mean/keepdims_0` | [4,8] | [4] | [1] | 0 | Reduce feature dimension, dropping reduced axis |
|
||||||
|
| 4D spatial | `reduce_mean/4d_spatial` | [1,3,4,4] | [1,3,1,1] | [2,3] | 1 | Reduce H and W on NCHW input |
|
||||||
|
| After Conv | `reduce_mean/after_conv` | [1,3,5,5] | [1,2,1,1] | [2,3] | 1 | Conv 3x3 + bias, then spatial ReduceMean |
|
||||||
|
|
||||||
|
## Relu
|
||||||
|
|
||||||
|
| Test | Directory | Input | Output | Notes |
|
||||||
|
|------------|-------------------|-----------|-----------|----------------------------|
|
||||||
|
| Basic | `relu/basic` | [4,8] | [4,8] | Standalone 2D Relu |
|
||||||
|
| 4D | `relu/4d` | [2,3,4,4] | [2,3,4,4] | Standalone NCHW Relu |
|
||||||
|
| After Conv | `relu/after_conv` | [1,3,5,5] | [1,2,3,3] | Conv 3x3 + bias, then Relu |
|
||||||
|
| After Gemm | `relu/after_gemm` | [4,64] | [4,32] | Gemm + bias, then Relu |
|
||||||
|
|
||||||
|
## Sigmoid
|
||||||
|
|
||||||
|
| Test | Directory | Input | Output | Notes |
|
||||||
|
|------------|----------------------|-----------|-----------|---------------------------|
|
||||||
|
| Basic | `sigmoid/basic` | [4,8] | [4,8] | Standalone 2D Sigmoid |
|
||||||
|
| 4D | `sigmoid/4d` | [2,3,4,4] | [2,3,4,4] | Standalone NCHW Sigmoid |
|
||||||
|
| After Gemm | `sigmoid/after_gemm` | [4,64] | [4,32] | Gemm + bias, then Sigmoid |
|
||||||
|
|
||||||
|
## Add
|
||||||
|
|
||||||
|
| Test | Directory | Input(s) | Output | Notes |
|
||||||
|
|---------------|---------------------|------------------|--------|---------------------------------------------|
|
||||||
|
| Basic | `add/basic` | A:[4,8], B:[4,8] | [4,8] | Elementwise add, same-shape inputs |
|
||||||
|
| Broadcast row | `add/broadcast_row` | A:[4,8], B:[8] | [4,8] | Row-vector broadcasting via initializer |
|
||||||
|
| After Gemm | `add/after_gemm` | A:[4,64], D:[32] | [4,32] | Gemm + bias, then Add with broadcast vector |
|
||||||
|
|
||||||
|
## Mul
|
||||||
|
|
||||||
|
| Test | Directory | Input(s) | Output | Notes |
|
||||||
|
|-----------------|-----------------------|--------------------------|-----------|-------------------------------------------|
|
||||||
|
| Basic | `mul/basic` | A:[4,8], B:[4,8] | [4,8] | Elementwise multiply, same-shape inputs |
|
||||||
|
| Scalar constant | `mul/scalar_constant` | X:[4,8], S:[1] | [4,8] | Scalar broadcasting via initializer |
|
||||||
|
| After Conv | `mul/after_conv` | X:[1,3,5,5], S:[1,2,1,1] | [1,2,3,3] | Conv 3x3 + bias, then per-channel scaling |
|
||||||
|
|
||||||
|
## Div
|
||||||
|
|
||||||
|
| Test | Directory | Input(s) | Output | Notes |
|
||||||
|
|-----------------|-----------------------|------------------|--------|------------------------------------------------------|
|
||||||
|
| Basic | `div/basic` | X:[4,8], D:[4,8] | [4,8] | Elementwise divide by same-shape constant tensor |
|
||||||
|
| Scalar constant | `div/scalar_constant` | X:[4,8], S:[1] | [4,8] | Scalar broadcasting via initializer |
|
||||||
|
| After Gemm | `div/after_gemm` | A:[4,64], D:[32] | [4,32] | Gemm + bias, then Div with positive broadcast vector |
|
||||||
|
|||||||
BIN
validation/operations/add/after_gemm/add_after_gemm.onnx
Normal file
BIN
validation/operations/add/after_gemm/add_after_gemm.onnx
Normal file
Binary file not shown.
BIN
validation/operations/add/basic/add_basic.onnx
Normal file
BIN
validation/operations/add/basic/add_basic.onnx
Normal file
Binary file not shown.
BIN
validation/operations/add/broadcast_row/add_broadcast_row.onnx
Normal file
BIN
validation/operations/add/broadcast_row/add_broadcast_row.onnx
Normal file
Binary file not shown.
BIN
validation/operations/div/after_gemm/div_after_gemm.onnx
Normal file
BIN
validation/operations/div/after_gemm/div_after_gemm.onnx
Normal file
Binary file not shown.
BIN
validation/operations/div/basic/div_basic.onnx
Normal file
BIN
validation/operations/div/basic/div_basic.onnx
Normal file
Binary file not shown.
Binary file not shown.
@@ -1,5 +1,5 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""Generate ONNX test models for validating GEMM, Conv, Pooling, and Relu implementations."""
|
"""Generate ONNX test models for validating GEMM, Conv, Pooling, Relu, and ReduceMean implementations."""
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import onnx
|
import onnx
|
||||||
@@ -19,102 +19,8 @@ def save_model(model, directory, filename):
|
|||||||
print(f" {path.relative_to(OPERATIONS_DIR)}")
|
print(f" {path.relative_to(OPERATIONS_DIR)}")
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
def make_int64_initializer(name, values):
|
||||||
# GEMM tests
|
return numpy_helper.from_array(np.asarray(values, dtype=np.int64), name=name)
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
def gemm_non_square():
|
|
||||||
"""GEMM with non-square weight matrix: [B, K] @ [K, N], K != N."""
|
|
||||||
B, K, N = 4, 128, 64
|
|
||||||
W = numpy_helper.from_array(np.random.default_rng(42).uniform(-1, 1, (K, N)).astype(np.float32), name="W")
|
|
||||||
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
|
|
||||||
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
|
|
||||||
node = helper.make_node("Gemm", ["A", "W"], ["Y"])
|
|
||||||
graph = helper.make_graph([node], "gemm_non_square", [A], [Y], initializer=[W])
|
|
||||||
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
|
||||||
save_model(model, "gemm/non_square", "gemm_non_square.onnx")
|
|
||||||
|
|
||||||
|
|
||||||
def gemm_with_bias():
|
|
||||||
"""GEMM with bias: Y = A @ W + C."""
|
|
||||||
B, K, N = 4, 128, 128
|
|
||||||
rng = np.random.default_rng(43)
|
|
||||||
W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
|
|
||||||
C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
|
|
||||||
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
|
|
||||||
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
|
|
||||||
node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"])
|
|
||||||
graph = helper.make_graph([node], "gemm_with_bias", [A], [Y], initializer=[W, C])
|
|
||||||
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
|
||||||
save_model(model, "gemm/with_bias", "gemm_with_bias.onnx")
|
|
||||||
|
|
||||||
|
|
||||||
def gemm_transB():
|
|
||||||
"""GEMM with transB=1: Y = A @ W^T."""
|
|
||||||
B, K, N = 4, 128, 64
|
|
||||||
rng = np.random.default_rng(44)
|
|
||||||
# W stored as [N, K], transposed during computation
|
|
||||||
W = numpy_helper.from_array(rng.uniform(-1, 1, (N, K)).astype(np.float32), name="W")
|
|
||||||
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
|
|
||||||
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
|
|
||||||
node = helper.make_node("Gemm", ["A", "W"], ["Y"], transB=1)
|
|
||||||
graph = helper.make_graph([node], "gemm_transB", [A], [Y], initializer=[W])
|
|
||||||
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
|
||||||
save_model(model, "gemm/transB", "gemm_transB.onnx")
|
|
||||||
|
|
||||||
|
|
||||||
def gemm_alpha_beta():
|
|
||||||
"""GEMM with alpha and beta: Y = 0.5 * A @ W + 0.25 * C."""
|
|
||||||
B, K, N = 4, 64, 64
|
|
||||||
rng = np.random.default_rng(45)
|
|
||||||
W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
|
|
||||||
C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
|
|
||||||
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
|
|
||||||
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
|
|
||||||
node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"], alpha=0.5, beta=0.25)
|
|
||||||
graph = helper.make_graph([node], "gemm_alpha_beta", [A], [Y], initializer=[W, C])
|
|
||||||
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
|
||||||
save_model(model, "gemm/alpha_beta", "gemm_alpha_beta.onnx")
|
|
||||||
|
|
||||||
|
|
||||||
def gemm_small():
|
|
||||||
"""Small GEMM: [2, 8] @ [8, 4]."""
|
|
||||||
B, K, N = 2, 8, 4
|
|
||||||
rng = np.random.default_rng(46)
|
|
||||||
W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
|
|
||||||
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
|
|
||||||
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
|
|
||||||
node = helper.make_node("Gemm", ["A", "W"], ["Y"])
|
|
||||||
graph = helper.make_graph([node], "gemm_small", [A], [Y], initializer=[W])
|
|
||||||
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
|
||||||
save_model(model, "gemm/small", "gemm_small.onnx")
|
|
||||||
|
|
||||||
|
|
||||||
def gemm_large():
|
|
||||||
"""Larger GEMM: [8, 256] @ [256, 128]."""
|
|
||||||
B, K, N = 8, 256, 128
|
|
||||||
rng = np.random.default_rng(47)
|
|
||||||
W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
|
|
||||||
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
|
|
||||||
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
|
|
||||||
node = helper.make_node("Gemm", ["A", "W"], ["Y"])
|
|
||||||
graph = helper.make_graph([node], "gemm_large", [A], [Y], initializer=[W])
|
|
||||||
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
|
||||||
save_model(model, "gemm/large", "gemm_large.onnx")
|
|
||||||
|
|
||||||
|
|
||||||
def gemm_transB_with_bias():
|
|
||||||
"""GEMM with transB and bias: Y = A @ W^T + C."""
|
|
||||||
B, K, N = 4, 128, 64
|
|
||||||
rng = np.random.default_rng(48)
|
|
||||||
W = numpy_helper.from_array(rng.uniform(-1, 1, (N, K)).astype(np.float32), name="W")
|
|
||||||
C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
|
|
||||||
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
|
|
||||||
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
|
|
||||||
node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"], transB=1)
|
|
||||||
graph = helper.make_graph([node], "gemm_transB_with_bias", [A], [Y], initializer=[W, C])
|
|
||||||
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
|
||||||
save_model(model, "gemm/transB_with_bias", "gemm_transB_with_bias.onnx")
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -248,6 +154,104 @@ def conv_large_spatial():
|
|||||||
save_model(model, "conv/large_spatial", "conv_large_spatial.onnx")
|
save_model(model, "conv/large_spatial", "conv_large_spatial.onnx")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# GEMM tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def gemm_non_square():
|
||||||
|
"""GEMM with non-square weight matrix: [B, K] @ [K, N], K != N."""
|
||||||
|
B, K, N = 4, 128, 64
|
||||||
|
W = numpy_helper.from_array(np.random.default_rng(42).uniform(-1, 1, (K, N)).astype(np.float32), name="W")
|
||||||
|
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
|
||||||
|
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
|
||||||
|
node = helper.make_node("Gemm", ["A", "W"], ["Y"])
|
||||||
|
graph = helper.make_graph([node], "gemm_non_square", [A], [Y], initializer=[W])
|
||||||
|
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||||
|
save_model(model, "gemm/non_square", "gemm_non_square.onnx")
|
||||||
|
|
||||||
|
|
||||||
|
def gemm_with_bias():
|
||||||
|
"""GEMM with bias: Y = A @ W + C."""
|
||||||
|
B, K, N = 4, 128, 128
|
||||||
|
rng = np.random.default_rng(43)
|
||||||
|
W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
|
||||||
|
C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
|
||||||
|
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
|
||||||
|
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
|
||||||
|
node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"])
|
||||||
|
graph = helper.make_graph([node], "gemm_with_bias", [A], [Y], initializer=[W, C])
|
||||||
|
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||||
|
save_model(model, "gemm/with_bias", "gemm_with_bias.onnx")
|
||||||
|
|
||||||
|
|
||||||
|
def gemm_transB():
|
||||||
|
"""GEMM with transB=1: Y = A @ W^T."""
|
||||||
|
B, K, N = 4, 128, 64
|
||||||
|
rng = np.random.default_rng(44)
|
||||||
|
# W stored as [N, K], transposed during computation
|
||||||
|
W = numpy_helper.from_array(rng.uniform(-1, 1, (N, K)).astype(np.float32), name="W")
|
||||||
|
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
|
||||||
|
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
|
||||||
|
node = helper.make_node("Gemm", ["A", "W"], ["Y"], transB=1)
|
||||||
|
graph = helper.make_graph([node], "gemm_transB", [A], [Y], initializer=[W])
|
||||||
|
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||||
|
save_model(model, "gemm/transB", "gemm_transB.onnx")
|
||||||
|
|
||||||
|
|
||||||
|
def gemm_alpha_beta():
|
||||||
|
"""GEMM with alpha and beta: Y = 0.5 * A @ W + 0.25 * C."""
|
||||||
|
B, K, N = 4, 64, 64
|
||||||
|
rng = np.random.default_rng(45)
|
||||||
|
W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
|
||||||
|
C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
|
||||||
|
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
|
||||||
|
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
|
||||||
|
node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"], alpha=0.5, beta=0.25)
|
||||||
|
graph = helper.make_graph([node], "gemm_alpha_beta", [A], [Y], initializer=[W, C])
|
||||||
|
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||||
|
save_model(model, "gemm/alpha_beta", "gemm_alpha_beta.onnx")
|
||||||
|
|
||||||
|
|
||||||
|
def gemm_small():
|
||||||
|
"""Small GEMM: [2, 8] @ [8, 4]."""
|
||||||
|
B, K, N = 2, 8, 4
|
||||||
|
rng = np.random.default_rng(46)
|
||||||
|
W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
|
||||||
|
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
|
||||||
|
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
|
||||||
|
node = helper.make_node("Gemm", ["A", "W"], ["Y"])
|
||||||
|
graph = helper.make_graph([node], "gemm_small", [A], [Y], initializer=[W])
|
||||||
|
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||||
|
save_model(model, "gemm/small", "gemm_small.onnx")
|
||||||
|
|
||||||
|
|
||||||
|
def gemm_large():
|
||||||
|
"""Larger GEMM: [8, 256] @ [256, 128]."""
|
||||||
|
B, K, N = 8, 256, 128
|
||||||
|
rng = np.random.default_rng(47)
|
||||||
|
W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
|
||||||
|
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
|
||||||
|
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
|
||||||
|
node = helper.make_node("Gemm", ["A", "W"], ["Y"])
|
||||||
|
graph = helper.make_graph([node], "gemm_large", [A], [Y], initializer=[W])
|
||||||
|
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||||
|
save_model(model, "gemm/large", "gemm_large.onnx")
|
||||||
|
|
||||||
|
|
||||||
|
def gemm_transB_with_bias():
|
||||||
|
"""GEMM with transB and bias: Y = A @ W^T + C."""
|
||||||
|
B, K, N = 4, 128, 64
|
||||||
|
rng = np.random.default_rng(48)
|
||||||
|
W = numpy_helper.from_array(rng.uniform(-1, 1, (N, K)).astype(np.float32), name="W")
|
||||||
|
C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
|
||||||
|
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
|
||||||
|
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
|
||||||
|
node = helper.make_node("Gemm", ["A", "W", "C"], ["Y"], transB=1)
|
||||||
|
graph = helper.make_graph([node], "gemm_transB_with_bias", [A], [Y], initializer=[W, C])
|
||||||
|
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||||
|
save_model(model, "gemm/transB_with_bias", "gemm_transB_with_bias.onnx")
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Pooling tests
|
# Pooling tests
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -327,6 +331,55 @@ def maxpool_after_conv():
|
|||||||
save_model(model, "pool/max_after_conv", "maxpool_after_conv.onnx")
|
save_model(model, "pool/max_after_conv", "maxpool_after_conv.onnx")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# ReduceMean tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def reducemean_basic():
|
||||||
|
"""ReduceMean over the feature dimension, preserving rank."""
|
||||||
|
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8])
|
||||||
|
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 1])
|
||||||
|
node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[1], keepdims=1)
|
||||||
|
graph = helper.make_graph([node], "reducemean_basic", [X], [Y])
|
||||||
|
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||||
|
save_model(model, "reduce_mean/basic", "reduce_mean_basic.onnx")
|
||||||
|
|
||||||
|
|
||||||
|
def reducemean_keepdims_0():
|
||||||
|
"""ReduceMean over the feature dimension, dropping the reduced axis."""
|
||||||
|
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8])
|
||||||
|
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4])
|
||||||
|
node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[1], keepdims=0)
|
||||||
|
graph = helper.make_graph([node], "reducemean_keepdims_0", [X], [Y])
|
||||||
|
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||||
|
save_model(model, "reduce_mean/keepdims_0", "reduce_mean_keepdims_0.onnx")
|
||||||
|
|
||||||
|
|
||||||
|
def reducemean_4d_spatial():
|
||||||
|
"""ReduceMean over H and W on an NCHW tensor."""
|
||||||
|
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 4, 4])
|
||||||
|
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 3, 1, 1])
|
||||||
|
node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[2, 3], keepdims=1)
|
||||||
|
graph = helper.make_graph([node], "reducemean_4d_spatial", [X], [Y])
|
||||||
|
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||||
|
save_model(model, "reduce_mean/4d_spatial", "reduce_mean_4d_spatial.onnx")
|
||||||
|
|
||||||
|
|
||||||
|
def reducemean_after_conv():
|
||||||
|
"""Conv followed by ReduceMean over the spatial dimensions."""
|
||||||
|
rng = np.random.default_rng(62)
|
||||||
|
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5])
|
||||||
|
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 1, 1])
|
||||||
|
W = numpy_helper.from_array(rng.uniform(-1, 1, (2, 3, 3, 3)).astype(np.float32), name="W")
|
||||||
|
B = numpy_helper.from_array(rng.uniform(-1, 1, (2,)).astype(np.float32), name="B")
|
||||||
|
conv = helper.make_node("Conv", ["X", "W", "B"], ["C"],
|
||||||
|
kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0])
|
||||||
|
reduce = helper.make_node("ReduceMean", ["C"], ["Y"], axes=[2, 3], keepdims=1)
|
||||||
|
graph = helper.make_graph([conv, reduce], "reducemean_after_conv", [X], [Y], initializer=[W, B])
|
||||||
|
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||||
|
save_model(model, "reduce_mean/after_conv", "reduce_mean_after_conv.onnx")
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Relu tests
|
# Relu tests
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -381,6 +434,220 @@ def relu_after_gemm():
|
|||||||
save_model(model, "relu/after_gemm", "relu_after_gemm.onnx")
|
save_model(model, "relu/after_gemm", "relu_after_gemm.onnx")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Sigmoid tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def sigmoid_basic():
|
||||||
|
"""Standalone Sigmoid on a simple 2D tensor."""
|
||||||
|
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8])
|
||||||
|
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8])
|
||||||
|
node = helper.make_node("Sigmoid", ["X"], ["Y"])
|
||||||
|
graph = helper.make_graph([node], "sigmoid_basic", [X], [Y])
|
||||||
|
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||||
|
save_model(model, "sigmoid/basic", "sigmoid_basic.onnx")
|
||||||
|
|
||||||
|
|
||||||
|
def sigmoid_4d():
|
||||||
|
"""Standalone Sigmoid on an NCHW tensor."""
|
||||||
|
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 3, 4, 4])
|
||||||
|
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 3, 4, 4])
|
||||||
|
node = helper.make_node("Sigmoid", ["X"], ["Y"])
|
||||||
|
graph = helper.make_graph([node], "sigmoid_4d", [X], [Y])
|
||||||
|
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||||
|
save_model(model, "sigmoid/4d", "sigmoid_4d.onnx")
|
||||||
|
|
||||||
|
|
||||||
|
def sigmoid_after_gemm():
|
||||||
|
"""Gemm followed by Sigmoid."""
|
||||||
|
B, K, N = 4, 64, 32
|
||||||
|
rng = np.random.default_rng(63)
|
||||||
|
W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
|
||||||
|
C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
|
||||||
|
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
|
||||||
|
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
|
||||||
|
gemm = helper.make_node("Gemm", ["A", "W", "C"], ["G"])
|
||||||
|
sigmoid = helper.make_node("Sigmoid", ["G"], ["Y"])
|
||||||
|
graph = helper.make_graph([gemm, sigmoid], "sigmoid_after_gemm", [A], [Y], initializer=[W, C])
|
||||||
|
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||||
|
save_model(model, "sigmoid/after_gemm", "sigmoid_after_gemm.onnx")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Add tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def add_basic():
|
||||||
|
"""Elementwise Add on two inputs with identical shapes."""
|
||||||
|
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [4, 8])
|
||||||
|
B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [4, 8])
|
||||||
|
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8])
|
||||||
|
node = helper.make_node("Add", ["A", "B"], ["Y"])
|
||||||
|
graph = helper.make_graph([node], "add_basic", [A, B], [Y])
|
||||||
|
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||||
|
save_model(model, "add/basic", "add_basic.onnx")
|
||||||
|
|
||||||
|
|
||||||
|
def add_broadcast_row():
|
||||||
|
"""Elementwise Add with row-vector broadcasting."""
|
||||||
|
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [4, 8])
|
||||||
|
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8])
|
||||||
|
B = numpy_helper.from_array(np.random.default_rng(64).uniform(-1, 1, (8,)).astype(np.float32), name="B")
|
||||||
|
node = helper.make_node("Add", ["A", "B"], ["Y"])
|
||||||
|
graph = helper.make_graph([node], "add_broadcast_row", [A], [Y], initializer=[B])
|
||||||
|
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||||
|
save_model(model, "add/broadcast_row", "add_broadcast_row.onnx")
|
||||||
|
|
||||||
|
|
||||||
|
def add_after_gemm():
|
||||||
|
"""Gemm followed by Add with a broadcast bias vector."""
|
||||||
|
B, K, N = 4, 64, 32
|
||||||
|
rng = np.random.default_rng(65)
|
||||||
|
W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
|
||||||
|
C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
|
||||||
|
D = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="D")
|
||||||
|
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
|
||||||
|
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
|
||||||
|
gemm = helper.make_node("Gemm", ["A", "W", "C"], ["G"])
|
||||||
|
add = helper.make_node("Add", ["G", "D"], ["Y"])
|
||||||
|
graph = helper.make_graph([gemm, add], "add_after_gemm", [A], [Y], initializer=[W, C, D])
|
||||||
|
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||||
|
save_model(model, "add/after_gemm", "add_after_gemm.onnx")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Mul tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def mul_basic():
|
||||||
|
"""Elementwise Mul on two inputs with identical shapes."""
|
||||||
|
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [4, 8])
|
||||||
|
B = helper.make_tensor_value_info("B", TensorProto.FLOAT, [4, 8])
|
||||||
|
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8])
|
||||||
|
node = helper.make_node("Mul", ["A", "B"], ["Y"])
|
||||||
|
graph = helper.make_graph([node], "mul_basic", [A, B], [Y])
|
||||||
|
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||||
|
save_model(model, "mul/basic", "mul_basic.onnx")
|
||||||
|
|
||||||
|
|
||||||
|
def mul_scalar_constant():
|
||||||
|
"""Elementwise Mul with scalar broadcasting."""
|
||||||
|
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8])
|
||||||
|
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8])
|
||||||
|
S = numpy_helper.from_array(np.asarray([1.5], dtype=np.float32), name="S")
|
||||||
|
node = helper.make_node("Mul", ["X", "S"], ["Y"])
|
||||||
|
graph = helper.make_graph([node], "mul_scalar_constant", [X], [Y], initializer=[S])
|
||||||
|
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||||
|
save_model(model, "mul/scalar_constant", "mul_scalar_constant.onnx")
|
||||||
|
|
||||||
|
|
||||||
|
def mul_after_conv():
|
||||||
|
"""Conv followed by Mul with per-channel scaling."""
|
||||||
|
rng = np.random.default_rng(66)
|
||||||
|
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5])
|
||||||
|
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 3, 3])
|
||||||
|
W = numpy_helper.from_array(rng.uniform(-1, 1, (2, 3, 3, 3)).astype(np.float32), name="W")
|
||||||
|
B = numpy_helper.from_array(rng.uniform(-1, 1, (2,)).astype(np.float32), name="B")
|
||||||
|
S = numpy_helper.from_array(rng.uniform(0.5, 1.5, (1, 2, 1, 1)).astype(np.float32), name="S")
|
||||||
|
conv = helper.make_node("Conv", ["X", "W", "B"], ["C"],
|
||||||
|
kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0])
|
||||||
|
mul = helper.make_node("Mul", ["C", "S"], ["Y"])
|
||||||
|
graph = helper.make_graph([conv, mul], "mul_after_conv", [X], [Y], initializer=[W, B, S])
|
||||||
|
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||||
|
save_model(model, "mul/after_conv", "mul_after_conv.onnx")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Div tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def div_basic():
|
||||||
|
"""Elementwise Div by a same-shape constant tensor."""
|
||||||
|
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8])
|
||||||
|
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8])
|
||||||
|
D = numpy_helper.from_array(np.random.default_rng(67).uniform(0.5, 2.0, (4, 8)).astype(np.float32), name="D")
|
||||||
|
node = helper.make_node("Div", ["X", "D"], ["Y"])
|
||||||
|
graph = helper.make_graph([node], "div_basic", [X], [Y], initializer=[D])
|
||||||
|
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||||
|
save_model(model, "div/basic", "div_basic.onnx")
|
||||||
|
|
||||||
|
|
||||||
|
def div_scalar_constant():
|
||||||
|
"""Elementwise Div with scalar broadcasting."""
|
||||||
|
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8])
|
||||||
|
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 8])
|
||||||
|
S = numpy_helper.from_array(np.asarray([2.0], dtype=np.float32), name="S")
|
||||||
|
node = helper.make_node("Div", ["X", "S"], ["Y"])
|
||||||
|
graph = helper.make_graph([node], "div_scalar_constant", [X], [Y], initializer=[S])
|
||||||
|
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||||
|
save_model(model, "div/scalar_constant", "div_scalar_constant.onnx")
|
||||||
|
|
||||||
|
|
||||||
|
def div_after_gemm():
|
||||||
|
"""Gemm followed by Div with a broadcast divisor vector."""
|
||||||
|
B, K, N = 4, 64, 32
|
||||||
|
rng = np.random.default_rng(68)
|
||||||
|
W = numpy_helper.from_array(rng.uniform(-1, 1, (K, N)).astype(np.float32), name="W")
|
||||||
|
C = numpy_helper.from_array(rng.uniform(-1, 1, (N,)).astype(np.float32), name="C")
|
||||||
|
D = numpy_helper.from_array(rng.uniform(0.5, 2.0, (N,)).astype(np.float32), name="D")
|
||||||
|
A = helper.make_tensor_value_info("A", TensorProto.FLOAT, [B, K])
|
||||||
|
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [B, N])
|
||||||
|
gemm = helper.make_node("Gemm", ["A", "W", "C"], ["G"])
|
||||||
|
div = helper.make_node("Div", ["G", "D"], ["Y"])
|
||||||
|
graph = helper.make_graph([gemm, div], "div_after_gemm", [A], [Y], initializer=[W, C, D])
|
||||||
|
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||||
|
save_model(model, "div/after_gemm", "div_after_gemm.onnx")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# ReduceMean tests
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def reducemean_basic():
|
||||||
|
"""ReduceMean over the feature dimension, preserving rank."""
|
||||||
|
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8])
|
||||||
|
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 1])
|
||||||
|
node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[1], keepdims=1)
|
||||||
|
graph = helper.make_graph([node], "reducemean_basic", [X], [Y])
|
||||||
|
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||||
|
save_model(model, "reduce_mean/basic", "reduce_mean_basic.onnx")
|
||||||
|
|
||||||
|
|
||||||
|
def reducemean_keepdims_0():
|
||||||
|
"""ReduceMean over the feature dimension, dropping the reduced axis."""
|
||||||
|
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8])
|
||||||
|
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4])
|
||||||
|
node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[1], keepdims=0)
|
||||||
|
graph = helper.make_graph([node], "reducemean_keepdims_0", [X], [Y])
|
||||||
|
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||||
|
save_model(model, "reduce_mean/keepdims_0", "reduce_mean_keepdims_0.onnx")
|
||||||
|
|
||||||
|
|
||||||
|
def reducemean_4d_spatial():
|
||||||
|
"""ReduceMean over H and W on an NCHW tensor."""
|
||||||
|
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 4, 4])
|
||||||
|
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 3, 1, 1])
|
||||||
|
node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[2, 3], keepdims=1)
|
||||||
|
graph = helper.make_graph([node], "reducemean_4d_spatial", [X], [Y])
|
||||||
|
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||||
|
save_model(model, "reduce_mean/4d_spatial", "reduce_mean_4d_spatial.onnx")
|
||||||
|
|
||||||
|
|
||||||
|
def reducemean_after_conv():
|
||||||
|
"""Conv followed by ReduceMean over the spatial dimensions."""
|
||||||
|
rng = np.random.default_rng(62)
|
||||||
|
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5])
|
||||||
|
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 1, 1])
|
||||||
|
W = numpy_helper.from_array(rng.uniform(-1, 1, (2, 3, 3, 3)).astype(np.float32), name="W")
|
||||||
|
B = numpy_helper.from_array(rng.uniform(-1, 1, (2,)).astype(np.float32), name="B")
|
||||||
|
conv = helper.make_node("Conv", ["X", "W", "B"], ["C"],
|
||||||
|
kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0])
|
||||||
|
reduce = helper.make_node("ReduceMean", ["C"], ["Y"], axes=[2, 3], keepdims=1)
|
||||||
|
graph = helper.make_graph([conv, reduce], "reducemean_after_conv", [X], [Y], initializer=[W, B])
|
||||||
|
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
|
||||||
|
save_model(model, "reduce_mean/after_conv", "reduce_mean_after_conv.onnx")
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Main
|
# Main
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -415,10 +682,36 @@ if __name__ == "__main__":
|
|||||||
avgpool_include_pad()
|
avgpool_include_pad()
|
||||||
maxpool_after_conv()
|
maxpool_after_conv()
|
||||||
|
|
||||||
|
print("\nGenerating ReduceMean tests:")
|
||||||
|
reducemean_basic()
|
||||||
|
reducemean_keepdims_0()
|
||||||
|
reducemean_4d_spatial()
|
||||||
|
reducemean_after_conv()
|
||||||
|
|
||||||
print("\nGenerating Relu tests:")
|
print("\nGenerating Relu tests:")
|
||||||
relu_basic()
|
relu_basic()
|
||||||
relu_4d()
|
relu_4d()
|
||||||
relu_after_conv()
|
relu_after_conv()
|
||||||
relu_after_gemm()
|
relu_after_gemm()
|
||||||
|
|
||||||
|
print("\nGenerating Sigmoid tests:")
|
||||||
|
sigmoid_basic()
|
||||||
|
sigmoid_4d()
|
||||||
|
sigmoid_after_gemm()
|
||||||
|
|
||||||
|
print("\nGenerating Add tests:")
|
||||||
|
add_basic()
|
||||||
|
add_broadcast_row()
|
||||||
|
add_after_gemm()
|
||||||
|
|
||||||
|
print("\nGenerating Mul tests:")
|
||||||
|
mul_basic()
|
||||||
|
mul_scalar_constant()
|
||||||
|
mul_after_conv()
|
||||||
|
|
||||||
|
print("\nGenerating Div tests:")
|
||||||
|
div_basic()
|
||||||
|
div_scalar_constant()
|
||||||
|
div_after_gemm()
|
||||||
|
|
||||||
print("\nDone.")
|
print("\nDone.")
|
||||||
|
|||||||
BIN
validation/operations/mul/after_conv/mul_after_conv.onnx
Normal file
BIN
validation/operations/mul/after_conv/mul_after_conv.onnx
Normal file
Binary file not shown.
BIN
validation/operations/mul/basic/mul_basic.onnx
Normal file
BIN
validation/operations/mul/basic/mul_basic.onnx
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
validation/operations/reduce_mean/basic/reduce_mean_basic.onnx
Normal file
BIN
validation/operations/reduce_mean/basic/reduce_mean_basic.onnx
Normal file
Binary file not shown.
Binary file not shown.
BIN
validation/operations/sigmoid/4d/sigmoid_4d.onnx
Normal file
BIN
validation/operations/sigmoid/4d/sigmoid_4d.onnx
Normal file
Binary file not shown.
BIN
validation/operations/sigmoid/after_gemm/sigmoid_after_gemm.onnx
Normal file
BIN
validation/operations/sigmoid/after_gemm/sigmoid_after_gemm.onnx
Normal file
Binary file not shown.
BIN
validation/operations/sigmoid/basic/sigmoid_basic.onnx
Normal file
BIN
validation/operations/sigmoid/basic/sigmoid_basic.onnx
Normal file
Binary file not shown.
Reference in New Issue
Block a user