Merge branch 'main' of chef.heaplab.deib.polimi.it:nnicolosi/Raptor into main
All checks were successful
Validate Operations / validate-operations (push) Successful in 16m23s

This commit is contained in:
ilgeco
2026-04-09 19:27:05 +02:00
43 changed files with 890 additions and 94 deletions

View File

@@ -52,6 +52,7 @@ static NAMES: LazyLock<HashMap<usize, &'static str>> = LazyLock::new(|| {
add_name_simd!(hash, vrelu);
add_name_simd!(hash, vtanh);
add_name_simd!(hash, vsigm);
add_name_simd!(hash, vsoftmax);
add_name!(hash, vmv);
add_name!(hash, vrsu);
add_name!(hash, vrsl);
@@ -177,6 +178,7 @@ static SIMD: LazyLock<HashMap<usize, HashMap<(usize, usize), InstructionType>>>
add_simd_to_map!(storage, vrelu);
add_simd_to_map!(storage, vtanh);
add_simd_to_map!(storage, vsigm);
add_simd_to_map!(storage, vsoftmax);
add_simd_to_map!(storage, mvmul);
storage
});
@@ -626,6 +628,46 @@ where
Ok(InstructionStatus::Completed)
}
pub fn vsoftmax(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
panic!("You are calling a placeholder, the real call is the generic version");
}
pub(super) fn vsoftmax_impl<F, T>(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus>
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
{
TRACER.lock().unwrap().pre_vsoftmax::<F,T>(cores, data);
let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
data.get_core_rd_r1_r2_immlen_offset();
let core = cores.core(core_indx);
let r1_val = core.register(r1);
let rd_val = core.register(rd);
let r1_val = add_offset_r1(r1_val, offset_select, offset_value);
let rd_val = add_offset_rd(rd_val, offset_select, offset_value);
let loads = core.reserve_load(r1_val, imm_len)?.execute_load::<F>()?;
let load1 = loads[0];
ensure!(!load1.is_empty(), "vsoftmax does not support empty vectors");
let max_val = load1
.iter()
.copied()
.reduce(|a, b| if a > b { a } else { b })
.unwrap();
let exp_values: Vec<F> = load1.iter().map(|&a| (a - max_val).exp()).collect();
let sum = exp_values
.iter()
.copied()
.reduce(|a, b| a + b)
.unwrap();
ensure!(sum > 0.0.into(), "vsoftmax normalization sum must be positive");
let res: Vec<F> = exp_values.iter().map(|&a| a / sum).collect();
let res_up: Cow<[T]> = res.as_slice().up();
core.execute_store(rd_val, res_up.as_ref());
TRACER.lock().unwrap().post_vsoftmax::<F,T>(cores, data);
Ok(InstructionStatus::Completed)
}
pub fn vmv(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
todo!()
}

View File

@@ -40,6 +40,7 @@ static SIMD: LazyLock<HashMap<String, FunctorType>> = LazyLock::new(|| {
add_to_json_map!(storage, vrelu);
add_to_json_map!(storage, vtanh);
add_to_json_map!(storage, vsigm);
add_to_json_map!(storage, vsoftmax);
add_to_json_map!(storage, vmv);
add_to_json_map!(storage, vrsu);
add_to_json_map!(storage, vrsl);
@@ -417,6 +418,27 @@ fn json_to_vsigm(
Ok(())
}
fn json_to_vsoftmax(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
let json = json.as_object().expect("Not an object");
assert_eq!("vsoftmax", json_str!(json, "op"));
let rd = json_i64!(json, "rd") as i32;
let rs1 = json_i64!(json, "rs1") as i32;
let len = json_i64!(json, "len") as i32;
let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
inst_data_builder
.set_rd(rd)
.set_r1(rs1)
.set_imm_len(len)
.set_offset_select(offset_select)
.set_offset_value(offset_value);
inst_builder.make_inst(vsoftmax, inst_data_builder.build());
Ok(())
}
fn json_to_vmv(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,

View File

@@ -67,6 +67,22 @@ impl HasSigm for f64 {
}
}
pub trait HasExp {
fn exp(self) -> Self;
}
impl HasExp for f32 {
fn exp(self) -> Self {
self.exp()
}
}
impl HasExp for f64 {
fn exp(self) -> Self {
self.exp()
}
}
pub trait TryToUsize: TryInto<usize, Error = Self::TryError>
@@ -112,6 +128,7 @@ pub trait UpcastDestTraits<T>:
+ PartialOrd<T>
+ HasTanh
+ HasSigm
+ HasExp
+ FromUsize
{
}

View File

@@ -248,6 +248,22 @@ impl Trace {
{
}
pub fn pre_vsoftmax<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
{
}
pub fn post_vsoftmax<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
{
}
/////////////////////////////////////////////////////////////////
/////Communication/synchronization Instructions/////////////////
/////////////////////////////////////////////////////////////////

View File

@@ -956,6 +956,35 @@ impl Trace {
// Ok(InstructionStatus::Completed)
}
pub fn pre_vsoftmax<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
{
let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
data.get_core_rd_r1_r2_immlen_offset();
let file: &mut File = self
.out_files
.get_mut(core_indx as usize)
.expect("File at index not found");
writeln!(file, "\t\tVSOFTMAX\t\t");
}
pub fn post_vsoftmax<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
{
let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
data.get_core_rd_r1_r2_immlen_offset();
let file: &mut File = self
.out_files
.get_mut(core_indx as usize)
.expect("File at index not found");
}
/////////////////////////////////////////////////////////////////
/////Communication/synchronization Instructions/////////////////
/////////////////////////////////////////////////////////////////

View File

@@ -386,6 +386,20 @@ void PimCodeGen::codeGenVSigmOp(pim::PimVSigmOp vsigmOp) const {
emitInstruction(std::move(json));
}
void PimCodeGen::codeGenVSoftmaxOp(pim::PimVSoftmaxOp vsoftmaxOp) const {
auto outputBufferAddr = memory.getValueAddress(vsoftmaxOp.getOutputBuffer());
auto inputAddr = memory.getValueAddress(vsoftmaxOp.getInput());
setupRdRs1(outputBufferAddr, 0, inputAddr, 0);
json::Object json;
json["op"] = "vsoftmax";
json["rd"] = 0;
json["rs1"] = 1;
json["offset"] = createEmptyOffset();
json["len"] = getValueSizeInBytes(vsoftmaxOp.getInput());
emitInstruction(std::move(json));
}
void PimCodeGen::codeGenTransposeOp(pim::PimTransposeOp transposeOp) const {
auto srcAddr = memory.getValueAddress(transposeOp.getInput());
auto dstAddr = memory.getValueAddress(transposeOp.getOutputBuffer());
@@ -537,6 +551,8 @@ static int64_t codeGenCoreOps(pim::PimCoreOp coreOp, PimCodeGen& coreCodeGen) {
coreCodeGen.codeGenVTanhOp(vtanhOp);
else if (auto vsigmOp = dyn_cast<pim::PimVSigmOp>(op))
coreCodeGen.codeGenVSigmOp(vsigmOp);
else if (auto vsoftmaxOp = dyn_cast<pim::PimVSoftmaxOp>(op))
coreCodeGen.codeGenVSoftmaxOp(vsoftmaxOp);
else {
op.emitError("Unsupported codegen for this operation");
op.dump();

View File

@@ -99,6 +99,7 @@ public:
void codeGenVReluOp(pim::PimVReluOp vreluOp) const;
void codeGenVTanhOp(pim::PimVTanhOp vtanhOp) const;
void codeGenVSigmOp(pim::PimVSigmOp vsigmOp) const;
void codeGenVSoftmaxOp(pim::PimVSoftmaxOp vsoftmaxOp) const;
void codeGenTransposeOp(pim::PimTransposeOp transposeOp) const;
};

View File

@@ -11,8 +11,12 @@ add_pim_library(OMONNXToSpatial
Patterns/NN/Pool.cpp
Patterns/NN/Relu.cpp
Patterns/NN/Sigmoid.cpp
Patterns/NN/Softmax.cpp
Patterns/Tensor/Concat.cpp
Patterns/Tensor/Gather.cpp
Patterns/Tensor/Resize.cpp
Patterns/Tensor/Reshape.cpp
Patterns/Tensor/Split.cpp
ONNXToSpatialPass.cpp
Common.cpp

View File

@@ -89,9 +89,12 @@ void ONNXToSpatialPass::runOnOperation() {
target.addIllegalOp<ONNXSigmoidOp>();
target.addIllegalOp<ONNXSoftmaxOp>();
target.addIllegalOp<ONNXConcatOp>();
target.addIllegalOp<ONNXGatherOp>();
target.addIllegalOp<ONNXReshapeOp>();
target.addIllegalOp<ONNXResizeOp>();
target.addIllegalOp<ONNXLRNOp>();
target.addIllegalOp<ONNXReduceMeanV13Op>();
target.addIllegalOp<ONNXSplitOp>();
RewritePatternSet patterns(ctx);
patterns.add<removeLRN>(ctx);
@@ -103,8 +106,12 @@ void ONNXToSpatialPass::runOnOperation() {
populateReduceMeanPatterns(patterns, ctx);
populateReluPatterns(patterns, ctx);
populateSigmoidPatterns(patterns, ctx);
populateSoftmaxPatterns(patterns, ctx);
populateConcatPatterns(patterns, ctx);
populateGatherPatterns(patterns, ctx);
populateResizePatterns(patterns, ctx);
populateReshapePatterns(patterns, ctx);
populateSplitPatterns(patterns, ctx);
if (failed(applyPartialConversion(moduleOp, target, std::move(patterns)))) {
signalPassFailure();
@@ -168,7 +175,7 @@ bool encapsulateConcat(IRRewriter& rewriter, Location loc, Operation* inst) {
auto newCompute = spatial::SpatWeightedCompute::create(rewriter, loc, inst->getResultTypes().front(), sources);
llvm::SmallVector<Type> sourceTypes;
llvm::SmallVector<Location> sourceLoc;
for (auto source : sources){
for (auto source : sources) {
sourceTypes.push_back(source.getType());
sourceLoc.push_back(loc);
}
@@ -176,7 +183,7 @@ bool encapsulateConcat(IRRewriter& rewriter, Location loc, Operation* inst) {
newCompute.getProperties().setOperandSegmentSizes({(int) 0, (int) sources.size()});
rewriter.setInsertionPointToEnd(BB);
IRMapping mapper;
for(auto [source,bbArg] : llvm::zip(sources, BB->getArguments()))
for (auto [source, bbArg] : llvm::zip(sources, BB->getArguments()))
mapper.map(source, bbArg);
auto newConcat = rewriter.clone(*inst, mapper);
spatial::SpatYieldOp::create(rewriter, loc, newConcat->getResult(0));

View File

@@ -21,8 +21,16 @@ void populateReluPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext*
void populateSigmoidPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populateSoftmaxPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populateConcatPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populateGatherPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populateResizePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populateReshapePatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
void populateSplitPatterns(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
} // namespace onnx_mlir

View File

@@ -92,10 +92,8 @@ static FailureOr<Value> materializeBroadcastedConstantTensor(Value value,
return arith::ConstantOp::create(rewriter, loc, resultType, broadcastedAttr).getResult();
}
static FailureOr<Value> prepareElementwiseOperand(Value value,
RankedTensorType resultType,
ConversionPatternRewriter& rewriter,
Location loc) {
static FailureOr<Value>
prepareElementwiseOperand(Value value, RankedTensorType resultType, ConversionPatternRewriter& rewriter, Location loc) {
auto valueType = dyn_cast<RankedTensorType>(value.getType());
if (!valueType || !valueType.hasStaticShape())
return failure();

View File

@@ -280,8 +280,8 @@ LogicalResult GemvToSpatialCompute::matchAndRewrite(ONNXGemmOp gemmOp,
for (size_t aSliceId = 0; aSliceId < aHSlices[coreId].size(); aSliceId++)
weights.push_back(bTiles[outSliceId][coreId][aSliceId]);
auto computeOp =
createSpatCompute(rewriter, gemmLoc, currOutHSliceType, weights, aHSlices[coreId], [&](ValueRange aHSlicesArgs) {
auto computeOp = createSpatCompute(
rewriter, gemmLoc, currOutHSliceType, weights, aHSlices[coreId], [&](ValueRange aHSlicesArgs) {
SmallVector<Value> vmmOutputs;
vmmOutputs.reserve(aHSlicesArgs.size());
for (auto [aHSliceId, computeArg] : llvm::enumerate(aHSlicesArgs))

View File

@@ -71,10 +71,8 @@ static SmallVector<ReassociationIndices> buildCollapseReassociation(ArrayRef<boo
return reassociation;
}
static Value createAverageCompute(Value input,
RankedTensorType resultType,
ConversionPatternRewriter& rewriter,
Location loc) {
static Value
createAverageCompute(Value input, RankedTensorType resultType, ConversionPatternRewriter& rewriter, Location loc) {
constexpr size_t numInputs = 1;
auto computeOp = createSpatCompute<numInputs>(rewriter, loc, resultType, {}, ValueRange {input}, [&](Value x) {
auto avgOp = spatial::SpatVAvgOp::create(rewriter, loc, resultType, x);
@@ -141,7 +139,8 @@ struct ReduceMeanToSpatialCompute : OpConversionPattern<ONNXReduceMeanV13Op> {
Location loc = reduceMeanOp.getLoc();
RankedTensorType leafType = getAllOnesType(inputType, resultType.getElementType());
Value reducedKeepdims = buildReduceMeanKeepdims(adaptor.getData(), reducedAxes, /*axis=*/0, leafType, rewriter, loc);
Value reducedKeepdims =
buildReduceMeanKeepdims(adaptor.getData(), reducedAxes, /*axis=*/0, leafType, rewriter, loc);
if (reduceMeanOp.getKeepdims() != 0) {
rewriter.replaceOp(reduceMeanOp, reducedKeepdims);

View File

@@ -0,0 +1,111 @@
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/Transforms/DialectConversion.h"
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common.hpp"
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Patterns.hpp"
#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
#include "src/Dialect/ONNX/ONNXOps.hpp"
using namespace mlir;
namespace onnx_mlir {
namespace {
static int64_t normalizeAxis(int64_t axis, int64_t rank) { return axis >= 0 ? axis : rank + axis; }
static SmallVector<int64_t> permuteShape(ArrayRef<int64_t> shape, ArrayRef<int64_t> permutation) {
SmallVector<int64_t> permutedShape;
permutedShape.reserve(permutation.size());
for (int64_t axis : permutation)
permutedShape.push_back(shape[axis]);
return permutedShape;
}
static Value createSoftmaxCompute(Value input, ConversionPatternRewriter& rewriter, Location loc) {
auto inputType = cast<RankedTensorType>(input.getType());
constexpr size_t numInputs = 1;
auto computeOp =
createSpatCompute<numInputs>(rewriter, loc, TypeRange {inputType}, {}, ValueRange {input}, [&](Value x) {
auto softmaxOp = spatial::SpatSoftmaxOp::create(rewriter, loc, inputType, x);
spatial::SpatYieldOp::create(rewriter, loc, softmaxOp.getResult());
});
return computeOp.getResult(0);
}
static Value
buildSoftmax(Value input, int64_t softmaxAxis, int64_t axis, ConversionPatternRewriter& rewriter, Location loc) {
auto inputType = cast<RankedTensorType>(input.getType());
if (axis == inputType.getRank())
return createSoftmaxCompute(input, rewriter, loc);
if (axis == softmaxAxis)
return buildSoftmax(input, softmaxAxis, axis + 1, rewriter, loc);
SmallVector<Value> slices = sliceTensor(input, axis, /*sliceSize=*/1, rewriter, loc);
SmallVector<Value> rebuiltSlices;
rebuiltSlices.reserve(slices.size());
for (Value slice : slices)
rebuiltSlices.push_back(buildSoftmax(slice, softmaxAxis, axis + 1, rewriter, loc));
return rebuiltSlices.size() == 1 ? rebuiltSlices.front()
: tensor::ConcatOp::create(rewriter, loc, axis, rebuiltSlices).getResult();
}
struct SoftmaxToSpatialCompute : OpConversionPattern<ONNXSoftmaxOp> {
using OpConversionPattern::OpConversionPattern;
LogicalResult matchAndRewrite(ONNXSoftmaxOp softmaxOp,
ONNXSoftmaxOpAdaptor adaptor,
ConversionPatternRewriter& rewriter) const override {
auto inputType = dyn_cast<RankedTensorType>(adaptor.getInput().getType());
if (!inputType || !inputType.hasStaticShape())
return failure();
int64_t axis = normalizeAxis(softmaxOp.getAxis(), inputType.getRank());
if (axis < 0 || axis >= inputType.getRank())
return failure();
Value input = adaptor.getInput();
Value result;
if (axis == inputType.getRank() - 1) {
result = buildSoftmax(input, axis, /*axis=*/0, rewriter, softmaxOp.getLoc());
}
else {
SmallVector<int64_t> permutation;
permutation.reserve(inputType.getRank());
for (int64_t dim = 0; dim < inputType.getRank(); ++dim)
if (dim != axis)
permutation.push_back(dim);
permutation.push_back(axis);
SmallVector<int64_t> inversePermutation(inputType.getRank());
for (auto [newIndex, oldIndex] : llvm::enumerate(permutation))
inversePermutation[oldIndex] = static_cast<int64_t>(newIndex);
auto transposedType = RankedTensorType::get(
permuteShape(inputType.getShape(), permutation), inputType.getElementType(), inputType.getEncoding());
auto preTransposeCompute =
createSpatCompute<1>(rewriter, softmaxOp.getLoc(), TypeRange {transposedType}, {}, input, [&](Value x) {
Value transposed = ONNXTransposeOp::create(
rewriter, softmaxOp.getLoc(), transposedType, x, rewriter.getI64ArrayAttr(permutation));
spatial::SpatYieldOp::create(rewriter, softmaxOp.getLoc(), transposed);
});
Value transposedInput = preTransposeCompute.getResult(0);
Value transposedResult = buildSoftmax(
transposedInput, /*softmaxAxis=*/inputType.getRank() - 1, /*axis=*/0, rewriter, softmaxOp.getLoc());
result = ONNXTransposeOp::create(
rewriter, softmaxOp.getLoc(), inputType, transposedResult, rewriter.getI64ArrayAttr(inversePermutation));
}
rewriter.replaceOp(softmaxOp, result);
return success();
}
};
} // namespace
void populateSoftmaxPatterns(RewritePatternSet& patterns, MLIRContext* ctx) {
patterns.add<SoftmaxToSpatialCompute>(ctx);
}
} // namespace onnx_mlir

View File

@@ -23,8 +23,6 @@ struct Concat : public OpConversionPattern<ONNXConcatOp> {
}
};
void populateConcatPatterns(RewritePatternSet& patterns, MLIRContext* ctx) {
patterns.insert<Concat>(ctx);
}
void populateConcatPatterns(RewritePatternSet& patterns, MLIRContext* ctx) { patterns.insert<Concat>(ctx); }
} // namespace onnx_mlir

View File

@@ -0,0 +1,157 @@
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/Transforms/DialectConversion.h"
#include "llvm/ADT/SmallVector.h"
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common.hpp"
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Patterns.hpp"
#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
#include "src/Dialect/ONNX/ONNXOps.hpp"
using namespace mlir;
namespace onnx_mlir {
namespace {
static int64_t normalizeAxis(int64_t axis, int64_t rank) { return axis >= 0 ? axis : rank + axis; }
static int64_t normalizeIndex(int64_t index, int64_t dimSize) { return index >= 0 ? index : dimSize + index; }
static Value
extractSliceAt(Value input, int64_t axis, int64_t offset, ConversionPatternRewriter& rewriter, Location loc) {
auto inputType = cast<RankedTensorType>(input.getType());
SmallVector<OpFoldResult> offsets(inputType.getRank(), rewriter.getIndexAttr(0));
SmallVector<OpFoldResult> sizes;
SmallVector<OpFoldResult> strides(inputType.getRank(), rewriter.getIndexAttr(1));
sizes.reserve(inputType.getRank());
for (int64_t dim : inputType.getShape())
sizes.push_back(rewriter.getIndexAttr(dim));
offsets[axis] = rewriter.getIndexAttr(offset);
sizes[axis] = rewriter.getIndexAttr(1);
return tensor::ExtractSliceOp::create(rewriter, loc, input, offsets, sizes, strides);
}
static Value concatGatherSlices(Value data,
int64_t axis,
ArrayRef<int64_t> indices,
int64_t axisDim,
ConversionPatternRewriter& rewriter,
Location loc) {
SmallVector<Value> slices;
slices.reserve(indices.size());
for (int64_t index : indices) {
int64_t normalizedIndex = normalizeIndex(index, axisDim);
if (normalizedIndex < 0 || normalizedIndex >= axisDim)
return {};
slices.push_back(extractSliceAt(data, axis, normalizedIndex, rewriter, loc));
}
if (slices.empty())
return {};
return slices.size() == 1 ? slices.front() : tensor::ConcatOp::create(rewriter, loc, axis, slices).getResult();
}
static Value addLeadingGatherDim(Value value, int64_t axis, ConversionPatternRewriter& rewriter, Location loc) {
auto valueType = cast<RankedTensorType>(value.getType());
SmallVector<int64_t> resultShape;
SmallVector<ReassociationIndices> reassociation;
resultShape.reserve(valueType.getRank() + 1);
reassociation.reserve(valueType.getRank());
int64_t resultDim = 0;
for (int64_t dim = 0; dim < valueType.getRank(); ++dim) {
if (dim == axis) {
resultShape.push_back(1);
resultShape.push_back(valueType.getShape()[dim]);
reassociation.push_back({static_cast<int64_t>(resultDim), static_cast<int64_t>(resultDim + 1)});
resultDim += 2;
continue;
}
resultShape.push_back(valueType.getShape()[dim]);
reassociation.push_back({static_cast<int64_t>(resultDim)});
resultDim++;
}
auto resultType = RankedTensorType::get(resultShape, valueType.getElementType(), valueType.getEncoding());
return tensor::ExpandShapeOp::create(rewriter, loc, resultType, value, reassociation);
}
struct Gather : OpConversionPattern<ONNXGatherOp> {
using OpConversionPattern::OpConversionPattern;
LogicalResult matchAndRewrite(ONNXGatherOp gatherOp,
ONNXGatherOpAdaptor adaptor,
ConversionPatternRewriter& rewriter) const override {
auto dataType = dyn_cast<RankedTensorType>(adaptor.getData().getType());
auto indicesType = dyn_cast<RankedTensorType>(adaptor.getIndices().getType());
if (!dataType || !indicesType || !dataType.hasStaticShape() || !indicesType.hasStaticShape())
return failure();
auto indicesConst = adaptor.getIndices().getDefiningOp<arith::ConstantOp>();
if (!indicesConst)
return failure();
auto indicesAttr = dyn_cast<DenseIntElementsAttr>(indicesConst.getValue());
if (!indicesAttr)
return failure();
int64_t rank = dataType.getRank();
int64_t axis = normalizeAxis(gatherOp.getAxis(), rank);
if (axis < 0 || axis >= rank)
return failure();
int64_t axisDim = dataType.getShape()[axis];
if (axisDim <= 0)
return failure();
SmallVector<int64_t> flatIndices(indicesAttr.getValues<int64_t>().begin(), indicesAttr.getValues<int64_t>().end());
Location loc = gatherOp.getLoc();
auto computeOp =
createSpatCompute<1>(rewriter,
loc,
TypeRange {gatherOp.getResult().getType()},
{},
adaptor.getData(),
[&](Value data) -> LogicalResult {
Value result;
if (indicesType.getRank() == 1) {
result = concatGatherSlices(data, axis, flatIndices, axisDim, rewriter, loc);
}
else if (indicesType.getRank() == 2) {
int64_t rowCount = indicesType.getShape()[0];
int64_t rowWidth = indicesType.getShape()[1];
SmallVector<Value> rows;
rows.reserve(rowCount);
for (int64_t row = 0; row < rowCount; ++row) {
ArrayRef<int64_t> rowIndices(flatIndices.data() + row * rowWidth, rowWidth);
Value gatheredRow = concatGatherSlices(data, axis, rowIndices, axisDim, rewriter, loc);
if (!gatheredRow)
return failure();
rows.push_back(addLeadingGatherDim(gatheredRow, axis, rewriter, loc));
}
result = rows.size() == 1
? rows.front()
: tensor::ConcatOp::create(rewriter, loc, /*axis=*/axis, rows).getResult();
}
else {
return failure();
}
if (!result)
return failure();
spatial::SpatYieldOp::create(rewriter, loc, result);
return success();
});
if (failed(computeOp))
return failure();
rewriter.replaceOp(gatherOp, computeOp->getResults());
return success();
}
};
} // namespace
void populateGatherPatterns(RewritePatternSet& patterns, MLIRContext* ctx) { patterns.add<Gather>(ctx); }
} // namespace onnx_mlir

View File

@@ -0,0 +1,90 @@
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/Transforms/DialectConversion.h"
#include "llvm/ADT/STLExtras.h"
#include <algorithm>
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common.hpp"
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Patterns.hpp"
#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
#include "src/Dialect/ONNX/ONNXOps.hpp"
using namespace mlir;
namespace onnx_mlir {
namespace {
static Value
extractSliceAt(Value input, int64_t axis, int64_t offset, ConversionPatternRewriter& rewriter, Location loc) {
auto inputType = cast<RankedTensorType>(input.getType());
SmallVector<OpFoldResult> offsets(inputType.getRank(), rewriter.getIndexAttr(0));
SmallVector<OpFoldResult> sizes;
SmallVector<OpFoldResult> strides(inputType.getRank(), rewriter.getIndexAttr(1));
sizes.reserve(inputType.getRank());
for (int64_t dim : inputType.getShape())
sizes.push_back(rewriter.getIndexAttr(dim));
offsets[axis] = rewriter.getIndexAttr(offset);
sizes[axis] = rewriter.getIndexAttr(1);
return tensor::ExtractSliceOp::create(rewriter, loc, input, offsets, sizes, strides);
}
static int64_t nearestAsymmetricIndex(int64_t outputIndex, int64_t inputDim, int64_t outputDim) {
return std::min<int64_t>((outputIndex * inputDim) / outputDim, inputDim - 1);
}
static Value buildNearestResize(Value input,
ArrayRef<int64_t> inputShape,
ArrayRef<int64_t> outputShape,
int64_t axis,
ConversionPatternRewriter& rewriter,
Location loc) {
if (axis == static_cast<int64_t>(outputShape.size()))
return input;
SmallVector<Value> slices;
slices.reserve(outputShape[axis]);
for (int64_t outputIndex = 0; outputIndex < outputShape[axis]; ++outputIndex) {
int64_t inputIndex = nearestAsymmetricIndex(outputIndex, inputShape[axis], outputShape[axis]);
Value slice = extractSliceAt(input, axis, inputIndex, rewriter, loc);
slices.push_back(buildNearestResize(slice, inputShape, outputShape, axis + 1, rewriter, loc));
}
return slices.size() == 1 ? slices.front() : tensor::ConcatOp::create(rewriter, loc, axis, slices).getResult();
}
struct Resize : OpConversionPattern<ONNXResizeOp> {
using OpConversionPattern::OpConversionPattern;
LogicalResult matchAndRewrite(ONNXResizeOp resizeOp,
ONNXResizeOpAdaptor adaptor,
ConversionPatternRewriter& rewriter) const override {
auto inputType = dyn_cast<RankedTensorType>(adaptor.getX().getType());
auto resultType = dyn_cast<RankedTensorType>(resizeOp.getY().getType());
if (!inputType || !resultType || !inputType.hasStaticShape() || !resultType.hasStaticShape())
return failure();
if (resizeOp.getMode() != "nearest" || resizeOp.getCoordinateTransformationMode() != "asymmetric"
|| resizeOp.getNearestMode() != "floor")
return failure();
if (llvm::any_of(inputType.getShape(), [](int64_t dim) { return dim <= 0; })
|| llvm::any_of(resultType.getShape(), [](int64_t dim) { return dim <= 0; }))
return failure();
auto computeOp =
createSpatCompute<1>(rewriter, resizeOp.getLoc(), TypeRange {resultType}, {}, adaptor.getX(), [&](Value x) {
Value result =
buildNearestResize(x, inputType.getShape(), resultType.getShape(), /*axis=*/0, rewriter, resizeOp.getLoc());
spatial::SpatYieldOp::create(rewriter, resizeOp.getLoc(), result);
});
rewriter.replaceOp(resizeOp, computeOp.getResults());
return success();
}
};
} // namespace
void populateResizePatterns(RewritePatternSet& patterns, MLIRContext* ctx) { patterns.add<Resize>(ctx); }
} // namespace onnx_mlir

View File

@@ -0,0 +1,70 @@
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/Transforms/DialectConversion.h"
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common.hpp"
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Patterns.hpp"
#include "src/Dialect/ONNX/ONNXOps.hpp"
using namespace mlir;
namespace onnx_mlir {
namespace {
static int64_t normalizeAxis(int64_t axis, int64_t rank) { return axis >= 0 ? axis : rank + axis; }
static Value extractSliceAt(
Value input, int64_t axis, int64_t offset, int64_t size, ConversionPatternRewriter& rewriter, Location loc) {
auto inputType = cast<RankedTensorType>(input.getType());
SmallVector<OpFoldResult> offsets(inputType.getRank(), rewriter.getIndexAttr(0));
SmallVector<OpFoldResult> sizes;
SmallVector<OpFoldResult> strides(inputType.getRank(), rewriter.getIndexAttr(1));
sizes.reserve(inputType.getRank());
for (int64_t dim : inputType.getShape())
sizes.push_back(rewriter.getIndexAttr(dim));
offsets[axis] = rewriter.getIndexAttr(offset);
sizes[axis] = rewriter.getIndexAttr(size);
return tensor::ExtractSliceOp::create(rewriter, loc, input, offsets, sizes, strides);
}
struct Split : OpConversionPattern<ONNXSplitOp> {
using OpConversionPattern::OpConversionPattern;
LogicalResult
matchAndRewrite(ONNXSplitOp splitOp, ONNXSplitOpAdaptor adaptor, ConversionPatternRewriter& rewriter) const override {
auto inputType = dyn_cast<RankedTensorType>(adaptor.getInput().getType());
if (!inputType || !inputType.hasStaticShape())
return failure();
int64_t rank = inputType.getRank();
int64_t axis = normalizeAxis(splitOp.getAxis(), rank);
if (axis < 0 || axis >= rank)
return failure();
SmallVector<Value> outputs;
outputs.reserve(splitOp.getNumResults());
int64_t offset = 0;
for (Value result : splitOp.getResults()) {
auto resultType = dyn_cast<RankedTensorType>(result.getType());
if (!resultType || !resultType.hasStaticShape())
return failure();
int64_t sliceSize = resultType.getShape()[axis];
auto computeOp =
createSpatCompute<1>(rewriter, splitOp.getLoc(), TypeRange {resultType}, {}, adaptor.getInput(), [&](Value x) {
Value output = extractSliceAt(x, axis, offset, sliceSize, rewriter, splitOp.getLoc());
spatial::SpatYieldOp::create(rewriter, splitOp.getLoc(), output);
});
outputs.push_back(computeOp.getResult(0));
offset += sliceSize;
}
rewriter.replaceOp(splitOp, outputs);
return success();
}
};
} // namespace
void populateSplitPatterns(RewritePatternSet& patterns, MLIRContext* ctx) { patterns.add<Split>(ctx); }
} // namespace onnx_mlir

View File

@@ -63,4 +63,10 @@ def spatToPimVSigm : Pat<
(NativeCodeCall<"onnx_mlir::getBestOutputTensorFromOperandsOrAllocate($_builder, $0.getDefiningOp())"> $srcOpRes))
>;
def spatToPimVSoftmax : Pat<
(SpatSoftmaxOp:$srcOpRes $input),
(PimVSoftmaxOp $input,
(NativeCodeCall<"onnx_mlir::getBestOutputTensorFromOperandsOrAllocate($_builder, $0.getDefiningOp())"> $srcOpRes))
>;
#endif // SPATIAL_TO_PIM

View File

@@ -618,17 +618,22 @@ void SpatialToPimPass::markOpToRemove(Operation* op) {
}
void SpatialToPimPass::replaceReturnOpOperands(func::ReturnOp& returnOp, IRRewriter& rewriter) {
for (auto it : llvm::enumerate(returnOp.getOperands())) {
Operation* returnOperand = it.value().getDefiningOp();
SmallVector<Value> originalOperands(returnOp.getOperands().begin(), returnOp.getOperands().end());
for (auto it : llvm::enumerate(originalOperands)) {
size_t orderWithinReturn = it.index();
Operation* returnOperand = it.value().getDefiningOp();
rewriter.modifyOpInPlace(returnOp,
[&] { returnOp.setOperand(orderWithinReturn, outputTensors[orderWithinReturn]); });
Operation* opToErase = returnOperand;
while (opToErase) {
bool isExclusivelyOwnedByReturnChain = opToErase->use_empty() || opToErase->hasOneUse();
bool isExclusivelyOwnedByReturnChain = opToErase->use_empty();
if (!isExclusivelyOwnedByReturnChain && opToErase->hasOneUse()) {
Operation* onlyUser = *opToErase->getUsers().begin();
isExclusivelyOwnedByReturnChain =
isa<func::ReturnOp, tensor::ConcatOp>(onlyUser) || isChannelUseChainOp(onlyUser);
}
if (!isExclusivelyOwnedByReturnChain)
break;

View File

@@ -455,4 +455,27 @@ def PimVSigmOp : PimOp<"vsigm", [DestinationStyleOpInterface]> {
}];
}
def PimVSoftmaxOp : PimOp<"vsoftmax", [DestinationStyleOpInterface]> {
let summary = "Softmax over the full input vector";
let arguments = (ins
PimTensor:$input,
PimTensor:$outputBuffer
);
let results = (outs
PimTensor:$output
);
let extraClassDeclaration = [{
mlir::MutableOperandRange getDpsInitsMutable() {
return getOutputBufferMutable();
}
}];
let assemblyFormat = [{
`(` $input `,` $outputBuffer `)` attr-dict `:` `(` type($input) `,` type($outputBuffer) `)` `->` type($output)
}];
}
#endif // PIM_DIALECT_H

View File

@@ -273,6 +273,7 @@ void registerOpBufferizationInterfaces(DialectRegistry& registry) {
PimVReluOp::attachInterface<UnaryDstOpInterface<PimVReluOp>>(*ctx);
PimVTanhOp::attachInterface<UnaryDstOpInterface<PimVTanhOp>>(*ctx);
PimVSigmOp::attachInterface<UnaryDstOpInterface<PimVSigmOp>>(*ctx);
PimVSoftmaxOp::attachInterface<UnaryDstOpInterface<PimVSoftmaxOp>>(*ctx);
});
}

View File

@@ -485,7 +485,7 @@ DCPAnalysisResult GraphDCP::getResult() {
size_t i = 0;
for (auto node : nodes) {
ret.computeToCPUMap[node->getSpatWeightedCompute()] = cpu;
if (i++ == nodes.size() - 1){
if (i++ == nodes.size() - 1) {
ret.isLastComputeOfACpu.insert(node->getSpatWeightedCompute());
ret.cpuToLastComputeMap[cpu] = node->getSpatWeightedCompute();
}

View File

@@ -43,7 +43,5 @@ bool TaskDCP::hasDescendent(TaskDCP* child) {
return false;
}
//TODO fare qualcosa di sensato
int TaskDCP::computeWeight(GraphDCP* graph, CPU cpu) {
return orig_weight;
}
// TODO fare qualcosa di sensato
int TaskDCP::computeWeight(GraphDCP* graph, CPU cpu) { return orig_weight; }

View File

@@ -75,11 +75,11 @@ public:
alst = val;
}
bool hasDescendent(TaskDCP* child);
int64_t Id() const { return (int64_t)spatWeightedCompute.getAsOpaquePointer(); }
int64_t Id() const { return (int64_t) spatWeightedCompute.getAsOpaquePointer(); }
bool isCP() const { return alst == aest; }
bool isScheduled() const { return scheduledCPU.has_value(); }
onnx_mlir::spatial::SpatWeightedCompute getSpatWeightedCompute(){return spatWeightedCompute;}
onnx_mlir::spatial::SpatWeightedCompute getSpatWeightedCompute() { return spatWeightedCompute; }
friend std::optional<Edge_pair> addEdge(TaskDCP* parent, TaskDCP* child, Weight_t weight);
friend void removeEdge(TaskDCP* parent, TaskDCP* child);

View File

@@ -71,12 +71,7 @@ public:
return true;
}
auto begin() {
return storage.begin();
}
auto end() {
return storage.end();
}
auto begin() { return storage.begin(); }
auto end() { return storage.end(); }
};

View File

@@ -1,7 +1,9 @@
#pragma once
#include "mlir/IR/BuiltinTypeInterfaces.h"
#include "llvm/Support/Casting.h"
#include <algorithm>
#include <cstdint>
#include <utility>
@@ -50,10 +52,9 @@ inline int64_t getSpatWeightCompute(onnx_mlir::spatial::SpatWeightedCompute spat
int64_t tot = 0;
for (auto& region : spatWeightedCompute.getBody()) {
for (auto& inst : region) {
for(auto result : inst.getResults()){
if(auto element = llvm::dyn_cast<mlir::ShapedType>(result.getType()))
tot += onnx_mlir::getSizeInBytes(element);
}
for (auto result : inst.getResults())
if (auto element = llvm::dyn_cast<mlir::ShapedType>(result.getType()))
tot += onnx_mlir::getSizeInBytes(element);
}
}
return tot;

View File

@@ -272,6 +272,22 @@ def SpatSigmoidOp : SpatOp<"sigmoid", []> {
}];
}
def SpatSoftmaxOp : SpatOp<"softmax", []> {
let summary = "Softmax over the full input tensor slice";
let arguments = (ins
SpatTensor:$input
);
let results = (outs
SpatTensor:$output
);
let assemblyFormat = [{
`(` $input `)` attr-dict `:` type($input) `->` type($output)
}];
}
def SpatReluOp : SpatOp<"relu", []> {
let summary = "Element-wise ReLU activation";

View File

@@ -202,9 +202,9 @@ private:
rewriter.clone(op, mapper);
}
for (auto users : oldWeightedCompute->getUsers())
if (auto funcRet = dyn_cast<func::ReturnOp>(users))
funcRet.setOperand(0, newWeightedCompute.getResult(0));
for (auto& use : llvm::make_early_inc_range(oldWeightedCompute->getUses()))
if (isa<func::ReturnOp>(use.getOwner()))
use.assign(newWeightedCompute.getResult(0));
oldToNewComputeMap.insert({oldWeightedCompute, newWeightedCompute});
return {cast<SpatWeightedCompute>(newWeightedCompute), computeValueResults};

View File

@@ -413,7 +413,7 @@ struct ChannelBroadcastReceiveOpInterface
outputTensor,
rewriter.getI32IntegerAttr(numElements * elementSize),
rewriter.getI32IntegerAttr(srcCoreId.value()))
.getOutput();
.getOutput();
replaceOpWithBufferizedValues(rewriter, op, newValue);

View File

@@ -146,6 +146,37 @@ struct RewriteHostSubviewLoadPattern final : OpRewritePattern<pim::PimMemCopyHos
}
};
struct RewriteHostSubviewStorePattern final : OpRewritePattern<pim::PimMemCopyDevToHostOp> {
using OpRewritePattern::OpRewritePattern;
LogicalResult matchAndRewrite(pim::PimMemCopyDevToHostOp copyOp, PatternRewriter& rewriter) const override {
auto status = rewriteSubviewCopyLikeOp(
copyOp,
copyOp.getHostTarget(),
copyOp.getDeviceSource(),
copyOp.getHostTargetOffset(),
copyOp.getDeviceSourceOffset(),
copyOp.getSize(),
rewriter,
[&](
MemRefType resultType, Value dst, Value src, int64_t dstByteOffset, int64_t srcByteOffset, int64_t sliceBytes) {
pim::PimMemCopyDevToHostOp::create(rewriter,
copyOp.getLoc(),
resultType,
dst,
src,
rewriter.getI32IntegerAttr(static_cast<int32_t>(dstByteOffset)),
rewriter.getI32IntegerAttr(static_cast<int32_t>(srcByteOffset)),
rewriter.getI32IntegerAttr(static_cast<int32_t>(sliceBytes)));
});
if (failed(status))
return failure();
rewriter.replaceOp(copyOp, copyOp.getHostTarget());
return success();
}
};
struct FoldConstantCoreSubviewPattern final : OpRewritePattern<memref::SubViewOp> {
using OpRewritePattern::OpRewritePattern;
@@ -209,8 +240,10 @@ struct FoldConstantCoreSubviewPattern final : OpRewritePattern<memref::SubViewOp
} // namespace
void populateConstantFoldingSubviewPatterns(RewritePatternSet& patterns) {
patterns.add<RewriteCoreSubviewCopyPattern, RewriteHostSubviewLoadPattern, FoldConstantCoreSubviewPattern>(
patterns.getContext());
patterns.add<RewriteCoreSubviewCopyPattern,
RewriteHostSubviewLoadPattern,
RewriteHostSubviewStorePattern,
FoldConstantCoreSubviewPattern>(patterns.getContext());
}
} // namespace onnx_mlir

View File

@@ -85,6 +85,36 @@ python3 validation/operations/gen_tests.py
| 4D | `sigmoid/4d` | [2,3,4,4] | [2,3,4,4] | Standalone NCHW Sigmoid |
| After Gemm | `sigmoid/after_gemm` | [4,64] | [4,32] | Gemm + bias, then Sigmoid |
## Softmax
| Test | Directory | Input | Output | Axis | Notes |
|--------------|--------------------------|-------------|-------------|------|---------------------------------|
| Basic | `softmax/basic` | [3,5] | [3,5] | 1 | Row-wise softmax over features |
| 3D last axis | `softmax/3d_last_axis` | [2,3,4] | [2,3,4] | 2 | Last-dimension normalization |
| Channel axis | `softmax/channel_axis` | [1,3,2,2] | [1,3,2,2] | 1 | NCHW channel-wise softmax |
## Resize
| Test | Directory | Input | Output | Mode | Notes |
|---------------------|-------------------------|-----------|-----------|---------|-----------------------------------------|
| Nearest 2x | `resize/nearest_2x` | [1,1,2,3] | [1,1,4,6] | nearest | NCHW upsampling with scales [1,1,2,2] |
| Non-uniform scales | `resize/non_uniform` | [1,1,2,3] | [1,1,6,6] | nearest | Different height/width scaling factors |
| Explicit sizes | `resize/with_sizes` | [1,1,2,3] | [1,1,3,5] | nearest | Sizes input used instead of scales |
## Split
| Test | Directory | Input | Outputs | Axis | Notes |
|-----------------|---------------------------|-------|----------------------|------|-------------------------------------|
| Basic | `split/basic` | [2,6] | [2,2], [2,4] | 1 | Two-way split with explicit sizes |
| Equal three-way | `split/equal_three_way` | [2,6] | [2,2], [2,2], [2,2] | 1 | Optional split input omitted |
## Gather
| Test | Directory | Input | Indices | Output | Axis | Notes |
|----------------------|--------------------------------|-------|---------|----------|------|--------------------------------|
| Axis 1 | `gather/axis1` | [3,4] | [2] | [3,2] | 1 | Select two columns |
| Axis 0 matrix indices| `gather/axis0_matrix_indices` | [4,3] | [2,2] | [2,2,3] | 0 | Gather rows with 2D indices |
## Add
| Test | Directory | Input(s) | Output | Notes |

Binary file not shown.

View File

@@ -1,5 +1,5 @@
#!/usr/bin/env python3
"""Generate ONNX test models for validating GEMM, Conv, Pooling, Relu, and ReduceMean implementations."""
"""Generate ONNX test models for validating supported ONNX operations."""
import numpy as np
import onnx
@@ -473,6 +473,140 @@ def sigmoid_after_gemm():
save_model(model, "sigmoid/after_gemm", "sigmoid_after_gemm.onnx")
# ---------------------------------------------------------------------------
# Softmax tests
# ---------------------------------------------------------------------------
def softmax_basic():
"""Softmax over the last dimension of a 2D tensor."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [3, 5])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [3, 5])
node = helper.make_node("Softmax", ["X"], ["Y"], axis=1)
graph = helper.make_graph([node], "softmax_basic", [X], [Y])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "softmax/basic", "softmax_basic.onnx")
def softmax_3d_last_axis():
"""Softmax over the last axis of a 3D tensor."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 3, 4])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 3, 4])
node = helper.make_node("Softmax", ["X"], ["Y"], axis=2)
graph = helper.make_graph([node], "softmax_3d_last_axis", [X], [Y])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "softmax/3d_last_axis", "softmax_3d_last_axis.onnx")
def softmax_channel_axis():
"""Softmax over the channel axis of an NCHW tensor."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 2, 2])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 3, 2, 2])
node = helper.make_node("Softmax", ["X"], ["Y"], axis=1)
graph = helper.make_graph([node], "softmax_channel_axis", [X], [Y])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "softmax/channel_axis", "softmax_channel_axis.onnx")
# ---------------------------------------------------------------------------
# Resize tests
# ---------------------------------------------------------------------------
def resize_nearest_2x():
"""Resize an NCHW tensor with nearest-neighbor upsampling by a factor of 2."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 2, 3])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 4, 6])
roi = numpy_helper.from_array(np.asarray([], dtype=np.float32), name="roi")
scales = numpy_helper.from_array(np.asarray([1.0, 1.0, 2.0, 2.0], dtype=np.float32), name="scales")
node = helper.make_node(
"Resize", ["X", "roi", "scales"], ["Y"],
mode="nearest", coordinate_transformation_mode="asymmetric", nearest_mode="floor")
graph = helper.make_graph([node], "resize_nearest_2x", [X], [Y], initializer=[roi, scales])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "resize/nearest_2x", "resize_nearest_2x.onnx")
def resize_nearest_non_uniform():
"""Resize an NCHW tensor with non-uniform nearest-neighbor scales."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 2, 3])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 6, 6])
roi = numpy_helper.from_array(np.asarray([], dtype=np.float32), name="roi")
scales = numpy_helper.from_array(np.asarray([1.0, 1.0, 3.0, 2.0], dtype=np.float32), name="scales")
node = helper.make_node(
"Resize", ["X", "roi", "scales"], ["Y"],
mode="nearest", coordinate_transformation_mode="asymmetric", nearest_mode="floor")
graph = helper.make_graph([node], "resize_nearest_non_uniform", [X], [Y], initializer=[roi, scales])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "resize/non_uniform", "resize_non_uniform.onnx")
def resize_with_sizes():
"""Resize an NCHW tensor to explicit output sizes."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 1, 2, 3])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 1, 3, 5])
roi = numpy_helper.from_array(np.asarray([], dtype=np.float32), name="roi")
sizes = make_int64_initializer("sizes", [1, 1, 3, 5])
node = helper.make_node(
"Resize", ["X", "roi", "", "sizes"], ["Y"],
mode="nearest", coordinate_transformation_mode="asymmetric", nearest_mode="floor")
graph = helper.make_graph([node], "resize_with_sizes", [X], [Y], initializer=[roi, sizes])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "resize/with_sizes", "resize_with_sizes.onnx")
# ---------------------------------------------------------------------------
# Split tests
# ---------------------------------------------------------------------------
def split_basic():
"""Split a 2D tensor into two outputs along the feature axis."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 6])
Y0 = helper.make_tensor_value_info("Y0", TensorProto.FLOAT, [2, 2])
Y1 = helper.make_tensor_value_info("Y1", TensorProto.FLOAT, [2, 4])
split = make_int64_initializer("split", [2, 4])
node = helper.make_node("Split", ["X", "split"], ["Y0", "Y1"], axis=1)
graph = helper.make_graph([node], "split_basic", [X], [Y0, Y1], initializer=[split])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "split/basic", "split_basic.onnx")
def split_equal_three_way():
"""Split a 2D tensor evenly into three outputs."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [2, 6])
Y0 = helper.make_tensor_value_info("Y0", TensorProto.FLOAT, [2, 2])
Y1 = helper.make_tensor_value_info("Y1", TensorProto.FLOAT, [2, 2])
Y2 = helper.make_tensor_value_info("Y2", TensorProto.FLOAT, [2, 2])
node = helper.make_node("Split", ["X"], ["Y0", "Y1", "Y2"], axis=1)
graph = helper.make_graph([node], "split_equal_three_way", [X], [Y0, Y1, Y2])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "split/equal_three_way", "split_equal_three_way.onnx")
# ---------------------------------------------------------------------------
# Gather tests
# ---------------------------------------------------------------------------
def gather_axis1():
"""Gather selected columns from a 2D tensor."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [3, 4])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [3, 2])
indices = make_int64_initializer("indices", [0, 2])
node = helper.make_node("Gather", ["X", "indices"], ["Y"], axis=1)
graph = helper.make_graph([node], "gather_axis1", [X], [Y], initializer=[indices])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "gather/axis1", "gather_axis1.onnx")
def gather_axis0_matrix_indices():
"""Gather rows using a 2D indices tensor."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 3])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [2, 2, 3])
indices = make_int64_initializer("indices", [[0, 2], [3, 1]])
node = helper.make_node("Gather", ["X", "indices"], ["Y"], axis=0)
graph = helper.make_graph([node], "gather_axis0_matrix_indices", [X], [Y], initializer=[indices])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "gather/axis0_matrix_indices", "gather_axis0_matrix_indices.onnx")
# ---------------------------------------------------------------------------
# Add tests
# ---------------------------------------------------------------------------
@@ -599,55 +733,6 @@ def div_after_gemm():
save_model(model, "div/after_gemm", "div_after_gemm.onnx")
# ---------------------------------------------------------------------------
# ReduceMean tests
# ---------------------------------------------------------------------------
def reducemean_basic():
"""ReduceMean over the feature dimension, preserving rank."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4, 1])
node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[1], keepdims=1)
graph = helper.make_graph([node], "reducemean_basic", [X], [Y])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "reduce_mean/basic", "reduce_mean_basic.onnx")
def reducemean_keepdims_0():
"""ReduceMean over the feature dimension, dropping the reduced axis."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [4, 8])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [4])
node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[1], keepdims=0)
graph = helper.make_graph([node], "reducemean_keepdims_0", [X], [Y])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "reduce_mean/keepdims_0", "reduce_mean_keepdims_0.onnx")
def reducemean_4d_spatial():
"""ReduceMean over H and W on an NCHW tensor."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 4, 4])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 3, 1, 1])
node = helper.make_node("ReduceMean", ["X"], ["Y"], axes=[2, 3], keepdims=1)
graph = helper.make_graph([node], "reducemean_4d_spatial", [X], [Y])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "reduce_mean/4d_spatial", "reduce_mean_4d_spatial.onnx")
def reducemean_after_conv():
"""Conv followed by ReduceMean over the spatial dimensions."""
rng = np.random.default_rng(62)
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 3, 5, 5])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 2, 1, 1])
W = numpy_helper.from_array(rng.uniform(-1, 1, (2, 3, 3, 3)).astype(np.float32), name="W")
B = numpy_helper.from_array(rng.uniform(-1, 1, (2,)).astype(np.float32), name="B")
conv = helper.make_node("Conv", ["X", "W", "B"], ["C"],
kernel_shape=[3, 3], strides=[1, 1], pads=[0, 0, 0, 0])
reduce = helper.make_node("ReduceMean", ["C"], ["Y"], axes=[2, 3], keepdims=1)
graph = helper.make_graph([conv, reduce], "reducemean_after_conv", [X], [Y], initializer=[W, B])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
save_model(model, "reduce_mean/after_conv", "reduce_mean_after_conv.onnx")
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
@@ -699,6 +784,24 @@ if __name__ == "__main__":
sigmoid_4d()
sigmoid_after_gemm()
print("\nGenerating Split tests:")
split_basic()
split_equal_three_way()
print("\nGenerating Softmax tests:")
softmax_basic()
softmax_3d_last_axis()
softmax_channel_axis()
print("\nGenerating Resize tests:")
resize_nearest_2x()
resize_nearest_non_uniform()
resize_with_sizes()
print("\nGenerating Gather tests:")
gather_axis1()
gather_axis0_matrix_indices()
print("\nGenerating Add tests:")
add_basic()
add_broadcast_row()

Binary file not shown.

Binary file not shown.