add .clang-format
reformat all src
This commit is contained in:
@@ -6,20 +6,22 @@
|
||||
#include "mlir/IR/PatternMatch.h"
|
||||
#include "mlir/IR/Value.h"
|
||||
#include "mlir/IR/ValueRange.h"
|
||||
#include "src/Accelerators/PIM/Common/PIMCommon.hpp"
|
||||
#include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp"
|
||||
#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
|
||||
#include "src/Dialect/ONNX/ONNXOps.hpp"
|
||||
|
||||
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/ONNXToSpatialCommon.hpp"
|
||||
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Utils/SpatialReducer.hpp"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <cstddef>
|
||||
|
||||
#include "src/Accelerators/PIM/Common/PIMCommon.hpp"
|
||||
#include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp"
|
||||
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/ONNXToSpatialCommon.hpp"
|
||||
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Utils/SpatialReducer.hpp"
|
||||
#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
|
||||
#include "src/Dialect/ONNX/ONNXOps.hpp"
|
||||
|
||||
using namespace mlir;
|
||||
|
||||
namespace onnx_mlir {
|
||||
@@ -35,71 +37,68 @@ bool hasPostProcessExperimentalPoolingWindow<ONNXAveragePoolOp>() {
|
||||
}
|
||||
|
||||
template <typename PoolOp>
|
||||
Value postProcessExperimentalPoolingWindow(ConversionPatternRewriter &rewriter,
|
||||
Location loc, PoolOp poolOp, Value valueToDivide, size_t krn_size,
|
||||
size_t tilesSkippedByPadding) {
|
||||
Value postProcessExperimentalPoolingWindow(ConversionPatternRewriter& rewriter,
|
||||
Location loc,
|
||||
PoolOp poolOp,
|
||||
Value valueToDivide,
|
||||
size_t krn_size,
|
||||
size_t tilesSkippedByPadding) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <>
|
||||
Value postProcessExperimentalPoolingWindow<ONNXAveragePoolOp>(
|
||||
ConversionPatternRewriter &rewriter, Location loc, ONNXAveragePoolOp poolOp,
|
||||
Value valueToDivide, size_t krn_size, size_t tilesSkippedByPadding) {
|
||||
Value postProcessExperimentalPoolingWindow<ONNXAveragePoolOp>(ConversionPatternRewriter& rewriter,
|
||||
Location loc,
|
||||
ONNXAveragePoolOp poolOp,
|
||||
Value valueToDivide,
|
||||
size_t krn_size,
|
||||
size_t tilesSkippedByPadding) {
|
||||
bool countIncludePad = poolOp.getCountIncludePad() == 1;
|
||||
|
||||
size_t divisorNumber =
|
||||
countIncludePad ? krn_size : krn_size - tilesSkippedByPadding;
|
||||
size_t divisorNumber = countIncludePad ? krn_size : krn_size - tilesSkippedByPadding;
|
||||
|
||||
RankedTensorType scalarTensor =
|
||||
RankedTensorType::get({1}, rewriter.getF32Type());
|
||||
RankedTensorType scalarTensor = RankedTensorType::get({1}, rewriter.getF32Type());
|
||||
|
||||
// Put a spat.const before the computeOp, and use its value. We do this to be
|
||||
// compatible with the current code generation, which assumes constant to be
|
||||
// loaded in global memory, which is allocated by adding a spat.const OP
|
||||
// directly under func.func (i.e. alongside ComputeOps)
|
||||
auto computeOp = cast<spatial::SpatWeightedCompute>(
|
||||
valueToDivide.getDefiningOp()->getParentOp());
|
||||
auto computeOp = cast<spatial::SpatWeightedCompute>(valueToDivide.getDefiningOp()->getParentOp());
|
||||
rewriter.setInsertionPoint(computeOp);
|
||||
auto divisorValue = rewriter.create<spatial::SpatConstantOp>(loc, scalarTensor,
|
||||
rewriter.getI64IntegerAttr(divisorNumber),
|
||||
/* should_allocate = */ rewriter.getBoolAttr(true));
|
||||
auto divisorValue = rewriter.create<spatial::SpatConstantOp>(loc,
|
||||
scalarTensor,
|
||||
rewriter.getI64IntegerAttr(divisorNumber),
|
||||
/* should_allocate = */ rewriter.getBoolAttr(true));
|
||||
|
||||
rewriter.setInsertionPointAfterValue(valueToDivide);
|
||||
return rewriter.create<spatial::SpatVSDivOp>(
|
||||
loc, valueToDivide.getType(), valueToDivide, divisorValue);
|
||||
return rewriter.create<spatial::SpatVSDivOp>(loc, valueToDivide.getType(), valueToDivide, divisorValue);
|
||||
}
|
||||
|
||||
template <typename ReductionOp>
|
||||
Value reduceInputTiles(
|
||||
SmallVector<Value> &inputTiles, ConversionPatternRewriter &rewriter) {
|
||||
if (inputTiles.size() == 1) {
|
||||
Value reduceInputTiles(SmallVector<Value>& inputTiles, ConversionPatternRewriter& rewriter) {
|
||||
if (inputTiles.size() == 1)
|
||||
return inputTiles[0];
|
||||
}
|
||||
|
||||
if (inputTiles.size() == 2) {
|
||||
return rewriter.create<spatial::SpatVMaxOp>(inputTiles[0].getLoc(),
|
||||
inputTiles[0].getType(), inputTiles[0], inputTiles[1]);
|
||||
return rewriter.create<spatial::SpatVMaxOp>(
|
||||
inputTiles[0].getLoc(), inputTiles[0].getType(), inputTiles[0], inputTiles[1]);
|
||||
}
|
||||
|
||||
SmallVector<Value> left(
|
||||
inputTiles.begin(), inputTiles.begin() + inputTiles.size() / 2);
|
||||
SmallVector<Value> right(
|
||||
inputTiles.begin() + inputTiles.size() / 2, inputTiles.end());
|
||||
SmallVector<Value> left(inputTiles.begin(), inputTiles.begin() + inputTiles.size() / 2);
|
||||
SmallVector<Value> right(inputTiles.begin() + inputTiles.size() / 2, inputTiles.end());
|
||||
|
||||
Value leftReduced = reduceInputTiles<ReductionOp>(left, rewriter);
|
||||
Value rightReduced = reduceInputTiles<ReductionOp>(right, rewriter);
|
||||
|
||||
return rewriter.create<ReductionOp>(
|
||||
inputTiles[0].getLoc(), leftReduced.getType(), leftReduced, rightReduced);
|
||||
return rewriter.create<ReductionOp>(inputTiles[0].getLoc(), leftReduced.getType(), leftReduced, rightReduced);
|
||||
}
|
||||
|
||||
template <typename PoolOp, typename PoolOpAdaptor, typename ReduceOp>
|
||||
struct ExperimentalPoolingBaseConverter : public OpConversionPattern<PoolOp> {
|
||||
ExperimentalPoolingBaseConverter(MLIRContext *ctx)
|
||||
: OpConversionPattern<PoolOp>(ctx) {}
|
||||
ExperimentalPoolingBaseConverter(MLIRContext* ctx)
|
||||
: OpConversionPattern<PoolOp>(ctx) {}
|
||||
|
||||
LogicalResult matchAndRewrite(PoolOp poolOp, PoolOpAdaptor adaptor,
|
||||
ConversionPatternRewriter &rewriter) const final {
|
||||
LogicalResult matchAndRewrite(PoolOp poolOp, PoolOpAdaptor adaptor, ConversionPatternRewriter& rewriter) const final {
|
||||
Value X = adaptor.getX();
|
||||
ShapedType xShape = mlir::cast<ShapedType>(X.getType());
|
||||
Value Y = poolOp.getResult();
|
||||
@@ -110,17 +109,13 @@ struct ExperimentalPoolingBaseConverter : public OpConversionPattern<PoolOp> {
|
||||
unpackOptionalPairVector(adaptor.getDilations(), dilation_x, dilation_y);
|
||||
unpackOptionalPairVector(adaptor.getKernelShape(), krn_w, krn_h);
|
||||
|
||||
if (adaptor.getAutoPad() != "NOTSET") {
|
||||
return rewriter.notifyMatchFailure(
|
||||
poolOp, "auto_pad != NOTSET is deprecated.");
|
||||
}
|
||||
if (adaptor.getAutoPad() != "NOTSET")
|
||||
return rewriter.notifyMatchFailure(poolOp, "auto_pad != NOTSET is deprecated.");
|
||||
|
||||
size_t pad_x, pad_y;
|
||||
auto padUnpackError =
|
||||
unpackOptionalPadsVector(adaptor.getPads(), pad_x, pad_y);
|
||||
if (padUnpackError.has_value()) {
|
||||
auto padUnpackError = unpackOptionalPadsVector(adaptor.getPads(), pad_x, pad_y);
|
||||
if (padUnpackError.has_value())
|
||||
return rewriter.notifyMatchFailure(poolOp, padUnpackError.value());
|
||||
}
|
||||
|
||||
Location loc = poolOp.getLoc();
|
||||
|
||||
@@ -133,10 +128,8 @@ struct ExperimentalPoolingBaseConverter : public OpConversionPattern<PoolOp> {
|
||||
|
||||
// Assert that the input is a tensor.ConcatOp.
|
||||
auto concat = X.getDefiningOp<tensor::ConcatOp>();
|
||||
if (!concat) {
|
||||
return rewriter.notifyMatchFailure(
|
||||
poolOp, "Expected input to be a tensor.ConcatOp");
|
||||
}
|
||||
if (!concat)
|
||||
return rewriter.notifyMatchFailure(poolOp, "Expected input to be a tensor.ConcatOp");
|
||||
|
||||
// Create a [channel_tile][x][y] array to store the input tiles.
|
||||
std::map<long, std::map<long, std::map<long, Value>>> inputTiles;
|
||||
@@ -145,24 +138,21 @@ struct ExperimentalPoolingBaseConverter : public OpConversionPattern<PoolOp> {
|
||||
for (size_t y = 0; y < input_h; ++y) {
|
||||
for (size_t x = 0; x < input_w; ++x) {
|
||||
for (long it = 0; it < tileCount.quot + (tileCount.rem > 0); ++it) {
|
||||
size_t tilingSize =
|
||||
it == tileCount.quot ? tileCount.rem : crossbarSize;
|
||||
size_t tilingSize = it == tileCount.quot ? tileCount.rem : crossbarSize;
|
||||
|
||||
SmallVector<OpFoldResult> strides(4, rewriter.getIndexAttr(1));
|
||||
SmallVector<OpFoldResult> offsets = {/* 0 */ rewriter.getIndexAttr(0),
|
||||
/* 1 */ rewriter.getIndexAttr(0),
|
||||
/* 2 */ rewriter.getIndexAttr(x),
|
||||
/* 3 */ rewriter.getIndexAttr(y)};
|
||||
SmallVector<OpFoldResult> sizes = {
|
||||
/* 0 */ rewriter.getIndexAttr(1), // Batch size is always 1.
|
||||
/* 1 */ rewriter.getIndexAttr(tilingSize),
|
||||
/* 2 */ rewriter.getIndexAttr(1),
|
||||
/* 3 */ rewriter.getIndexAttr(1)};
|
||||
/* 1 */ rewriter.getIndexAttr(0),
|
||||
/* 2 */ rewriter.getIndexAttr(x),
|
||||
/* 3 */ rewriter.getIndexAttr(y)};
|
||||
SmallVector<OpFoldResult> sizes = {/* 0 */ rewriter.getIndexAttr(1), // Batch size is always 1.
|
||||
/* 1 */ rewriter.getIndexAttr(tilingSize),
|
||||
/* 2 */ rewriter.getIndexAttr(1),
|
||||
/* 3 */ rewriter.getIndexAttr(1)};
|
||||
|
||||
// Get the concat's operand that we want to slice.
|
||||
Value concatInput = concat.getOperand(it);
|
||||
Value slicedTile = rewriter.create<tensor::ExtractSliceOp>(
|
||||
loc, concatInput, offsets, sizes, strides);
|
||||
Value slicedTile = rewriter.create<tensor::ExtractSliceOp>(loc, concatInput, offsets, sizes, strides);
|
||||
|
||||
inputTiles[it][x][y] = slicedTile;
|
||||
}
|
||||
@@ -175,19 +165,15 @@ struct ExperimentalPoolingBaseConverter : public OpConversionPattern<PoolOp> {
|
||||
for (size_t y = 0; y < output_h; ++y) {
|
||||
for (size_t x = 0; x < output_w; ++x) {
|
||||
for (long it = 0; it < itc.quot + (itc.rem > 0); ++it) {
|
||||
SmallVector<int64_t> outputShapeArray{
|
||||
/* 0 */ 1, // Batch size is always 1.
|
||||
/* 1 */
|
||||
cast<RankedTensorType>(inputTiles[it][0][0].getType())
|
||||
.getShape()[1],
|
||||
/* 2 */ 1,
|
||||
/* 3 */ 1};
|
||||
SmallVector<int64_t> outputShapeArray {/* 0 */ 1, // Batch size is always 1.
|
||||
/* 1 */
|
||||
cast<RankedTensorType>(inputTiles[it][0][0].getType()).getShape()[1],
|
||||
/* 2 */ 1,
|
||||
/* 3 */ 1};
|
||||
|
||||
auto elementType =
|
||||
dyn_cast<RankedTensorType>(xShape).getElementType();
|
||||
auto elementType = dyn_cast<RankedTensorType>(xShape).getElementType();
|
||||
|
||||
outputTileTypes.push_back(
|
||||
RankedTensorType::get(outputShapeArray, elementType));
|
||||
outputTileTypes.push_back(RankedTensorType::get(outputShapeArray, elementType));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -195,29 +181,25 @@ struct ExperimentalPoolingBaseConverter : public OpConversionPattern<PoolOp> {
|
||||
// Create a plain value list of the input tiles.
|
||||
SmallVector<Value> inputTilesList;
|
||||
for (size_t y = 0; y < input_h; ++y) {
|
||||
for (size_t x = 0; x < input_w; ++x) {
|
||||
for (long it = 0; it < itc.quot + (itc.rem > 0); ++it) {
|
||||
for (size_t x = 0; x < input_w; ++x)
|
||||
for (long it = 0; it < itc.quot + (itc.rem > 0); ++it)
|
||||
inputTilesList.push_back(inputTiles[it][y][x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Create a single compute to calculate the output.
|
||||
auto computeOp = rewriter.create<spatial::SpatWeightedCompute>(
|
||||
loc, outputTileTypes, SmallVector<Value>(), inputTilesList);
|
||||
auto computeOp =
|
||||
rewriter.create<spatial::SpatWeightedCompute>(loc, outputTileTypes, SmallVector<Value>(), inputTilesList);
|
||||
|
||||
// Create a new block for the compute unit and add the operands.
|
||||
Block *block = rewriter.createBlock(&computeOp.getRegion());
|
||||
Block* block = rewriter.createBlock(&computeOp.getRegion());
|
||||
|
||||
// Fill the block arguments and keep a reference to them.
|
||||
std::map<size_t, std::map<size_t, std::map<size_t, Value>>> inputTilesArgs;
|
||||
for (size_t y = 0; y < input_h; ++y) {
|
||||
for (size_t x = 0; x < input_w; ++x) {
|
||||
for (long it = 0; it < itc.quot + (itc.rem > 0); ++it) {
|
||||
auto tileIndex = y * input_w * (itc.quot + (itc.rem > 0)) +
|
||||
x * (itc.quot + (itc.rem > 0)) + it;
|
||||
inputTilesArgs[it][y][x] = block->addArgument(
|
||||
computeOp->getOperand(tileIndex).getType(), loc);
|
||||
auto tileIndex = y * input_w * (itc.quot + (itc.rem > 0)) + x * (itc.quot + (itc.rem > 0)) + it;
|
||||
inputTilesArgs[it][y][x] = block->addArgument(computeOp->getOperand(tileIndex).getType(), loc);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -236,28 +218,26 @@ struct ExperimentalPoolingBaseConverter : public OpConversionPattern<PoolOp> {
|
||||
size_t end_y = std::min(start_y + krn_h, input_h);
|
||||
|
||||
SmallVector<Value> inputTilesToReduce;
|
||||
for (size_t ky = start_y; ky < end_y; ++ky) {
|
||||
for (size_t kx = start_x; kx < end_x; ++kx) {
|
||||
for (size_t ky = start_y; ky < end_y; ++ky)
|
||||
for (size_t kx = start_x; kx < end_x; ++kx)
|
||||
inputTilesToReduce.push_back(inputTilesArgs[it][ky][kx]);
|
||||
}
|
||||
}
|
||||
|
||||
auto reduceResult =
|
||||
reduceInputTiles<ReduceOp>(inputTilesToReduce, rewriter);
|
||||
auto reduceResult = reduceInputTiles<ReduceOp>(inputTilesToReduce, rewriter);
|
||||
|
||||
// If the reduce op is add, we need to divide the result by the
|
||||
// number of elements in the pooling window.
|
||||
if (hasPostProcessExperimentalPoolingWindow<PoolOp>()) {
|
||||
// Add a spat.const before the computeOp.
|
||||
rewriter.setInsertionPoint(computeOp);
|
||||
auto divisorValue = rewriter.create<spatial::SpatConstantOp>(loc,
|
||||
RankedTensorType::get({1}, rewriter.getF32Type()),
|
||||
rewriter.getI64IntegerAttr(krn_w * krn_h),
|
||||
rewriter.getBoolAttr(true));
|
||||
auto divisorValue =
|
||||
rewriter.create<spatial::SpatConstantOp>(loc,
|
||||
RankedTensorType::get({1}, rewriter.getF32Type()),
|
||||
rewriter.getI64IntegerAttr(krn_w * krn_h),
|
||||
rewriter.getBoolAttr(true));
|
||||
|
||||
rewriter.setInsertionPointAfter(reduceResult.getDefiningOp());
|
||||
reduceResult = rewriter.create<spatial::SpatVSDivOp>(
|
||||
loc, reduceResult.getType(), reduceResult, divisorValue);
|
||||
reduceResult =
|
||||
rewriter.create<spatial::SpatVSDivOp>(loc, reduceResult.getType(), reduceResult, divisorValue);
|
||||
}
|
||||
outputTiles.push_back(reduceResult);
|
||||
}
|
||||
@@ -274,8 +254,7 @@ struct ExperimentalPoolingBaseConverter : public OpConversionPattern<PoolOp> {
|
||||
for (size_t y = 0; y < output_h; ++y) {
|
||||
for (size_t x = 0; x < output_w; ++x) {
|
||||
for (long it = 0; it < itc.quot + (itc.rem > 0); ++it) {
|
||||
auto tileIndex = y * output_w * (itc.quot + (itc.rem > 0)) +
|
||||
x * (itc.quot + (itc.rem > 0)) + it;
|
||||
auto tileIndex = y * output_w * (itc.quot + (itc.rem > 0)) + x * (itc.quot + (itc.rem > 0)) + it;
|
||||
computeOutput[it][y][x] = computeOp.getResult(tileIndex);
|
||||
}
|
||||
}
|
||||
@@ -285,30 +264,25 @@ struct ExperimentalPoolingBaseConverter : public OpConversionPattern<PoolOp> {
|
||||
SmallVector<Value> outputTilesList;
|
||||
for (long it = 0; it < itc.quot + (itc.rem > 0); ++it) {
|
||||
SmallVector<Value> imgConcatTiles;
|
||||
for (size_t y = 0; y < output_h; ++y) {
|
||||
for (size_t x = 0; x < output_w; ++x) {
|
||||
for (size_t y = 0; y < output_h; ++y)
|
||||
for (size_t x = 0; x < output_w; ++x)
|
||||
imgConcatTiles.push_back(computeOutput[it][y][x]);
|
||||
}
|
||||
}
|
||||
|
||||
size_t tilingSize = it == tileCount.quot ? tileCount.rem : crossbarSize;
|
||||
|
||||
SmallVector<int64_t> outputShapeArray{
|
||||
/* 0 */ 1, // Batch size is always 1.
|
||||
/* 1 */ (long)tilingSize,
|
||||
/* 2 */ (long)output_w,
|
||||
/* 3 */ (long)output_h};
|
||||
SmallVector<int64_t> outputShapeArray {/* 0 */ 1, // Batch size is always 1.
|
||||
/* 1 */ (long) tilingSize,
|
||||
/* 2 */ (long) output_w,
|
||||
/* 3 */ (long) output_h};
|
||||
|
||||
auto elementType = dyn_cast<RankedTensorType>(xShape).getElementType();
|
||||
|
||||
outputTilesList.push_back(rewriter.create<spatial::SpatImgConcatOp>(loc,
|
||||
RankedTensorType::get(outputShapeArray, elementType),
|
||||
imgConcatTiles));
|
||||
outputTilesList.push_back(rewriter.create<spatial::SpatImgConcatOp>(
|
||||
loc, RankedTensorType::get(outputShapeArray, elementType), imgConcatTiles));
|
||||
}
|
||||
|
||||
// Create a new tensor.ConcatOp to concatenate the output tiles.
|
||||
Value outputTensor =
|
||||
rewriter.create<tensor::ConcatOp>(loc, 1, outputTilesList);
|
||||
Value outputTensor = rewriter.create<tensor::ConcatOp>(loc, 1, outputTilesList);
|
||||
|
||||
rewriter.replaceOp(poolOp, outputTensor);
|
||||
|
||||
@@ -316,12 +290,11 @@ struct ExperimentalPoolingBaseConverter : public OpConversionPattern<PoolOp> {
|
||||
}
|
||||
};
|
||||
|
||||
void populateExperimentalPoolingTilingPattern(
|
||||
RewritePatternSet &patterns, MLIRContext *ctx) {
|
||||
patterns.insert<ExperimentalPoolingBaseConverter<ONNXMaxPoolSingleOutOp,
|
||||
ONNXMaxPoolSingleOutOpAdaptor, spatial::SpatVMaxOp>>(ctx);
|
||||
patterns.insert<ExperimentalPoolingBaseConverter<ONNXAveragePoolOp,
|
||||
ONNXAveragePoolOpAdaptor, spatial::SpatVAddOp>>(ctx);
|
||||
void populateExperimentalPoolingTilingPattern(RewritePatternSet& patterns, MLIRContext* ctx) {
|
||||
patterns.insert<
|
||||
ExperimentalPoolingBaseConverter<ONNXMaxPoolSingleOutOp, ONNXMaxPoolSingleOutOpAdaptor, spatial::SpatVMaxOp>>(ctx);
|
||||
patterns.insert<ExperimentalPoolingBaseConverter<ONNXAveragePoolOp, ONNXAveragePoolOpAdaptor, spatial::SpatVAddOp>>(
|
||||
ctx);
|
||||
}
|
||||
|
||||
} // namespace onnx_mlir
|
||||
} // namespace onnx_mlir
|
||||
|
||||
Reference in New Issue
Block a user