142 lines
5.9 KiB
C++
142 lines
5.9 KiB
C++
#include "mlir/Dialect/Arith/IR/Arith.h"
|
|
#include "mlir/Dialect/Tensor/IR/Tensor.h"
|
|
|
|
#include "llvm/ADT/SmallVector.h"
|
|
|
|
#include "ShapeTilingUtils.hpp"
|
|
#include "src/Accelerators/PIM/Common/IR/ConstantUtils.hpp"
|
|
#include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp"
|
|
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common/Common.hpp"
|
|
|
|
using namespace mlir;
|
|
|
|
namespace onnx_mlir {
|
|
|
|
SmallVector<Value> sliceTensor(
|
|
const Value& tensorToSlice, size_t axis, int64_t sliceSize, PatternRewriter& rewriter, Location loc) {
|
|
ArrayRef<long> shape = getTensorShape(tensorToSlice);
|
|
assert("Invalid axis" && axis < shape.size());
|
|
|
|
SmallVector<OpFoldResult> strides(shape.size(), rewriter.getIndexAttr(1));
|
|
SmallVector<OpFoldResult> offsets = getZeroOffsets(rewriter, shape.size());
|
|
SmallVector<OpFoldResult> sizes = getStaticSizes(rewriter, shape);
|
|
sizes[axis] = rewriter.getIndexAttr(sliceSize);
|
|
|
|
long length = shape[axis];
|
|
auto [numSlices, lastSliceSize] = ceilIntegerDivideWithRemainder(length, sliceSize);
|
|
SmallVector<Value> slices;
|
|
slices.reserve(numSlices);
|
|
|
|
for (int64_t i = 0; i < numSlices; i++) {
|
|
offsets[axis] = rewriter.getIndexAttr(i * sliceSize);
|
|
int64_t currentSliceSize = sliceSize;
|
|
if (i == numSlices - 1 && lastSliceSize != 0) {
|
|
currentSliceSize = lastSliceSize;
|
|
sizes[axis] = rewriter.getIndexAttr(lastSliceSize);
|
|
}
|
|
|
|
SmallVector<int64_t> sliceShape(shape.begin(), shape.end());
|
|
sliceShape[axis] = currentSliceSize;
|
|
auto sliceType =
|
|
RankedTensorType::get(sliceShape, cast<RankedTensorType>(tensorToSlice.getType()).getElementType());
|
|
|
|
Value slice;
|
|
if (isCompileTimeComputable(tensorToSlice)) {
|
|
slice = tensor::ExtractSliceOp::create(rewriter, loc, tensorToSlice, offsets, sizes, strides);
|
|
}
|
|
else {
|
|
auto sliceCompute =
|
|
createSpatCompute<1>(rewriter, loc, TypeRange {sliceType}, {}, ValueRange {tensorToSlice}, [&](Value input) {
|
|
Value computedSlice = tensor::ExtractSliceOp::create(rewriter, loc, input, offsets, sizes, strides);
|
|
spatial::SpatYieldOp::create(rewriter, loc, computedSlice);
|
|
});
|
|
slice = sliceCompute.getResult(0);
|
|
}
|
|
slices.push_back(slice);
|
|
}
|
|
|
|
return slices;
|
|
}
|
|
|
|
SmallVector<Value>
|
|
sliceVector(const Value& vectorToSlice, int64_t sliceSize, PatternRewriter& rewriter, Location loc) {
|
|
ArrayRef<long> shape = getTensorShape(vectorToSlice);
|
|
assert("Not a vector" && isVectorShape(shape));
|
|
size_t axis = shape[0] != 1 ? 0 : 1;
|
|
return sliceTensor(vectorToSlice, axis, sliceSize, rewriter, loc);
|
|
}
|
|
|
|
DenseMap<CoreId, SmallVector<Value>>
|
|
sliceVectorPerCrossbarPerCore(const Value& vectorToSlice, PatternRewriter& rewriter, Location loc) {
|
|
SmallVector<Value> slices = sliceVector(vectorToSlice, crossbarSize, rewriter, loc);
|
|
DenseMap<CoreId, SmallVector<Value>> slicesPerCore;
|
|
for (size_t sliceId = 0; sliceId < slices.size(); sliceId++) {
|
|
size_t coreId = sliceId / crossbarCountInCore;
|
|
slicesPerCore[coreId].push_back(slices[sliceId]);
|
|
}
|
|
return slicesPerCore;
|
|
}
|
|
|
|
Value extractAxisSlice(
|
|
PatternRewriter& rewriter, Location loc, Value source, int64_t axis, int64_t offset, int64_t size) {
|
|
auto sourceType = cast<RankedTensorType>(source.getType());
|
|
SmallVector<int64_t> resultShape(sourceType.getShape());
|
|
resultShape[axis] = size;
|
|
auto resultType = RankedTensorType::get(resultShape, sourceType.getElementType(), sourceType.getEncoding());
|
|
|
|
SmallVector<OpFoldResult> offsets = getZeroOffsets(rewriter, sourceType.getRank());
|
|
SmallVector<OpFoldResult> sizes = getStaticSizes(rewriter, sourceType.getShape());
|
|
offsets[axis] = rewriter.getIndexAttr(offset);
|
|
sizes[axis] = rewriter.getIndexAttr(size);
|
|
return tensor::ExtractSliceOp::create(
|
|
rewriter, loc, resultType, source, offsets, sizes, getUnitStrides(rewriter, sourceType.getRank()))
|
|
.getResult();
|
|
}
|
|
|
|
Value extractStaticSliceOrIdentity(RewriterBase& rewriter,
|
|
Location loc,
|
|
Value source,
|
|
RankedTensorType resultType,
|
|
ArrayRef<OpFoldResult> offsets,
|
|
ArrayRef<OpFoldResult> sizes,
|
|
ArrayRef<OpFoldResult> strides) {
|
|
auto sourceType = cast<RankedTensorType>(source.getType());
|
|
size_t rank = static_cast<size_t>(sourceType.getRank());
|
|
|
|
bool isIdentitySlice =
|
|
sourceType == resultType && sourceType.hasStaticShape() && offsets.size() == rank && sizes.size() == rank
|
|
&& strides.size() == rank;
|
|
if (isIdentitySlice) {
|
|
ArrayRef<int64_t> sourceShape = sourceType.getShape();
|
|
for (auto [dim, offset, size, stride] : llvm::zip_equal(sourceShape, offsets, sizes, strides)) {
|
|
std::optional<int64_t> staticOffset = mlir::getConstantIntValue(offset);
|
|
std::optional<int64_t> staticSize = mlir::getConstantIntValue(size);
|
|
std::optional<int64_t> staticStride = mlir::getConstantIntValue(stride);
|
|
if (!staticOffset || !staticSize || !staticStride || *staticOffset != 0 || *staticSize != dim || *staticStride != 1) {
|
|
isIdentitySlice = false;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (isIdentitySlice)
|
|
return source;
|
|
|
|
return tensor::ExtractSliceOp::create(rewriter, loc, resultType, source, offsets, sizes, strides).getResult();
|
|
}
|
|
|
|
Value insertStaticSlice(
|
|
PatternRewriter& rewriter, Location loc, Value source, Value dest, ArrayRef<OpFoldResult> offsets) {
|
|
auto sourceType = cast<RankedTensorType>(source.getType());
|
|
return tensor::InsertSliceOp::create(rewriter,
|
|
loc,
|
|
source,
|
|
dest,
|
|
offsets,
|
|
getStaticSizes(rewriter, sourceType.getShape()),
|
|
getUnitStrides(rewriter, sourceType.getRank()))
|
|
.getResult();
|
|
}
|
|
|
|
} // namespace onnx_mlir
|