#include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "llvm/ADT/SmallVector.h" #include "ShapeTilingUtils.hpp" #include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp" #include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common/Common.hpp" #include "src/Accelerators/PIM/Conversion/ONNXToSpatial/HostFoldability.hpp" using namespace mlir; namespace onnx_mlir { SmallVector sliceTensor( const Value& tensorToSlice, size_t axis, int64_t sliceSize, ConversionPatternRewriter& rewriter, Location loc) { ArrayRef shape = getTensorShape(tensorToSlice); assert("Invalid axis" && axis < shape.size()); SmallVector strides(shape.size(), rewriter.getIndexAttr(1)); SmallVector offsets(shape.size(), rewriter.getIndexAttr(0)); SmallVector sizes; sizes.reserve(shape.size()); for (const auto size : shape) sizes.push_back(rewriter.getIndexAttr(size)); sizes[axis] = rewriter.getIndexAttr(sliceSize); long length = shape[axis]; auto [numSlices, lastSliceSize] = ceilIntegerDivideWithRemainder(length, sliceSize); SmallVector slices; slices.reserve(numSlices); for (int64_t i = 0; i < numSlices; i++) { offsets[axis] = rewriter.getIndexAttr(i * sliceSize); int64_t currentSliceSize = sliceSize; if (i == numSlices - 1 && lastSliceSize != 0) { currentSliceSize = lastSliceSize; sizes[axis] = rewriter.getIndexAttr(lastSliceSize); } SmallVector sliceShape(shape.begin(), shape.end()); sliceShape[axis] = currentSliceSize; auto sliceType = RankedTensorType::get(sliceShape, cast(tensorToSlice.getType()).getElementType()); Value slice; if (isHostFoldableValue(tensorToSlice)) { slice = tensor::ExtractSliceOp::create(rewriter, loc, tensorToSlice, offsets, sizes, strides); } else { auto sliceCompute = createSpatCompute<1>(rewriter, loc, TypeRange {sliceType}, {}, ValueRange {tensorToSlice}, [&](Value input) { Value computedSlice = tensor::ExtractSliceOp::create(rewriter, loc, input, offsets, sizes, strides); spatial::SpatYieldOp::create(rewriter, loc, computedSlice); }); slice = sliceCompute.getResult(0); } slices.push_back(slice); } return slices; } SmallVector sliceVector(const Value& vectorToSlice, int64_t sliceSize, ConversionPatternRewriter& rewriter, Location loc) { ArrayRef shape = getTensorShape(vectorToSlice); assert("Not a vector" && isVectorShape(shape)); size_t axis = shape[0] != 1 ? 0 : 1; return sliceTensor(vectorToSlice, axis, sliceSize, rewriter, loc); } DenseMap> sliceVectorPerCrossbarPerCore(const Value& vectorToSlice, ConversionPatternRewriter& rewriter, Location loc) { SmallVector slices = sliceVector(vectorToSlice, crossbarSize, rewriter, loc); DenseMap> slicesPerCore; for (size_t sliceId = 0; sliceId < slices.size(); sliceId++) { size_t coreId = sliceId / crossbarCountInCore; slicesPerCore[coreId].push_back(slices[sliceId]); } return slicesPerCore; } DenseMap>> tileMatrix( Value& matrixToTile, int64_t hSliceSize, int64_t vSliceSize, ConversionPatternRewriter& rewriter, Location& loc) { assert("Not a matrix" && isMatrixShape(getTensorShape(matrixToTile))); DenseMap>> tiles; SmallVector hSlices = sliceTensor(matrixToTile, 1, hSliceSize, rewriter, loc); size_t numHSlices = hSlices.size(); for (size_t hSliceId = 0; hSliceId < numHSlices; hSliceId++) { Value hSlice = hSlices[hSliceId]; SmallVector vSlices = sliceTensor(hSlice, 0, vSliceSize, rewriter, loc); for (size_t vSliceId = 0; vSliceId < vSlices.size(); vSliceId++) { size_t coreId = vSliceId / crossbarCountInCore; Value vSlice = vSlices[vSliceId]; tiles[hSliceId][coreId].push_back(vSlice); } } return tiles; } Value broadcastToVector(Value scalarToBroadcast, int64_t length, ConversionPatternRewriter& rewriter, Location loc) { auto oldType = cast(scalarToBroadcast.getType()); Type elementType = oldType.getElementType(); int64_t shape[2] = {1, length}; Type type = oldType.cloneWith(ArrayRef(shape), elementType); auto buildBroadcast = [&](Value input) -> Value { auto zero = arith::ConstantIndexOp::create(rewriter, loc, 0).getResult(); SmallVector index(oldType.getRank(), zero); auto elementValue = tensor::ExtractOp::create(rewriter, loc, input, index).getResult(); return tensor::SplatOp::create(rewriter, loc, type, elementValue); }; if (isHostFoldableValue(scalarToBroadcast)) return buildBroadcast(scalarToBroadcast); auto broadcastCompute = createSpatCompute<1>(rewriter, loc, TypeRange {type}, {}, ValueRange {scalarToBroadcast}, [&](Value input) { spatial::SpatYieldOp::create(rewriter, loc, buildBroadcast(input)); }); return broadcastCompute.getResult(0); } } // namespace onnx_mlir