finish helper refactoring

use uniqued constant helpers everywhere materialize transposed constants directly
2026-05-29 17:05:45 +02:00
parent 819d8af0f7
commit 8bb0babf1b
32 changed files with 300 additions and 467 deletions
@@ -40,14 +40,6 @@ static SmallVector<int64_t> normalizeAxesImpl(std::optional<ArrayAttr> axesAttr,
  return normalizedAxes;
 }

-SmallVector<int64_t> normalizeAxes(ArrayAttr axesAttr, int64_t rank) {
-  return normalizeAxesImpl(std::optional<ArrayAttr>(axesAttr), rank);
-}
-
-SmallVector<int64_t> normalizeAxes(std::optional<ArrayAttr> axesAttr, int64_t rank) {
-  return normalizeAxesImpl(axesAttr, rank);
-}
-
 FailureOr<SmallVector<int64_t>> normalizeAxesChecked(std::optional<ArrayAttr> axesAttr, int64_t rank) {
  SmallVector<int64_t> normalizedAxes = normalizeAxesImpl(axesAttr, rank);
  for (int64_t axis : normalizedAxes)
@@ -56,11 +48,7 @@ FailureOr<SmallVector<int64_t>> normalizeAxesChecked(std::optional<ArrayAttr> ax
  return normalizedAxes;
 }

-FailureOr<SmallVector<int64_t>> normalizeAxesChecked(ArrayAttr axesAttr, int64_t rank) {
-  return normalizeAxesChecked(std::optional<ArrayAttr>(axesAttr), rank);
-}
-
-Value createAffineApplyOrConstant(PatternRewriter& rewriter, Location loc, AffineExpr expr, ValueRange operands) {
+Value createAffineApplyOrFoldedConstant(PatternRewriter& rewriter, Location loc, AffineExpr expr, ValueRange operands) {
  AffineMap map = AffineMap::get(/*dimCount=*/operands.size(), /*symbolCount=*/0, expr);
  Operation* anchorOp = rewriter.getInsertionBlock()->getParentOp();
  return createAffineApplyOrFoldedConstant(rewriter, loc, map, operands, anchorOp);
@@ -68,22 +56,22 @@ Value createAffineApplyOrConstant(PatternRewriter& rewriter, Location loc, Affin

 Value multiplyIndexByConstant(PatternRewriter& rewriter, Operation* anchorOp, Value value, int64_t multiplier) {
  if (multiplier == 0)
-    return getOrCreateHostIndexConstant(rewriter, anchorOp, 0);
+    return getOrCreateIndexConstant(rewriter, anchorOp, 0);
  if (multiplier == 1)
    return value;

  MLIRContext* context = rewriter.getContext();
  AffineExpr d0 = getAffineDimExpr(0, context);
-  return createAffineApplyOrConstant(rewriter, anchorOp->getLoc(), d0 * multiplier, ValueRange {value});
+  return createAffineApplyOrFoldedConstant(rewriter, anchorOp->getLoc(), d0 * multiplier, ValueRange {value});
 }

 Value modIndexByConstant(PatternRewriter& rewriter, Location loc, Value value, int64_t divisor) {
  if (divisor == 1)
-    return getOrCreateHostIndexConstant(rewriter, rewriter.getInsertionBlock()->getParentOp(), 0);
+    return getOrCreateIndexConstant(rewriter, rewriter.getInsertionBlock()->getParentOp(), 0);

  MLIRContext* context = rewriter.getContext();
  AffineExpr d0 = getAffineDimExpr(0, context);
-  return createAffineApplyOrConstant(rewriter, loc, d0 % divisor, ValueRange {value});
+  return createAffineApplyOrFoldedConstant(rewriter, loc, d0 % divisor, ValueRange {value});
 }

 Value floorDivIndexByConstant(PatternRewriter& rewriter, Location loc, Value value, int64_t divisor) {
@@ -92,12 +80,12 @@ Value floorDivIndexByConstant(PatternRewriter& rewriter, Location loc, Value val

  MLIRContext* context = rewriter.getContext();
  AffineExpr d0 = getAffineDimExpr(0, context);
-  return createAffineApplyOrConstant(rewriter, loc, d0.floorDiv(divisor), ValueRange {value});
+  return createAffineApplyOrFoldedConstant(rewriter, loc, d0.floorDiv(divisor), ValueRange {value});
 }

-Value getOrMaterializeIndexValue(PatternRewriter& rewriter, Location loc, OpFoldResult value) {
+Value getOrMaterializeIndexValue(PatternRewriter& rewriter, OpFoldResult value) {
  if (auto attr = dyn_cast<Attribute>(value))
-    return getOrCreateHostIndexConstant(rewriter, rewriter.getInsertionBlock()->getParentOp(), cast<IntegerAttr>(attr).getInt());
+    return getOrCreateIndexConstant(rewriter, rewriter.getInsertionBlock()->getParentOp(), cast<IntegerAttr>(attr).getInt());
  return cast<Value>(value);
 }

@@ -19,18 +19,12 @@ mlir::FailureOr<int64_t> normalizeAxisChecked(int64_t axis, int64_t rank);

 int64_t normalizeIndex(int64_t index, int64_t dimSize);

-llvm::SmallVector<int64_t> normalizeAxes(mlir::ArrayAttr axesAttr, int64_t rank);
-
-llvm::SmallVector<int64_t> normalizeAxes(std::optional<mlir::ArrayAttr> axesAttr, int64_t rank);
-
-mlir::FailureOr<llvm::SmallVector<int64_t>> normalizeAxesChecked(mlir::ArrayAttr axesAttr, int64_t rank);
-
 mlir::FailureOr<llvm::SmallVector<int64_t>> normalizeAxesChecked(std::optional<mlir::ArrayAttr> axesAttr, int64_t rank);

-mlir::Value createAffineApplyOrConstant(mlir::PatternRewriter& rewriter,
-                                        mlir::Location loc,
-                                        mlir::AffineExpr expr,
-                                        mlir::ValueRange operands);
+mlir::Value createAffineApplyOrFoldedConstant(mlir::PatternRewriter& rewriter,
+                                              mlir::Location loc,
+                                              mlir::AffineExpr expr,
+                                              mlir::ValueRange operands);

 mlir::Value
 multiplyIndexByConstant(mlir::PatternRewriter& rewriter, mlir::Operation* anchorOp, mlir::Value value, int64_t multiplier);
@@ -40,6 +34,6 @@ mlir::Value modIndexByConstant(mlir::PatternRewriter& rewriter, mlir::Location l
 mlir::Value
 floorDivIndexByConstant(mlir::PatternRewriter& rewriter, mlir::Location loc, mlir::Value value, int64_t divisor);

-mlir::Value getOrMaterializeIndexValue(mlir::PatternRewriter& rewriter, mlir::Location loc, mlir::OpFoldResult value);
+mlir::Value getOrMaterializeIndexValue(mlir::PatternRewriter& rewriter, mlir::OpFoldResult value);

 } // namespace onnx_mlir
@@ -10,6 +10,7 @@

 #include "ShapeTilingUtils.hpp"
 #include "IndexingUtils.hpp"
+#include "src/Accelerators/PIM/Common/IR/ConstantUtils.hpp"
 #include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp"
 #include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common/Common.hpp"
 #include "src/Accelerators/PIM/Conversion/ONNXToSpatial/CompileTime.hpp"
@@ -19,10 +20,6 @@ using namespace mlir;

 namespace onnx_mlir {

-static Value getIndexValue(OpFoldResult result, ConversionPatternRewriter& rewriter, Location loc) {
-  return getOrMaterializeIndexValue(rewriter, loc, result);
-}
-
 static Value addIndexValues(Value lhs, Value rhs, ConversionPatternRewriter& rewriter, Location loc) {
  APInt lhsConst;
  if (matchPattern(lhs, m_ConstantInt(&lhsConst)) && lhsConst.isZero())
@@ -43,11 +40,12 @@ static Value multiplyIndexValue(Value value, OpFoldResult factor, ConversionPatt
    return arith::MulIOp::create(rewriter, loc, value, cast<Value>(factor)).getResult();

  if (factorConst.isZero())
-    return arith::ConstantIndexOp::create(rewriter, loc, 0).getResult();
+    return getOrCreateIndexConstant(rewriter, rewriter.getInsertionBlock()->getParentOp(), 0);
  if (factorConst.isOne())
    return value;

-  auto factorValue = arith::ConstantIndexOp::create(rewriter, loc, factorConst.getSExtValue()).getResult();
+  auto factorValue =
+    getOrCreateIndexConstant(rewriter, rewriter.getInsertionBlock()->getParentOp(), factorConst.getSExtValue());
  return arith::MulIOp::create(rewriter, loc, value, factorValue).getResult();
 }

@@ -61,8 +59,6 @@ int64_t getStaticShapeElementCount(ArrayRef<int64_t> shape) {
  return std::accumulate(shape.begin(), shape.end(), int64_t {1}, std::multiplies<int64_t> {});
 }

-int64_t getStaticShapeElementCount(RankedTensorType type) { return getStaticShapeElementCount(type.getShape()); }
-
 SmallVector<int64_t> permuteShape(ArrayRef<int64_t> shape, ArrayRef<int64_t> permutation) {
  SmallVector<int64_t> permutedShape;
  permutedShape.reserve(permutation.size());
@@ -226,49 +222,6 @@ sliceVectorPerCrossbarPerCore(const Value& vectorToSlice, ConversionPatternRewri
  return slicesPerCore;
 }

-DenseMap<HSliceId, DenseMap<CoreId, SmallVector<Value>>> tileMatrix(
-  Value& matrixToTile, int64_t hSliceSize, int64_t vSliceSize, ConversionPatternRewriter& rewriter, Location& loc) {
-  assert("Not a matrix" && isMatrixShape(getTensorShape(matrixToTile)));
-
-  DenseMap<HSliceId, DenseMap<CoreId, SmallVector<Value>>> tiles;
-
-  SmallVector<Value> hSlices = sliceTensor(matrixToTile, 1, hSliceSize, rewriter, loc);
-  size_t numHSlices = hSlices.size();
-  for (size_t hSliceId = 0; hSliceId < numHSlices; hSliceId++) {
-    Value hSlice = hSlices[hSliceId];
-    SmallVector<Value> vSlices = sliceTensor(hSlice, 0, vSliceSize, rewriter, loc);
-    for (size_t vSliceId = 0; vSliceId < vSlices.size(); vSliceId++) {
-      size_t coreId = vSliceId / crossbarCountInCore;
-      Value vSlice = vSlices[vSliceId];
-      tiles[hSliceId][coreId].push_back(vSlice);
-    }
-  }
-  return tiles;
-}
-
-Value broadcastToVector(Value scalarToBroadcast, int64_t length, ConversionPatternRewriter& rewriter, Location loc) {
-  auto oldType = cast<RankedTensorType>(scalarToBroadcast.getType());
-  Type elementType = oldType.getElementType();
-  int64_t shape[2] = {1, length};
-  Type type = oldType.cloneWith(ArrayRef(shape), elementType);
-
-  auto buildBroadcast = [&](Value input) -> Value {
-    auto zero = arith::ConstantIndexOp::create(rewriter, loc, 0).getResult();
-    SmallVector<Value> index(oldType.getRank(), zero);
-    auto elementValue = tensor::ExtractOp::create(rewriter, loc, input, index).getResult();
-    return tensor::SplatOp::create(rewriter, loc, type, elementValue);
-  };
-
-  if (isCompileTimeComputable(scalarToBroadcast))
-    return buildBroadcast(scalarToBroadcast);
-
-  auto broadcastCompute =
-    createSpatCompute<1>(rewriter, loc, TypeRange {type}, {}, ValueRange {scalarToBroadcast}, [&](Value input) {
-      spatial::SpatYieldOp::create(rewriter, loc, buildBroadcast(input));
-    });
-  return broadcastCompute.getResult(0);
-}
-
 Value materializeContiguousTensorSlice(Value source,
                                       RankedTensorType resultType,
                                       ArrayRef<OpFoldResult> offsets,
@@ -294,7 +247,7 @@ Value materializeContiguousTensorSlice(Value source,
  Value init = tensor::EmptyOp::create(rewriter, loc, resultType.getShape(), resultType.getElementType()).getResult();
  SmallVector<Value> zeroIndices(resultType.getRank());
  for (Value& zeroIndex : zeroIndices)
-    zeroIndex = arith::ConstantIndexOp::create(rewriter, loc, 0).getResult();
+    zeroIndex = getOrCreateIndexConstant(rewriter, rewriter.getInsertionBlock()->getParentOp(), 0);

  SmallVector<Value> resultIndices;
  resultIndices.reserve(resultType.getRank());
@@ -304,7 +257,7 @@ Value materializeContiguousTensorSlice(Value source,
      SmallVector<Value> sourceIndices;
      sourceIndices.reserve(resultType.getRank());
      for (unsigned idx = 0; idx < resultType.getRank(); ++idx) {
-        Value offsetValue = getIndexValue(offsets[idx], rewriter, loc);
+        Value offsetValue = getOrMaterializeIndexValue(rewriter, offsets[idx]);
        Value scaledIndex = multiplyIndexValue(resultIndices[idx], strides[idx], rewriter, loc);
        sourceIndices.push_back(addIndexValues(offsetValue, scaledIndex, rewriter, loc));
      }
@@ -337,8 +290,8 @@ Value materializeContiguousTensorSlice(Value source,
    }

    Value lower = zeroIndices[dim];
-    Value upper = arith::ConstantIndexOp::create(rewriter, loc, resultType.getDimSize(dim)).getResult();
-    Value step = arith::ConstantIndexOp::create(rewriter, loc, 1).getResult();
+    Value upper = getOrCreateIndexConstant(rewriter, rewriter.getInsertionBlock()->getParentOp(), resultType.getDimSize(dim));
+    Value step = getOrCreateIndexConstant(rewriter, rewriter.getInsertionBlock()->getParentOp(), 1);
    auto loop = scf::ForOp::create(rewriter, loc, lower, upper, step, ValueRange {accumulator});
    rewriter.setInsertionPointToStart(loop.getBody());
    resultIndices.push_back(loop.getInductionVar());
@@ -352,17 +305,6 @@ Value materializeContiguousTensorSlice(Value source,
  return buildLoopNest(buildLoopNest, 0, init);
 }

-Value extractStaticSlice(PatternRewriter& rewriter,
-                         Location loc,
-                         Value source,
-                         RankedTensorType resultType,
-                         ArrayRef<OpFoldResult> offsets) {
-  return tensor::ExtractSliceOp::create(
-           rewriter, loc, resultType, source, offsets, getStaticSizes(rewriter, resultType.getShape()),
-           getUnitStrides(rewriter, resultType.getRank()))
-    .getResult();
-}
-
 Value extractAxisSlice(
  PatternRewriter& rewriter, Location loc, Value source, int64_t axis, int64_t offset, int64_t size) {
  auto sourceType = cast<RankedTensorType>(source.getType());
@@ -18,41 +18,6 @@

 namespace onnx_mlir {

-template <class ShapedType>
-inline auto getImageWidth(const ShapedType& shapedType) {
-  return shapedType.getDimSize(2);
-}
-
-template <class ShapedType>
-inline auto getImageHeight(const ShapedType& shapedType) {
-  return shapedType.getDimSize(3);
-}
-
-template <class ShapedType>
-inline auto getImageChannel(const ShapedType& shapedType) {
-  return shapedType.getDimSize(1);
-}
-
-template <class ShapedType>
-inline auto getImageN(const ShapedType& shapedType) {
-  return shapedType.getDimSize(0);
-}
-
-template <class ShapedType>
-inline auto getKernelWidth(const ShapedType& shapedType) {
-  return shapedType.getDimSize(2);
-}
-
-template <class ShapedType>
-inline auto getKernelHeight(const ShapedType& shapedType) {
-  return shapedType.getDimSize(3);
-}
-
-template <class ShapedType>
-inline auto getFilterCount(const ShapedType& shapedType) {
-  return shapedType.getDimSize(0);
-}
-
 using HSliceId = size_t;
 using CoreId = size_t;

@@ -89,17 +54,6 @@ bool isHVectorShape(mlir::ArrayRef<T> shape) {
  return shape.size() == 2 && shape[0] == 1;
 }

-template <class T>
-bool isVVectorShape(mlir::ArrayRef<T> shape) {
-  return shape.size() == 2 && shape[1] == 1;
-}
-
-template <class T>
-T getVectorLength(mlir::ArrayRef<T> shape) {
-  assert(isVectorShape(shape));
-  return shape[0] != 1 ? shape[0] : shape[1];
-}
-
 inline auto getTensorShape(mlir::Value tensor) {
  return mlir::cast<mlir::RankedTensorType>(tensor.getType()).getShape();
 }
@@ -117,8 +71,6 @@ bool hasStaticPositiveShape(mlir::RankedTensorType type);

 int64_t getStaticShapeElementCount(mlir::ArrayRef<int64_t> shape);

-int64_t getStaticShapeElementCount(mlir::RankedTensorType type);
-
 llvm::SmallVector<int64_t> permuteShape(mlir::ArrayRef<int64_t> shape, mlir::ArrayRef<int64_t> permutation);

 llvm::SmallVector<int64_t> invertPermutation(mlir::ArrayRef<int64_t> permutation);
@@ -156,20 +108,6 @@ llvm::SmallVector<mlir::Value> sliceVector(const mlir::Value& vectorToSlice,
 llvm::DenseMap<CoreId, llvm::SmallVector<mlir::Value>> sliceVectorPerCrossbarPerCore(
  const mlir::Value& vectorToSlice, mlir::ConversionPatternRewriter& rewriter, mlir::Location loc);

-/// Tiles a matrix first across output columns and then across input rows so it
-/// can be assigned to crossbars grouped by core.
-llvm::DenseMap<HSliceId, llvm::DenseMap<CoreId, llvm::SmallVector<mlir::Value>>>
-tileMatrix(mlir::Value& matrixToTile,
-           int64_t hSliceSize,
-           int64_t vSliceSize,
-           mlir::ConversionPatternRewriter& rewriter,
-           mlir::Location& loc);
-
-mlir::Value broadcastToVector(mlir::Value scalarToBroadcast,
-                              int64_t length,
-                              mlir::ConversionPatternRewriter& rewriter,
-                              mlir::Location loc);
-
 mlir::Value materializeContiguousTensorSlice(mlir::Value source,
                                             mlir::RankedTensorType resultType,
                                             llvm::ArrayRef<mlir::OpFoldResult> offsets,
@@ -177,12 +115,6 @@ mlir::Value materializeContiguousTensorSlice(mlir::Value source,
                                             mlir::ConversionPatternRewriter& rewriter,
                                             mlir::Location loc);

-mlir::Value extractStaticSlice(mlir::PatternRewriter& rewriter,
-                               mlir::Location loc,
-                               mlir::Value source,
-                               mlir::RankedTensorType resultType,
-                               llvm::ArrayRef<mlir::OpFoldResult> offsets);
-
 mlir::Value extractAxisSlice(mlir::PatternRewriter& rewriter,
                             mlir::Location loc,
                             mlir::Value source,