remove old unused stuff

2026-03-23 20:00:09 +01:00
parent f2d593f749
commit f869925b64
12 changed files with 16 additions and 906 deletions
@@ -7,12 +7,8 @@ add_pim_library(OMONNXToSpatial
  Patterns/Math/Conv.cpp
  Patterns/Math/MatMul.cpp
  Patterns/NN/Pool.cpp
-  Patterns/NN/ReduceMean.cpp
  Patterns/Tensor/Concat.cpp
  Patterns/Tensor/Reshape.cpp
-  Utils/SpatialReducer.cpp
-  Utils/WeightSubdivider.cpp
-  Utils/AnnotateReplication.cpp
  ONNXToSpatialPass.cpp
  Common.cpp

@@ -57,8 +57,6 @@ inline auto getFilterCount(const ShapedType& shapedType) {
  return shapedType.getDimSize(0);
 }

-inline constexpr mlir::StringRef REPLICATION_ATTR_NAME = "replication_factor";
-
 using HSliceId = size_t;
 using CoreId = size_t;

@@ -11,7 +11,6 @@
 #include <fstream>

 #include "Common/PimCommon.hpp"
-#include "Conversion/ONNXToSpatial/Utils/AnnotateReplication.hpp"
 #include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp"
 #include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Patterns.hpp"
 #include "src/Accelerators/PIM/Dialect/Pim/PimOps.hpp"
@@ -68,11 +67,6 @@ void ONNXToSpatialPass::runOnOperation() {
    signalPassFailure();
    return;
  }
-  if (annotateReplication(*entryFunc, rewriter).failed()) {
-    llvm::dbgs() << "Failed during annotation for replication analysis\n";
-    signalPassFailure();
-    return;
-  }

  ConversionTarget target(*ctx);
  target.addLegalDialect<spatial::SpatialDialect, ONNXDialect, tensor::TensorDialect, arith::ArithDialect>();
@@ -98,7 +92,6 @@ void ONNXToSpatialPass::runOnOperation() {
  populateReshapeConversionPattern(patterns, ctx);

  populateONNXConcatToTensorConcatPattern(patterns, ctx);
-  populateReduceMeanConversionPattern(patterns, ctx);

  if (failed(applyPartialConversion(moduleOp, target, std::move(patterns)))) {
    signalPassFailure();
@@ -17,6 +17,4 @@ void populateONNXConcatToTensorConcatPattern(mlir::RewritePatternSet& patterns,

 void populateReshapeConversionPattern(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);

-void populateReduceMeanConversionPattern(mlir::RewritePatternSet& patterns, mlir::MLIRContext* ctx);
-
 } // namespace onnx_mlir
@@ -5,14 +5,12 @@
 #include "mlir/Support/LogicalResult.h"
 #include "mlir/Transforms/DialectConversion.h"

-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"

 #include <cassert>

 #include "src/Accelerators/PIM/Common/PimCommon.hpp"
 #include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common.hpp"
-#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Utils/SpatialReducer.hpp"
 #include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
 #include "src/Dialect/ONNX/ONNXOps.hpp"

@@ -21,12 +19,8 @@ using namespace mlir;
 namespace onnx_mlir {
 namespace {

-constexpr StringRef COMPUTE_HAS_SOFTMAX_DIVISOR_ATTRNAME = "computeWithSoftmaxDivisor";
-
-static FailureOr<Value> materializeScaledConstantTensor(Value value,
-                                                        float factor,
-                                                        ConversionPatternRewriter& rewriter,
-                                                        Location loc) {
+static FailureOr<Value>
+materializeScaledConstantTensor(Value value, float factor, ConversionPatternRewriter& rewriter, Location loc) {
  if (factor == 1.0f)
    return value;

@@ -70,16 +64,6 @@ struct GemvToSpatialCompute : OpConversionPattern<ONNXGemmOp> {
  LogicalResult matchAndRewrite(ONNXGemmOp gemmOp,
                                ONNXGemmOpAdaptor gemmOpAdaptor,
                                ConversionPatternRewriter& rewriter) const override;
-
-private:
-  static Value resolveONNXExpOpFromUseChain(Value startValue);
-
-  static LogicalResult softmaxReductionApplication(SmallVector<OpAndResNum>& outputOpsAndResNums,
-                                                   Value& softmaxChannel,
-                                                   ConversionPatternRewriter& rewriter,
-                                                   SpatialReducer& reducer,
-                                                   ONNXGemmOp& gemmOp,
-                                                   Location& loc);
 };

 } // namespace
@@ -122,7 +106,13 @@ LogicalResult GemmToManyGemv::matchAndRewrite(ONNXGemmOp gemmOp,
    // Expand rank-1 bias [N] to rank-2 [1, N] for uniform handling
    if (cType.getRank() == 1) {
      auto expandedType = RankedTensorType::get({1, cType.getDimSize(0)}, cType.getElementType());
-      c = tensor::ExpandShapeOp::create(rewriter, loc, expandedType, c, SmallVector<ReassociationIndices>{{0, 1}});
+      c = tensor::ExpandShapeOp::create(rewriter,
+                                        loc,
+                                        expandedType,
+                                        c,
+                                        SmallVector<ReassociationIndices> {
+                                          {0, 1}
+      });
      cType = expandedType;
    }
    assert("Only support rank 2 tensor for C" && cType.getRank() == 2);
@@ -208,7 +198,13 @@ LogicalResult GemvToSpatialCompute::matchAndRewrite(ONNXGemmOp gemmOp,
    // Expand rank-1 bias [N] to rank-2 [1, N] for uniform handling
    if (cType.getRank() == 1) {
      auto expandedType = RankedTensorType::get({1, cType.getDimSize(0)}, cType.getElementType());
-      c = tensor::ExpandShapeOp::create(rewriter, gemmLoc, expandedType, c, SmallVector<ReassociationIndices>{{0, 1}});
+      c = tensor::ExpandShapeOp::create(rewriter,
+                                        gemmLoc,
+                                        expandedType,
+                                        c,
+                                        SmallVector<ReassociationIndices> {
+                                          {0, 1}
+      });
      cType = expandedType;
    }
    assert("Only support rank 2 tensor for C" && cType.getRank() == 2);
@@ -356,124 +352,6 @@ LogicalResult GemvToSpatialCompute::matchAndRewrite(ONNXGemmOp gemmOp,
  return success();
 }

-Value GemvToSpatialCompute::resolveONNXExpOpFromUseChain(Value startValue) {
-  Value walker = startValue;
-
-  while (!llvm::isa<ONNXExpOp>(walker.getDefiningOp())) {
-    walker = walker.getDefiningOp()->getOperand(0);
-
-    assert(walker && walker.getDefiningOp()
-           && "Unwinded the whole chain of operations while trying to "
-              "find ONNXExpOp, but did not find it");
-  }
-
-  // Make sure the dividend is actually produced by an ONNXExpOp
-  assert(llvm::isa<ONNXExpOp>(walker.getDefiningOp())
-         && "Old output tile (softmax reducer) is not produced by an "
-            "ONNXExpOp");
-
-  return walker;
-}
-
-LogicalResult GemvToSpatialCompute::softmaxReductionApplication(SmallVector<OpAndResNum>& outputOpsAndResNums,
-                                                                Value& softmaxChannel,
-                                                                ConversionPatternRewriter& rewriter,
-                                                                SpatialReducer& reducer,
-                                                                ONNXGemmOp& gemmOp,
-                                                                Location& loc) {
-  // TODO: Check case with one compute op
-
-  // Cast vector of Value into vector of ComputeOp
-  SmallVector<ComputeAndResNum> softmaxOpsToReduce =
-    llvm::to_vector(llvm::map_range(outputOpsAndResNums, [&](OpAndResNum computeAndResNum) {
-      return std::make_pair(cast<spatial::SpatWeightedCompute>(computeAndResNum.first), computeAndResNum.second);
-    }));
-
-  RankedTensorType::Builder tensorTypeBuilder({1}, Float32Type::get(rewriter.getContext()), nullptr);
-  const TensorType scalarTensorType = tensorTypeBuilder;
-
-  reducer.applyReducePattern(
-    softmaxOpsToReduce,
-    [&](Value a, Value b) { return spatial::SpatVAddOp::create(rewriter, loc, scalarTensorType, a, b); },
-    /* preprocess = */
-    [&](Value a) { return spatial::SpatSumOp::create(rewriter, loc, scalarTensorType, a); },
-    [&](Value softmaxDivisor) {
-      // Signal that this is the compute with the softmax divisor
-      auto computeOp = cast<spatial::SpatWeightedCompute>(softmaxDivisor.getDefiningOp()->getParentOp());
-      computeOp->setAttr(COMPUTE_HAS_SOFTMAX_DIVISOR_ATTRNAME, rewriter.getUnitAttr());
-
-      // Broadcast the divisor to all the cores
-      rewriter.setInsertionPointAfterValue(softmaxDivisor);
-      spatial::SpatChannelBroadcastSendOp::create(rewriter, loc, softmaxChannel, softmaxDivisor);
-
-      /*
-       * softmaxDividend = onnx.exp (...)
-       * sum = spat.SumOp(softmaxDividend)
-       * [following can be repeated N times, thus walk the use chain]
-       * softmaxDivisor = spat.sadd(sum, ...)
-       */
-      Value softmaxDividend = resolveONNXExpOpFromUseChain(softmaxDivisor.getDefiningOp()->getOperand(0));
-
-      // Make sure the dividend is actually produced by an ONNXExpOp
-      assert(llvm::isa<ONNXExpOp>(softmaxDividend.getDefiningOp())
-             && "Dividend of softmax reduction is not an ONNXExpOp");
-
-      // Do not divide here, divide after this
-      return softmaxDivisor;
-    });
-
-  // In all the cores, insert a ChannelRecvOp and divide the output tile by
-  // the reduced denominator.
-  outputOpsAndResNums.clear();
-  outputOpsAndResNums.reserve(softmaxOpsToReduce.size());
-  for (auto& computeToDivideOpAndResNum : softmaxOpsToReduce) {
-
-    auto yieldOp = cast<spatial::SpatYieldOp>(computeToDivideOpAndResNum.first.getBody().front().getTerminator());
-
-    Value divisor;
-
-    // Check if this compute contains the softmax divisor: if so, find the
-    // ChannelBroadcastSendOp, otherwise receive the value from the channel
-    // using ChannelBroadcastReceiveOp
-    if (computeToDivideOpAndResNum.first->hasAttr(COMPUTE_HAS_SOFTMAX_DIVISOR_ATTRNAME)) {
-
-      bool found = false;
-      for (auto broadcastOp :
-           computeToDivideOpAndResNum.first.getBody().front().getOps<spatial::SpatChannelBroadcastSendOp>()) {
-        assert(found == false
-               && "More than one ChannelBroadcastSendOp in "
-                  "compute? How is this possible?");
-        found = true;
-
-        divisor = broadcastOp.getData();
-      }
-
-      assert(found
-             && "No ChannelBroadcastSendOp in compute where softmax "
-                "divisor was specified to be?");
-    }
-    else {
-      rewriter.setInsertionPoint(yieldOp);
-      divisor = spatial::SpatChannelBroadcastReceiveOp::create(rewriter, loc, scalarTensorType, softmaxChannel);
-    }
-
-    // Walk the chain of operations until we find the ONNXExpOp: this is
-    // needed because some some may have a different amount of `VAddOp`s due
-    // to the tree reduction (e.g. some may have no VAddOp, some may have
-    // multiples)
-    Value oldOutputTile = resolveONNXExpOpFromUseChain(yieldOp->getOperand(computeToDivideOpAndResNum.second));
-
-    rewriter.setInsertionPoint(yieldOp);
-    Value newOutputTile = spatial::SpatVSDivOp::create(rewriter, loc, oldOutputTile.getType(), oldOutputTile, divisor);
-    auto yieldOperandNum = yieldOp->getNumOperands();
-    yieldOp->insertOperands(yieldOperandNum, newOutputTile);
-
-    outputOpsAndResNums.push_back({computeToDivideOpAndResNum.first, yieldOperandNum});
-  }
-
-  return success();
-}
-
 void populateOnnxGemmOpPatterns(RewritePatternSet& patterns, MLIRContext* ctx) {
  patterns.insert<GemmToManyGemv>(ctx);
  patterns.insert<GemvToSpatialCompute>(ctx);
@@ -1,89 +0,0 @@
-#include "mlir/Transforms/DialectConversion.h"
-
-#include "Conversion/ONNXToSpatial/Patterns.hpp"
-#include "src/Dialect/ONNX/ONNXOps.hpp"
-
-using namespace mlir;
-namespace onnx_mlir {
-
-struct ReduceMeanConversionPattern : public OpConversionPattern<ONNXReduceMeanV13Op> {
-
-  ReduceMeanConversionPattern(MLIRContext* ctx)
-  : OpConversionPattern(ctx) {}
-
-  LogicalResult matchAndRewrite(ONNXReduceMeanV13Op reduceMean,
-                                ONNXReduceMeanV13OpAdaptor adaptor,
-                                ConversionPatternRewriter& rewriter) const final {
-
-    // Get the input tensor.
-    Value inputTensor = adaptor.getData();
-    auto inputTensorType = cast<RankedTensorType>(inputTensor.getType());
-
-    // This pattern will substitute the ONNXReduceMeanV13Op with a
-    // ONNXAveragePoolOp with the same input tensor and an appropriate kernel
-    // shape and strides.
-
-    // To get the stride and shape of the kernel, we need to read the tensor
-    // shape.
-    int image_height = inputTensorType.getShape()[2];
-    int image_width = inputTensorType.getShape()[3];
-
-    // Define the kernel shape and strides.
-    SmallVector<int64_t> kernelShapeVals = {image_height, image_width};
-    SmallVector<int64_t> stridesVals = {image_height, image_width};
-    SmallVector<int64_t> dilationsVals = {1, 1};
-
-    // Set the pads to 0.
-    SmallVector<int64_t> padsVals = {0, 0, 0, 0};
-
-    // Create the ArrayAttrs
-    auto kernelShape = mlir::ArrayAttr::get(
-      rewriter.getContext(), llvm::to_vector(llvm::map_range(kernelShapeVals, [&](int64_t v) -> mlir::Attribute {
-        return rewriter.getI64IntegerAttr(v);
-      })));
-
-    auto strides = mlir::ArrayAttr::get(rewriter.getContext(),
-                                        llvm::to_vector(llvm::map_range(stridesVals, [&](int64_t v) -> mlir::Attribute {
-                                          return rewriter.getI64IntegerAttr(v);
-                                        })));
-
-    auto dilations = mlir::ArrayAttr::get(
-      rewriter.getContext(), llvm::to_vector(llvm::map_range(dilationsVals, [&](int64_t v) -> mlir::Attribute {
-        return rewriter.getI64IntegerAttr(v);
-      })));
-
-    auto pads = mlir::ArrayAttr::get(rewriter.getContext(),
-                                     llvm::to_vector(llvm::map_range(padsVals, [&](int64_t v) -> mlir::Attribute {
-                                       return rewriter.getI64IntegerAttr(v);
-                                     })));
-
-    // Create the resulting tensor type.
-    auto resultType = RankedTensorType::get(
-      /*shape=*/ {inputTensorType.getShape()[0], inputTensorType.getShape()[1], 1, 1},
-      /*elementType=*/inputTensorType.getElementType());
-
-    // Create the ONNXAveragePoolOp.
-    auto averagePool = ONNXAveragePoolOp::create(rewriter,
-                                                 reduceMean.getLoc(),
-                                                 resultType,
-                                                 inputTensor,
-                                                 /*auto_pad=*/"NOTSET",
-                                                 /*ceil_mode=*/0,
-                                                 /*count_include_pad=*/1,
-                                                 dilations,
-                                                 /*kernel_shape=*/kernelShape,
-                                                 /*pads=*/pads,
-                                                 /*strides=*/strides);
-
-    // Replace the ONNXReduceMeanV13Op with the ONNXAveragePoolOp.
-    rewriter.replaceOp(reduceMean, averagePool.getResult());
-
-    return success();
-  }
-};
-
-void populateReduceMeanConversionPattern(RewritePatternSet& patterns, MLIRContext* ctx) {
-  patterns.insert<ReduceMeanConversionPattern>(ctx);
-}
-
-} // namespace onnx_mlir
@@ -1,119 +0,0 @@
-#include <queue>
-
-#include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp"
-#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common.hpp"
-#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Utils/AnnotateReplication.hpp"
-#include "src/Dialect/ONNX/ONNXOps.hpp"
-
-using namespace mlir;
-
-namespace onnx_mlir {
-
-/**
- * @brief Structure that describes the replication of a convolution operation,
- * along the image height axis.
- */
-struct ConvReplication {
-  ONNXConvOp convOp;            // Convolution operation
-  size_t input_w;               // Width of the input image
-  size_t replicationFactor;     // Replication factor on the image height axis
-  size_t coresNeededPerReplica; // Number of cores needed for each replica
-
-  friend bool operator<(const ConvReplication& a, const ConvReplication& b) {
-    return a.input_w / a.replicationFactor < b.input_w / b.replicationFactor;
-  }
-
-  ConvReplication(ONNXConvOp convOp, size_t input_w, size_t replicationFactor, size_t coresNeededPerReplica)
-  : convOp(convOp),
-    input_w(input_w),
-    replicationFactor(replicationFactor),
-    coresNeededPerReplica(coresNeededPerReplica) {}
-};
-
-LogicalResult annotateReplication(mlir::func::FuncOp funcOp, mlir::IRRewriter& rewriter) {
-
-  if (coresCount == -1) {
-    // No need for annotation, implicitly set replication to 1
-    return success();
-  }
-
-  std::priority_queue<struct ConvReplication> convOpsReplicationQueue;
-
-  size_t minimumCores = 0;
-
-  for (auto& op : funcOp.getFunctionBody().begin()->getOperations()) {
-    if (auto convOp = dyn_cast<ONNXConvOp>(op)) {
-      // Convolution layer
-
-      Value X = convOp.getX(), W = convOp.getW();
-      ShapedType xShape = mlir::cast<ShapedType>(X.getType());
-      ShapedType wShape = mlir::cast<ShapedType>(W.getType());
-
-      size_t input_w = getImageWidth(xShape);
-      size_t krn_h = getKernelHeight(wShape);
-      size_t krn_w = getKernelWidth(wShape);
-
-      size_t inputTileCount = ceilIntegerDivide(getImageChannel(xShape), crossbarSize.getValue());
-      size_t outputTileCount = ceilIntegerDivide(wShape.getDimSize(0), crossbarSize.getValue());
-
-      auto neededXbars = krn_h * krn_w * inputTileCount * outputTileCount;
-      auto neededCores = ceilIntegerDivide(neededXbars, crossbarCountInCore.getValue());
-
-      minimumCores += neededCores;
-
-      convOpsReplicationQueue.emplace(convOp, input_w, 1, neededCores);
-    }
-    else if (auto gemmOp = dyn_cast<ONNXGemmOp>(op)) {
-      // Fully connected layer
-      auto matrixTensorShape = cast<ShapedType>(gemmOp.getB().getType());
-      auto inputSize = matrixTensorShape.getDimSize(0);
-      auto outputSize = matrixTensorShape.getDimSize(1);
-      if (gemmOp.getTransB())
-        std::swap(inputSize, outputSize);
-
-      const size_t inputTilesCount = ceilIntegerDivide(inputSize, crossbarSize.getValue());
-      const size_t outputTilesCount = ceilIntegerDivide(outputSize, crossbarSize.getValue());
-
-      // Each output tile is computed by `coresPerOutputTile` cores. The
-      // entire input is given to each of these cores.
-      const size_t coresPerOutputTile = ceilIntegerDivide(inputTilesCount, crossbarCountInCore.getValue());
-
-      auto neededCores = coresPerOutputTile * outputTilesCount;
-
-      minimumCores += neededCores;
-    }
-  }
-
-  if (static_cast<size_t>(coresCount) < minimumCores) {
-    return funcOp->emitError("Not enough cores for this network: ")
-        << minimumCores << " cores needed, but only " << static_cast<size_t>(coresCount) << " available.";
-  }
-
-  size_t availableCores = static_cast<size_t>(coresCount) - minimumCores;
-
-  // Consume all the elements in the queue
-  while (!convOpsReplicationQueue.empty()) {
-    auto convOpReplication = convOpsReplicationQueue.top();
-    convOpsReplicationQueue.pop();
-
-    // Check if we can replicate this convolution (e.g. we have enough cores)
-    if (availableCores > convOpReplication.coresNeededPerReplica * (convOpReplication.replicationFactor + 1)) {
-      // We can replicate this convolution: increment replicationFactor and put
-      // back in queue
-      availableCores -= convOpReplication.coresNeededPerReplica;
-      convOpReplication.replicationFactor++;
-
-      convOpsReplicationQueue.push(convOpReplication);
-    }
-    else {
-      // Cannot replicate this convolution anymore, annotate the operation
-      // with the replication factor
-      convOpReplication.convOp->setAttr(REPLICATION_ATTR_NAME,
-                                        rewriter.getI64IntegerAttr(convOpReplication.replicationFactor));
-    }
-  }
-
-  return success();
-}
-
-} // namespace onnx_mlir
@@ -1,10 +0,0 @@
-#pragma once
-
-#include "mlir/Dialect/Func/IR/FuncOps.h"
-#include "mlir/IR/PatternMatch.h"
-
-namespace onnx_mlir {
-
-mlir::LogicalResult annotateReplication(mlir::func::FuncOp funcOp, mlir::IRRewriter& rewriter);
-
-} // namespace onnx_mlir
@@ -1,348 +0,0 @@
-#include "mlir/IR/BuiltinAttributes.h"
-#include "mlir/IR/Value.h"
-
-#include "llvm/Support/raw_ostream.h"
-
-#include <cassert>
-#include <unordered_map>
-#include <utility>
-
-#include "SpatialReducer.hpp"
-#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
-
-#define GET_COMP(computeOpAndResNum) std::get<0>(computeOpAndResNum)
-#define GET_RES_NUM(computeOpAndResNum) std::get<1>(computeOpAndResNum)
-
-namespace onnx_mlir {
-
-llvm::SmallPtrSet<mlir::Operation*, 16> onnx_mlir::SpatialReducer::oldComputeOpsReplaced;
-
-ResNum SpatialReducer::applyResultProcessing(ComputeAndResNum computeOpAndResNum,
-                                             std::function<mlir::Value(const mlir::Value&)> processFun,
-                                             mlir::ConversionPatternRewriter& rewriter) {
-  assert(processFun);
-
-  auto computeOp = GET_COMP(computeOpAndResNum);
-  auto resultNum = GET_RES_NUM(computeOpAndResNum);
-
-  spatial::SpatYieldOp yieldOp = mlir::cast<spatial::SpatYieldOp>(computeOp.getBody().front().getTerminator());
-
-  mlir::Value result = yieldOp->getOperand(resultNum);
-  rewriter.setInsertionPointAfterValue(result);
-  mlir::Value processedResult = processFun(result);
-  if (processedResult == result) {
-    // Sometimes we want processedResult to return the same value but do
-    // something else with it (e.g. in softmax we want to broadcast the value
-    // using a channel). In this case, we can just return the same value.
-    return resultNum;
-  }
-
-  yieldOp->insertOperands(yieldOp->getNumOperands(), processedResult);
-
-  return yieldOp.getNumOperands() - 1;
-}
-
-OpAndResNum
-SpatialReducer::applyReducePattern(llvm::SmallVector<ComputeAndResNum>& computeOpsAndResNum,
-                                   std::function<mlir::Value(const mlir::Value&, const mlir::Value&)> reduce,
-                                   std::function<mlir::Value(const mlir::Value&)> preprocess,
-                                   std::function<mlir::Value(const mlir::Value&)> postprocess) {
-
-  if (preprocess)
-    for (auto& computeOpAndResNum : computeOpsAndResNum)
-      GET_RES_NUM(computeOpAndResNum) = applyResultProcessing(computeOpAndResNum, preprocess, rewriter);
-
-  // It is possible that `computeOpsAndResNum` contains two entries for the same
-  // computeOp. In this case, we need to apply the reduction within-computef
-
-  // Keep a map between a computeOp and the last Value for this reduction
-  std::unordered_map<mlir::Operation*, mlir::Value> lastValueForCompute;
-  for (auto& computeOpAndResNum : computeOpsAndResNum) {
-    auto computeOp = GET_COMP(computeOpAndResNum);
-    auto yieldOp = mlir::cast<spatial::SpatYieldOp>(computeOp.getBody().front().getTerminator());
-    mlir::Value valueWithinCompute = yieldOp->getOperand(GET_RES_NUM(computeOpAndResNum));
-
-    auto it = lastValueForCompute.find(computeOp.getOperation());
-
-    if (it != lastValueForCompute.end()) {
-      // If we have already seen this computeOp, apply the reduction
-      // within-compute
-      mlir::Value lastWithinComputeValue = it->second;
-
-      assert(valueWithinCompute.getDefiningOp() && lastWithinComputeValue.getDefiningOp());
-
-      if (valueWithinCompute.getDefiningOp()->isBeforeInBlock(lastWithinComputeValue.getDefiningOp()))
-        rewriter.setInsertionPointAfterValue(lastWithinComputeValue);
-      else
-        rewriter.setInsertionPointAfterValue(valueWithinCompute);
-      valueWithinCompute = reduce(lastWithinComputeValue, valueWithinCompute);
-      lastValueForCompute[computeOp.getOperation()] = valueWithinCompute;
-    }
-
-    lastValueForCompute[computeOp.getOperation()] = valueWithinCompute;
-  }
-
-  // Now, reconstruct from the map the computeOpsAndResNum list
-  computeOpsAndResNum.clear();
-  computeOpsAndResNum.reserve(lastValueForCompute.size());
-  for (auto& entry : lastValueForCompute) {
-    auto computeOp = mlir::cast<spatial::SpatWeightedCompute>(entry.first);
-    auto valueWithinCompute = entry.second;
-
-    // We check if `valueWithinCompute` is already used by the yieldOp, in that
-    // case no need to add it
-    auto yieldOp = mlir::cast<spatial::SpatYieldOp>(computeOp.getBody().front().getTerminator());
-    bool yieldOpUseFound = false;
-    for (auto& use : valueWithinCompute.getUses()) {
-      if (use.getOwner() == yieldOp.getOperation()) {
-        // If the value is already used by the yieldOp, we can just use it
-        computeOpsAndResNum.push_back({computeOp, use.getOperandNumber()});
-        yieldOpUseFound = true;
-        break;
-      }
-    }
-    if (yieldOpUseFound)
-      continue;
-
-    // If this result is not used within a yieldOp, then add it
-    auto resultNum = yieldOp->getNumOperands();
-    yieldOp->insertOperands(resultNum, valueWithinCompute);
-
-    computeOpsAndResNum.push_back({computeOp, resultNum});
-  }
-
-  mlir::Location loc = GET_COMP(computeOpsAndResNum[0])->getLoc();
-
-  // Recursive algorithm to reduce the inputs to a single one:
-  // - Take two inputs at a time, and reduce them into a single one, updating
-  // the computeOpsAndResNum list which becomes half the size.
-  // - Repeat until there is only one input left.
-  llvm::OwningArrayRef<ComputeAndResNum> computeOpsRef(computeOpsAndResNum);
-  while (computeOpsRef.size() > 1) {
-    llvm::SmallVector<ComputeAndResNum> nextComputeOps;
-    nextComputeOps.reserve(computeOpsRef.size() / 2);
-    for (size_t i = 0; i < computeOpsRef.size() - 1; i += 2) {
-      auto [firstCompute, firstResultNum] = computeOpsRef[i];
-      auto [secondCompute, secondResultNum] = computeOpsRef[i + 1];
-
-      if (secondCompute->isBeforeInBlock(firstCompute)) {
-        std::swap(firstCompute, secondCompute);
-        std::swap(firstResultNum, secondResultNum);
-      }
-
-      // We do not immediately alter the computeOps results/operands, instead we
-      // do it in a delayed manner, to avoid invalidating the references to the
-      // computeOps (which must be replaced by a cloned ComputeOp when changing
-      // the number of results)
-      // See below `reducerChanges.push_back` and `finalizeReduceUpdates`
-
-      auto yieldOpFirstCompute = mlir::cast<spatial::SpatYieldOp>(firstCompute.getBody().front().getTerminator());
-
-      // Add a new operand to the block of the second computeOp
-      mlir::Block& secondBlock = secondCompute.getBody().front();
-      mlir::Value formerRes1 = secondBlock.addArgument(yieldOpFirstCompute->getOperand(firstResultNum).getType(), loc);
-
-      auto secondComputeWeightsNum =
-        secondCompute->getAttrOfType<mlir::DenseI32ArrayAttr>(secondCompute.getOperandSegmentSizesAttrName())[0];
-      auto secondComputeOperandNum = secondComputeWeightsNum + secondBlock.getNumArguments() - 1;
-
-      // Take the "former-result" from the second computeOp
-      spatial::SpatYieldOp secondYield = mlir::cast<spatial::SpatYieldOp>(secondBlock.getTerminator());
-      mlir::Value formerRes2 = secondYield.getOperand(secondResultNum);
-
-      // Apply reduction operation
-      rewriter.setInsertionPoint(secondYield);
-      mlir::Value reduced = reduce(formerRes2, formerRes1);
-
-      // Unfortunately, it is not possible to update the result in place,
-      // because we may have already referenced it by <computeOp, resultNum>
-      // outside of this function, thus replacing it would invalidate the
-      // reference. Therefore, we need to append a new result to the yieldOp,
-      // and then at a later stage update the computeOp accordingly.
-
-      // Add `reduced` to the second yieldOp
-      auto secondYieldOperandNum = secondYield.getNumOperands();
-      secondYield->insertOperands(secondYieldOperandNum, reduced);
-      secondResultNum = secondYieldOperandNum;
-
-      // We should also add an entry for updating the results of the last
-      // operation (the one which never becomes a `firstCompute`): because it is
-      // not tracked by reducerChanges as `fromOp`
-      reducerChanges.push_back(
-        {firstCompute.getOperation(), firstResultNum, secondCompute.getOperation(), secondComputeOperandNum});
-      nextComputeOps.push_back(std::make_pair(secondCompute, secondResultNum));
-    }
-
-    // If we have an odd number of inputs, we need to add the last one to the
-    // newInputs list.
-    if (computeOpsRef.size() % 2 == 1)
-      nextComputeOps.push_back(computeOpsRef.back());
-
-    // Replace the inputOps list with the new one.
-    computeOpsRef = llvm::OwningArrayRef<ComputeAndResNum>(std::move(nextComputeOps));
-  }
-
-  assert(computeOpsRef.size() == 1 && "Internal error: expected a single input at this point.");
-
-  auto finalComputeAndResNum = computeOpsRef[0];
-
-  // Force the update of the results of this computeOp, when finalizing
-  computeOpNeedingResUpdate.push_back(GET_COMP(finalComputeAndResNum));
-
-  if (postprocess)
-    GET_RES_NUM(finalComputeAndResNum) = applyResultProcessing(finalComputeAndResNum, postprocess, rewriter);
-
-  return std::make_pair(GET_COMP(finalComputeAndResNum).getOperation(), GET_RES_NUM(finalComputeAndResNum));
-}
-
-void SpatialReducer::finalizeReduceUpdates() {
-  assert(reducesFinalized == false && "Cannot finalize two times.");
-
-  reducesFinalized = true;
-
-  // First, add the results to the computeOps
-  for (auto& reduceChange : reducerChanges)
-    updateResultsOfCompute(reduceChange.fromOp);
-
-  for (auto& c : computeOpNeedingResUpdate)
-    updateResultsOfCompute(c.getOperation());
-
-  for (auto& reducerChange : this->reducerChanges) {
-    auto fromOp = reducerChange.fromOp;
-    auto toOp = reducerChange.toOp;
-    auto fromOpResNum = reducerChange.fromOpResNum;
-    auto toOpOperandNum = reducerChange.toOpOperandNum;
-
-    auto fromComputeOp = opToReplacedCompute[fromOp];
-    assert(fromComputeOp && "fromOp should have been mapped before!");
-
-    // toComputeOp could be the existing pointer, or we have to remap it with
-    // `opToReplacedCompute`
-    auto toComputeOp = opToReplacedCompute[toOp];
-    if (!toComputeOp)
-      toComputeOp = mlir::cast<spatial::SpatWeightedCompute>(toOp);
-
-    assert(toComputeOp != fromComputeOp && "Oops should have caught this earlier!");
-
-    assert(toComputeOp->getNumOperands() == toOpOperandNum
-           && "toOpOperandNum should be the last operand of toComputeOp, are the "
-              "operations in the right order?");
-
-    // Add the new operand to `toComputeOp`
-    auto fromResult = fromComputeOp.getResult(fromOpResNum);
-    toComputeOp->insertOperands(toOpOperandNum, fromResult);
-    incrementWeightedComputeInputsSegmentSize(toComputeOp, 1);
-  }
-}
-
-mlir::Value SpatialReducer::resolveValueFromOpAndResNum(OpAndResNum& opAndResNum) {
-  assert(reducesFinalized && "Cannot create resolve values before finalizing the reduce updates.");
-
-  mlir::Operation* opToCast;
-  auto it = opToReplacedCompute.find(opAndResNum.first);
-  if (it != opToReplacedCompute.end())
-    opToCast = it->second;
-  else
-    opToCast = opAndResNum.first;
-
-  auto computeOp = mlir::cast<spatial::SpatWeightedCompute>(opToCast);
-
-  return computeOp.getResult(opAndResNum.second);
-}
-
-void SpatialReducer::updateResultsOfCompute(mlir::Operation* computeOp) {
-  if (opToReplacedCompute.find(computeOp) != opToReplacedCompute.end()) {
-    // If we have already replaced the fromOp, we do not need to do it again
-    return;
-  }
-  auto oldComputeOp = mlir::cast<spatial::SpatWeightedCompute>(computeOp);
-
-  auto oldComputeOpNum = oldComputeOp->getNumOperands();
-
-  auto yieldOp = mlir::cast<spatial::SpatYieldOp>(oldComputeOp.getBody().front().getTerminator());
-
-  if (yieldOp.getNumOperands() == oldComputeOp->getNumResults()) {
-    // No result was added, just add itself to the map
-    opToReplacedCompute[oldComputeOp.getOperation()] = oldComputeOp;
-    return;
-  }
-
-  // Add the results by inspecting its YieldOp
-  auto newResultTypes = yieldOp.getOperandTypes();
-
-  // Create a new ComputeOp with the new result type, but same operands
-  rewriter.setInsertionPoint(oldComputeOp);
-  auto newComputeOp = spatial::SpatWeightedCompute::create(
-    rewriter, oldComputeOp->getLoc(), newResultTypes, oldComputeOp.getWeights(), oldComputeOp.getInputs());
-
-  newComputeOp.getBody().takeBody(oldComputeOp.getBody());
-
-  auto newComputeOpNum = newComputeOp->getNumOperands();
-
-  assert(oldComputeOpNum == newComputeOpNum);
-
-  // Since we replaced the old ComputeOp with a new one, we need to replace
-  // all its results' uses
-  for (size_t i = 0; i < oldComputeOp.getNumResults(); i++) {
-    mlir::Value oldResult = oldComputeOp.getResult(i);
-    mlir::Value newResult = newComputeOp.getResult(i);
-
-    // Replace the uses, except the uses of the compute ops which got deleted
-    // previously
-    rewriter.replaceAllUsesExcept(oldResult, newResult, oldComputeOpsReplaced);
-  }
-
-  // Finally, erase the old computeOp and update the map
-  opToReplacedCompute[oldComputeOp.getOperation()] = newComputeOp;
-  oldComputeOpsReplaced.insert(oldComputeOp.getOperation());
-  rewriter.setInsertionPoint(oldComputeOp);
-  rewriter.eraseOp(oldComputeOp);
-}
-
-mlir::Value
-SpatialReducer::createImgConcatOp(llvm::SmallVector<llvm::SmallVector<llvm::SmallVector<OpAndResNum>>>& outputTiles,
-                                  mlir::Location& loc,
-                                  mlir::Type outputType) {
-
-  assert(reducesFinalized && "Cannot create ImgConcatOp before finalizing the reduce updates.");
-
-  // outputTiles are indexed like this: [channelTile][x][y]
-  auto tilesCount = outputTiles.size();
-  auto width = outputTiles[0].size();
-  auto height = outputTiles[0][0].size();
-
-  llvm::SmallVector<llvm::SmallVector<llvm::SmallVector<mlir::Value>>> remappedOutputTiles(
-    tilesCount, llvm::SmallVector<llvm::SmallVector<mlir::Value>>(width, llvm::SmallVector<mlir::Value>(height)));
-
-  for (size_t t = 0; t < tilesCount; t++)
-    for (size_t x = 0; x < width; x++)
-      for (size_t y = 0; y < height; y++)
-        remappedOutputTiles[t][x][y] = resolveValueFromOpAndResNum(outputTiles[t][x][y]);
-
-  return ::onnx_mlir::createImgConcatOp(remappedOutputTiles, rewriter, loc, outputType);
-}
-
-OpAndResNum SpatialReducer::applyAddMapReduction(llvm::SmallVector<ComputeAndResNum>& computeOps,
-                                                 mlir::ConversionPatternRewriter& rewriter,
-                                                 mlir::Value biasTile,
-                                                 MapOperations mapOp) {
-
-  std::function<mlir::Value(const mlir::Value&)> postprocessing = nullptr;
-
-  if (mapOp != MapOperations::None) {
-    postprocessing = [&](const mlir::Value a) {
-      mlir::Value mapOperand = a;
-      if (biasTile)
-        mapOperand = spatial::SpatVAddOp::create(rewriter, a.getLoc(), a.getType(), a, biasTile);
-      return createMapOperation(rewriter, mapOp, mapOperand);
-    };
-  }
-
-  return this->applyReducePattern(
-    computeOps,
-    [&](mlir::Value a, mlir::Value b) { return spatial::SpatVAddOp::create(rewriter, a.getLoc(), a.getType(), a, b); },
-    /* preprocess = */ nullptr,
-    postprocessing);
-}
-
-} // namespace onnx_mlir
@@ -1,88 +0,0 @@
-#pragma once
-
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Support/Casting.h"
-
-#include <functional>
-#include <unordered_map>
-#include <utility>
-
-#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common.hpp"
-#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
-
-namespace onnx_mlir {
-
-using ResNum = unsigned int;
-
-using ComputeAndResNum = std::pair<spatial::SpatWeightedCompute, ResNum>;
-
-struct SpatialReducerChange {
-  mlir::Operation* fromOp;
-  unsigned int fromOpResNum;
-  mlir::Operation* toOp;
-  unsigned int toOpOperandNum;
-};
-
-using OpAndResNum = std::pair<mlir::Operation*, ResNum>;
-
-class SpatialReducer {
-
-public:
-  SpatialReducer(mlir::ConversionPatternRewriter& rewriter)
-  : rewriter(rewriter) {}
-
-  OpAndResNum applyReducePattern(llvm::SmallVector<ComputeAndResNum>& computeOpsAndResNum,
-                                 std::function<mlir::Value(const mlir::Value&, const mlir::Value&)> reduce,
-                                 std::function<mlir::Value(const mlir::Value&)> preprocess,
-                                 std::function<mlir::Value(const mlir::Value&)> postprocess);
-
-  OpAndResNum applyAddMapReduction(llvm::SmallVector<ComputeAndResNum>& computeOps,
-                                   mlir::ConversionPatternRewriter& rewriter,
-                                   mlir::Value biasTile,
-                                   MapOperations mapOp);
-
-  void finalizeReduceUpdates();
-
-  ~SpatialReducer() {
-    if (!reducesFinalized)
-      finalizeReduceUpdates();
-  }
-
-  mlir::Value createImgConcatOp(llvm::SmallVector<llvm::SmallVector<llvm::SmallVector<OpAndResNum>>>& outputTiles,
-                                mlir::Location& loc,
-                                mlir::Type outputType);
-
-  mlir::Value resolveValueFromOpAndResNum(OpAndResNum& opAndResNum);
-
-private:
-  [[nodiscard("computeOp result number gets updated")]] ResNum
-  applyResultProcessing(ComputeAndResNum computeOpAndResNum,
-                        std::function<mlir::Value(const mlir::Value&)> processFun,
-                        mlir::ConversionPatternRewriter& rewriter);
-
-  /**
-   * @brief Update the results of a ComputeOp.
-   *
-   * This function updates the results of a ComputeOp by taking a look at the
-   operands of its yieldOp.
-   * If the ComputeOp was replaced, it updates `opToReplacedCompute` with the
-   replaced ComputeOp.
-   *
-   * @param computeOp The ComputeOp to update the results of.
-   */
-  void updateResultsOfCompute(mlir::Operation* computeOp);
-
-  mlir::ConversionPatternRewriter& rewriter;
-  bool reducesFinalized = false;
-
-  // List of changes to be applied after the reduction is finalized
-  llvm::SmallVector<SpatialReducerChange, 4> reducerChanges;
-  // List of computeOps that need to be replaced with new results
-  llvm::SmallVector<spatial::SpatWeightedCompute> computeOpNeedingResUpdate;
-
-  std::unordered_map<mlir::Operation*, spatial::SpatWeightedCompute> opToReplacedCompute;
-
-  static llvm::SmallPtrSet<mlir::Operation*, 16> oldComputeOpsReplaced;
-};
-
-} // namespace onnx_mlir
@@ -1,53 +0,0 @@
-#include <cassert>
-
-#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Utils/WeightSubdivider.hpp"
-
-namespace onnx_mlir {
-
-WeightSubdivider::WeightSubdivider(std::map<long, std::map<long, llvm::SmallVector<mlir::Value>>> weights)
-: weights(std::move(weights)) {}
-
-bool WeightSubdivider::isEmpty() const { return weights.empty(); }
-
-TaggedWeights WeightSubdivider::popGroup(size_t amount) {
-  assert(!weights.empty() && "No weights to extract.");
-
-  auto it = weights.begin();
-  llvm::SmallVector<mlir::Value>& values = it->second.begin()->second;
-
-  long inputTile = it->first;
-  long outputTile = it->second.begin()->first;
-
-  size_t n = std::min(amount, values.size());
-  crossbarsUsed += n;
-
-  llvm::SmallVector<mlir::Value> result;
-  result.assign(values.begin(), values.begin() + n);
-
-  if (n < values.size()) {
-    values.erase(values.begin(), values.begin() + n);
-  }
-  else {
-    it->second.erase(outputTile);
-    if (it->second.empty())
-      weights.erase(inputTile);
-  }
-
-  return {inputTile, outputTile, crossbarsUsed - n, result};
-}
-
-llvm::SmallVector<TaggedWeights> WeightSubdivider::popGroups(size_t n) {
-  crossbarsUsed = 0;
-  llvm::SmallVector<TaggedWeights> result;
-  size_t remaining = n;
-
-  while (remaining > 0 && !weights.empty()) {
-    auto group = popGroup(remaining);
-    result.push_back(group);
-    remaining -= group.weights.size();
-  }
-
-  return result;
-}
-
-} // namespace onnx_mlir
@@ -1,46 +0,0 @@
-#pragma once
-
-#include "mlir/IR/Value.h"
-
-#include "llvm/ADT/SmallVector.h"
-
-#include <cstddef>
-#include <map>
-
-namespace onnx_mlir {
-
-/**
- * @brief A helper struct to store a group of weights.
- *
- */
-struct TaggedWeights {
-  long inputTile;
-  long outputTile;
-  size_t startingCrossbarIndex;
-  llvm::SmallVector<mlir::Value> weights;
-};
-
-/**
- * @brief A helper class to subdivide weights into groups.
- *
- * Weights are stored as a map of maps of SmallVectors. The outer map is indexed
- * by input tile, the inner map is indexed by output tile, and the SmallVector
- * contains the weights for the filter. This class allows us to extract groups
- * of weights from the map until we've extracted a certain number of elements,
- * namely as many as we need to fill a compute unit.
- */
-class WeightSubdivider {
-private:
-  std::map<long, std::map<long, llvm::SmallVector<mlir::Value>>> weights;
-  size_t crossbarsUsed = 0;
-
-  TaggedWeights popGroup(size_t amount);
-
-public:
-  WeightSubdivider(std::map<long, std::map<long, llvm::SmallVector<mlir::Value>>> weights);
-
-  bool isEmpty() const;
-  llvm::SmallVector<TaggedWeights> popGroups(size_t n);
-};
-
-} // namespace onnx_mlir