reduce spatial compile-times in convolutions using a scf.for instead of materializing a huge number of instructions

2026-04-10 18:50:25 +02:00
parent f3a36e9d43
commit f054e66ed0
18 changed files with 623 additions and 241 deletions
--- a/src/PIM/Pass/Pim/ConstantFolding/Patterns/Constant.cpp
+++ b/src/PIM/Pass/Pim/ConstantFolding/Patterns/Constant.cpp
@@ -120,7 +120,15 @@ struct FoldConstantCoreMapPattern final : OpRewritePattern<linalg::MapOp> {
    auto getGlobalOp = memref::GetGlobalOp::create(rewriter, mapOp.getLoc(), initType, globalOp.getName());

    rewriter.setInsertionPoint(mapOp);
-    rewriter.replaceAllUsesExcept(mapOp.getInit(), getGlobalOp.getResult(), mapOp);
+    auto sizeInBytes = initType.getNumElements() * initType.getElementTypeBitWidth() / 8;
+    pim::PimMemCopyOp::create(rewriter,
+                              mapOp.getLoc(),
+                              initType,
+                              mapOp.getInit(),
+                              getGlobalOp.getResult(),
+                              rewriter.getI32IntegerAttr(0),
+                              rewriter.getI32IntegerAttr(0),
+                              rewriter.getI32IntegerAttr(sizeInBytes));
    rewriter.eraseOp(mapOp);
    return success();
  }
@@ -416,6 +424,9 @@ struct FoldConstantMemCpPattern final : OpRewritePattern<pim::PimMemCopyOp> {
        return failure();
      if (llvm::any_of(srcSubview->strides, [](int64_t stride) { return stride != 1; }))
        return failure();
+      auto staticOffsets = getStaticSubviewOffsets(*srcSubview);
+      if (failed(staticOffsets))
+        return failure();

      auto resultTensorType = RankedTensorType::get(allocType.getShape(), allocType.getElementType());
      const int64_t numResultElements = resultTensorType.getNumElements();
@@ -428,7 +439,7 @@ struct FoldConstantMemCpPattern final : OpRewritePattern<pim::PimMemCopyOp> {
        auto resultIndices = delinearizeIndex(i, resultTensorType.getShape(), resultStrides);
        SmallVector<int64_t> sourceIndices;
        sourceIndices.reserve(resultIndices.size());
-        for (auto [off, idx] : llvm::zip_equal(srcSubview->offsets, resultIndices))
+        for (auto [off, idx] : llvm::zip_equal(*staticOffsets, resultIndices))
          sourceIndices.push_back(off + idx);
        int64_t srcLinear = linearizeIndex(sourceIndices, sourceStrides);
        resultValues[i] = sourceValues[srcLinear];