fix batched conv

2026-03-20 22:00:46 +01:00
parent ca2e1645bb
commit 568529ea5f
2 changed files with 36 additions and 32 deletions
@@ -38,11 +38,11 @@ LogicalResult ConvToGemm::matchAndRewrite(ONNXConvOp convOp,
  assert("Only support static shapes" && xType.hasStaticShape() && wType.hasStaticShape() && outType.hasStaticShape());
  assert("Only support 2D convolution" && xType.getRank() == 4);
  assert("Only support batch size 1 for input" && xType.getDimSize(0) == 1);
  // We need to understand what is group
  assert("Only support group=1" && convOp.getGroup() == 1);
  const int64_t batchSize = xType.getDimSize(0);
  const int64_t numChannelsIn = xType.getDimSize(1);
  const int64_t xHeight = xType.getDimSize(2);
  const int64_t xWidth = xType.getDimSize(3);
@@ -107,7 +107,8 @@ LogicalResult ConvToGemm::matchAndRewrite(ONNXConvOp convOp,
  //   B (weights): [patchSize, cOut]        -- W^T, stored in crossbar columns
  //   Gemm output: [numPatches, cOut]
  const int64_t patchSize = numChannelsIn * wHeight * wWidth;
-  const int64_t numPatches = outHeight * outWidth;
+  const int64_t numPatchesPerBatch = outHeight * outWidth;
  const int64_t numPatches = batchSize * numPatchesPerBatch;
  auto elemType = xType.getElementType();
  auto im2colType = RankedTensorType::get({numPatches, patchSize}, elemType);
@@ -115,7 +116,7 @@ LogicalResult ConvToGemm::matchAndRewrite(ONNXConvOp convOp,
  auto wFlatType = RankedTensorType::get({numChannelsOut, patchSize}, wType.getElementType());
  auto wTransType = RankedTensorType::get({patchSize, numChannelsOut}, wType.getElementType());
  auto gemmOutType = RankedTensorType::get({numPatches, numChannelsOut}, outType.getElementType());
-  auto nhwcType = RankedTensorType::get({1, outHeight, outWidth, numChannelsOut}, outType.getElementType());
+  auto nhwcType = RankedTensorType::get({batchSize, outHeight, outWidth, numChannelsOut}, outType.getElementType());
  // Prepare weight matrix W for crossbar storage:
  // W: [numChannelsOut, numChannelsIn, wHeight, wWidth] -> [numChannelsOut, patchSize] -> [patchSize, numChannelsOut]
@@ -160,7 +161,7 @@ LogicalResult ConvToGemm::matchAndRewrite(ONNXConvOp convOp,
  if (padHeightBegin || padHeightEnd || padWidthBegin || padWidthEnd) {
    const int64_t paddedHeight = xHeight + padHeightBegin + padHeightEnd;
    const int64_t paddedWidth = xWidth + padWidthBegin + padWidthEnd;
-    auto paddedType = RankedTensorType::get({1, numChannelsIn, paddedHeight, paddedWidth}, elemType);
+    auto paddedType = RankedTensorType::get({batchSize, numChannelsIn, paddedHeight, paddedWidth}, elemType);
    SmallVector<OpFoldResult> lowPads = {rewriter.getIndexAttr(0),
                                         rewriter.getIndexAttr(0),
                                         rewriter.getIndexAttr(padHeightBegin),
@@ -182,12 +183,13 @@ LogicalResult ConvToGemm::matchAndRewrite(ONNXConvOp convOp,
  }
  // Build im2col [numPatches, patchSize]:
-  // For each output position (oh, ow), extract the patch from x
+  // For each batch/output position (n, oh, ow), extract the patch from x
  SmallVector<Value> im2colRows;
  im2colRows.reserve(numPatches);
  for (int64_t n = 0; n < batchSize; n++) {
    for (int64_t oh = 0; oh < outHeight; oh++) {
      for (int64_t ow = 0; ow < outWidth; ow++) {
-      SmallVector<OpFoldResult> offsets = {rewriter.getIndexAttr(0),
+        SmallVector<OpFoldResult> offsets = {rewriter.getIndexAttr(n),
                                             rewriter.getIndexAttr(0),
                                             rewriter.getIndexAttr(oh * strideHeight),
                                             rewriter.getIndexAttr(ow * strideWidth)};
@@ -214,6 +216,7 @@ LogicalResult ConvToGemm::matchAndRewrite(ONNXConvOp convOp,
        im2colRows.push_back(row);
      }
    }
  }
  // Concatenate all rows: [numPatches, patchSize]
  Value im2col = tensor::ConcatOp::create(rewriter, loc, /*axis=*/0, im2colRows);
@@ -7,6 +7,7 @@ add_onnx_mlir_library(SpatialOps
  Transforms/SpatialBufferizableOpInterface.cpp
  DEPENDS
  OMONNXIncGen
  OMSpatialIncGen
  LINK_LIBS PUBLIC