Refactor PIM/Common (splitting in files, adding helpers, adding brief

docs)
2026-05-04 09:20:43 +02:00
parent 905fa9f9a7
commit 717ad160cd
20 changed files with 863 additions and 650 deletions
--- a/src/PIM/Common/IR/AddressAnalysis.cpp
+++ b/src/PIM/Common/IR/AddressAnalysis.cpp
@@ -0,0 +1,258 @@
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/SCF/IR/SCF.h"
+#include "mlir/Interfaces/DestinationStyleOpInterface.h"
+
+#include "src/Accelerators/PIM/Common/IR/AddressAnalysis.hpp"
+#include "src/Accelerators/PIM/Common/IR/ShapeUtils.hpp"
+
+namespace onnx_mlir {
+
+mlir::memref::GlobalOp lookupGlobalForGetGlobal(mlir::ModuleOp moduleOp, mlir::memref::GetGlobalOp getGlobalOp) {
+  if (!moduleOp || !getGlobalOp)
+    return {};
+  return moduleOp.lookupSymbol<mlir::memref::GlobalOp>(getGlobalOp.getName());
+}
+
+namespace {
+
+mlir::Value resolveAlias(mlir::Value value, const StaticValueKnowledge* knowledge) {
+  if (!knowledge)
+    return value;
+
+  auto iter = knowledge->aliases.find(value);
+  while (iter != knowledge->aliases.end()) {
+    value = iter->second;
+    iter = knowledge->aliases.find(value);
+  }
+  return value;
+}
+
+mlir::Value resolveLoopCarriedAliasImpl(mlir::Value value, const StaticValueKnowledge* knowledge) {
+  value = resolveAlias(value, knowledge);
+
+  if (mlir::isa<mlir::BlockArgument>(value))
+    return value;
+
+  mlir::Operation* definingOp = value.getDefiningOp();
+  if (!definingOp)
+    return value;
+
+  if (auto dpsDefiningOp = mlir::dyn_cast<mlir::DestinationStyleOpInterface>(definingOp)) {
+    if (auto result = mlir::dyn_cast<mlir::OpResult>(value))
+      if (mlir::OpOperand* tiedOperand = dpsDefiningOp.getTiedOpOperand(result))
+        return resolveLoopCarriedAliasImpl(tiedOperand->get(), knowledge);
+  }
+
+  if (auto castOp = mlir::dyn_cast<mlir::memref::CastOp>(definingOp))
+    return resolveLoopCarriedAliasImpl(castOp.getSource(), knowledge);
+  if (auto collapseOp = mlir::dyn_cast<mlir::memref::CollapseShapeOp>(definingOp))
+    return resolveLoopCarriedAliasImpl(collapseOp.getSrc(), knowledge);
+  if (auto expandOp = mlir::dyn_cast<mlir::memref::ExpandShapeOp>(definingOp))
+    return resolveLoopCarriedAliasImpl(expandOp.getSrc(), knowledge);
+
+  return value;
+}
+
+llvm::FailureOr<int64_t> resolveOpFoldResult(mlir::OpFoldResult ofr, const StaticValueKnowledge* knowledge);
+
+llvm::FailureOr<int64_t> resolveIndexValueImpl(mlir::Value value, const StaticValueKnowledge* knowledge) {
+  value = resolveAlias(value, knowledge);
+
+  if (knowledge) {
+    auto iter = knowledge->indexValues.find(value);
+    if (iter != knowledge->indexValues.end())
+      return iter->second;
+  }
+
+  auto constantOp = value.getDefiningOp<mlir::arith::ConstantOp>();
+  if (constantOp) {
+    if (auto integerAttr = mlir::dyn_cast<mlir::IntegerAttr>(constantOp.getValue()))
+      return integerAttr.getInt();
+  }
+
+  mlir::Operation* definingOp = value.getDefiningOp();
+  if (!definingOp)
+    return mlir::failure();
+
+  if (auto indexCastOp = mlir::dyn_cast<mlir::arith::IndexCastOp>(definingOp))
+    return resolveIndexValueImpl(indexCastOp.getIn(), knowledge);
+
+  if (auto addOp = mlir::dyn_cast<mlir::arith::AddIOp>(definingOp)) {
+    auto lhs = resolveIndexValueImpl(addOp.getLhs(), knowledge);
+    auto rhs = resolveIndexValueImpl(addOp.getRhs(), knowledge);
+    if (failed(lhs) || failed(rhs))
+      return mlir::failure();
+    return *lhs + *rhs;
+  }
+
+  if (auto subOp = mlir::dyn_cast<mlir::arith::SubIOp>(definingOp)) {
+    auto lhs = resolveIndexValueImpl(subOp.getLhs(), knowledge);
+    auto rhs = resolveIndexValueImpl(subOp.getRhs(), knowledge);
+    if (failed(lhs) || failed(rhs))
+      return mlir::failure();
+    return *lhs - *rhs;
+  }
+
+  if (auto mulOp = mlir::dyn_cast<mlir::arith::MulIOp>(definingOp)) {
+    auto lhs = resolveIndexValueImpl(mulOp.getLhs(), knowledge);
+    auto rhs = resolveIndexValueImpl(mulOp.getRhs(), knowledge);
+    if (failed(lhs) || failed(rhs))
+      return mlir::failure();
+    return *lhs * *rhs;
+  }
+
+  if (auto divOp = mlir::dyn_cast<mlir::arith::DivUIOp>(definingOp)) {
+    auto lhs = resolveIndexValueImpl(divOp.getLhs(), knowledge);
+    auto rhs = resolveIndexValueImpl(divOp.getRhs(), knowledge);
+    if (failed(lhs) || failed(rhs) || *rhs == 0)
+      return mlir::failure();
+    return static_cast<int64_t>(static_cast<uint64_t>(*lhs) / static_cast<uint64_t>(*rhs));
+  }
+
+  if (auto remOp = mlir::dyn_cast<mlir::arith::RemUIOp>(definingOp)) {
+    auto lhs = resolveIndexValueImpl(remOp.getLhs(), knowledge);
+    auto rhs = resolveIndexValueImpl(remOp.getRhs(), knowledge);
+    if (failed(lhs) || failed(rhs) || *rhs == 0)
+      return mlir::failure();
+    return static_cast<int64_t>(static_cast<uint64_t>(*lhs) % static_cast<uint64_t>(*rhs));
+  }
+
+  return mlir::failure();
+}
+
+llvm::FailureOr<int64_t> resolveOpFoldResult(mlir::OpFoldResult ofr, const StaticValueKnowledge* knowledge) {
+  if (auto attr = mlir::dyn_cast<mlir::Attribute>(ofr)) {
+    auto integerAttr = mlir::dyn_cast<mlir::IntegerAttr>(attr);
+    if (!integerAttr)
+      return mlir::failure();
+    return integerAttr.getInt();
+  }
+
+  return resolveIndexValueImpl(mlir::cast<mlir::Value>(ofr), knowledge);
+}
+
+llvm::FailureOr<ResolvedContiguousAddress> resolveContiguousAddressImpl(mlir::Value value,
+                                                                        const StaticValueKnowledge* knowledge) {
+  int64_t byteOffset = 0;
+  value = resolveAlias(value, knowledge);
+
+  while (true) {
+    if (mlir::isa<mlir::BlockArgument>(value))
+      return ResolvedContiguousAddress {value, byteOffset};
+
+    mlir::Operation* definingOp = value.getDefiningOp();
+    if (!definingOp)
+      return mlir::failure();
+
+    if (auto dpsDefiningOp = mlir::dyn_cast<mlir::DestinationStyleOpInterface>(definingOp)) {
+      mlir::OpOperand* tiedOperand = dpsDefiningOp.getTiedOpOperand(mlir::dyn_cast<mlir::OpResult>(value));
+      if (!tiedOperand)
+        return mlir::failure();
+      value = resolveAlias(tiedOperand->get(), knowledge);
+      continue;
+    }
+
+    if (auto forOp = mlir::dyn_cast<mlir::scf::ForOp>(definingOp)) {
+      auto result = mlir::dyn_cast<mlir::OpResult>(value);
+      if (!result)
+        return mlir::failure();
+
+      auto yieldOp = mlir::cast<mlir::scf::YieldOp>(forOp.getBody()->getTerminator());
+      mlir::Value yieldedValue = resolveLoopCarriedAliasImpl(yieldOp.getOperand(result.getResultNumber()), knowledge);
+      if (auto blockArgument = mlir::dyn_cast<mlir::BlockArgument>(yieldedValue)) {
+        if (blockArgument.getOwner() == forOp.getBody() && blockArgument.getArgNumber() > 0
+            && static_cast<unsigned>(blockArgument.getArgNumber() - 1) < forOp.getInitArgs().size()) {
+          value = resolveAlias(forOp.getInitArgs()[blockArgument.getArgNumber() - 1], knowledge);
+          continue;
+        }
+      }
+
+      value = yieldedValue;
+      continue;
+    }
+
+    if (auto subviewOp = mlir::dyn_cast<mlir::memref::SubViewOp>(definingOp)) {
+      auto sourceType = mlir::dyn_cast<mlir::MemRefType>(subviewOp.getSource().getType());
+      auto subviewType = mlir::dyn_cast<mlir::MemRefType>(subviewOp.getType());
+      if (!sourceType || !subviewType || !sourceType.hasStaticShape() || !subviewType.hasStaticShape())
+        return mlir::failure();
+
+      llvm::SmallVector<int64_t> offsets;
+      llvm::SmallVector<int64_t> sizes;
+      llvm::SmallVector<int64_t> strides;
+      offsets.reserve(subviewOp.getMixedOffsets().size());
+      sizes.reserve(subviewOp.getMixedSizes().size());
+      strides.reserve(subviewOp.getMixedStrides().size());
+
+      for (mlir::OpFoldResult offset : subviewOp.getMixedOffsets()) {
+        auto resolvedOffset = resolveOpFoldResult(offset, knowledge);
+        if (failed(resolvedOffset))
+          return mlir::failure();
+        offsets.push_back(*resolvedOffset);
+      }
+
+      for (mlir::OpFoldResult size : subviewOp.getMixedSizes()) {
+        auto resolvedSize = resolveOpFoldResult(size, knowledge);
+        if (failed(resolvedSize))
+          return mlir::failure();
+        sizes.push_back(*resolvedSize);
+      }
+
+      for (mlir::OpFoldResult stride : subviewOp.getMixedStrides()) {
+        auto resolvedStride = resolveOpFoldResult(stride, knowledge);
+        if (failed(resolvedStride))
+          return mlir::failure();
+        strides.push_back(*resolvedStride);
+      }
+
+      if (!isMemoryContiguous(sourceType.getShape(), offsets, sizes, strides))
+        return mlir::failure();
+
+      auto sourceStrides = computeRowMajorStrides(sourceType.getShape());
+      byteOffset += linearizeIndex(offsets, sourceStrides) * subviewType.getElementTypeBitWidth() / 8;
+      value = resolveAlias(subviewOp.getSource(), knowledge);
+      continue;
+    }
+
+    if (auto castOp = mlir::dyn_cast<mlir::memref::CastOp>(definingOp)) {
+      value = resolveAlias(castOp.getSource(), knowledge);
+      continue;
+    }
+    if (auto collapseOp = mlir::dyn_cast<mlir::memref::CollapseShapeOp>(definingOp)) {
+      value = resolveAlias(collapseOp.getSrc(), knowledge);
+      continue;
+    }
+    if (auto expandOp = mlir::dyn_cast<mlir::memref::ExpandShapeOp>(definingOp)) {
+      value = resolveAlias(expandOp.getSrc(), knowledge);
+      continue;
+    }
+
+    if (mlir::isa<mlir::memref::AllocOp, mlir::memref::GetGlobalOp>(definingOp))
+      return ResolvedContiguousAddress {value, byteOffset};
+
+    return mlir::failure();
+  }
+}
+
+} // namespace
+
+llvm::FailureOr<int64_t> resolveIndexValue(mlir::Value value) { return resolveIndexValueImpl(value, nullptr); }
+
+llvm::FailureOr<int64_t> resolveIndexValue(mlir::Value value, const StaticValueKnowledge& knowledge) {
+  return resolveIndexValueImpl(value, &knowledge);
+}
+
+llvm::FailureOr<ResolvedContiguousAddress> resolveContiguousAddress(mlir::Value value) {
+  return resolveContiguousAddressImpl(value, nullptr);
+}
+
+llvm::FailureOr<ResolvedContiguousAddress> resolveContiguousAddress(mlir::Value value,
+                                                                    const StaticValueKnowledge& knowledge) {
+  return resolveContiguousAddressImpl(value, &knowledge);
+}
+
+mlir::Value resolveLoopCarriedAlias(mlir::Value value, const StaticValueKnowledge& knowledge) {
+  return resolveLoopCarriedAliasImpl(value, &knowledge);
+}
+
+} // namespace onnx_mlir
--- a/src/PIM/Common/IR/AddressAnalysis.hpp
+++ b/src/PIM/Common/IR/AddressAnalysis.hpp
@@ -0,0 +1,43 @@
+#pragma once
+
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/IR/Value.h"
+
+#include "llvm/ADT/DenseMap.h"
+
+namespace onnx_mlir {
+
+/// Describes a value as a base addressable object plus a statically known
+/// byte offset after peeling aliases, casts, and contiguous subviews.
+struct ResolvedContiguousAddress {
+  mlir::Value base;
+  int64_t byteOffset = 0;
+};
+
+/// Records compile-time facts used when interpreting address arithmetic and
+/// loop-carried aliases inside PIM regions.
+struct StaticValueKnowledge {
+  llvm::DenseMap<mlir::Value, int64_t> indexValues;
+  llvm::DenseMap<mlir::Value, mlir::Value> aliases;
+
+  StaticValueKnowledge() {}
+};
+
+mlir::memref::GlobalOp lookupGlobalForGetGlobal(mlir::ModuleOp moduleOp, mlir::memref::GetGlobalOp getGlobalOp);
+
+/// Resolves a value to contiguous backing storage when that storage can be
+/// proven statically from aliases, DPS ties, casts, and subviews.
+llvm::FailureOr<ResolvedContiguousAddress> resolveContiguousAddress(mlir::Value value);
+llvm::FailureOr<ResolvedContiguousAddress> resolveContiguousAddress(mlir::Value value,
+                                                                    const StaticValueKnowledge& knowledge);
+
+/// Statically evaluates index-like SSA values, including simple integer
+/// arithmetic and loop facts recorded in `knowledge`.
+llvm::FailureOr<int64_t> resolveIndexValue(mlir::Value value);
+llvm::FailureOr<int64_t> resolveIndexValue(mlir::Value value, const StaticValueKnowledge& knowledge);
+
+/// Follows alias, view, and DPS chains to recover the backing value of a
+/// loop-carried memref/result.
+mlir::Value resolveLoopCarriedAlias(mlir::Value value, const StaticValueKnowledge& knowledge);
+
+} // namespace onnx_mlir
--- a/src/PIM/Common/IR/CoreBlockUtils.cpp
+++ b/src/PIM/Common/IR/CoreBlockUtils.cpp
@@ -0,0 +1,67 @@
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/SCF/IR/SCF.h"
+
+#include "src/Accelerators/PIM/Common/IR/CoreBlockUtils.hpp"
+#include "src/Accelerators/PIM/Dialect/Pim/PimOps.hpp"
+
+namespace onnx_mlir {
+
+bool isCoreStaticAddressOp(mlir::Operation* op) {
+  return mlir::isa<mlir::arith::ConstantOp,
+                   mlir::arith::AddIOp,
+                   mlir::arith::SubIOp,
+                   mlir::arith::MulIOp,
+                   mlir::arith::DivUIOp,
+                   mlir::arith::RemUIOp,
+                   mlir::arith::IndexCastOp,
+                   mlir::memref::AllocOp,
+                   mlir::memref::SubViewOp,
+                   mlir::memref::CastOp,
+                   mlir::memref::CollapseShapeOp,
+                   mlir::memref::ExpandShapeOp>(op);
+}
+
+mlir::LogicalResult
+walkPimCoreBlock(mlir::Block& block,
+                 const StaticValueKnowledge& knowledge,
+                 llvm::function_ref<mlir::LogicalResult(mlir::Operation&, const StaticValueKnowledge&)> callback) {
+  bool hasFailure = false;
+  for (mlir::Operation& op : block) {
+    if (mlir::isa<pim::PimHaltOp, mlir::scf::YieldOp>(op) || isCoreStaticAddressOp(&op))
+      continue;
+
+    if (auto forOp = mlir::dyn_cast<mlir::scf::ForOp>(op)) {
+      mlir::Block& loopBody = forOp.getRegion().front();
+      auto lowerBound = resolveIndexValue(forOp.getLowerBound(), knowledge);
+      auto upperBound = resolveIndexValue(forOp.getUpperBound(), knowledge);
+      auto step = resolveIndexValue(forOp.getStep(), knowledge);
+      if (failed(lowerBound) || failed(upperBound) || failed(step) || *step <= 0) {
+        forOp.emitOpError("requires statically evaluable scf.for bounds for PIM codegen");
+        hasFailure = true;
+        continue;
+      }
+
+      llvm::SmallVector<mlir::Value> iterValues(forOp.getInitArgs().begin(), forOp.getInitArgs().end());
+      for (int64_t inductionValue = *lowerBound; inductionValue < *upperBound; inductionValue += *step) {
+        StaticValueKnowledge loopKnowledge = knowledge;
+        loopKnowledge.indexValues[forOp.getInductionVar()] = inductionValue;
+        for (auto [iterArg, iterValue] : llvm::zip_equal(forOp.getRegionIterArgs(), iterValues))
+          loopKnowledge.aliases[iterArg] = iterValue;
+
+        if (failed(walkPimCoreBlock(loopBody, loopKnowledge, callback)))
+          hasFailure = true;
+
+        auto yieldOp = mlir::cast<mlir::scf::YieldOp>(loopBody.getTerminator());
+        for (auto [index, yieldedValue] : llvm::enumerate(yieldOp.getOperands()))
+          iterValues[index] = resolveLoopCarriedAlias(yieldedValue, loopKnowledge);
+      }
+      continue;
+    }
+
+    if (failed(callback(op, knowledge)))
+      hasFailure = true;
+  }
+  return mlir::success(!hasFailure);
+}
+
+} // namespace onnx_mlir
--- a/src/PIM/Common/IR/CoreBlockUtils.hpp
+++ b/src/PIM/Common/IR/CoreBlockUtils.hpp
@@ -0,0 +1,24 @@
+#pragma once
+
+#include "mlir/IR/Block.h"
+#include "mlir/Support/LogicalResult.h"
+
+#include "llvm/ADT/STLFunctionalExtras.h"
+
+#include "src/Accelerators/PIM/Common/IR/AddressAnalysis.hpp"
+
+namespace onnx_mlir {
+
+/// Returns true for ops in a `pim.core` body that only participate in static
+/// address or index computation and therefore do not emit PIM instructions.
+bool isCoreStaticAddressOp(mlir::Operation* op);
+
+/// Walks a `pim.core` body, statically unrolling nested `scf.for` loops when
+/// their bounds are known and invoking `callback` only on instruction-emitting
+/// operations.
+mlir::LogicalResult
+walkPimCoreBlock(mlir::Block& block,
+                 const StaticValueKnowledge& knowledge,
+                 llvm::function_ref<mlir::LogicalResult(mlir::Operation&, const StaticValueKnowledge&)> callback);
+
+} // namespace onnx_mlir
--- a/src/PIM/Common/IR/EntryPointUtils.cpp
+++ b/src/PIM/Common/IR/EntryPointUtils.cpp
@@ -0,0 +1,45 @@
+#include "src/Accelerators/PIM/Common/IR/EntryPointUtils.hpp"
+#include "src/Dialect/ONNX/ONNXOps.hpp"
+
+namespace onnx_mlir {
+
+llvm::FailureOr<mlir::func::FuncOp> getPimEntryFunc(mlir::ModuleOp moduleOp) {
+  if (!moduleOp)
+    return mlir::failure();
+
+  llvm::SmallVector<mlir::ONNXEntryPointOp> entryPoints(moduleOp.getOps<mlir::ONNXEntryPointOp>());
+  if (entryPoints.size() > 1) {
+    moduleOp.emitError("PIM pipeline requires a single ONNX entry point, but found ") << entryPoints.size();
+    return mlir::failure();
+  }
+  if (!entryPoints.empty()) {
+    auto entryPointAttr =
+      entryPoints.front()->getAttrOfType<mlir::SymbolRefAttr>(mlir::ONNXEntryPointOp::getEntryPointFuncAttrName());
+    if (!entryPointAttr) {
+      entryPoints.front().emitOpError("is missing the entry point function attribute");
+      return mlir::failure();
+    }
+    auto entryFunc = moduleOp.lookupSymbol<mlir::func::FuncOp>(entryPointAttr.getLeafReference().getValue());
+    if (!entryFunc) {
+      entryPoints.front().emitOpError("references an unknown entry function ")
+        << entryPointAttr.getLeafReference().getValue();
+      return mlir::failure();
+    }
+    return entryFunc;
+  }
+
+  if (auto mainGraphFunc = moduleOp.lookupSymbol<mlir::func::FuncOp>("main_graph"))
+    return mainGraphFunc;
+
+  llvm::SmallVector<mlir::func::FuncOp> nonExternalFuncs;
+  for (auto funcOp : moduleOp.getOps<mlir::func::FuncOp>())
+    if (!funcOp.isExternal())
+      nonExternalFuncs.push_back(funcOp);
+  if (nonExternalFuncs.size() == 1)
+    return nonExternalFuncs.front();
+
+  moduleOp.emitError("could not resolve a unique PIM entry function");
+  return mlir::failure();
+}
+
+} // namespace onnx_mlir
--- a/src/PIM/Common/IR/EntryPointUtils.hpp
+++ b/src/PIM/Common/IR/EntryPointUtils.hpp
@@ -0,0 +1,13 @@
+#pragma once
+
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/BuiltinOps.h"
+
+namespace onnx_mlir {
+
+/// Resolves the function the PIM pipeline should treat as its entry point.
+/// Prefers ONNX entry-point metadata, then `main_graph`, then the only
+/// non-external function if the module is otherwise unambiguous.
+llvm::FailureOr<mlir::func::FuncOp> getPimEntryFunc(mlir::ModuleOp moduleOp);
+
+} // namespace onnx_mlir
--- a/src/PIM/Common/IR/ShapeUtils.cpp
+++ b/src/PIM/Common/IR/ShapeUtils.cpp
@@ -0,0 +1,89 @@
+#include "llvm/ADT/STLExtras.h"
+
+#include "src/Accelerators/PIM/Common/IR/ShapeUtils.hpp"
+
+namespace onnx_mlir {
+
+llvm::SmallVector<int64_t> computeRowMajorStrides(llvm::ArrayRef<int64_t> shape) {
+  llvm::SmallVector<int64_t> strides(shape.size(), 1);
+  for (int64_t dim = static_cast<int64_t>(shape.size()) - 2; dim >= 0; --dim)
+    strides[dim] = strides[dim + 1] * shape[dim + 1];
+  return strides;
+}
+
+llvm::SmallVector<int64_t>
+delinearizeIndex(int64_t linearIndex, llvm::ArrayRef<int64_t> shape, llvm::ArrayRef<int64_t> strides) {
+  llvm::SmallVector<int64_t> indices(shape.size(), 0);
+  for (auto [dim, stride] : llvm::enumerate(strides)) {
+    indices[dim] = linearIndex / stride;
+    linearIndex %= stride;
+  }
+  return indices;
+}
+
+int64_t linearizeIndex(llvm::ArrayRef<int64_t> indices, llvm::ArrayRef<int64_t> strides) {
+  int64_t linearIndex = 0;
+  for (auto [index, stride] : llvm::zip_equal(indices, strides))
+    linearIndex += index * stride;
+  return linearIndex;
+}
+
+int64_t getNumElements(llvm::ArrayRef<int64_t> shape) {
+  int64_t numElements = 1;
+  for (int64_t dim : shape)
+    numElements *= dim;
+  return numElements;
+}
+
+bool isMemoryContiguous(llvm::ArrayRef<int64_t> srcShape,
+                        llvm::ArrayRef<int64_t> offsets,
+                        llvm::ArrayRef<int64_t> sizes,
+                        llvm::ArrayRef<int64_t> strides) {
+  if (std::any_of(strides.begin(), strides.end(), [](int64_t stride) -> bool { return stride != 1; }))
+    return false;
+
+  auto offsetsAndSizesAndShape = llvm::zip_equal(llvm::make_range(offsets.rbegin(), offsets.rend()),
+                                                 llvm::make_range(sizes.rbegin(), sizes.rend()),
+                                                 llvm::make_range(srcShape.rbegin(), srcShape.rend()));
+
+  auto firstNonZeroOffset = std::find_if(
+    offsetsAndSizesAndShape.begin(), offsetsAndSizesAndShape.end(), [&](auto offsetAndSizeAndShape) -> bool {
+      auto [offset, _size, _dimension] = offsetAndSizeAndShape;
+      return offset != 0;
+    });
+
+  if (firstNonZeroOffset != offsetsAndSizesAndShape.end()) {
+    auto [offset, size, dimension] = *firstNonZeroOffset;
+    if (size > dimension - offset)
+      return false;
+    ++firstNonZeroOffset;
+
+    if (std::any_of(firstNonZeroOffset, offsetsAndSizesAndShape.end(), [](auto offsetAndSizeAndShape) -> bool {
+          auto [_offset, size, _dimension] = offsetAndSizeAndShape;
+          return size != 1;
+        }))
+      return false;
+  }
+
+  auto sizesAndShape = llvm::zip_equal(llvm::make_range(sizes.rbegin(), sizes.rend()),
+                                       llvm::make_range(srcShape.rbegin(), srcShape.rend()));
+
+  auto firstDifferentSize = std::find_if(sizesAndShape.begin(), sizesAndShape.end(), [&](auto sizeAndShape) -> bool {
+    auto [size, dimension] = sizeAndShape;
+    return size != dimension;
+  });
+
+  if (firstDifferentSize != sizesAndShape.end()) {
+    ++firstDifferentSize;
+
+    if (std::any_of(firstDifferentSize, sizesAndShape.end(), [](auto sizeAndShape) -> bool {
+          auto [size, _dimension] = sizeAndShape;
+          return size != 1;
+        }))
+      return false;
+  }
+
+  return true;
+}
+
+} // namespace onnx_mlir
--- a/src/PIM/Common/IR/ShapeUtils.hpp
+++ b/src/PIM/Common/IR/ShapeUtils.hpp
@@ -0,0 +1,22 @@
+#pragma once
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace onnx_mlir {
+
+llvm::SmallVector<int64_t> computeRowMajorStrides(llvm::ArrayRef<int64_t> shape);
+
+llvm::SmallVector<int64_t>
+delinearizeIndex(int64_t linearIndex, llvm::ArrayRef<int64_t> shape, llvm::ArrayRef<int64_t> strides);
+
+int64_t linearizeIndex(llvm::ArrayRef<int64_t> indices, llvm::ArrayRef<int64_t> strides);
+
+int64_t getNumElements(llvm::ArrayRef<int64_t> shape);
+
+bool isMemoryContiguous(llvm::ArrayRef<int64_t> srcShape,
+                        llvm::ArrayRef<int64_t> offsets,
+                        llvm::ArrayRef<int64_t> sizes,
+                        llvm::ArrayRef<int64_t> strides);
+
+} // namespace onnx_mlir
--- a/src/PIM/Common/IR/WeightUtils.cpp
+++ b/src/PIM/Common/IR/WeightUtils.cpp
@@ -0,0 +1,101 @@
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+
+#include "src/Accelerators/PIM/Common/IR/WeightUtils.hpp"
+#include "src/Accelerators/PIM/Dialect/Pim/PimOps.hpp"
+#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
+#include "src/Dialect/ONNX/ONNXOps.hpp"
+
+namespace onnx_mlir {
+
+bool hasWeightAlways(mlir::Operation* op) { return op && op->getAttr(PimWeightAlwaysAttrName) != nullptr; }
+
+void markWeightAlways(mlir::Operation* op) {
+  assert(op && "expected valid op");
+  op->setAttr(PimWeightAlwaysAttrName, mlir::UnitAttr::get(op->getContext()));
+}
+
+namespace {
+
+template <typename MVMOpTy, typename VMMOpTy, typename ParentOpTy>
+bool hasMvmVmmWeightUse(ParentOpTy parentOp, unsigned weightIndex) {
+  bool found = false;
+  parentOp.walk([&](mlir::Operation* op) {
+    if (auto mvmOp = mlir::dyn_cast<MVMOpTy>(op))
+      found |= mvmOp.getWeightIndex() == weightIndex;
+    else if (auto vmmOp = mlir::dyn_cast<VMMOpTy>(op))
+      found |= vmmOp.getWeightIndex() == weightIndex;
+  });
+  return found;
+}
+
+template <typename MVMOpTy, typename VMMOpTy, typename ParentOpTy>
+void walkMvmVmmWeightUses(ParentOpTy parentOp, llvm::function_ref<void(mlir::OpOperand&)> callback) {
+  auto weights = parentOp.getWeights();
+  llvm::SmallSet<unsigned, 8> visited;
+  auto walkWeightIndex = [&](unsigned weightIndex) {
+    if (weightIndex < weights.size() && visited.insert(weightIndex).second)
+      callback(parentOp->getOpOperand(weightIndex));
+  };
+
+  parentOp.walk([&](MVMOpTy op) { walkWeightIndex(op.getWeightIndex()); });
+  parentOp.walk([&](VMMOpTy op) { walkWeightIndex(op.getWeightIndex()); });
+}
+
+} // namespace
+
+bool isSpatialMvmVmmWeightUse(mlir::OpOperand& use) {
+  mlir::Operation* user = use.getOwner();
+  unsigned operandIndex = use.getOperandNumber();
+
+  auto computeOp = mlir::dyn_cast<spatial::SpatCompute>(user);
+  if (!computeOp || operandIndex >= computeOp.getWeights().size())
+    return false;
+
+  return hasMvmVmmWeightUse<spatial::SpatWeightedMVMOp, spatial::SpatWeightedVMMOp>(computeOp, operandIndex);
+}
+
+bool hasOnlySpatialMvmVmmWeightUses(mlir::Value value) {
+  llvm::SmallPtrSet<mlir::Value, 8> visited;
+  auto walkUses = [&](mlir::Value currentValue, auto& self) -> bool {
+    if (!visited.insert(currentValue).second)
+      return true;
+    if (currentValue.use_empty())
+      return false;
+
+    return llvm::all_of(currentValue.getUses(), [&](mlir::OpOperand& use) {
+      if (isSpatialMvmVmmWeightUse(use))
+        return true;
+
+      mlir::Operation* user = use.getOwner();
+      if (auto extractSliceOp = mlir::dyn_cast<mlir::tensor::ExtractSliceOp>(user))
+        return extractSliceOp.getSource() == currentValue && self(extractSliceOp.getResult(), self);
+      if (auto expandShapeOp = mlir::dyn_cast<mlir::tensor::ExpandShapeOp>(user))
+        return expandShapeOp.getSrc() == currentValue && self(expandShapeOp.getResult(), self);
+      if (auto collapseShapeOp = mlir::dyn_cast<mlir::tensor::CollapseShapeOp>(user))
+        return collapseShapeOp.getSrc() == currentValue && self(collapseShapeOp.getResult(), self);
+      if (auto transposeOp = mlir::dyn_cast<mlir::ONNXTransposeOp>(user))
+        return transposeOp.getData() == currentValue && self(transposeOp.getResult(), self);
+
+      return false;
+    });
+  };
+
+  return walkUses(value, walkUses);
+}
+
+void walkPimMvmVmmWeightUses(mlir::Operation* root, llvm::function_ref<void(mlir::OpOperand&)> callback) {
+  assert(root && "expected valid root op");
+  root->walk([&](pim::PimCoreOp coreOp) { walkMvmVmmWeightUses<pim::PimMVMOp, pim::PimVMMOp>(coreOp, callback); });
+  root->walk([&](pim::PimCoreBatchOp coreBatchOp) {
+    auto weights = coreBatchOp.getWeights();
+    for (auto weight : weights)
+      for (mlir::OpOperand& use : weight.getUses())
+        if (use.getOwner() == coreBatchOp.getOperation())
+          callback(use);
+  });
+}
+
+} // namespace onnx_mlir
--- a/src/PIM/Common/IR/WeightUtils.hpp
+++ b/src/PIM/Common/IR/WeightUtils.hpp
@@ -0,0 +1,29 @@
+#pragma once
+
+#include "mlir/IR/Operation.h"
+#include "mlir/IR/Value.h"
+
+#include "llvm/ADT/STLFunctionalExtras.h"
+#include "llvm/ADT/StringRef.h"
+
+inline constexpr llvm::StringRef PimWeightAlwaysAttrName = "weightAlways";
+
+namespace onnx_mlir {
+
+bool hasWeightAlways(mlir::Operation* op);
+
+/// Tags an op as producing a value that should stay materialized as a reusable
+/// weight across later PIM lowering/codegen stages.
+void markWeightAlways(mlir::Operation* op);
+
+bool isSpatialMvmVmmWeightUse(mlir::OpOperand& use);
+
+/// Returns true when a value flows only into Spatial weighted MVM/VMM operands,
+/// allowing later passes to preserve it as a dedicated weight-like object.
+bool hasOnlySpatialMvmVmmWeightUses(mlir::Value value);
+
+/// Visits weight operands consumed by Pim core ops/core batches so downstream
+/// passes can identify globals that must remain weight-backed.
+void walkPimMvmVmmWeightUses(mlir::Operation* root, llvm::function_ref<void(mlir::OpOperand&)> callback);
+
+} // namespace onnx_mlir