Raptor/src/PIM/Conversion/ONNXToSpatial/Common/ShapeTilingUtils.hpp

#pragma once

#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Value.h"
#include "mlir/Transforms/DialectConversion.h"

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"

#include <cassert>
#include <cstddef>
#include <type_traits>
#include <utility>

namespace onnx_mlir {

template <class ShapedType>
inline auto getImageWidth(const ShapedType& shapedType) {
  return shapedType.getDimSize(2);
}

template <class ShapedType>
inline auto getImageHeight(const ShapedType& shapedType) {
  return shapedType.getDimSize(3);
}

template <class ShapedType>
inline auto getImageChannel(const ShapedType& shapedType) {
  return shapedType.getDimSize(1);
}

template <class ShapedType>
inline auto getImageN(const ShapedType& shapedType) {
  return shapedType.getDimSize(0);
}

template <class ShapedType>
inline auto getKernelWidth(const ShapedType& shapedType) {
  return shapedType.getDimSize(2);
}

template <class ShapedType>
inline auto getKernelHeight(const ShapedType& shapedType) {
  return shapedType.getDimSize(3);
}

template <class ShapedType>
inline auto getFilterCount(const ShapedType& shapedType) {
  return shapedType.getDimSize(0);
}

using HSliceId = size_t;
using CoreId = size_t;

template <class A, class B, class C = std::common_type_t<A, B>>
constexpr C ceilIntegerDivide(A a, B b) {
  static_assert(std::is_integral_v<A>, "A must be an integer type");
  static_assert(std::is_integral_v<B>, "B must be an integer type");
  C ac = static_cast<C>(a);
  C bc = static_cast<C>(b);
  return 1 + (ac - 1) / bc;
}

template <class A, class B, class C = std::common_type_t<A, B>>
constexpr std::pair<C, C> ceilIntegerDivideWithRemainder(A a, B b) {
  static_assert(std::is_integral_v<A>, "A must be an integer type");
  static_assert(std::is_integral_v<B>, "B must be an integer type");
  C ac = static_cast<C>(a);
  C bc = static_cast<C>(b);
  return {ceilIntegerDivide(ac, bc), ac % bc};
}

template <class T>
bool isVectorShape(mlir::ArrayRef<T> shape) {
  return shape.size() == 2 && (shape[0] == 1 || shape[1] == 1);
}

template <class T>
bool isMatrixShape(mlir::ArrayRef<T> shape) {
  return shape.size() == 2;
}

template <class T>
bool isHVectorShape(mlir::ArrayRef<T> shape) {
  return shape.size() == 2 && shape[0] == 1;
}

template <class T>
bool isVVectorShape(mlir::ArrayRef<T> shape) {
  return shape.size() == 2 && shape[1] == 1;
}

template <class T>
T getVectorLength(mlir::ArrayRef<T> shape) {
  assert(isVectorShape(shape));
  return shape[0] != 1 ? shape[0] : shape[1];
}

inline auto getTensorShape(mlir::Value tensor) {
  return mlir::cast<mlir::RankedTensorType>(tensor.getType()).getShape();
}

inline bool haveSameStaticShape(mlir::Value lhs, mlir::Value rhs) {
  auto lhsType = mlir::dyn_cast<mlir::RankedTensorType>(lhs.getType());
  auto rhsType = mlir::dyn_cast<mlir::RankedTensorType>(rhs.getType());
  return lhsType && rhsType && lhsType.hasStaticShape() && rhsType.hasStaticShape()
      && lhsType.getShape() == rhsType.getShape();
}

/// Slices a statically shaped tensor along one axis into contiguous pieces of
/// at most `sliceSize` elements.
llvm::SmallVector<mlir::Value> sliceTensor(const mlir::Value& tensorToSlice,
                                           size_t axis,
                                           int64_t sliceSize,
                                           mlir::ConversionPatternRewriter& rewriter,
                                           mlir::Location loc);

llvm::SmallVector<mlir::Value> sliceVector(const mlir::Value& vectorToSlice,
                                           int64_t sliceSize,
                                           mlir::ConversionPatternRewriter& rewriter,
                                           mlir::Location loc);

/// Partitions one logical vector into per-core crossbar-sized slices using the
/// current PIM target geometry.
llvm::DenseMap<CoreId, llvm::SmallVector<mlir::Value>> sliceVectorPerCrossbarPerCore(
  const mlir::Value& vectorToSlice, mlir::ConversionPatternRewriter& rewriter, mlir::Location loc);

/// Tiles a matrix first across output columns and then across input rows so it
/// can be assigned to crossbars grouped by core.
llvm::DenseMap<HSliceId, llvm::DenseMap<CoreId, llvm::SmallVector<mlir::Value>>>
tileMatrix(mlir::Value& matrixToTile,
           int64_t hSliceSize,
           int64_t vSliceSize,
           mlir::ConversionPatternRewriter& rewriter,
           mlir::Location& loc);

mlir::tensor::SplatOp broadcastToVector(mlir::Value scalarToBroadcast,
                                        int64_t length,
                                        mlir::ConversionPatternRewriter& rewriter,
                                        mlir::Location loc);

} // namespace onnx_mlir