#pragma once

#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Operation.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/Support/LLVM.h"
#include "mlir/Transforms/DialectConversion.h"
#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
#include "src/Dialect/ONNX/ONNXOps.hpp"

#include "llvm/Support/LogicalResult.h"

#define DEFINE_MAP_OP(opname) opname,

#define GET_IMAGE_WIDTH(shapedType) shapedType.getDimSize(2)
#define GET_IMAGE_HEIGHT(shapedType) shapedType.getDimSize(3)
#define GET_IMAGE_CHANNEL(shapedType) shapedType.getDimSize(1)
#define GET_IMAGE_N(shapedType) shapedType.getDimSize(0)
#define GET_KERNEL_WIDTH(shapedType) shapedType.getDimSize(2)
#define GET_KERNEL_HEIGHT(shapedType) shapedType.getDimSize(3)
#define GET_FILTER_COUNT(shapedType) shapedType.getDimSize(0)

using namespace mlir;

namespace onnx_mlir {

const StringRef REPLICATION_ATTR_NAME = "replication_factor";

using HSliceId = size_t;
using CoreId = size_t;

enum class MapOperations {
  None,
  ONNXSoftmaxOp,
  ONNXReluOp,
  ONNXLeakyReluOp,
  ONNXExpOp
};

template <class A, class B, class C = std::common_type_t<A, B>>
constexpr C ceilIntegerDivide(A a, B b) {
  static_assert(std::is_integral_v<A>, "A must be an integer type");
  static_assert(std::is_integral_v<B>, "B must be an integer type");
  C ac = static_cast<C>(a);
  C bc = static_cast<C>(b);
  return 1 + (ac - 1) / bc;
}

template <class A, class B, class C = std::common_type_t<A, B>>
constexpr std::pair<C, C> ceilIntegerDivideWithRemainder(A a, B b) {
  static_assert(std::is_integral_v<A>, "A must be an integer type");
  static_assert(std::is_integral_v<B>, "B must be an integer type");
  C ac = static_cast<C>(a);
  C bc = static_cast<C>(b);
  return {ceilIntegerDivide(ac, bc), ac % bc};
}

template <class T>
bool isVectorShape(const ArrayRef<T> shape) {
  return shape.size() == 2 && (shape[0] == 1 || shape[1] == 1);
}

template <class T>
bool isMatrixShape(const ArrayRef<T> shape) {
  return shape.size() == 2;
}

template <class T>
bool isHVectorShape(const ArrayRef<T> shape) {
  return shape.size() == 2 && shape[0] == 1;
}

template <class T>
bool isVVectorShape(const ArrayRef<T> shape) {
  return shape.size() == 2 && shape[1] == 1;
}

template <class T>
T getVectorLength(const ArrayRef<T> shape) {
  assert(isVectorShape(shape));
  return shape[0] != 1 ? shape[0] : shape[1];
}

inline auto getTensorShape(const Value tensor) { return cast<RankedTensorType>(tensor.getType()).getShape(); }

SmallVector<Value> sliceTensor(
  const Value& tensorToSlice, size_t axis, int64_t sliceSize, ConversionPatternRewriter& rewriter, Location loc);

SmallVector<Value>
sliceVector(const Value& vectorToSlice, int64_t sliceSize, ConversionPatternRewriter& rewriter, Location loc);

DenseMap<CoreId, SmallVector<Value>>
sliceVectorPerCrossbarPerCore(const Value& vectorToSlice, ConversionPatternRewriter& rewriter, Location loc);

DenseMap<HSliceId, DenseMap<CoreId, SmallVector<Value>>> tileMatrix(
  Value& matrixToTile, int64_t hSliceSize, int64_t vSliceSize, ConversionPatternRewriter& rewriter, Location& loc);

tensor::SplatOp
broadcastToVector(Value scalarToBroadcast, int64_t length, ConversionPatternRewriter& rewriter, Location loc);

Value sumTensors(ArrayRef<Value> tensors, ConversionPatternRewriter& rewriter);

Value createMapOperation(PatternRewriter& rewriter, MapOperations mapOp, const Value& input);

/**
 * Unpacks an optional pair vector into two size_t values.
 *
 * @param valuesArray The optional `mlir::ArrayAttr` containing the pair of
 * values.
 * @param value1      The reference to the first `size_t` variable to store the
 * unpacked value.
 * @param value2      The reference to the second `size_t` variable to store the
 * unpacked value.
 */
void unpackOptionalPairVector(std::optional<mlir::ArrayAttr> valuesArray, size_t& value1, size_t& value2);

/**
 * Unpacks the optional pads vector.
 *
 * @param valuesArray The optional array attribute containing the values.
 * @param pad_x The output variable to store the value of pad_x.
 * @param pad_y The output variable to store the value of pad_y.
 * @param rewriter The rewriter to notify failure
 *
 * @return llvm::Optional<llvm::Twine> The error message if the pads are invalid
 */
std::optional<Twine> unpackOptionalPadsVector(std::optional<mlir::ArrayAttr> valuesArray, size_t& pad_x, size_t& pad_y);

/**
 * Tiles the image tensor by channel.
 *
 * This function takes an image tensor and tiles it into smaller tiles based on
 * the channel dimension. The size of each tile is specified by the tileSize
 * parameter.
 *
 * @param imageTensor The input image tensor (NxCxWxH) to be tiled.
 * @param tiles The output tiles vector to store the tiled image tensors.
 * @param tileSize The size of each tile.
 * @param rewriter The ConversionPatternRewriter used for creating operations.
 */
void tileImageTensorByChannel(Value imageTensor,
                              SmallVector<SmallVector<SmallVector<Value>>>& tiles,
                              size_t tileSize,
                              ConversionPatternRewriter& rewriter);

/**
 * Creates an ImgConcatOp based on the given tiles.
 *
 * This function takes a 3-dimensional vector `outputTiles` representing the
 * tiles to concatenate. The tiles are indexed by [tile][x][y].
 *
 * @param outputTiles The tiles to concatenate.
 * @param rewriter The ConversionPatternRewriter used for creating the
 * ImgConcatOp.
 * @param loc The location of the operation.
 * @param outputType The type of the output tensor.
 *
 * @return The created ImgConcatOp.
 */
Value createImgConcatOp(SmallVector<SmallVector<SmallVector<Value>>>& outputTiles,
                        ConversionPatternRewriter& rewriter,
                        Location& loc,
                        Type outputType);

/**
 * @brief Verifies if the given input coordinates and padding values are within
 * the bounds of the input tensor.
 *
 * @param input_w The width of the input tensor.
 * @param input_h The height of the input tensor.
 * @param inX The X-coordinate of the input.
 * @param inY The Y-coordinate of the input.
 * @param pad_x The padding value in the X-direction.
 * @param pad_y The padding value in the Y-direction.
 * @return LogicalResult Returns success if the coordinates and padding are
 * within bounds, failure otherwise.
 */
LogicalResult
verifyWithinBoundsAndPaddings(size_t input_w, size_t input_h, int inX, int inY, size_t pad_x, size_t pad_y);

/**
 * Resolves the tiling of the input tensor into smaller tiles.
 *
 * This function takes a whole input tensor and tiles it into smaller tiles
 * using the provided parameters. The resulting tiles are stored in the
 * `inputTiles` vector.
 * Input tiles need to be indexed by:
 *    a. Channel Tile
 *    b. Pixel `x` position
 *    c. Pixel `y` position
 * For example: inputTiles[channelTile][x][y]
 *
 * @param wholeInputTensor The whole input tensor to be tiled.
 * @param inputTiles A vector of vectors of vectors of Values representing the
 *                   tiles of the input tensor. The outermost vector represents
 *                   the channels, the middle vector represents the rows, and
 *                   the innermost vector represents the columns of the tiles.
 * @param channelTileCount The number of tiles for the `channel` axis.
 * @param channelTileRest The size of the last channelTile. Set as 0 if tiles
 * fit exactly
 * @param input_w The width of the input tensor.
 * @param input_h The height of the input tensor.
 * @param rewriter The ConversionPatternRewriter used for creating operations.
 *
 * @return std::optional<llvm::Twine> An error message if the input tensor could
 * not be resolved into tiles.
 */
std::optional<Twine> resolveImgInputTiles(Value wholeInputTensor,
                                          SmallVector<SmallVector<SmallVector<Value>>>& inputTiles,
                                          size_t channelTileCount,
                                          size_t channelTileRest,
                                          size_t input_w,
                                          size_t input_h,
                                          mlir::ConversionPatternRewriter& rewriter);

/**
 * Computes the boundaries of an image kernel application.
 *
 * @param out_pos The position of the output element.
 * @param input_width The width of the input image.
 * @param krn_width The width of the kernel.
 * @param stride The stride value.
 * @param dilation The dilation value.
 * @param pad The padding value.
 * @return A pair of size_t values representing the start and end positions of
 * the kernel application.
 */
std::pair<size_t, size_t> kernel_get_start_and_end(
  int64_t out_pos, int64_t input_width, int64_t krn_width, int64_t stride, int64_t dilation, int64_t pad);

/**
 * @brief Increment the `operandSegmentSizes` in the WeightedCompute operation
 * for the `inputs` operand.
 *
 * This function increments the size of the `inputs` operand segment in the
 * `operandSegmentSizes` of the given WeightedCompute operation by the specified
 * increment. This is necessary when new operands are programmatically added to
 * the WeightedCompute operation.
 *
 * @param wcomputeOp The WeightedCompute operation whose `operandSegmentSizes`
 * is to be incremented.
 * @param increment The value by which to increment the `inputs` operand segment
 * size.
 */
void incrementWeightedComputeInputsSegmentSize(spatial::SpatWeightedCompute wcomputeOp, int increment);

/**
 * @brief Finds the result index of the given operation that produces the
 * specified value.
 *
 * This function takes an operation and a value, and returns the index of the
 * result of the operation that corresponds to the given value.
 *
 * @param op Operation whose result index is to be found.
 * @param v The value for which the result index is to be determined.
 * @return The index of the result of the operation that produces the specified
 * value.
 */
int getResultIndex(Operation* op, Value v);

}; // namespace onnx_mlir