From 2d5b03c08f6ad0f16fde383e21b55b0534d4deeb Mon Sep 17 00:00:00 2001 From: NiccoloN Date: Fri, 29 May 2026 19:21:37 +0200 Subject: [PATCH] automatic code reformat --- src/PIM/Common/IR/ConstantUtils.hpp | 12 ++-- src/PIM/Common/IR/WeightUtils.cpp | 2 +- src/PIM/Common/Support/DebugDump.cpp | 2 +- .../ONNXToSpatial/Common/AttributeUtils.cpp | 4 +- .../Common/ComputeRegionBuilder.hpp | 10 ++- .../ONNXToSpatial/Common/IndexingUtils.cpp | 6 +- .../ONNXToSpatial/Common/IndexingUtils.hpp | 6 +- .../ONNXToSpatial/Common/ShapeTilingUtils.cpp | 22 +++--- .../ONNXToSpatial/Common/ShapeTilingUtils.hpp | 8 +-- .../ONNXToSpatial/ONNXToSpatialPass.cpp | 2 +- .../ONNXToSpatial/ONNXToSpatialVerifier.cpp | 22 +++--- src/PIM/Conversion/ONNXToSpatial/Patterns.cpp | 4 +- .../ONNXToSpatial/Patterns/Math/Conv.cpp | 42 +++++------ .../ONNXToSpatial/Patterns/Math/Gemm.cpp | 58 +++++++++------ .../Patterns/Math/ReduceMean.cpp | 71 ++++++++++--------- .../ONNXToSpatial/Patterns/NN/Softmax.cpp | 6 +- .../ONNXToSpatial/Patterns/Post.cpp | 23 ++++-- .../ONNXToSpatial/Patterns/Tensor/Reshape.cpp | 3 +- .../ONNXToSpatial/Patterns/Tensor/Split.cpp | 3 +- .../Patterns/Tensor/Transpose.cpp | 5 +- .../BatchCoreLoweringPatterns.cpp | 2 +- src/PIM/Conversion/SpatialToPim/Patterns.cpp | 2 +- .../SpatialToPim/Patterns/ChannelLowering.cpp | 2 +- .../SpatialToPim/SpatialToPimPass.cpp | 4 +- .../MaterializeMergeSchedule.cpp | 9 ++- .../MaterializeHostConstantsPass.cpp | 21 +++--- 26 files changed, 183 insertions(+), 168 deletions(-) diff --git a/src/PIM/Common/IR/ConstantUtils.hpp b/src/PIM/Common/IR/ConstantUtils.hpp index e496a20..ae87f4c 100644 --- a/src/PIM/Common/IR/ConstantUtils.hpp +++ b/src/PIM/Common/IR/ConstantUtils.hpp @@ -10,15 +10,11 @@ namespace onnx_mlir { mlir::Block* getConstantInsertionBlock(mlir::Operation* anchorOp); -mlir::Value getOrCreateConstant(mlir::OperationFolder& folder, - mlir::Operation* anchorOp, - mlir::Attribute value, - mlir::Type type); +mlir::Value +getOrCreateConstant(mlir::OperationFolder& folder, mlir::Operation* anchorOp, mlir::Attribute value, mlir::Type type); -mlir::Value getOrCreateConstant(mlir::RewriterBase& rewriter, - mlir::Operation* anchorOp, - mlir::Attribute value, - mlir::Type type); +mlir::Value +getOrCreateConstant(mlir::RewriterBase& rewriter, mlir::Operation* anchorOp, mlir::Attribute value, mlir::Type type); mlir::Value getOrCreateConstantLike(mlir::OperationFolder& folder, mlir::arith::ConstantOp constantOp); diff --git a/src/PIM/Common/IR/WeightUtils.cpp b/src/PIM/Common/IR/WeightUtils.cpp index 4b3d678..916dddf 100644 --- a/src/PIM/Common/IR/WeightUtils.cpp +++ b/src/PIM/Common/IR/WeightUtils.cpp @@ -1,5 +1,5 @@ -#include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/BuiltinTypes.h" diff --git a/src/PIM/Common/Support/DebugDump.cpp b/src/PIM/Common/Support/DebugDump.cpp index 36c26df..fb47f93 100644 --- a/src/PIM/Common/Support/DebugDump.cpp +++ b/src/PIM/Common/Support/DebugDump.cpp @@ -18,7 +18,7 @@ void dumpModule(mlir::ModuleOp moduleOp, const std::string& name) { std::fstream file(dialectsDir + "/" + name + ".mlir", std::ios::out); llvm::raw_os_ostream os(file); mlir::OpPrintingFlags flags; - flags.elideLargeElementsAttrs().enableDebugInfo(true,false); + flags.elideLargeElementsAttrs().enableDebugInfo(true, false); moduleOp.print(os, flags); os.flush(); file.close(); diff --git a/src/PIM/Conversion/ONNXToSpatial/Common/AttributeUtils.cpp b/src/PIM/Conversion/ONNXToSpatial/Common/AttributeUtils.cpp index 63e7817..8c7e4e5 100644 --- a/src/PIM/Conversion/ONNXToSpatial/Common/AttributeUtils.cpp +++ b/src/PIM/Conversion/ONNXToSpatial/Common/AttributeUtils.cpp @@ -1,7 +1,7 @@ -#include "AttributeUtils.hpp" - #include "mlir/IR/BuiltinAttributes.h" +#include "AttributeUtils.hpp" + using namespace mlir; namespace onnx_mlir { diff --git a/src/PIM/Conversion/ONNXToSpatial/Common/ComputeRegionBuilder.hpp b/src/PIM/Conversion/ONNXToSpatial/Common/ComputeRegionBuilder.hpp index e503484..bb5ba07 100644 --- a/src/PIM/Conversion/ONNXToSpatial/Common/ComputeRegionBuilder.hpp +++ b/src/PIM/Conversion/ONNXToSpatial/Common/ComputeRegionBuilder.hpp @@ -1,5 +1,6 @@ #pragma once +#include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/IR/Block.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/ValueRange.h" @@ -11,8 +12,6 @@ #include #include -#include "mlir/Dialect/Tensor/IR/Tensor.h" - #include "src/Accelerators/PIM/Conversion/ONNXToSpatial/CompileTime.hpp" #include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp" @@ -209,8 +208,7 @@ auto createSpatComputeBatch(RewriterT& rewriter, block->getArgument(0), mlir::ValueRange(block->getArguments()).slice(1, weights.size()), mlir::ValueRange(block->getArguments()).slice(1 + weights.size(), inputs.size()), - mlir::ValueRange(block->getArguments()).drop_front(1 + weights.size() + inputs.size()) - }; + mlir::ValueRange(block->getArguments()).drop_front(1 + weights.size() + inputs.size())}; using BodyResult = std::invoke_result_t; if constexpr (std::is_same_v) { @@ -252,8 +250,8 @@ mlir::Value materializeOrComputeUnary(mlir::Value input, if (isCompileTimeComputable(input)) return buildFn(input); - auto computeOp = - createSpatCompute<1>(rewriter, loc, mlir::TypeRange {resultType}, {}, mlir::ValueRange {input}, [&](mlir::Value computeInput) { + auto computeOp = createSpatCompute<1>( + rewriter, loc, mlir::TypeRange {resultType}, {}, mlir::ValueRange {input}, [&](mlir::Value computeInput) { mlir::Value result = buildFn(computeInput); spatial::SpatYieldOp::create(rewriter, loc, result); }); diff --git a/src/PIM/Conversion/ONNXToSpatial/Common/IndexingUtils.cpp b/src/PIM/Conversion/ONNXToSpatial/Common/IndexingUtils.cpp index 0c24977..400fc81 100644 --- a/src/PIM/Conversion/ONNXToSpatial/Common/IndexingUtils.cpp +++ b/src/PIM/Conversion/ONNXToSpatial/Common/IndexingUtils.cpp @@ -1,11 +1,10 @@ -#include "IndexingUtils.hpp" - #include "mlir/Dialect/Arith/IR/Arith.h" #include "llvm/ADT/APInt.h" #include +#include "IndexingUtils.hpp" #include "src/Accelerators/PIM/Common/IR/ConstantUtils.hpp" using namespace mlir; @@ -85,7 +84,8 @@ Value floorDivIndexByConstant(PatternRewriter& rewriter, Location loc, Value val Value getOrMaterializeIndexValue(PatternRewriter& rewriter, OpFoldResult value) { if (auto attr = dyn_cast(value)) - return getOrCreateIndexConstant(rewriter, rewriter.getInsertionBlock()->getParentOp(), cast(attr).getInt()); + return getOrCreateIndexConstant( + rewriter, rewriter.getInsertionBlock()->getParentOp(), cast(attr).getInt()); return cast(value); } diff --git a/src/PIM/Conversion/ONNXToSpatial/Common/IndexingUtils.hpp b/src/PIM/Conversion/ONNXToSpatial/Common/IndexingUtils.hpp index 2d90496..143be3a 100644 --- a/src/PIM/Conversion/ONNXToSpatial/Common/IndexingUtils.hpp +++ b/src/PIM/Conversion/ONNXToSpatial/Common/IndexingUtils.hpp @@ -26,8 +26,10 @@ mlir::Value createAffineApplyOrFoldedConstant(mlir::PatternRewriter& rewriter, mlir::AffineExpr expr, mlir::ValueRange operands); -mlir::Value -multiplyIndexByConstant(mlir::PatternRewriter& rewriter, mlir::Operation* anchorOp, mlir::Value value, int64_t multiplier); +mlir::Value multiplyIndexByConstant(mlir::PatternRewriter& rewriter, + mlir::Operation* anchorOp, + mlir::Value value, + int64_t multiplier); mlir::Value modIndexByConstant(mlir::PatternRewriter& rewriter, mlir::Location loc, mlir::Value value, int64_t divisor); diff --git a/src/PIM/Conversion/ONNXToSpatial/Common/ShapeTilingUtils.cpp b/src/PIM/Conversion/ONNXToSpatial/Common/ShapeTilingUtils.cpp index ae510a6..2b9c6ba 100644 --- a/src/PIM/Conversion/ONNXToSpatial/Common/ShapeTilingUtils.cpp +++ b/src/PIM/Conversion/ONNXToSpatial/Common/ShapeTilingUtils.cpp @@ -8,8 +8,8 @@ #include #include -#include "ShapeTilingUtils.hpp" #include "IndexingUtils.hpp" +#include "ShapeTilingUtils.hpp" #include "src/Accelerators/PIM/Common/IR/ConstantUtils.hpp" #include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp" #include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common/Common.hpp" @@ -53,7 +53,9 @@ bool hasStaticPositiveShape(ArrayRef shape) { return llvm::all_of(shape, [](int64_t dim) { return dim > 0; }); } -bool hasStaticPositiveShape(RankedTensorType type) { return type.hasStaticShape() && hasStaticPositiveShape(type.getShape()); } +bool hasStaticPositiveShape(RankedTensorType type) { + return type.hasStaticShape() && hasStaticPositiveShape(type.getShape()); +} int64_t getStaticShapeElementCount(ArrayRef shape) { return std::accumulate(shape.begin(), shape.end(), int64_t {1}, std::multiplies {}); @@ -98,11 +100,8 @@ FailureOr> getTransposePermutationChecked(std::optional permutation, - PatternRewriter& rewriter, - Location loc) { +Value transposeMaybeInCompute( + Value value, RankedTensorType resultType, ArrayRef permutation, PatternRewriter& rewriter, Location loc) { auto buildTranspose = [&](Value input) -> Value { return ONNXTransposeOp::create(rewriter, loc, resultType, input, rewriter.getI64ArrayAttr(permutation)).getResult(); }; @@ -127,7 +126,8 @@ SmallVector getStaticSizes(PatternRewriter& rewriter, ArrayRef strides) { auto sourceType = dyn_cast(source.getType()); - if (!sourceType || !sourceType.hasStaticShape() || !resultType.hasStaticShape() || sourceType.getRank() != resultType.getRank()) + if (!sourceType || !sourceType.hasStaticShape() || !resultType.hasStaticShape() + || sourceType.getRank() != resultType.getRank()) return false; for (OpFoldResult stride : strides) { @@ -290,7 +290,8 @@ Value materializeContiguousTensorSlice(Value source, } Value lower = zeroIndices[dim]; - Value upper = getOrCreateIndexConstant(rewriter, rewriter.getInsertionBlock()->getParentOp(), resultType.getDimSize(dim)); + Value upper = + getOrCreateIndexConstant(rewriter, rewriter.getInsertionBlock()->getParentOp(), resultType.getDimSize(dim)); Value step = getOrCreateIndexConstant(rewriter, rewriter.getInsertionBlock()->getParentOp(), 1); auto loop = scf::ForOp::create(rewriter, loc, lower, upper, step, ValueRange {accumulator}); rewriter.setInsertionPointToStart(loop.getBody()); @@ -316,7 +317,8 @@ Value extractAxisSlice( SmallVector sizes = getStaticSizes(rewriter, sourceType.getShape()); offsets[axis] = rewriter.getIndexAttr(offset); sizes[axis] = rewriter.getIndexAttr(size); - return tensor::ExtractSliceOp::create(rewriter, loc, resultType, source, offsets, sizes, getUnitStrides(rewriter, sourceType.getRank())) + return tensor::ExtractSliceOp::create( + rewriter, loc, resultType, source, offsets, sizes, getUnitStrides(rewriter, sourceType.getRank())) .getResult(); } diff --git a/src/PIM/Conversion/ONNXToSpatial/Common/ShapeTilingUtils.hpp b/src/PIM/Conversion/ONNXToSpatial/Common/ShapeTilingUtils.hpp index f0367bd..785c906 100644 --- a/src/PIM/Conversion/ONNXToSpatial/Common/ShapeTilingUtils.hpp +++ b/src/PIM/Conversion/ONNXToSpatial/Common/ShapeTilingUtils.hpp @@ -115,12 +115,8 @@ mlir::Value materializeContiguousTensorSlice(mlir::Value source, mlir::ConversionPatternRewriter& rewriter, mlir::Location loc); -mlir::Value extractAxisSlice(mlir::PatternRewriter& rewriter, - mlir::Location loc, - mlir::Value source, - int64_t axis, - int64_t offset, - int64_t size); +mlir::Value extractAxisSlice( + mlir::PatternRewriter& rewriter, mlir::Location loc, mlir::Value source, int64_t axis, int64_t offset, int64_t size); mlir::Value insertStaticSlice(mlir::PatternRewriter& rewriter, mlir::Location loc, diff --git a/src/PIM/Conversion/ONNXToSpatial/ONNXToSpatialPass.cpp b/src/PIM/Conversion/ONNXToSpatial/ONNXToSpatialPass.cpp index 5869c21..2ea2215 100644 --- a/src/PIM/Conversion/ONNXToSpatial/ONNXToSpatialPass.cpp +++ b/src/PIM/Conversion/ONNXToSpatial/ONNXToSpatialPass.cpp @@ -15,8 +15,8 @@ #include "Common/Common.hpp" #include "Common/PimCommon.hpp" #include "src/Accelerators/PIM/Conversion/ONNXToSpatial/CompileTime.hpp" -#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Patterns.hpp" #include "src/Accelerators/PIM/Conversion/ONNXToSpatial/ONNXToSpatialVerifier.hpp" +#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Patterns.hpp" #include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp" #include "src/Dialect/ONNX/ONNXOps.hpp" diff --git a/src/PIM/Conversion/ONNXToSpatial/ONNXToSpatialVerifier.cpp b/src/PIM/Conversion/ONNXToSpatial/ONNXToSpatialVerifier.cpp index 0ebdaeb..e191093 100644 --- a/src/PIM/Conversion/ONNXToSpatial/ONNXToSpatialVerifier.cpp +++ b/src/PIM/Conversion/ONNXToSpatial/ONNXToSpatialVerifier.cpp @@ -69,11 +69,11 @@ LogicalResult verifyComputeLikeInputs(Operation* computeLikeOp, continue; diagnostics.report(computeLikeOp, [&](Operation* illegalOp) { - InFlightDiagnostic diagnostic = illegalOp->emitOpError() << kind << " input #" << currentInputIndex - << (allowChannelReceiveInputs - ? " must come from the host or an explicit " - "spat.channel_receive" - : " must come from the host"); + InFlightDiagnostic diagnostic = illegalOp->emitOpError() + << kind << " input #" << currentInputIndex + << (allowChannelReceiveInputs ? " must come from the host or an explicit " + "spat.channel_receive" + : " must come from the host"); if (definingOp) diagnostic.attachNote(definingOp->getLoc()) << "illegal Spatial producer is " << definingOp->getName(); }); @@ -135,17 +135,17 @@ LogicalResult verifySpatialCommunicationInvariants(func::FuncOp funcOp) { pim::CappedDiagnosticReporter diagnostics; for (auto computeOp : funcOp.getOps()) { - (void)verifyComputeLikeInputs( + (void) verifyComputeLikeInputs( computeOp.getOperation(), computeOp.getInputs(), /*allowChannelReceiveInputs=*/true, "spat.compute", diagnostics); verifyNoExternalTensorCaptures(computeOp.getOperation(), computeOp.getBody(), "spat.compute", diagnostics); } for (auto computeBatchOp : funcOp.getOps()) { - (void)verifyComputeLikeInputs(computeBatchOp.getOperation(), - computeBatchOp.getInputs(), - /*allowChannelReceiveInputs=*/false, - "spat.compute_batch", - diagnostics); + (void) verifyComputeLikeInputs(computeBatchOp.getOperation(), + computeBatchOp.getInputs(), + /*allowChannelReceiveInputs=*/false, + "spat.compute_batch", + diagnostics); verifyNoExternalTensorCaptures( computeBatchOp.getOperation(), computeBatchOp.getBody(), "spat.compute_batch", diagnostics); } diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns.cpp b/src/PIM/Conversion/ONNXToSpatial/Patterns.cpp index c77118e..bfe02c6 100644 --- a/src/PIM/Conversion/ONNXToSpatial/Patterns.cpp +++ b/src/PIM/Conversion/ONNXToSpatial/Patterns.cpp @@ -5,9 +5,7 @@ using namespace mlir; namespace onnx_mlir { -void populatePrePatterns(RewritePatternSet& patterns, MLIRContext* ctx) { - populateGeneratedPrePatterns(patterns, ctx); -} +void populatePrePatterns(RewritePatternSet& patterns, MLIRContext* ctx) { populateGeneratedPrePatterns(patterns, ctx); } void populateConversionPatterns(RewritePatternSet& patterns, MLIRContext* ctx) { populateGeneratedConversionPatterns(patterns, ctx); diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/Conv.cpp b/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/Conv.cpp index 3fa2a59..bda761c 100644 --- a/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/Conv.cpp +++ b/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/Conv.cpp @@ -51,8 +51,8 @@ static Value createPaddedRows(Value tensorValue, if (tensorType.getDimSize(0) == paddedRows) return tensorValue; - auto paddedType = - RankedTensorType::get({paddedRows, tensorType.getDimSize(1)}, tensorType.getElementType(), tensorType.getEncoding()); + auto paddedType = RankedTensorType::get( + {paddedRows, tensorType.getDimSize(1)}, tensorType.getElementType(), tensorType.getEncoding()); SmallVector lowPads = {rewriter.getIndexAttr(0), rewriter.getIndexAttr(0)}; SmallVector highPads = {rewriter.getIndexAttr(paddedRows - tensorType.getDimSize(0)), rewriter.getIndexAttr(0)}; @@ -62,20 +62,15 @@ static Value createPaddedRows(Value tensorValue, padBlock->addArgument(rewriter.getIndexType(), loc); padOp.getRegion().push_back(padBlock); rewriter.setInsertionPointToStart(padBlock); - auto zero = getOrCreateConstant(rewriter, - padOp.getOperation(), - rewriter.getZeroAttr(tensorType.getElementType()), - tensorType.getElementType()); + auto zero = getOrCreateConstant( + rewriter, padOp.getOperation(), rewriter.getZeroAttr(tensorType.getElementType()), tensorType.getElementType()); tensor::YieldOp::create(rewriter, loc, zero); rewriter.setInsertionPointAfter(padOp); return padOp.getResult(); } -static Value packRowsForParallelGemm(Value rows, - RankedTensorType rowsType, - int64_t packFactor, - ConversionPatternRewriter& rewriter, - Location loc) { +static Value packRowsForParallelGemm( + Value rows, RankedTensorType rowsType, int64_t packFactor, ConversionPatternRewriter& rewriter, Location loc) { if (packFactor == 1) return rows; @@ -118,10 +113,8 @@ static Value unpackRowsFromParallelGemm(Value packedRows, const int64_t packedNumRows = packedRowsType.getDimSize(0); const int64_t paddedNumRows = packedNumRows * packFactor; - auto expandedType = - RankedTensorType::get({packedNumRows, packFactor, rowWidth}, - packedRowsType.getElementType(), - packedRowsType.getEncoding()); + auto expandedType = RankedTensorType::get( + {packedNumRows, packFactor, rowWidth}, packedRowsType.getElementType(), packedRowsType.getEncoding()); auto paddedType = RankedTensorType::get({paddedNumRows, rowWidth}, packedRowsType.getElementType(), packedRowsType.getEncoding()); auto unpackedType = @@ -193,11 +186,8 @@ static Value buildPackedWeight(DenseElementsAttr wDenseAttr, return getOrCreateConstant(rewriter, rewriter.getInsertionBlock()->getParentOp(), packedAttr, packedWeightType); } -static Value createConvWeightMatrix(Value w, - RankedTensorType wFlatType, - RankedTensorType wTransType, - ConversionPatternRewriter& rewriter, - Location loc) { +static Value createConvWeightMatrix( + Value w, RankedTensorType wFlatType, RankedTensorType wTransType, ConversionPatternRewriter& rewriter, Location loc) { auto buildWeightMatrix = [&](Value weight) -> Value { Value wFlat = tensor::CollapseShapeOp::create(rewriter, loc, @@ -360,9 +350,8 @@ static Value createIm2colRowComputes(Value x, Value im2col = im2colLoop.getResult(0); Value gemmInputRows = im2col; - if (packFactor != 1) { + if (packFactor != 1) gemmInputRows = packRowsForParallelGemm(im2col, im2colType, packFactor, rewriter, loc); - } spatial::SpatYieldOp::create(rewriter, loc, gemmInputRows); }); @@ -387,8 +376,13 @@ static Value createCollectedConvOutput(ValueRange gemmRows, } else { Value packedOutput = createSpatConcat(rewriter, loc, /*axis=*/0, gemmRowArgs); - gemmOut = unpackRowsFromParallelGemm( - packedOutput, cast(packedOutput.getType()), numPatches, numChannelsOut, packFactor, rewriter, loc); + gemmOut = unpackRowsFromParallelGemm(packedOutput, + cast(packedOutput.getType()), + numPatches, + numChannelsOut, + packFactor, + rewriter, + loc); } // Restore to NCHW layout: diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/Gemm.cpp b/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/Gemm.cpp index 78bd79b..7c6e294 100644 --- a/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/Gemm.cpp +++ b/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/Gemm.cpp @@ -252,7 +252,13 @@ static spatial::SpatComputeBatch createVmmBatch(Value a, Location loc) { const int64_t laneCount = partialPiecesType.getDimSize(0); auto batchOp = createSpatComputeBatch( - rewriter, loc, TypeRange {partialPiecesType}, laneCount, ValueRange {b}, ValueRange {a}, [&](detail::SpatComputeBatchBodyArgs args) { + rewriter, + loc, + TypeRange {partialPiecesType}, + laneCount, + ValueRange {b}, + ValueRange {a}, + [&](detail::SpatComputeBatchBodyArgs args) { Value row = onnx_mlir::modIndexByConstant(rewriter, loc, args.lane, numOutRows); Value kOffset = createGemmBatchKOffset(args.lane, numOutRows, numKSlices, rewriter, loc); Value hOffset = createGemmBatchHOffset(args.lane, numOutRows, numKSlices, numOutHSlices, rewriter, loc); @@ -284,8 +290,8 @@ static spatial::SpatComputeBatch createVmmBatch(Value a, return *batchOp; } -static Value createDynamicGemmBatchRow( - Value lane, int64_t numOutCols, ConversionPatternRewriter& rewriter, Location loc) { +static Value +createDynamicGemmBatchRow(Value lane, int64_t numOutCols, ConversionPatternRewriter& rewriter, Location loc) { if (numOutCols == 1) return lane; @@ -294,17 +300,21 @@ static Value createDynamicGemmBatchRow( return createAffineApplyOrFoldedConstant(rewriter, loc, d0.floorDiv(numOutCols), ValueRange {lane}); } -static Value -extractDynamicGemmBColumn(Value matrix, Value column, RankedTensorType vectorType, ConversionPatternRewriter& rewriter, Location loc) { +static Value extractDynamicGemmBColumn( + Value matrix, Value column, RankedTensorType vectorType, ConversionPatternRewriter& rewriter, Location loc) { SmallVector offsets {rewriter.getIndexAttr(0), column}; SmallVector strides {rewriter.getIndexAttr(1), rewriter.getIndexAttr(1)}; auto columnSliceType = RankedTensorType::get({vectorType.getDimSize(1), 1}, vectorType.getElementType()); Value columnSlice = materializeContiguousTensorSlice(matrix, columnSliceType, offsets, strides, rewriter, loc); - SmallVector collapseReassociation {ReassociationIndices {0, 1}}; + SmallVector collapseReassociation { + ReassociationIndices {0, 1} + }; auto collapsedType = RankedTensorType::get({vectorType.getDimSize(1)}, vectorType.getElementType()); Value collapsed = tensor::CollapseShapeOp::create(rewriter, loc, collapsedType, columnSlice, collapseReassociation).getResult(); - SmallVector expandReassociation {ReassociationIndices {0, 1}}; + SmallVector expandReassociation { + ReassociationIndices {0, 1} + }; return tensor::ExpandShapeOp::create(rewriter, loc, vectorType, collapsed, expandReassociation).getResult(); } @@ -371,13 +381,15 @@ static Value createBroadcastedBiasScalar(Value bias, Location loc) { SmallVector unitStrides(biasType.getRank(), rewriter.getIndexAttr(1)); if (biasType.getRank() == 1) { - SmallVector offsets { - biasType.getDimSize(0) == 1 ? OpFoldResult(rewriter.getIndexAttr(0)) : OpFoldResult(column)}; + SmallVector offsets {biasType.getDimSize(0) == 1 ? OpFoldResult(rewriter.getIndexAttr(0)) + : OpFoldResult(column)}; SmallVector sizes {rewriter.getIndexAttr(1)}; auto vectorType = RankedTensorType::get({1}, scalarType.getElementType()); - Value vector = tensor::ExtractSliceOp::create(rewriter, loc, vectorType, bias, offsets, sizes, unitStrides) - .getResult(); - SmallVector reassociation {ReassociationIndices {0, 1}}; + Value vector = + tensor::ExtractSliceOp::create(rewriter, loc, vectorType, bias, offsets, sizes, unitStrides).getResult(); + SmallVector reassociation { + ReassociationIndices {0, 1} + }; return tensor::ExpandShapeOp::create(rewriter, loc, scalarType, vector, reassociation).getResult(); } @@ -407,16 +419,21 @@ static spatial::SpatComputeBatch createVvdmulBatch(Value a, const int64_t reductionSize = aType.getDimSize(1); const int64_t laneCount = numOutRows * numOutCols; auto batchOp = createSpatComputeBatch( - rewriter, loc, TypeRange {scalarPiecesType}, laneCount, ValueRange {}, ValueRange {a, b}, [&](detail::SpatComputeBatchBodyArgs args) { + rewriter, + loc, + TypeRange {scalarPiecesType}, + laneCount, + ValueRange {}, + ValueRange {a, b}, + [&](detail::SpatComputeBatchBodyArgs args) { Value row = createDynamicGemmBatchRow(args.lane, numOutCols, rewriter, loc); Value column = onnx_mlir::modIndexByConstant(rewriter, loc, args.lane, numOutCols); auto vectorType = RankedTensorType::get({1, reductionSize}, aType.getElementType()); auto scalarType = RankedTensorType::get({1, 1}, outType.getElementType()); Value aVector = extractDynamicGemmRowVector(args.inputs[0], row, vectorType, rewriter, loc); - Value bVector = bAlreadyTransposed - ? extractTransposedBRow(args.inputs[1], column, vectorType, rewriter, loc) - : extractDynamicGemmBColumn(args.inputs[1], column, vectorType, rewriter, loc); + Value bVector = bAlreadyTransposed ? extractTransposedBRow(args.inputs[1], column, vectorType, rewriter, loc) + : extractDynamicGemmBColumn(args.inputs[1], column, vectorType, rewriter, loc); Value scalar = spatial::SpatVVDMulOp::create(rewriter, loc, scalarType, aVector, bVector).getResult(); SmallVector outputOffsets {args.lane, rewriter.getIndexAttr(0)}; @@ -578,9 +595,8 @@ static spatial::SpatCompute createReductionCompute(Value partialPieces, auto buildOutputSlice = [&](Value outputAcc, Value hSlice) -> Value { Value reduced = reducePartialPiecesForHSlice(partialPiecesArg, hSlice, pieceType, numKSlices, numOutRows, rewriter, loc); - Value hOffset = - onnx_mlir::multiplyIndexByConstant(rewriter, rewriter.getInsertionBlock()->getParentOp(), hSlice, - crossbarSize.getValue()); + Value hOffset = onnx_mlir::multiplyIndexByConstant( + rewriter, rewriter.getInsertionBlock()->getParentOp(), hSlice, crossbarSize.getValue()); if (biasArg) { SmallVector biasOffsets {rewriter.getIndexAttr(0), hOffset}; Value biasSlice = @@ -721,8 +737,8 @@ LogicalResult GemmToSpatialComputes::matchAndRewrite(ONNXGemmOp gemmOp, } auto scalarPiecesType = RankedTensorType::get({laneCount64, 1}, outType.getElementType()); - auto batchOp = createVvdmulBatch( - a, b, aType, bType, scalarPiecesType, outType, gemmOpAdaptor.getTransB(), rewriter, loc); + auto batchOp = + createVvdmulBatch(a, b, aType, bType, scalarPiecesType, outType, gemmOpAdaptor.getTransB(), rewriter, loc); auto outputCompute = createDynamicGemmOutputCompute( batchOp.getResult(0), hasC ? c : Value(), scalarPiecesType, biasType, outType, alpha, beta, rewriter, loc); rewriter.replaceOp(gemmOp, outputCompute.getResults()); diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/ReduceMean.cpp b/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/ReduceMean.cpp index dfd8535..e89d7c2 100644 --- a/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/ReduceMean.cpp +++ b/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/ReduceMean.cpp @@ -70,11 +70,8 @@ static SmallVector getKeptAxes(ArrayRef reducedAxes) { return keptAxes; } -static Value computeLaneIndex(Value lane, - int64_t stride, - int64_t dimSize, - ConversionPatternRewriter& rewriter, - Location loc) { +static Value +computeLaneIndex(Value lane, int64_t stride, int64_t dimSize, ConversionPatternRewriter& rewriter, Location loc) { if (dimSize == 1) return getOrCreateIndexConstant(rewriter, rewriter.getInsertionBlock()->getParentOp(), 0); @@ -119,35 +116,41 @@ static FailureOr buildReduceMeanKeepdimsBatch(Value input, sliceSizes.reserve(inputType.getRank()); insertOffsets.reserve(inputType.getRank()); - auto batchOp = createSpatComputeBatch( - rewriter, loc, TypeRange {batchType}, laneCount, {}, ValueRange {input}, [&](detail::SpatComputeBatchBodyArgs args) { - size_t keptAxisIndex = 0; - sliceOffsets.clear(); - sliceSizes.clear(); - insertOffsets.clear(); - for (auto [axis, isReduced] : llvm::enumerate(reducedAxes)) { - if (isReduced) { - sliceOffsets.push_back(rewriter.getIndexAttr(0)); - sliceSizes.push_back(rewriter.getIndexAttr(inputType.getDimSize(axis))); - continue; - } + auto batchOp = + createSpatComputeBatch(rewriter, + loc, + TypeRange {batchType}, + laneCount, + {}, + ValueRange {input}, + [&](detail::SpatComputeBatchBodyArgs args) { + size_t keptAxisIndex = 0; + sliceOffsets.clear(); + sliceSizes.clear(); + insertOffsets.clear(); + for (auto [axis, isReduced] : llvm::enumerate(reducedAxes)) { + if (isReduced) { + sliceOffsets.push_back(rewriter.getIndexAttr(0)); + sliceSizes.push_back(rewriter.getIndexAttr(inputType.getDimSize(axis))); + continue; + } - Value axisIndex = - computeLaneIndex(args.lane, keptAxisStrides[keptAxisIndex], inputType.getDimSize(axis), rewriter, loc); - ++keptAxisIndex; - sliceOffsets.push_back(axisIndex); - sliceSizes.push_back(rewriter.getIndexAttr(1)); - } + Value axisIndex = computeLaneIndex( + args.lane, keptAxisStrides[keptAxisIndex], inputType.getDimSize(axis), rewriter, loc); + ++keptAxisIndex; + sliceOffsets.push_back(axisIndex); + sliceSizes.push_back(rewriter.getIndexAttr(1)); + } - insertOffsets.push_back(args.lane); - insertOffsets.append(inputType.getRank() - 1, rewriter.getIndexAttr(0)); + insertOffsets.push_back(args.lane); + insertOffsets.append(inputType.getRank() - 1, rewriter.getIndexAttr(0)); - Value slice = - tensor::ExtractSliceOp::create(rewriter, loc, sliceType, args.inputs.front(), sliceOffsets, sliceSizes, unitStrides); - Value reduced = spatial::SpatVAvgOp::create(rewriter, loc, leafType, slice).getResult(); - createParallelInsertSliceIntoBatchOutput( - rewriter, loc, reduced, args.outputs.front(), insertOffsets, insertSizes, unitStrides); - }); + Value slice = tensor::ExtractSliceOp::create( + rewriter, loc, sliceType, args.inputs.front(), sliceOffsets, sliceSizes, unitStrides); + Value reduced = spatial::SpatVAvgOp::create(rewriter, loc, leafType, slice).getResult(); + createParallelInsertSliceIntoBatchOutput( + rewriter, loc, reduced, args.outputs.front(), insertOffsets, insertSizes, unitStrides); + }); if (failed(batchOp)) return failure(); return (*batchOp).getResult(0); @@ -193,15 +196,15 @@ static Value buildKeepdimsFromLanePackedBatch(Value batchValue, auto reshapeCompute = createSpatCompute<1>(rewriter, loc, TypeRange {keepdimsType}, {}, ValueRange {batchValue}, [&](Value input) { - auto flatType = RankedTensorType::get({batchType.getDimSize(0)}, batchType.getElementType(), batchType.getEncoding()); + auto flatType = + RankedTensorType::get({batchType.getDimSize(0)}, batchType.getElementType(), batchType.getEncoding()); Value flat = tensor::CollapseShapeOp::create(rewriter, loc, flatType, input, collapseToFlat); Value compact = flat; if (compactKeptType != flatType) compact = tensor::ExpandShapeOp::create(rewriter, loc, compactKeptType, flat, expandFlatToCompact); Value keepdims = compact; if (keepdimsType != compactKeptType) - keepdims = - tensor::ExpandShapeOp::create(rewriter, loc, keepdimsType, compact, expandCompactToKeepdims); + keepdims = tensor::ExpandShapeOp::create(rewriter, loc, keepdimsType, compact, expandCompactToKeepdims); spatial::SpatYieldOp::create(rewriter, loc, keepdims); }); return reshapeCompute.getResult(0); diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns/NN/Softmax.cpp b/src/PIM/Conversion/ONNXToSpatial/Patterns/NN/Softmax.cpp index 3e9f3ee..7657ea7 100644 --- a/src/PIM/Conversion/ONNXToSpatial/Patterns/NN/Softmax.cpp +++ b/src/PIM/Conversion/ONNXToSpatial/Patterns/NN/Softmax.cpp @@ -121,11 +121,9 @@ struct SoftmaxToSpatialCompute : OpConversionPattern { auto transposedType = RankedTensorType::get( permuteShape(inputType.getShape(), permutation), inputType.getElementType(), inputType.getEncoding()); - Value transposedInput = - transposeMaybeInCompute(input, transposedType, permutation, rewriter, softmaxOp.getLoc()); + Value transposedInput = transposeMaybeInCompute(input, transposedType, permutation, rewriter, softmaxOp.getLoc()); Value transposedResult = createLoopSoftmaxCompute(transposedInput, rewriter, softmaxOp.getLoc()); - result = transposeMaybeInCompute( - transposedResult, inputType, inversePermutation, rewriter, softmaxOp.getLoc()); + result = transposeMaybeInCompute(transposedResult, inputType, inversePermutation, rewriter, softmaxOp.getLoc()); } rewriter.replaceOp(softmaxOp, result); diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns/Post.cpp b/src/PIM/Conversion/ONNXToSpatial/Patterns/Post.cpp index d66524b..f42f4d3 100644 --- a/src/PIM/Conversion/ONNXToSpatial/Patterns/Post.cpp +++ b/src/PIM/Conversion/ONNXToSpatial/Patterns/Post.cpp @@ -77,7 +77,7 @@ static FailureOr computePromotedOperands(ComputeOpTy compute) needsRewrite = true; continue; - keep_input: +keep_input: promoted.newInputs.push_back(input); promoted.newInputTypes.push_back(input.getType()); promoted.newInputLocs.push_back(input.getLoc()); @@ -127,8 +127,8 @@ struct PromoteWeightLikeComputeInputsPattern : OpRewritePatternnewWeights, promoted->newInputs); + auto newCompute = spatial::SpatCompute::create( + rewriter, compute.getLoc(), compute.getResultTypes(), promoted->newWeights, promoted->newInputs); SmallVector newBlockArgTypes; SmallVector newBlockArgLocs; for (Value weight : promoted->newWeights) { @@ -155,7 +155,12 @@ struct PromoteWeightLikeComputeInputsPattern : OpRewritePattern newBlockArgTypes; SmallVector newBlockArgLocs; - newBlockArgTypes.reserve(1 + promoted->newWeights.size() + promoted->newInputTypes.size() + compute.getNumResults()); + newBlockArgTypes.reserve(1 + promoted->newWeights.size() + promoted->newInputTypes.size() + + compute.getNumResults()); newBlockArgLocs.reserve(1 + promoted->newWeights.size() + promoted->newInputLocs.size() + compute.getNumResults()); newBlockArgTypes.push_back(laneArg->getType()); newBlockArgLocs.push_back(laneArg->getLoc()); @@ -239,7 +245,12 @@ struct PromoteWeightLikeComputeBatchInputsPattern : OpRewritePattern(0, compute.getNumResults())) { auto outputArg = compute.getOutputArgument(resultIndex); diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Reshape.cpp b/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Reshape.cpp index d75bc9f..3a1ff14 100644 --- a/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Reshape.cpp +++ b/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Reshape.cpp @@ -111,7 +111,8 @@ struct Reshape : OpConversionPattern { } auto replaceWithReshape = [&](auto buildReshape) -> LogicalResult { - Value reshaped = materializeOrComputeUnary(adaptor.getData(), resultType, rewriter, reshapeOp.getLoc(), buildReshape); + Value reshaped = + materializeOrComputeUnary(adaptor.getData(), resultType, rewriter, reshapeOp.getLoc(), buildReshape); rewriter.replaceOp(reshapeOp, reshaped); return success(); }; diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Split.cpp b/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Split.cpp index db82465..60f7219 100644 --- a/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Split.cpp +++ b/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Split.cpp @@ -44,8 +44,7 @@ struct Split : OpConversionPattern { if (isCompileTimeComputable(adaptor.getInput())) { for (int64_t sliceSize : sliceSizes) { - outputs.push_back( - extractAxisSlice(rewriter, splitOp.getLoc(), adaptor.getInput(), *axis, offset, sliceSize)); + outputs.push_back(extractAxisSlice(rewriter, splitOp.getLoc(), adaptor.getInput(), *axis, offset, sliceSize)); offset += sliceSize; } rewriter.replaceOp(splitOp, outputs); diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Transpose.cpp b/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Transpose.cpp index c928093..fe733f7 100644 --- a/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Transpose.cpp +++ b/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Transpose.cpp @@ -1,5 +1,5 @@ -#include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Transforms/DialectConversion.h" @@ -104,8 +104,7 @@ struct TransposeToLinalgTranspose : OpConversionPattern { } Value init = createTransposeInit(adaptor.getData(), resultType, *permutation, rewriter, transposeOp.getLoc()); Value transposed = - linalg::TransposeOp::create(rewriter, transposeOp.getLoc(), adaptor.getData(), init, *permutation) - .getResult()[0]; + linalg::TransposeOp::create(rewriter, transposeOp.getLoc(), adaptor.getData(), init, *permutation).getResult()[0]; rewriter.replaceOp(transposeOp, transposed); return success(); } diff --git a/src/PIM/Conversion/SpatialToPim/BatchCoreLoweringPatterns.cpp b/src/PIM/Conversion/SpatialToPim/BatchCoreLoweringPatterns.cpp index bd2e9e2..87e6c8c 100644 --- a/src/PIM/Conversion/SpatialToPim/BatchCoreLoweringPatterns.cpp +++ b/src/PIM/Conversion/SpatialToPim/BatchCoreLoweringPatterns.cpp @@ -7,8 +7,8 @@ #include "mlir/IR/Matchers.h" #include "Conversion/ONNXToSpatial/Common/Common.hpp" -#include "src/Accelerators/PIM/Common/IR/BatchCoreUtils.hpp" #include "Conversion/SpatialToPim/SpatialToPimPass.hpp" +#include "src/Accelerators/PIM/Common/IR/BatchCoreUtils.hpp" #include "src/Accelerators/PIM/Common/PimCommon.hpp" #include "src/Accelerators/PIM/Conversion/SpatialToPim/Common.hpp" #include "src/Accelerators/PIM/Dialect/Pim/PimOps.hpp" diff --git a/src/PIM/Conversion/SpatialToPim/Patterns.cpp b/src/PIM/Conversion/SpatialToPim/Patterns.cpp index 8a98192..e0452e7 100644 --- a/src/PIM/Conversion/SpatialToPim/Patterns.cpp +++ b/src/PIM/Conversion/SpatialToPim/Patterns.cpp @@ -1,5 +1,5 @@ -#include "src/Accelerators/PIM/Conversion/SpatialToPim/Patterns.hpp" #include "src/Accelerators/PIM/Conversion/SpatialToPim/Common.hpp" +#include "src/Accelerators/PIM/Conversion/SpatialToPim/Patterns.hpp" #include "src/Accelerators/PIM/Dialect/Pim/PimOps.hpp" using namespace mlir; diff --git a/src/PIM/Conversion/SpatialToPim/Patterns/ChannelLowering.cpp b/src/PIM/Conversion/SpatialToPim/Patterns/ChannelLowering.cpp index a938bd8..996add3 100644 --- a/src/PIM/Conversion/SpatialToPim/Patterns/ChannelLowering.cpp +++ b/src/PIM/Conversion/SpatialToPim/Patterns/ChannelLowering.cpp @@ -1,7 +1,7 @@ #include "mlir/Dialect/Tensor/IR/Tensor.h" -#include "src/Accelerators/PIM/Conversion/SpatialToPim/Patterns.hpp" #include "src/Accelerators/PIM/Conversion/SpatialToPim/Common.hpp" +#include "src/Accelerators/PIM/Conversion/SpatialToPim/Patterns.hpp" #include "src/Accelerators/PIM/Dialect/Pim/PimOps.hpp" #include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp" diff --git a/src/PIM/Conversion/SpatialToPim/SpatialToPimPass.cpp b/src/PIM/Conversion/SpatialToPim/SpatialToPimPass.cpp index 56bda03..7e44e45 100644 --- a/src/PIM/Conversion/SpatialToPim/SpatialToPimPass.cpp +++ b/src/PIM/Conversion/SpatialToPim/SpatialToPimPass.cpp @@ -334,8 +334,8 @@ LogicalResult raptor::SpatialToPimPass::allocateAndInitializeCoreLocalVariables( loc, tensorType, getOrCreateIndexConstant(constantFolder, deviceTensor.getOperation(), 0), - getOrCreateIndexConstant(constantFolder, - deviceTensor.getOperation(), static_cast(elementsOffset * elementByteSize) ), + getOrCreateIndexConstant( + constantFolder, deviceTensor.getOperation(), static_cast(elementsOffset * elementByteSize)), deviceTensor, inputTensor, rewriter.getI32IntegerAttr(static_cast(tensorType.getNumElements() * elementByteSize))); diff --git a/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/MaterializeMergeSchedule.cpp b/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/MaterializeMergeSchedule.cpp index 80e9517..5e4f8b3 100644 --- a/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/MaterializeMergeSchedule.cpp +++ b/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/MaterializeMergeSchedule.cpp @@ -1482,7 +1482,8 @@ void appendScalarSendLoop(MaterializerState& state, state.rewriter.setInsertionPoint(sourceClass.body->getTerminator()); Value lowerBound = getOrCreateIndexConstant(state.constantFolder, sourceClass.op, 0); - Value upperBound = getOrCreateIndexConstant(state.constantFolder, sourceClass.op, static_cast(channelIds.size())); + Value upperBound = + getOrCreateIndexConstant(state.constantFolder, sourceClass.op, static_cast(channelIds.size())); Value step = getOrCreateIndexConstant(state.constantFolder, sourceClass.op, 1); auto loop = scf::ForOp::create(state.rewriter, loc, lowerBound, upperBound, step, ValueRange {}); @@ -1577,7 +1578,8 @@ void appendProjectedScalarSendLoop(MaterializerState& state, } Value lowerBound = getOrCreateIndexConstant(state.constantFolder, sourceClass.op, 0); - Value upperBound = getOrCreateIndexConstant(state.constantFolder, sourceClass.op, static_cast(channelIds.size())); + Value upperBound = + getOrCreateIndexConstant(state.constantFolder, sourceClass.op, static_cast(channelIds.size())); Value step = getOrCreateIndexConstant(state.constantFolder, sourceClass.op, 1); auto loop = scf::ForOp::create(state.rewriter, loc, lowerBound, upperBound, step, ValueRange {}); @@ -2342,7 +2344,8 @@ FailureOr insertPackedScalarRunIntoWholeBatch(MaterializerState& state, } Value lowerBound = getOrCreateIndexConstant(state.constantFolder, targetClass.op, 0); - Value upperBound = getOrCreateIndexConstant(state.constantFolder, targetClass.op, static_cast(run.slots.size())); + Value upperBound = + getOrCreateIndexConstant(state.constantFolder, targetClass.op, static_cast(run.slots.size())); Value step = getOrCreateIndexConstant(state.constantFolder, targetClass.op, 1); state.rewriter.setInsertionPoint(targetClass.body->getTerminator()); diff --git a/src/PIM/Pass/PimCodegen/MaterializeHostConstantsPass.cpp b/src/PIM/Pass/PimCodegen/MaterializeHostConstantsPass.cpp index 7882ddc..3aa45be 100644 --- a/src/PIM/Pass/PimCodegen/MaterializeHostConstantsPass.cpp +++ b/src/PIM/Pass/PimCodegen/MaterializeHostConstantsPass.cpp @@ -99,17 +99,16 @@ static void materializeHostConstantsInCore(CoreOpTy coreOp, .getOutput(); } else { - copiedValue = - pim::PimMemCopyHostToDevOp::create( - rewriter, - op->getLoc(), - originalType, - getOrCreateIndexConstant(constantFolder, op, 0), - getOrCreateIndexConstant(constantFolder, op, static_cast(resolvedAddress->byteOffset) ), - deviceDst, - getGlobalOp.getResult(), - rewriter.getI32IntegerAttr(static_cast(totalBytes))) - .getOutput(); + copiedValue = pim::PimMemCopyHostToDevOp::create( + rewriter, + op->getLoc(), + originalType, + getOrCreateIndexConstant(constantFolder, op, 0), + getOrCreateIndexConstant(constantFolder, op, static_cast(resolvedAddress->byteOffset)), + deviceDst, + getGlobalOp.getResult(), + rewriter.getI32IntegerAttr(static_cast(totalBytes))) + .getOutput(); } cachedByType[originalType] = copiedValue;