From 874a2f53e6e0059548f92f2cf4cfeb65ec97569b Mon Sep 17 00:00:00 2001 From: NiccoloN Date: Wed, 27 May 2026 16:39:56 +0200 Subject: [PATCH] automatic code reformat --- src/PIM/Common/IR/AddressAnalysis.cpp | 68 ++++----- src/PIM/Common/IR/AddressAnalysis.hpp | 7 +- src/PIM/Common/IR/BatchCoreUtils.cpp | 4 +- src/PIM/Common/IR/BatchCoreUtils.hpp | 3 +- src/PIM/Common/IR/CoreBlockUtils.hpp | 9 +- src/PIM/Common/IR/WeightUtils.hpp | 2 +- src/PIM/Compiler/PimCodeGen.cpp | 141 +++++++----------- src/PIM/Compiler/PimCodeGen.hpp | 12 +- src/PIM/Compiler/PimWeightEmitter.cpp | 4 +- .../Conversion/ONNXToSpatial/CompileTime.cpp | 16 +- .../ONNXToSpatial/Patterns/Math/Gemm.cpp | 15 +- .../ONNXToSpatial/Patterns/Math/MatMul.cpp | 2 +- .../Patterns/Math/ReduceMean.cpp | 2 +- .../ONNXToSpatial/Patterns/Tensor/Reshape.cpp | 2 +- .../ONNXToSpatial/Patterns/Tensor/Split.cpp | 2 +- .../SpatialToPim/ChannelLoweringPatterns.cpp | 1 + .../SpatialToPim/CoreLoweringPatterns.cpp | 1 - .../SpatialToPim/ReturnPathNormalization.cpp | 1 - src/PIM/Dialect/Spatial/SpatialOps.cpp | 2 +- .../MergeComputeNodesPass.cpp | 8 +- .../Scheduling/ComputeGraph.cpp | 26 ++-- .../MergeComputeNodes/Scheduling/Utils.hpp | 1 - src/PIM/Pass/PimCodegen/VerificationPass.cpp | 5 +- 23 files changed, 136 insertions(+), 198 deletions(-) diff --git a/src/PIM/Common/IR/AddressAnalysis.cpp b/src/PIM/Common/IR/AddressAnalysis.cpp index 9aa664c..d687415 100644 --- a/src/PIM/Common/IR/AddressAnalysis.cpp +++ b/src/PIM/Common/IR/AddressAnalysis.cpp @@ -111,39 +111,29 @@ static llvm::FailureOr resolveConstantGlobalLoad(mlir::memref::LoadOp l static bool evaluateCmpPredicate(mlir::arith::CmpIPredicate predicate, int64_t lhs, int64_t rhs) { switch (predicate) { - case mlir::arith::CmpIPredicate::eq: - return lhs == rhs; - case mlir::arith::CmpIPredicate::ne: - return lhs != rhs; - case mlir::arith::CmpIPredicate::slt: - return lhs < rhs; - case mlir::arith::CmpIPredicate::sle: - return lhs <= rhs; - case mlir::arith::CmpIPredicate::sgt: - return lhs > rhs; - case mlir::arith::CmpIPredicate::sge: - return lhs >= rhs; - case mlir::arith::CmpIPredicate::ult: - return static_cast(lhs) < static_cast(rhs); - case mlir::arith::CmpIPredicate::ule: - return static_cast(lhs) <= static_cast(rhs); - case mlir::arith::CmpIPredicate::ugt: - return static_cast(lhs) > static_cast(rhs); - case mlir::arith::CmpIPredicate::uge: - return static_cast(lhs) >= static_cast(rhs); + case mlir::arith::CmpIPredicate::eq: return lhs == rhs; + case mlir::arith::CmpIPredicate::ne: return lhs != rhs; + case mlir::arith::CmpIPredicate::slt: return lhs < rhs; + case mlir::arith::CmpIPredicate::sle: return lhs <= rhs; + case mlir::arith::CmpIPredicate::sgt: return lhs > rhs; + case mlir::arith::CmpIPredicate::sge: return lhs >= rhs; + case mlir::arith::CmpIPredicate::ult: return static_cast(lhs) < static_cast(rhs); + case mlir::arith::CmpIPredicate::ule: return static_cast(lhs) <= static_cast(rhs); + case mlir::arith::CmpIPredicate::ugt: return static_cast(lhs) > static_cast(rhs); + case mlir::arith::CmpIPredicate::uge: return static_cast(lhs) >= static_cast(rhs); } llvm_unreachable("unknown cmpi predicate"); } -llvm::FailureOr evaluateCompiledIndexExpr(const CompiledIndexExpr& expr, const StaticValueKnowledge& knowledge) { +llvm::FailureOr evaluateCompiledIndexExpr(const CompiledIndexExpr& expr, + const StaticValueKnowledge& knowledge) { if (!expr.node) return mlir::failure(); switch (expr.node->kind) { - case CompiledIndexExprNode::Kind::Constant: - return expr.node->constant; - case CompiledIndexExprNode::Kind::Symbol: { + case CompiledIndexExprNode::Kind::Constant: return expr.node->constant; + case CompiledIndexExprNode::Kind::Symbol: { auto value = resolveAlias(expr.node->symbol, &knowledge); auto iter = knowledge.indexValues.find(value); if (iter != knowledge.indexValues.end()) @@ -158,19 +148,16 @@ llvm::FailureOr evaluateCompiledIndexExpr(const CompiledIndexExpr& expr case CompiledIndexExprNode::Kind::RemUI: case CompiledIndexExprNode::Kind::RemSI: case CompiledIndexExprNode::Kind::MinUI: - case CompiledIndexExprNode::Kind::CmpI: { + case CompiledIndexExprNode::Kind::CmpI: { auto lhs = evaluateCompiledIndexExpr(expr.node->operands[0], knowledge); auto rhs = evaluateCompiledIndexExpr(expr.node->operands[1], knowledge); if (failed(lhs) || failed(rhs)) return mlir::failure(); switch (expr.node->kind) { - case CompiledIndexExprNode::Kind::Add: - return *lhs + *rhs; - case CompiledIndexExprNode::Kind::Sub: - return *lhs - *rhs; - case CompiledIndexExprNode::Kind::Mul: - return *lhs * *rhs; + case CompiledIndexExprNode::Kind::Add: return *lhs + *rhs; + case CompiledIndexExprNode::Kind::Sub: return *lhs - *rhs; + case CompiledIndexExprNode::Kind::Mul: return *lhs * *rhs; case CompiledIndexExprNode::Kind::DivUI: if (*rhs == 0) return mlir::failure(); @@ -191,10 +178,8 @@ llvm::FailureOr evaluateCompiledIndexExpr(const CompiledIndexExpr& expr return *lhs % *rhs; case CompiledIndexExprNode::Kind::MinUI: return static_cast(std::min(static_cast(*lhs), static_cast(*rhs))); - case CompiledIndexExprNode::Kind::CmpI: - return evaluateCmpPredicate(expr.node->predicate, *lhs, *rhs) ? 1 : 0; - default: - llvm_unreachable("unexpected binary compiled index kind"); + case CompiledIndexExprNode::Kind::CmpI: return evaluateCmpPredicate(expr.node->predicate, *lhs, *rhs) ? 1 : 0; + default: llvm_unreachable("unexpected binary compiled index kind"); } } case CompiledIndexExprNode::Kind::Select: { @@ -639,24 +624,21 @@ llvm::FailureOr compileContiguousAddressExprImpl(mlir::Valu staticStrides.reserve(subviewOp.getMixedStrides().size()); bool allStatic = true; - for (mlir::OpFoldResult offset : subviewOp.getMixedOffsets()) { + for (mlir::OpFoldResult offset : subviewOp.getMixedOffsets()) if (auto attr = mlir::dyn_cast(offset)) staticOffsets.push_back(mlir::cast(attr).getInt()); else allStatic = false; - } - for (mlir::OpFoldResult size : subviewOp.getMixedSizes()) { + for (mlir::OpFoldResult size : subviewOp.getMixedSizes()) if (auto attr = mlir::dyn_cast(size)) staticSizes.push_back(mlir::cast(attr).getInt()); else allStatic = false; - } - for (mlir::OpFoldResult stride : subviewOp.getMixedStrides()) { + for (mlir::OpFoldResult stride : subviewOp.getMixedStrides()) if (auto attr = mlir::dyn_cast(stride)) staticStrides.push_back(mlir::cast(attr).getInt()); else allStatic = false; - } if (allStatic) { if (!isMemoryContiguous(sourceType.getShape(), staticOffsets, staticSizes, staticStrides)) @@ -796,8 +778,8 @@ llvm::FailureOr CompiledIndexExpr::evaluate(const StaticValueKnowledge& return evaluateCompiledIndexExpr(*this, knowledge); } -llvm::FailureOr -CompiledAddressExpr::evaluate(const StaticValueKnowledge& knowledge, std::optional lane) const { +llvm::FailureOr CompiledAddressExpr::evaluate(const StaticValueKnowledge& knowledge, + std::optional lane) const { (void) lane; auto resolvedOffset = byteOffset.evaluate(knowledge); if (failed(resolvedOffset)) diff --git a/src/PIM/Common/IR/AddressAnalysis.hpp b/src/PIM/Common/IR/AddressAnalysis.hpp index 9b73e6e..d2fead4 100644 --- a/src/PIM/Common/IR/AddressAnalysis.hpp +++ b/src/PIM/Common/IR/AddressAnalysis.hpp @@ -33,7 +33,8 @@ struct CompiledIndexExpr { std::shared_ptr node; CompiledIndexExpr() = default; - explicit CompiledIndexExpr(std::shared_ptr node) : node(std::move(node)) {} + explicit CompiledIndexExpr(std::shared_ptr node) + : node(std::move(node)) {} llvm::FailureOr evaluate(const StaticValueKnowledge& knowledge) const; }; @@ -68,8 +69,8 @@ struct CompiledAddressExpr { mlir::Value base; CompiledIndexExpr byteOffset; - llvm::FailureOr - evaluate(const StaticValueKnowledge& knowledge, std::optional lane) const; + llvm::FailureOr evaluate(const StaticValueKnowledge& knowledge, + std::optional lane) const; }; mlir::memref::GlobalOp lookupGlobalForGetGlobal(mlir::ModuleOp moduleOp, mlir::memref::GetGlobalOp getGlobalOp); diff --git a/src/PIM/Common/IR/BatchCoreUtils.cpp b/src/PIM/Common/IR/BatchCoreUtils.cpp index bc1a837..0baf7fc 100644 --- a/src/PIM/Common/IR/BatchCoreUtils.cpp +++ b/src/PIM/Common/IR/BatchCoreUtils.cpp @@ -1,5 +1,4 @@ #include "src/Accelerators/PIM/Common/IR/BatchCoreUtils.hpp" - #include "src/Accelerators/PIM/Common/PimCommon.hpp" namespace onnx_mlir { @@ -10,8 +9,7 @@ llvm::SmallVector getBatchCoreIds(pim::PimCoreBatchOp coreBatchOp) { return llvm::SmallVector(coreIdsAttr.asArrayRef().begin(), coreIdsAttr.asArrayRef().end()); } -llvm::SmallVector -getLaneChunkCoreIds(llvm::ArrayRef coreIds, size_t laneCount, unsigned lane) { +llvm::SmallVector getLaneChunkCoreIds(llvm::ArrayRef coreIds, size_t laneCount, unsigned lane) { llvm::SmallVector laneCoreIds; laneCoreIds.reserve(coreIds.size() / laneCount); for (size_t chunkIndex = 0; chunkIndex < coreIds.size() / laneCount; ++chunkIndex) diff --git a/src/PIM/Common/IR/BatchCoreUtils.hpp b/src/PIM/Common/IR/BatchCoreUtils.hpp index 0b92644..41351d6 100644 --- a/src/PIM/Common/IR/BatchCoreUtils.hpp +++ b/src/PIM/Common/IR/BatchCoreUtils.hpp @@ -9,7 +9,6 @@ namespace onnx_mlir { llvm::SmallVector getBatchCoreIds(pim::PimCoreBatchOp coreBatchOp); -llvm::SmallVector -getLaneChunkCoreIds(llvm::ArrayRef coreIds, size_t laneCount, unsigned lane); +llvm::SmallVector getLaneChunkCoreIds(llvm::ArrayRef coreIds, size_t laneCount, unsigned lane); } // namespace onnx_mlir diff --git a/src/PIM/Common/IR/CoreBlockUtils.hpp b/src/PIM/Common/IR/CoreBlockUtils.hpp index a002098..3a9c56e 100644 --- a/src/PIM/Common/IR/CoreBlockUtils.hpp +++ b/src/PIM/Common/IR/CoreBlockUtils.hpp @@ -24,10 +24,9 @@ walkPimCoreBlock(mlir::Block& block, /// Walks a `pim.core`-like body structurally for verification without /// enumerating full loop trip counts. Loop bounds must still be statically /// evaluable so address resolution remains well-defined. -mlir::LogicalResult -walkPimCoreBlockStructurally(mlir::Block& block, - const StaticValueKnowledge& knowledge, - llvm::function_ref - callback); +mlir::LogicalResult walkPimCoreBlockStructurally( + mlir::Block& block, + const StaticValueKnowledge& knowledge, + llvm::function_ref callback); } // namespace onnx_mlir diff --git a/src/PIM/Common/IR/WeightUtils.hpp b/src/PIM/Common/IR/WeightUtils.hpp index b14fc71..be4cbb1 100644 --- a/src/PIM/Common/IR/WeightUtils.hpp +++ b/src/PIM/Common/IR/WeightUtils.hpp @@ -4,9 +4,9 @@ #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Value.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include diff --git a/src/PIM/Compiler/PimCodeGen.cpp b/src/PIM/Compiler/PimCodeGen.cpp index ff5e2e9..a758a8f 100644 --- a/src/PIM/Compiler/PimCodeGen.cpp +++ b/src/PIM/Compiler/PimCodeGen.cpp @@ -199,22 +199,20 @@ MemoryReportRow PimMemory::getReportRow() const { } void PimMemory::remove(mlir::Value val) { - for (auto it = ownedMemEntriesMap.begin(); it != ownedMemEntriesMap.end();) { + for (auto it = ownedMemEntriesMap.begin(); it != ownedMemEntriesMap.end();) if (it->first.value == val) { auto eraseIt = it++; ownedMemEntriesMap.erase(eraseIt); } else ++it; - } - for (auto it = globalMemEntriesMap.begin(); it != globalMemEntriesMap.end();) { + for (auto it = globalMemEntriesMap.begin(); it != globalMemEntriesMap.end();) if (it->first.value == val) { auto eraseIt = it++; globalMemEntriesMap.erase(eraseIt); } else ++it; - } } MemEntry PimMemory::getMemEntry(const MemoryValueKey& key) const { @@ -275,7 +273,8 @@ size_t PimAcceleratorMemory::getValueAddress(mlir::Value value, return iter->second.address + resolvedAddress->byteOffset; } -llvm::FailureOr PimAcceleratorMemory::getIndexValue(mlir::Value value, const StaticValueKnowledge& knowledge) const { +llvm::FailureOr PimAcceleratorMemory::getIndexValue(mlir::Value value, + const StaticValueKnowledge& knowledge) const { value = resolveCachedAlias(value, knowledge); auto compiledIt = compiledIndexExprs.find(value); if (compiledIt == compiledIndexExprs.end()) { @@ -826,7 +825,8 @@ class ScopedMapBindings { llvm::SmallVector>, 8> savedEntries; public: - explicit ScopedMapBindings(MapTy& map) : map(map) {} + explicit ScopedMapBindings(MapTy& map) + : map(map) {} void bind(const KeyTy& key, const ValueTy& value) { auto it = map.find(key); @@ -838,12 +838,11 @@ public: } ~ScopedMapBindings() { - for (auto it = savedEntries.rbegin(); it != savedEntries.rend(); ++it) { + for (auto it = savedEntries.rbegin(); it != savedEntries.rend(); ++it) if (it->second) map[it->first] = *it->second; else map.erase(it->first); - } } }; @@ -929,9 +928,8 @@ static FailureOr classifyCompiledCoreOpKind(Operation& op) { return failure(); } -static LogicalResult compileCoreEmissionPlan(Block& block, - Operation* weightOwner, - llvm::SmallVectorImpl& plan) { +static LogicalResult +compileCoreEmissionPlan(Block& block, Operation* weightOwner, llvm::SmallVectorImpl& plan) { for (Operation& op : block) { if (isa(op) || isCoreStaticAddressOp(&op)) continue; @@ -982,15 +980,14 @@ static LogicalResult compileCoreEmissionPlan(Block& block, return success(); } -static LogicalResult executeCompiledCorePlan(const llvm::SmallVectorImpl& plan, - PimCodeGen& coreCodeGen, - StaticValueKnowledge& knowledge, - llvm::function_ref(pim::PimVMMOp, - const StaticValueKnowledge&)> - resolveWeightSlot, - size_t& processedOperations, - std::optional batchLane = std::nullopt, - std::optional batchLaneCount = std::nullopt) { +static LogicalResult executeCompiledCorePlan( + const llvm::SmallVectorImpl& plan, + PimCodeGen& coreCodeGen, + StaticValueKnowledge& knowledge, + llvm::function_ref(pim::PimVMMOp, const StaticValueKnowledge&)> resolveWeightSlot, + size_t& processedOperations, + std::optional batchLane = std::nullopt, + std::optional batchLaneCount = std::nullopt) { for (const CompiledCoreNode& node : plan) { if (node.kind == CompiledCoreNode::Kind::Loop) { auto lowerBound = node.lowerBound.evaluate(knowledge); @@ -1010,8 +1007,13 @@ static LogicalResult executeCompiledCorePlan(const llvm::SmallVectorImpl(forOp.getRegion().front().getTerminator()); @@ -1031,18 +1033,10 @@ static LogicalResult executeCompiledCorePlan(const llvm::SmallVectorImpl(node.op), knowledge); break; - case CompiledCoreOpKind::Lmv: - coreCodeGen.codeGenLmvOp(cast(node.op), knowledge); - break; - case CompiledCoreOpKind::Receive: - coreCodeGen.codeGenReceiveOp(cast(node.op), knowledge); - break; - case CompiledCoreOpKind::Send: - coreCodeGen.codeGenSendOp(cast(node.op), knowledge); - break; - case CompiledCoreOpKind::Concat: - coreCodeGen.codeGenConcatOp(cast(node.op), knowledge); - break; + case CompiledCoreOpKind::Lmv: coreCodeGen.codeGenLmvOp(cast(node.op), knowledge); break; + case CompiledCoreOpKind::Receive: coreCodeGen.codeGenReceiveOp(cast(node.op), knowledge); break; + case CompiledCoreOpKind::Send: coreCodeGen.codeGenSendOp(cast(node.op), knowledge); break; + case CompiledCoreOpKind::Concat: coreCodeGen.codeGenConcatOp(cast(node.op), knowledge); break; case CompiledCoreOpKind::Vmm: if (auto weightSlot = resolveWeightSlot(cast(node.op), knowledge); succeeded(weightSlot)) coreCodeGen.codeGenMVMLikeOp(*weightSlot, cast(node.op), true, knowledge); @@ -1052,33 +1046,15 @@ static LogicalResult executeCompiledCorePlan(const llvm::SmallVectorImpl(node.op), knowledge); break; - case CompiledCoreOpKind::VVAdd: - coreCodeGen.codeGenVVAddOp(cast(node.op), knowledge); - break; - case CompiledCoreOpKind::VVSub: - coreCodeGen.codeGenVVSubOp(cast(node.op), knowledge); - break; - case CompiledCoreOpKind::VVMul: - coreCodeGen.codeGenVVMulOp(cast(node.op), knowledge); - break; - case CompiledCoreOpKind::VVMax: - coreCodeGen.codeGenVVMaxOp(cast(node.op), knowledge); - break; - case CompiledCoreOpKind::VVDMul: - coreCodeGen.codeGenVVDMulOp(cast(node.op), knowledge); - break; - case CompiledCoreOpKind::VAvg: - coreCodeGen.codeGenVAvgOp(cast(node.op), knowledge); - break; - case CompiledCoreOpKind::VRelu: - coreCodeGen.codeGenVReluOp(cast(node.op), knowledge); - break; - case CompiledCoreOpKind::VTanh: - coreCodeGen.codeGenVTanhOp(cast(node.op), knowledge); - break; - case CompiledCoreOpKind::VSigm: - coreCodeGen.codeGenVSigmOp(cast(node.op), knowledge); - break; + case CompiledCoreOpKind::VVAdd: coreCodeGen.codeGenVVAddOp(cast(node.op), knowledge); break; + case CompiledCoreOpKind::VVSub: coreCodeGen.codeGenVVSubOp(cast(node.op), knowledge); break; + case CompiledCoreOpKind::VVMul: coreCodeGen.codeGenVVMulOp(cast(node.op), knowledge); break; + case CompiledCoreOpKind::VVMax: coreCodeGen.codeGenVVMaxOp(cast(node.op), knowledge); break; + case CompiledCoreOpKind::VVDMul: coreCodeGen.codeGenVVDMulOp(cast(node.op), knowledge); break; + case CompiledCoreOpKind::VAvg: coreCodeGen.codeGenVAvgOp(cast(node.op), knowledge); break; + case CompiledCoreOpKind::VRelu: coreCodeGen.codeGenVReluOp(cast(node.op), knowledge); break; + case CompiledCoreOpKind::VTanh: coreCodeGen.codeGenVTanhOp(cast(node.op), knowledge); break; + case CompiledCoreOpKind::VSigm: coreCodeGen.codeGenVSigmOp(cast(node.op), knowledge); break; case CompiledCoreOpKind::VSoftmax: coreCodeGen.codeGenVSoftmaxOp(cast(node.op), knowledge); break; @@ -1131,23 +1107,22 @@ static void aliasMaterializedHostGlobals(CoreLikeOpTy coreLikeOp, /// scf.for loops are statically unrolled via walkPimCoreBlock so that addressing is /// fully resolved before the JSON instructions are emitted. /// Returns the number of emitted instructions, or -1 on failure. -static int64_t codeGenCoreOps(Block& block, - PimCodeGen& coreCodeGen, - const StaticValueKnowledge& initialKnowledge, - Operation* weightOwner, - llvm::function_ref(pim::PimVMMOp, - const StaticValueKnowledge&)> - resolveWeightSlot, - std::optional batchLane = std::nullopt, - std::optional batchLaneCount = std::nullopt) { +static int64_t codeGenCoreOps( + Block& block, + PimCodeGen& coreCodeGen, + const StaticValueKnowledge& initialKnowledge, + Operation* weightOwner, + llvm::function_ref(pim::PimVMMOp, const StaticValueKnowledge&)> resolveWeightSlot, + std::optional batchLane = std::nullopt, + std::optional batchLaneCount = std::nullopt) { llvm::SmallVector plan; if (failed(compileCoreEmissionPlan(block, weightOwner, plan))) return -1; size_t processedOperations = 0; StaticValueKnowledge knowledge = initialKnowledge; - auto result = - executeCompiledCorePlan(plan, coreCodeGen, knowledge, resolveWeightSlot, processedOperations, batchLane, batchLaneCount); + auto result = executeCompiledCorePlan( + plan, coreCodeGen, knowledge, resolveWeightSlot, processedOperations, batchLane, batchLaneCount); return failed(result) ? -1 : static_cast(processedOperations); } @@ -1219,9 +1194,8 @@ OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimCode(ModuleOp& moduleOp, std:: SmallVector jobIndices; SmallVector orderedOriginalCoreIds = llvm::to_vector(lanesByCoreId.keys()); - llvm::sort(orderedOriginalCoreIds, [&](size_t lhs, size_t rhs) { - return emittedCoreIds.lookup(lhs) < emittedCoreIds.lookup(rhs); - }); + llvm::sort(orderedOriginalCoreIds, + [&](size_t lhs, size_t rhs) { return emittedCoreIds.lookup(lhs) < emittedCoreIds.lookup(rhs); }); for (size_t originalCoreId : orderedOriginalCoreIds) { CoreEmissionJob job; job.coreLikeOp = coreBatchOp; @@ -1236,9 +1210,8 @@ OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimCode(ModuleOp& moduleOp, std:: ++nextBatchReportId; } - auto linkCoreWeights = [&](size_t coreId, - ArrayRef weightFiles, - json::Array& xbarsPerGroup) -> OnnxMlirCompilerErrorCodes { + auto linkCoreWeights = + [&](size_t coreId, ArrayRef weightFiles, json::Array& xbarsPerGroup) -> OnnxMlirCompilerErrorCodes { auto coreWeightsDirPath = outputDirPath + "/core_" + std::to_string(coreId); if (auto error = sys::fs::create_directory(coreWeightsDirPath)) { errs() << "Error creating core directory: " << coreWeightsDirPath << ": " << error.message() << '\n'; @@ -1250,8 +1223,8 @@ OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimCode(ModuleOp& moduleOp, std:: if (auto error = sys::fs::create_link(outputDirPath + "/weights/" + fileName, coreWeightsDirPath + "/crossbar_" + std::to_string(slot) + ".bin")) { errs() << "Error creating link file: " << (outputDirPath + "/weights/" + fileName) << " to " - << (coreWeightsDirPath + "/crossbar_" + std::to_string(slot) + ".bin") - << "\nError:" << error.message() << '\n'; + << (coreWeightsDirPath + "/crossbar_" + std::to_string(slot) + ".bin") << "\nError:" << error.message() + << '\n'; return InvalidOutputFileAccess; } } @@ -1294,8 +1267,7 @@ OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimCode(ModuleOp& moduleOp, std:: errorCode = std::error_code(); coreJsonStream = std::make_unique(outputCoreJsonPath, errorCode); if (errorCode) { - errs() << "Error while opening core json file `" << outputCoreJsonPath << "`: " << errorCode.message() - << '\n'; + errs() << "Error while opening core json file `" << outputCoreJsonPath << "`: " << errorCode.message() << '\n'; result.status = InvalidOutputFileAccess; return result; } @@ -1364,9 +1336,8 @@ OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimCode(ModuleOp& moduleOp, std:: }; std::vector jobResults(jobs.size()); - mlir::parallelFor(moduleOp.getContext(), 0, jobs.size(), [&](size_t index) { - jobResults[index] = emitJob(jobs[index]); - }); + mlir::parallelFor( + moduleOp.getContext(), 0, jobs.size(), [&](size_t index) { jobResults[index] = emitJob(jobs[index]); }); for (size_t jobIndex = 0; jobIndex < jobs.size(); ++jobIndex) if (jobResults[jobIndex].status != CompilerSuccess) diff --git a/src/PIM/Compiler/PimCodeGen.hpp b/src/PIM/Compiler/PimCodeGen.hpp index a9d2dac..981435f 100644 --- a/src/PIM/Compiler/PimCodeGen.hpp +++ b/src/PIM/Compiler/PimCodeGen.hpp @@ -101,7 +101,9 @@ public: PimAcceleratorMemory() : hostMem(memEntriesMap), fileReport(openReportFile("memory_report")) {} PimAcceleratorMemory(const llvm::SmallDenseMap& initialMemEntries, bool enableReport) - : memEntriesMap(initialMemEntries), hostMem(memEntriesMap), fileReport(enableReport ? openReportFile("memory_report") : std::fstream()) {} + : memEntriesMap(initialMemEntries), + hostMem(memEntriesMap), + fileReport(enableReport ? openReportFile("memory_report") : std::fstream()) {} PimMemory& getOrCreateDeviceMem(size_t id); @@ -206,13 +208,9 @@ namespace llvm { template <> struct DenseMapInfo { - static onnx_mlir::MemoryValueKey getEmptyKey() { - return {DenseMapInfo::getEmptyKey(), 0}; - } + static onnx_mlir::MemoryValueKey getEmptyKey() { return {DenseMapInfo::getEmptyKey(), 0}; } - static onnx_mlir::MemoryValueKey getTombstoneKey() { - return {DenseMapInfo::getTombstoneKey(), 0}; - } + static onnx_mlir::MemoryValueKey getTombstoneKey() { return {DenseMapInfo::getTombstoneKey(), 0}; } static unsigned getHashValue(const onnx_mlir::MemoryValueKey& key) { return hash_combine(key.value, key.lane.value_or(std::numeric_limits::max())); diff --git a/src/PIM/Compiler/PimWeightEmitter.cpp b/src/PIM/Compiler/PimWeightEmitter.cpp index 719e52a..f16c320 100644 --- a/src/PIM/Compiler/PimWeightEmitter.cpp +++ b/src/PIM/Compiler/PimWeightEmitter.cpp @@ -16,9 +16,7 @@ using namespace llvm; using namespace mlir; namespace onnx_mlir { -namespace { - -} // namespace +namespace {} // namespace llvm::DenseMap> createAndPopulateWeightFolder(ArrayRef requests, StringRef outputDirPath) { diff --git a/src/PIM/Conversion/ONNXToSpatial/CompileTime.cpp b/src/PIM/Conversion/ONNXToSpatial/CompileTime.cpp index f96061e..45ffe57 100644 --- a/src/PIM/Conversion/ONNXToSpatial/CompileTime.cpp +++ b/src/PIM/Conversion/ONNXToSpatial/CompileTime.cpp @@ -198,7 +198,6 @@ static DenseElementsAttr getHostConstantDenseElementsAttrImpl(Value value, llvm: return nullptr; } - static std::optional getCompileTimeSourceImpl(Operation* op, llvm::SmallPtrSetImpl& visited, size_t chainLength = 0) { if (!op) @@ -217,7 +216,9 @@ getCompileTimeSourceImpl(Operation* op, llvm::SmallPtrSetImpl& visit chainLength += 1; if (auto extractOp = dyn_cast(op)) - return hasConstantIndices(extractOp) ? getCompileTimeSourceImpl(extractOp.getTensor().getDefiningOp(), visited, chainLength) : std::nullopt; + return hasConstantIndices(extractOp) + ? getCompileTimeSourceImpl(extractOp.getTensor().getDefiningOp(), visited, chainLength) + : std::nullopt; if (!isStaticTensorResult(op)) return std::nullopt; @@ -232,8 +233,9 @@ getCompileTimeSourceImpl(Operation* op, llvm::SmallPtrSetImpl& visit return getCompileTimeSourceImpl(expandShapeOp.getSrc().getDefiningOp(), visited, chainLength); if (auto extractSliceOp = dyn_cast(op)) - return hasStaticUnitStrides(extractSliceOp) ? getCompileTimeSourceImpl(extractSliceOp.getSource().getDefiningOp(), visited, chainLength) - : std::nullopt; + return hasStaticUnitStrides(extractSliceOp) + ? getCompileTimeSourceImpl(extractSliceOp.getSource().getDefiningOp(), visited, chainLength) + : std::nullopt; if (auto splatOp = dyn_cast(op)) return getCompileTimeSourceImpl(splatOp.getInput().getDefiningOp(), visited, chainLength); @@ -252,9 +254,8 @@ getCompileTimeSourceImpl(Operation* op, llvm::SmallPtrSetImpl& visit res = partialRes; continue; } - if(res->chainLength < partialRes->chainLength){ + if (res->chainLength < partialRes->chainLength) res = partialRes; - } } return res; } @@ -264,8 +265,7 @@ getCompileTimeSourceImpl(Operation* op, llvm::SmallPtrSetImpl& visit } // namespace - - std::optional getCompileTimeSource(Operation* op) { +std::optional getCompileTimeSource(Operation* op) { llvm::SmallPtrSet visited; return getCompileTimeSourceImpl(op, visited); } diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/Gemm.cpp b/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/Gemm.cpp index b189bd0..ce9973e 100644 --- a/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/Gemm.cpp +++ b/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/Gemm.cpp @@ -2,9 +2,9 @@ #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" -#include "mlir/IR/Matchers.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Location.h" +#include "mlir/IR/Matchers.h" #include "mlir/Support/LogicalResult.h" #include "mlir/Transforms/DialectConversion.h" @@ -143,13 +143,12 @@ static Value createGemmBatchKOffset( rewriter, loc, (d0.floorDiv(numOutRows) % numKSlices) * crossbarSize.getValue(), ValueRange {lane}); } -static Value createGemmBatchHOffset( - Value lane, - int64_t numOutRows, - int64_t numKSlices, - int64_t numOutHSlices, - ConversionPatternRewriter& rewriter, - Location loc) { +static Value createGemmBatchHOffset(Value lane, + int64_t numOutRows, + int64_t numKSlices, + int64_t numOutHSlices, + ConversionPatternRewriter& rewriter, + Location loc) { if (numOutHSlices == 1) return createIndexConstant(rewriter, 0); diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/MatMul.cpp b/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/MatMul.cpp index 7609b91..9859ad2 100644 --- a/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/MatMul.cpp +++ b/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/MatMul.cpp @@ -9,8 +9,8 @@ #include #include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common/Common.hpp" -#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/ConversionPatterns.hpp" #include "src/Accelerators/PIM/Conversion/ONNXToSpatial/CompileTime.hpp" +#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/ConversionPatterns.hpp" #include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp" #include "src/Dialect/ONNX/ONNXOps.hpp" diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/ReduceMean.cpp b/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/ReduceMean.cpp index b2f8381..1218481 100644 --- a/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/ReduceMean.cpp +++ b/src/PIM/Conversion/ONNXToSpatial/Patterns/Math/ReduceMean.cpp @@ -6,8 +6,8 @@ #include #include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common/Common.hpp" -#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/ConversionPatterns.hpp" #include "src/Accelerators/PIM/Conversion/ONNXToSpatial/CompileTime.hpp" +#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/ConversionPatterns.hpp" #include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp" #include "src/Dialect/ONNX/ONNXOps.hpp" diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Reshape.cpp b/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Reshape.cpp index 4c1131f..901d1ca 100644 --- a/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Reshape.cpp +++ b/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Reshape.cpp @@ -4,8 +4,8 @@ #include "llvm/ADT/SmallVector.h" #include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common/Common.hpp" -#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/ConversionPatterns.hpp" #include "src/Accelerators/PIM/Conversion/ONNXToSpatial/CompileTime.hpp" +#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/ConversionPatterns.hpp" #include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp" #include "src/Dialect/ONNX/ONNXOps.hpp" diff --git a/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Split.cpp b/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Split.cpp index 8683481..2f154c9 100644 --- a/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Split.cpp +++ b/src/PIM/Conversion/ONNXToSpatial/Patterns/Tensor/Split.cpp @@ -2,8 +2,8 @@ #include "mlir/Transforms/DialectConversion.h" #include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common/Common.hpp" -#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/ConversionPatterns.hpp" #include "src/Accelerators/PIM/Conversion/ONNXToSpatial/CompileTime.hpp" +#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/ConversionPatterns.hpp" #include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp" #include "src/Dialect/ONNX/ONNXOps.hpp" diff --git a/src/PIM/Conversion/SpatialToPim/ChannelLoweringPatterns.cpp b/src/PIM/Conversion/SpatialToPim/ChannelLoweringPatterns.cpp index 33cdc24..042e473 100644 --- a/src/PIM/Conversion/SpatialToPim/ChannelLoweringPatterns.cpp +++ b/src/PIM/Conversion/SpatialToPim/ChannelLoweringPatterns.cpp @@ -1,4 +1,5 @@ #include "mlir/Dialect/Tensor/IR/Tensor.h" + #include "src/Accelerators/PIM/Conversion/SpatialToPim/ChannelLoweringPatterns.hpp" #include "src/Accelerators/PIM/Conversion/SpatialToPim/Common.hpp" #include "src/Accelerators/PIM/Dialect/Pim/PimOps.hpp" diff --git a/src/PIM/Conversion/SpatialToPim/CoreLoweringPatterns.cpp b/src/PIM/Conversion/SpatialToPim/CoreLoweringPatterns.cpp index 345a459..f62bfc8 100644 --- a/src/PIM/Conversion/SpatialToPim/CoreLoweringPatterns.cpp +++ b/src/PIM/Conversion/SpatialToPim/CoreLoweringPatterns.cpp @@ -171,7 +171,6 @@ LogicalResult raptor::SpatialToPimPass::lowerComputeOp(spatial::SpatCompute comp markOpToRemove(receiveOp); continue; } - } if (computeOp.getNumResults() != yieldOp.getNumOperands()) diff --git a/src/PIM/Conversion/SpatialToPim/ReturnPathNormalization.cpp b/src/PIM/Conversion/SpatialToPim/ReturnPathNormalization.cpp index 6efa6b5..26cd27f 100644 --- a/src/PIM/Conversion/SpatialToPim/ReturnPathNormalization.cpp +++ b/src/PIM/Conversion/SpatialToPim/ReturnPathNormalization.cpp @@ -606,7 +606,6 @@ void raptor::SpatialToPimPass::replaceReturnWithOutputBuffers(func::ReturnOp ret markOpToRemove(receiveOp); return; } - }; SmallVector originalOperands(returnOp.getOperands().begin(), returnOp.getOperands().end()); diff --git a/src/PIM/Dialect/Spatial/SpatialOps.cpp b/src/PIM/Dialect/Spatial/SpatialOps.cpp index c315219..5c7fdf1 100644 --- a/src/PIM/Dialect/Spatial/SpatialOps.cpp +++ b/src/PIM/Dialect/Spatial/SpatialOps.cpp @@ -1,5 +1,5 @@ -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include diff --git a/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/MergeComputeNodesPass.cpp b/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/MergeComputeNodesPass.cpp index cc5ff13..3c143b9 100644 --- a/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/MergeComputeNodesPass.cpp +++ b/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/MergeComputeNodesPass.cpp @@ -122,9 +122,8 @@ void emitMergeIrCounts(StringRef phaseName, func::FuncOp funcOp) { llvm::errs() << "[merge-profile] " << phaseName << " counts:" << " compute=" << counts.topLevelComputeCount << " compute_batch=" << counts.topLevelComputeBatchCount << " scalar_send=" << counts.scalarChannelSendCount - << " scalar_recv=" << counts.scalarChannelReceiveCount - << " wvmm=" << counts.wvmmCount << " vadd=" << counts.vaddCount - << " scf_for=" << counts.scfForCount << "\n"; + << " scalar_recv=" << counts.scalarChannelReceiveCount << " wvmm=" << counts.wvmmCount + << " vadd=" << counts.vaddCount << " scf_for=" << counts.scfForCount << "\n"; } static std::optional getComputeCoreId(SpatCompute compute) { @@ -514,7 +513,8 @@ void generateReport(func::FuncOp funcOp, const std::string& name, size_t usedCpu SmallVector coreIds; if (auto coreIdsAttr = batch->getAttrOfType(onnx_mlir::kCoreIdsAttrName)) llvm::append_range(coreIds, coreIdsAttr.asArrayRef()); - collectedData.push_back({nextBatchId++, logicalCount, perInstanceCrossbarCount * logicalCount, numInst, true, coreIds}); + collectedData.push_back( + {nextBatchId++, logicalCount, perInstanceCrossbarCount * logicalCount, numInst, true, coreIds}); totalComputeOps += 1; totalLogicalComputes += logicalCount; totalBatchComputeOps += 1; diff --git a/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/Scheduling/ComputeGraph.cpp b/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/Scheduling/ComputeGraph.cpp index 184c309..e193168 100644 --- a/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/Scheduling/ComputeGraph.cpp +++ b/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/Scheduling/ComputeGraph.cpp @@ -206,10 +206,8 @@ static FailureOr evaluateIndexLike(OpFoldResult value, return evaluateIndexLike(llvm::cast(value), bindings, lane, laneArg); } -static FailureOr evaluateIndexLike(Value value, - const DenseMap& bindings, - std::optional lane, - Value laneArg) { +static FailureOr +evaluateIndexLike(Value value, const DenseMap& bindings, std::optional lane, Value laneArg) { if (lane && value == laneArg) return *lane; if (auto it = bindings.find(value); it != bindings.end()) @@ -260,11 +258,10 @@ static FailureOr evaluateIndexLike(Value value, return evaluateAffineExpr(map.getResult(0), dims, symbols); } -static FailureOr> -evaluateIndexList(ArrayRef values, - const DenseMap& bindings, - std::optional lane, - Value laneArg) { +static FailureOr> evaluateIndexList(ArrayRef values, + const DenseMap& bindings, + std::optional lane, + Value laneArg) { SmallVector result; result.reserve(values.size()); for (OpFoldResult value : values) { @@ -308,12 +305,11 @@ static CrossbarWeight completeCrossbarWeight(Value root, return weight; } -static FailureOr -getStaticCrossbarWeight(Operation* owner, - Value value, - const DenseMap& bindings, - std::optional lane, - Value laneArg) { +static FailureOr getStaticCrossbarWeight(Operation* owner, + Value value, + const DenseMap& bindings, + std::optional lane, + Value laneArg) { if (auto extract = value.getDefiningOp()) { FailureOr sourceWeight = getStaticCrossbarWeight(owner, extract.getSource(), bindings, lane, laneArg); diff --git a/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/Scheduling/Utils.hpp b/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/Scheduling/Utils.hpp index 00edd35..3737a44 100644 --- a/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/Scheduling/Utils.hpp +++ b/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/Scheduling/Utils.hpp @@ -19,7 +19,6 @@ using CPU = int; using Cost = unsigned long long; using Time = unsigned long long; - template inline T checkedAdd(T lhs, T rhs) { static_assert(std::is_unsigned_v, "checkedAdd only supports unsigned types"); diff --git a/src/PIM/Pass/PimCodegen/VerificationPass.cpp b/src/PIM/Pass/PimCodegen/VerificationPass.cpp index 9ac56c9..94dec33 100644 --- a/src/PIM/Pass/PimCodegen/VerificationPass.cpp +++ b/src/PIM/Pass/PimCodegen/VerificationPass.cpp @@ -327,9 +327,8 @@ private: static LogicalResult verifyCoreLikeOperands(CoreLikeOpTy coreLikeOp, const StaticValueKnowledge& initialKnowledge, pim::CappedDiagnosticReporter& diagnostics) { - return walkPimCoreBlockStructurally(coreLikeOp.getBody().front(), - initialKnowledge, - [&](Operation& op, const StaticValueKnowledge& knowledge) { + return walkPimCoreBlockStructurally( + coreLikeOp.getBody().front(), initialKnowledge, [&](Operation& op, const StaticValueKnowledge& knowledge) { bool hasFailure = false; if (!isSupportedCoreInstructionOp(&op)) { diagnostics.report(&op, [](Operation* illegalOp) {