From bdacb9871d24ecb99bc3558b1a806b9ae591789c Mon Sep 17 00:00:00 2001 From: NiccoloN Date: Mon, 4 May 2026 15:58:14 +0200 Subject: [PATCH] fix dcp merge bug --- src/PIM/Dialect/Spatial/SpatialOpsVerify.cpp | 5 +++++ .../MergeComputeNodes/DCPGraph/DCPAnalysis.cpp | 4 +--- .../MergeComputeNodes/MergeComputeNodesPass.cpp | 16 ++++++++++++---- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/PIM/Dialect/Spatial/SpatialOpsVerify.cpp b/src/PIM/Dialect/Spatial/SpatialOpsVerify.cpp index 04d0ccf..b4d8657 100644 --- a/src/PIM/Dialect/Spatial/SpatialOpsVerify.cpp +++ b/src/PIM/Dialect/Spatial/SpatialOpsVerify.cpp @@ -5,6 +5,7 @@ #include "mlir/IR/TypeUtilities.h" #include "mlir/Support/LLVM.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/Support/LogicalResult.h" #include "src/Accelerators/PIM/Common/PimCommon.hpp" @@ -412,6 +413,10 @@ LogicalResult SpatComputeBatch::verify() { return emitError("compute_batch core_id array length must match laneCount"); if (llvm::any_of(coreIdsAttr.asArrayRef(), [](int32_t coreId) { return coreId <= 0; })) return emitError("compute_batch core_id values must be positive"); + llvm::SmallDenseSet seenCoreIds; + for (int32_t coreId : coreIdsAttr.asArrayRef()) + if (!seenCoreIds.insert(coreId).second) + return emitError("compute_batch core_id values must be distinct"); } Block& block = getBody().front(); diff --git a/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/DCPGraph/DCPAnalysis.cpp b/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/DCPGraph/DCPAnalysis.cpp index 3d47ea7..35534a4 100644 --- a/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/DCPGraph/DCPAnalysis.cpp +++ b/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/DCPGraph/DCPAnalysis.cpp @@ -56,12 +56,10 @@ struct WindowScheduleResult { size_t maxMergeGroupSize = 0; }; -constexpr CPU kDefaultMaxCpuCount = 1000; - size_t getSchedulingCpuBudget() { if (coresCount.getValue() > 0) return static_cast(coresCount.getValue()); - return static_cast(kDefaultMaxCpuCount); + return std::numeric_limits::max(); } size_t getBatchChunkTargetCount(int32_t laneCount) { diff --git a/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/MergeComputeNodesPass.cpp b/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/MergeComputeNodesPass.cpp index 1309c3b..4dc9713 100644 --- a/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/MergeComputeNodesPass.cpp +++ b/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/MergeComputeNodesPass.cpp @@ -54,10 +54,9 @@ struct ProducerValueRef { std::optional getProducerValueRef(Value value); static size_t getFastPathCpuBudget() { - constexpr size_t kDefaultMaxCpuCount = 1000; if (coresCount.getValue() > 0) return static_cast(coresCount.getValue()); - return kDefaultMaxCpuCount; + return std::numeric_limits::max(); } static size_t getBatchChunkTargetCount(int32_t laneCount) { @@ -670,6 +669,9 @@ void rebatchEquivalentComputes(func::FuncOp funcOp, int64_t& nextChannelId) { continue; SmallVector group {anchor}; + llvm::SmallDenseSet usedCoreIds; + if (auto coreId = getComputeCoreId(anchor)) + usedCoreIds.insert(*coreId); if (!anchor.getResults().empty()) continue; for (size_t candidateIndex = index + 1; candidateIndex < computes.size(); ++candidateIndex) { @@ -680,8 +682,14 @@ void rebatchEquivalentComputes(func::FuncOp funcOp, int64_t& nextChannelId) { continue; if (!candidate.getResults().empty()) continue; - if (areEquivalentForRebatch(anchor, candidate)) - group.push_back(candidate); + if (!areEquivalentForRebatch(anchor, candidate)) + continue; + + if (auto coreId = getComputeCoreId(candidate)) + if (!usedCoreIds.insert(*coreId).second) + continue; + + group.push_back(candidate); } if (group.size() <= 1)