From a103ba328b46f1149590d46cf2737980856b3626 Mon Sep 17 00:00:00 2001 From: NiccoloN Date: Tue, 19 May 2026 12:23:01 +0200 Subject: [PATCH] remove dead logic --- .../MaterializeMergeSchedule.cpp | 63 +++++-------------- .../MergeComputeNodesPass.cpp | 9 +-- .../Scheduling/ComputeInstanceUtils.cpp | 29 ++------- .../Scheduling/ComputeInstanceUtils.hpp | 1 - 4 files changed, 21 insertions(+), 81 deletions(-) diff --git a/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/MaterializeMergeSchedule.cpp b/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/MaterializeMergeSchedule.cpp index 9c6fd46..1cd985d 100644 --- a/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/MaterializeMergeSchedule.cpp +++ b/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/MaterializeMergeSchedule.cpp @@ -9,7 +9,6 @@ #include #include -#include #include #include @@ -54,7 +53,6 @@ public: replaceExternalUses(); if (failed(eraseOldScheduledOps())) return failure(); - moveExternalUsersBeforeReturn(); return success(); } @@ -97,18 +95,6 @@ private: | static_cast(channelInfo.targetCoreId); } - void collectExternalUsers(Operation* op) { - if (!externalUsersToMove.insert(op).second) - return; - for (Value result : op->getResults()) { - for (Operation* user : result.getUsers()) { - if (oldComputeOps.contains(user) || isa(user)) - continue; - collectExternalUsers(user); - } - } - } - void collectScheduledTasks() { for (ComputeInstance scheduledInstance : schedule->dominanceOrderCompute) { oldComputeOps.insert(scheduledInstance.op); @@ -151,25 +137,22 @@ private: auto& remoteInputs = remoteInputsByTask[task.computeInstance]; remoteInputs.resize(taskInputs.size()); for (auto [inputIndex, input] : llvm::enumerate(taskInputs)) { - auto producerRef = getProducerValueRef(input); - if (producerRef) { + if (auto producerRef = getProducerValueRef(input)) { auto producerIt = taskByComputeInstance.find(producerRef->instance); - if (producerIt != taskByComputeInstance.end()) { - if (producerIt->second.cpu != cpu) { - ChannelInfo info { - (*nextChannelId)++, - static_cast(producerIt->second.cpu), - static_cast(cpu), - }; - remoteInputs[inputIndex] = info; - auto& perResultChannels = remoteSendsByTask[producerRef->instance]; - if (perResultChannels.empty()) - perResultChannels.resize(getComputeInstanceOutputTypes(producerIt->second.computeInstance).size()); - perResultChannels[producerRef->resultIndex].push_back( - {info, task.computeInstance, inputIndex, task.orderWithinCpu, 0}); - } - continue; + if (producerIt->second.cpu != cpu) { + ChannelInfo info { + (*nextChannelId)++, + static_cast(producerIt->second.cpu), + static_cast(cpu), + }; + remoteInputs[inputIndex] = info; + auto& perResultChannels = remoteSendsByTask[producerRef->instance]; + if (perResultChannels.empty()) + perResultChannels.resize(getComputeInstanceOutputTypes(producerIt->second.computeInstance).size()); + perResultChannels[producerRef->resultIndex].push_back( + {info, task.computeInstance, inputIndex, task.orderWithinCpu, 0}); } + continue; } if (seenExternalInputsByCpu[cpu].insert(input).second) cpuExternalInputs[cpu].push_back(input); @@ -183,8 +166,6 @@ private: if (oldComputeOps.contains(useOwner)) continue; hasExternalUser = true; - if (!isa(useOwner)) - collectExternalUsers(useOwner); } if (hasExternalUser) cpuExternalOutputs[cpu].push_back({task.computeInstance, resultIndex}); @@ -407,7 +388,8 @@ private: if (producerIt->second.cpu == cpu) { auto producedIt = producedValuesByTask.find(producerRef->instance); if (producedIt == producedValuesByTask.end() || producedIt->second.size() <= producerRef->resultIndex) { - task.computeInstance.op->emitOpError("missing local producer value during per-cpu merge materialization") + task.computeInstance.op->emitOpError( + "missing local producer value during per-cpu merge materialization") << " consumerCpu=" << cpu << " producerCpu=" << producerIt->second.cpu << " producerLaneStart=" << producerRef->instance.laneStart << " producerLaneCount=" << producerRef->instance.laneCount; @@ -586,18 +568,6 @@ private: return success(); } - void moveExternalUsersBeforeReturn() { - SmallVector orderedUsersToMove; - for (Operation& op : func.getBody().front()) { - if (&op == returnOp.getOperation()) - break; - if (externalUsersToMove.contains(&op)) - orderedUsersToMove.push_back(&op); - } - for (Operation* op : orderedUsersToMove) - op->moveBefore(returnOp); - } - func::FuncOp func; const MergeScheduleResult* schedule = nullptr; int64_t* nextChannelId = nullptr; @@ -610,7 +580,6 @@ private: DenseMap> tasksByCpu; SmallVector orderedCpus; DenseSet seenCpus; - DenseSet externalUsersToMove; DenseMap>> remoteSendsByTask; DenseMap>> remoteInputsByTask; DenseMap> cpuExternalInputs; diff --git a/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/MergeComputeNodesPass.cpp b/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/MergeComputeNodesPass.cpp index b465955..92016c2 100644 --- a/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/MergeComputeNodesPass.cpp +++ b/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/MergeComputeNodesPass.cpp @@ -13,7 +13,6 @@ #include "mlir/Support/LLVM.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -28,9 +27,7 @@ #include #include #include -#include #include -#include #include #include #include @@ -39,13 +36,11 @@ #include "MaterializeMergeSchedule.hpp" #include "PostMergeCompaction.hpp" -#include "RegularOpCompaction.hpp" #include "Scheduling/ComputeInstanceUtils.hpp" #include "Scheduling/MergeSchedulingAnalysis.hpp" #include "src/Accelerators/PIM/Common/IR/CompactAsmUtils.hpp" #include "src/Accelerators/PIM/Common/PimCommon.hpp" #include "src/Accelerators/PIM/Common/Support/ReportUtils.hpp" -#include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp" #include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp" using namespace mlir; @@ -53,10 +48,8 @@ using namespace mlir; namespace onnx_mlir { namespace { using namespace onnx_mlir::compact_asm; -using ProducerValueRef = spatial::ProducerValueRef; using SpatCompute = spatial::SpatCompute; using SpatComputeBatch = spatial::SpatComputeBatch; -using spatial::getOriginalSpatCompute; using spatial::getProducerValueRef; bool isMergeProfilingEnabled() { return std::getenv("RAPTOR_PROFILE_MERGE") != nullptr; } @@ -303,7 +296,7 @@ void emitMotifProfile(func::FuncOp funcOp) { } for (Value input : compute.getInputs()) { - auto parent = getOriginalSpatCompute(input.getDefiningOp()); + auto parent = dyn_cast(input.getDefiningOp()); if (!parent || parent == compute) continue; auto parentIt = computeToIndex.find(parent); diff --git a/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/Scheduling/ComputeInstanceUtils.cpp b/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/Scheduling/ComputeInstanceUtils.cpp index 4e53be2..d583249 100644 --- a/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/Scheduling/ComputeInstanceUtils.cpp +++ b/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/Scheduling/ComputeInstanceUtils.cpp @@ -22,7 +22,7 @@ size_t getBatchChunkTargetCount(int32_t laneCount) { } ComputeInstance getBatchChunkForIndex(SpatComputeBatch batch, size_t chunkIndex) { - size_t totalLanes = static_cast(batch.getLaneCount()); + size_t totalLanes = batch.getLaneCount(); size_t chunkCount = getBatchChunkTargetCount(batch.getLaneCount()); size_t baseChunkSize = totalLanes / chunkCount; size_t largeChunkCount = totalLanes % chunkCount; @@ -33,7 +33,7 @@ ComputeInstance getBatchChunkForIndex(SpatComputeBatch batch, size_t chunkIndex) } ComputeInstance getBatchChunkForLane(SpatComputeBatch batch, uint32_t lane) { - size_t totalLanes = static_cast(batch.getLaneCount()); + size_t totalLanes = batch.getLaneCount(); size_t chunkCount = getBatchChunkTargetCount(batch.getLaneCount()); size_t baseChunkSize = totalLanes / chunkCount; size_t largeChunkCount = totalLanes % chunkCount; @@ -47,32 +47,11 @@ ComputeInstance getBatchChunkForLane(SpatComputeBatch batch, uint32_t lane) { return getBatchChunkForIndex(batch, chunkIndex); } -SpatCompute getOriginalSpatCompute(Operation *op) { - if (!op) - return {}; - - while (auto extract = dyn_cast(op)) { - op = extract.getSource().getDefiningOp(); - if (!op) - return {}; - } - - return dyn_cast(op); -} - std::optional getProducerValueRef(Value value) { Operation *op = value.getDefiningOp(); if (!op) return std::nullopt; - //TODO Extract Slice is not the only global non compute operation. There are other legal op - while (auto extract = dyn_cast(op)) { - value = extract.getSource(); - op = value.getDefiningOp(); - if (!op) - return std::nullopt; - } - if (auto compute = dyn_cast(op)) { return ProducerValueRef { ComputeInstance {compute.getOperation(), 0, 1}, @@ -81,9 +60,9 @@ std::optional getProducerValueRef(Value value) { } if (auto batch = dyn_cast(op)) { - uint32_t lane = static_cast(cast(value).getResultNumber()); + uint32_t lane = cast(value).getResultNumber(); ComputeInstance instance = getBatchChunkForLane(batch, lane); - size_t resultIndex = static_cast(lane - instance.laneStart); + size_t resultIndex = lane - instance.laneStart; return ProducerValueRef {instance, resultIndex}; } diff --git a/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/Scheduling/ComputeInstanceUtils.hpp b/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/Scheduling/ComputeInstanceUtils.hpp index b75a00e..91f3e39 100644 --- a/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/Scheduling/ComputeInstanceUtils.hpp +++ b/src/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/Scheduling/ComputeInstanceUtils.hpp @@ -26,7 +26,6 @@ size_t getBatchChunkTargetCount(int32_t laneCount); ComputeInstance getBatchChunkForIndex(SpatComputeBatch batch, size_t chunkIndex); ComputeInstance getBatchChunkForLane(SpatComputeBatch batch, uint32_t lane); -SpatCompute getOriginalSpatCompute(mlir::Operation *op); std::optional getProducerValueRef(mlir::Value value); std::optional getComputeProducerInstance(mlir::Value value);