cleanup unused channel operations and related logic

2026-05-25 20:58:51 +02:00
parent bdc4ca33f3
commit 0f240af271
15 changed files with 3 additions and 1182 deletions
@@ -519,61 +519,12 @@ void PimCodeGen::codeGenReceiveOp(pim::PimReceiveOp receiveOp, const StaticValue
  emitCommunicationOp("recv", addressOf(receiveOp.getOutputBuffer(), knowledge), *sourceCoreId, receiveOp.getSize());
 }

-void PimCodeGen::codeGenReceiveTensorOp(pim::PimReceiveTensorOp receiveTensorOp,
-                                        const StaticValueKnowledge& knowledge) const {
-  size_t outputAddr = addressOf(receiveTensorOp.getOutputBuffer(), knowledge);
-  size_t chunkSize = getShapedTypeSizeInBytes(cast<ShapedType>(receiveTensorOp.getOutputBuffer().getType()))
-                   / receiveTensorOp.getSourceCoreIds().size();
-  for (auto [chunkIndex, sourceCoreId] : llvm::enumerate(receiveTensorOp.getSourceCoreIds()))
-    emitCommunicationOp("recv", outputAddr + chunkIndex * chunkSize, sourceCoreId, chunkSize);
-}
-
-void PimCodeGen::codeGenReceiveBatchOp(pim::PimReceiveBatchOp receiveOp,
-                                       unsigned lane,
-                                       const StaticValueKnowledge& knowledge) const {
-  emitCommunicationOp(
-    "recv", addressOf(receiveOp.getOutputBuffer(), knowledge), receiveOp.getSourceCoreIds()[lane], receiveOp.getSize());
-}
-
-void PimCodeGen::codeGenReceiveTensorBatchOp(pim::PimReceiveTensorBatchOp receiveOp,
-                                             ArrayRef<int32_t> laneCoreIds,
-                                             const StaticValueKnowledge& knowledge) const {
-  size_t outputAddr = addressOf(receiveOp.getOutputBuffer(), knowledge);
-  size_t chunkSize = getShapedTypeSizeInBytes(cast<ShapedType>(receiveOp.getOutputBuffer().getType()))
-                   / laneCoreIds.size();
-  for (auto [chunkIndex, sourceCoreId] : llvm::enumerate(laneCoreIds))
-    emitCommunicationOp("recv", outputAddr + chunkIndex * chunkSize, sourceCoreId, chunkSize);
-}
-
 void PimCodeGen::codeGenSendOp(pim::PimSendOp sendOp, const StaticValueKnowledge& knowledge) const {
  auto targetCoreId = indexOf(sendOp.getTargetCoreId(), knowledge);
  assert(succeeded(targetCoreId) && "pim.send target core id must be statically resolvable during codegen");
  emitCommunicationOp("send", addressOf(sendOp.getInput(), knowledge), *targetCoreId, sendOp.getSize());
 }

-void PimCodeGen::codeGenSendTensorOp(pim::PimSendTensorOp sendTensorOp, const StaticValueKnowledge& knowledge) const {
-  size_t inputAddr = addressOf(sendTensorOp.getInput(), knowledge);
-  size_t chunkSize = getShapedTypeSizeInBytes(cast<ShapedType>(sendTensorOp.getInput().getType()))
-                   / sendTensorOp.getTargetCoreIds().size();
-  for (auto [chunkIndex, targetCoreId] : llvm::enumerate(sendTensorOp.getTargetCoreIds()))
-    emitCommunicationOp("send", inputAddr + chunkIndex * chunkSize, targetCoreId, chunkSize);
-}
-
-void PimCodeGen::codeGenSendBatchOp(pim::PimSendBatchOp sendOp,
-                                    unsigned lane,
-                                    const StaticValueKnowledge& knowledge) const {
-  emitCommunicationOp("send", addressOf(sendOp.getInput(), knowledge), sendOp.getTargetCoreIds()[lane], sendOp.getSize());
-}
-
-void PimCodeGen::codeGenSendTensorBatchOp(pim::PimSendTensorBatchOp sendOp,
-                                          ArrayRef<int32_t> laneCoreIds,
-                                          const StaticValueKnowledge& knowledge) const {
-  size_t inputAddr = addressOf(sendOp.getInput(), knowledge);
-  size_t chunkSize = getShapedTypeSizeInBytes(cast<ShapedType>(sendOp.getInput().getType())) / laneCoreIds.size();
-  for (auto [chunkIndex, targetCoreId] : llvm::enumerate(laneCoreIds))
-    emitCommunicationOp("send", inputAddr + chunkIndex * chunkSize, targetCoreId, chunkSize);
-}
-
 void PimCodeGen::codeGenConcatOp(pim::PimConcatOp concatOp, const StaticValueKnowledge& knowledge) const {
  auto outputType = cast<ShapedType>(concatOp.getOutputBuffer().getType());
  assert(outputType.hasStaticShape() && "concat codegen requires static output shape");
@@ -902,13 +853,7 @@ enum class CompiledCoreOpKind : uint8_t {
  Store,
  Lmv,
  Receive,
-  ReceiveBatch,
-  ReceiveTensor,
-  ReceiveTensorBatch,
  Send,
-  SendBatch,
-  SendTensor,
-  SendTensorBatch,
  Concat,
  Vmm,
  Transpose,
@@ -952,20 +897,8 @@ static FailureOr<CompiledCoreOpKind> classifyCompiledCoreOpKind(Operation& op) {
    return CompiledCoreOpKind::Lmv;
  if (isa<pim::PimReceiveOp>(op))
    return CompiledCoreOpKind::Receive;
-  if (isa<pim::PimReceiveBatchOp>(op))
-    return CompiledCoreOpKind::ReceiveBatch;
-  if (isa<pim::PimReceiveTensorOp>(op))
-    return CompiledCoreOpKind::ReceiveTensor;
-  if (isa<pim::PimReceiveTensorBatchOp>(op))
-    return CompiledCoreOpKind::ReceiveTensorBatch;
  if (isa<pim::PimSendOp>(op))
    return CompiledCoreOpKind::Send;
-  if (isa<pim::PimSendBatchOp>(op))
-    return CompiledCoreOpKind::SendBatch;
-  if (isa<pim::PimSendTensorOp>(op))
-    return CompiledCoreOpKind::SendTensor;
-  if (isa<pim::PimSendTensorBatchOp>(op))
-    return CompiledCoreOpKind::SendTensorBatch;
  if (isa<pim::PimConcatOp>(op))
    return CompiledCoreOpKind::Concat;
  if (isa<pim::PimVMMOp>(op))
@@ -1108,43 +1041,9 @@ static LogicalResult executeCompiledCorePlan(const llvm::SmallVectorImpl<Compile
    case CompiledCoreOpKind::Receive:
      coreCodeGen.codeGenReceiveOp(cast<pim::PimReceiveOp>(node.op), knowledge);
      break;
-    case CompiledCoreOpKind::ReceiveBatch:
-      if (!batchLane)
-        return failure();
-      coreCodeGen.codeGenReceiveBatchOp(cast<pim::PimReceiveBatchOp>(node.op), *batchLane, knowledge);
-      break;
-    case CompiledCoreOpKind::ReceiveTensor:
-      coreCodeGen.codeGenReceiveTensorOp(cast<pim::PimReceiveTensorOp>(node.op), knowledge);
-      break;
-    case CompiledCoreOpKind::ReceiveTensorBatch:
-      if (!batchLane || !batchLaneCount)
-        return failure();
-      coreCodeGen.codeGenReceiveTensorBatchOp(cast<pim::PimReceiveTensorBatchOp>(node.op),
-                                              getLaneChunkCoreIds(cast<pim::PimReceiveTensorBatchOp>(node.op).getSourceCoreIds(),
-                                                                  *batchLaneCount,
-                                                                  *batchLane),
-                                              knowledge);
-      break;
    case CompiledCoreOpKind::Send:
      coreCodeGen.codeGenSendOp(cast<pim::PimSendOp>(node.op), knowledge);
      break;
-    case CompiledCoreOpKind::SendBatch:
-      if (!batchLane)
-        return failure();
-      coreCodeGen.codeGenSendBatchOp(cast<pim::PimSendBatchOp>(node.op), *batchLane, knowledge);
-      break;
-    case CompiledCoreOpKind::SendTensor:
-      coreCodeGen.codeGenSendTensorOp(cast<pim::PimSendTensorOp>(node.op), knowledge);
-      break;
-    case CompiledCoreOpKind::SendTensorBatch:
-      if (!batchLane || !batchLaneCount)
-        return failure();
-      coreCodeGen.codeGenSendTensorBatchOp(cast<pim::PimSendTensorBatchOp>(node.op),
-                                           getLaneChunkCoreIds(cast<pim::PimSendTensorBatchOp>(node.op).getTargetCoreIds(),
-                                                               *batchLaneCount,
-                                                               *batchLane),
-                                           knowledge);
-      break;
    case CompiledCoreOpKind::Concat:
      coreCodeGen.codeGenConcatOp(cast<pim::PimConcatOp>(node.op), knowledge);
      break;