cleanup unused channel operations and related logic
Validate Operations / validate-operations (push) Has been cancelled

This commit is contained in:
NiccoloN
2026-05-25 20:58:51 +02:00
parent bdc4ca33f3
commit 0f240af271
15 changed files with 3 additions and 1182 deletions
-101
View File
@@ -519,61 +519,12 @@ void PimCodeGen::codeGenReceiveOp(pim::PimReceiveOp receiveOp, const StaticValue
emitCommunicationOp("recv", addressOf(receiveOp.getOutputBuffer(), knowledge), *sourceCoreId, receiveOp.getSize());
}
void PimCodeGen::codeGenReceiveTensorOp(pim::PimReceiveTensorOp receiveTensorOp,
const StaticValueKnowledge& knowledge) const {
size_t outputAddr = addressOf(receiveTensorOp.getOutputBuffer(), knowledge);
size_t chunkSize = getShapedTypeSizeInBytes(cast<ShapedType>(receiveTensorOp.getOutputBuffer().getType()))
/ receiveTensorOp.getSourceCoreIds().size();
for (auto [chunkIndex, sourceCoreId] : llvm::enumerate(receiveTensorOp.getSourceCoreIds()))
emitCommunicationOp("recv", outputAddr + chunkIndex * chunkSize, sourceCoreId, chunkSize);
}
void PimCodeGen::codeGenReceiveBatchOp(pim::PimReceiveBatchOp receiveOp,
unsigned lane,
const StaticValueKnowledge& knowledge) const {
emitCommunicationOp(
"recv", addressOf(receiveOp.getOutputBuffer(), knowledge), receiveOp.getSourceCoreIds()[lane], receiveOp.getSize());
}
void PimCodeGen::codeGenReceiveTensorBatchOp(pim::PimReceiveTensorBatchOp receiveOp,
ArrayRef<int32_t> laneCoreIds,
const StaticValueKnowledge& knowledge) const {
size_t outputAddr = addressOf(receiveOp.getOutputBuffer(), knowledge);
size_t chunkSize = getShapedTypeSizeInBytes(cast<ShapedType>(receiveOp.getOutputBuffer().getType()))
/ laneCoreIds.size();
for (auto [chunkIndex, sourceCoreId] : llvm::enumerate(laneCoreIds))
emitCommunicationOp("recv", outputAddr + chunkIndex * chunkSize, sourceCoreId, chunkSize);
}
void PimCodeGen::codeGenSendOp(pim::PimSendOp sendOp, const StaticValueKnowledge& knowledge) const {
auto targetCoreId = indexOf(sendOp.getTargetCoreId(), knowledge);
assert(succeeded(targetCoreId) && "pim.send target core id must be statically resolvable during codegen");
emitCommunicationOp("send", addressOf(sendOp.getInput(), knowledge), *targetCoreId, sendOp.getSize());
}
void PimCodeGen::codeGenSendTensorOp(pim::PimSendTensorOp sendTensorOp, const StaticValueKnowledge& knowledge) const {
size_t inputAddr = addressOf(sendTensorOp.getInput(), knowledge);
size_t chunkSize = getShapedTypeSizeInBytes(cast<ShapedType>(sendTensorOp.getInput().getType()))
/ sendTensorOp.getTargetCoreIds().size();
for (auto [chunkIndex, targetCoreId] : llvm::enumerate(sendTensorOp.getTargetCoreIds()))
emitCommunicationOp("send", inputAddr + chunkIndex * chunkSize, targetCoreId, chunkSize);
}
void PimCodeGen::codeGenSendBatchOp(pim::PimSendBatchOp sendOp,
unsigned lane,
const StaticValueKnowledge& knowledge) const {
emitCommunicationOp("send", addressOf(sendOp.getInput(), knowledge), sendOp.getTargetCoreIds()[lane], sendOp.getSize());
}
void PimCodeGen::codeGenSendTensorBatchOp(pim::PimSendTensorBatchOp sendOp,
ArrayRef<int32_t> laneCoreIds,
const StaticValueKnowledge& knowledge) const {
size_t inputAddr = addressOf(sendOp.getInput(), knowledge);
size_t chunkSize = getShapedTypeSizeInBytes(cast<ShapedType>(sendOp.getInput().getType())) / laneCoreIds.size();
for (auto [chunkIndex, targetCoreId] : llvm::enumerate(laneCoreIds))
emitCommunicationOp("send", inputAddr + chunkIndex * chunkSize, targetCoreId, chunkSize);
}
void PimCodeGen::codeGenConcatOp(pim::PimConcatOp concatOp, const StaticValueKnowledge& knowledge) const {
auto outputType = cast<ShapedType>(concatOp.getOutputBuffer().getType());
assert(outputType.hasStaticShape() && "concat codegen requires static output shape");
@@ -902,13 +853,7 @@ enum class CompiledCoreOpKind : uint8_t {
Store,
Lmv,
Receive,
ReceiveBatch,
ReceiveTensor,
ReceiveTensorBatch,
Send,
SendBatch,
SendTensor,
SendTensorBatch,
Concat,
Vmm,
Transpose,
@@ -952,20 +897,8 @@ static FailureOr<CompiledCoreOpKind> classifyCompiledCoreOpKind(Operation& op) {
return CompiledCoreOpKind::Lmv;
if (isa<pim::PimReceiveOp>(op))
return CompiledCoreOpKind::Receive;
if (isa<pim::PimReceiveBatchOp>(op))
return CompiledCoreOpKind::ReceiveBatch;
if (isa<pim::PimReceiveTensorOp>(op))
return CompiledCoreOpKind::ReceiveTensor;
if (isa<pim::PimReceiveTensorBatchOp>(op))
return CompiledCoreOpKind::ReceiveTensorBatch;
if (isa<pim::PimSendOp>(op))
return CompiledCoreOpKind::Send;
if (isa<pim::PimSendBatchOp>(op))
return CompiledCoreOpKind::SendBatch;
if (isa<pim::PimSendTensorOp>(op))
return CompiledCoreOpKind::SendTensor;
if (isa<pim::PimSendTensorBatchOp>(op))
return CompiledCoreOpKind::SendTensorBatch;
if (isa<pim::PimConcatOp>(op))
return CompiledCoreOpKind::Concat;
if (isa<pim::PimVMMOp>(op))
@@ -1108,43 +1041,9 @@ static LogicalResult executeCompiledCorePlan(const llvm::SmallVectorImpl<Compile
case CompiledCoreOpKind::Receive:
coreCodeGen.codeGenReceiveOp(cast<pim::PimReceiveOp>(node.op), knowledge);
break;
case CompiledCoreOpKind::ReceiveBatch:
if (!batchLane)
return failure();
coreCodeGen.codeGenReceiveBatchOp(cast<pim::PimReceiveBatchOp>(node.op), *batchLane, knowledge);
break;
case CompiledCoreOpKind::ReceiveTensor:
coreCodeGen.codeGenReceiveTensorOp(cast<pim::PimReceiveTensorOp>(node.op), knowledge);
break;
case CompiledCoreOpKind::ReceiveTensorBatch:
if (!batchLane || !batchLaneCount)
return failure();
coreCodeGen.codeGenReceiveTensorBatchOp(cast<pim::PimReceiveTensorBatchOp>(node.op),
getLaneChunkCoreIds(cast<pim::PimReceiveTensorBatchOp>(node.op).getSourceCoreIds(),
*batchLaneCount,
*batchLane),
knowledge);
break;
case CompiledCoreOpKind::Send:
coreCodeGen.codeGenSendOp(cast<pim::PimSendOp>(node.op), knowledge);
break;
case CompiledCoreOpKind::SendBatch:
if (!batchLane)
return failure();
coreCodeGen.codeGenSendBatchOp(cast<pim::PimSendBatchOp>(node.op), *batchLane, knowledge);
break;
case CompiledCoreOpKind::SendTensor:
coreCodeGen.codeGenSendTensorOp(cast<pim::PimSendTensorOp>(node.op), knowledge);
break;
case CompiledCoreOpKind::SendTensorBatch:
if (!batchLane || !batchLaneCount)
return failure();
coreCodeGen.codeGenSendTensorBatchOp(cast<pim::PimSendTensorBatchOp>(node.op),
getLaneChunkCoreIds(cast<pim::PimSendTensorBatchOp>(node.op).getTargetCoreIds(),
*batchLaneCount,
*batchLane),
knowledge);
break;
case CompiledCoreOpKind::Concat:
coreCodeGen.codeGenConcatOp(cast<pim::PimConcatOp>(node.op), knowledge);
break;