fast pim bufferization using tensors

2026-05-08 14:21:45 +02:00
parent 58e6587697
commit b1272d2283
7 changed files with 541 additions and 81 deletions
@@ -133,6 +133,18 @@ def PimSendManyOp : PimOp<"send_many", []> {
  let hasCustomAssemblyFormat = 1;
 }

+def PimSendTensorOp : PimOp<"send_tensor", []> {
+  let summary = "Send equal contiguous chunks of one tensor to target cores";
+
+  let arguments = (ins
+    PimTensor:$input,
+    DenseI32ArrayAttr:$targetCoreIds
+  );
+
+  let hasVerifier = 1;
+  let hasCustomAssemblyFormat = 1;
+}
+
 def PimSendBatchOp : PimOp<"send_batch", []> {
  let summary = "Send a per-lane tensor to target cores from a batched core";

@@ -203,6 +215,28 @@ def PimReceiveManyOp : PimOp<"receive_many", [DestinationStyleOpInterface]> {
  let hasCustomAssemblyFormat = 1;
 }

+def PimReceiveTensorOp : PimOp<"receive_tensor", [DestinationStyleOpInterface]> {
+  let summary = "Receive equal contiguous chunks from source cores into one tensor";
+
+  let arguments = (ins
+    PimTensor:$outputBuffer,
+    DenseI32ArrayAttr:$sourceCoreIds
+  );
+
+  let results = (outs
+    PimTensor:$output
+  );
+
+  let extraClassDeclaration = [{
+    mlir::MutableOperandRange getDpsInitsMutable() {
+      return getOutputBufferMutable();
+    }
+  }];
+
+  let hasVerifier = 1;
+  let hasCustomAssemblyFormat = 1;
+}
+
 def PimReceiveBatchOp : PimOp<"receive_batch", [DestinationStyleOpInterface]> {
  let summary = "Receive per-lane tensors from source cores into a batched core";