use uniqued constant helpers everywhere materialize transposed constants directly
This commit is contained in:
@@ -9,6 +9,7 @@
|
||||
#include "llvm/Support/LogicalResult.h"
|
||||
|
||||
#include "src/Accelerators/PIM/Common/IR/AddressAnalysis.hpp"
|
||||
#include "src/Accelerators/PIM/Common/IR/BatchCoreUtils.hpp"
|
||||
#include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp"
|
||||
#include "src/Accelerators/PIM/Dialect/Pim/PimOps.hpp"
|
||||
|
||||
@@ -19,16 +20,6 @@ namespace pim {
|
||||
|
||||
namespace {
|
||||
|
||||
static bool isExplicitHostOperand(Operation* op, unsigned operandIndex) {
|
||||
if (isa<PimMemCopyHostToDevOp>(op))
|
||||
return operandIndex == 3;
|
||||
if (isa<PimMemCopyHostToDevBatchOp>(op))
|
||||
return operandIndex == 1;
|
||||
if (isa<PimMemCopyDevToHostOp>(op))
|
||||
return operandIndex == 2;
|
||||
return false;
|
||||
}
|
||||
|
||||
static Region* getParentRegion(Value value) {
|
||||
if (auto blockArgument = dyn_cast<BlockArgument>(value))
|
||||
return blockArgument.getParentRegion();
|
||||
@@ -63,7 +54,7 @@ static LogicalResult verifyOnlyConstantExternalValues(Operation* ownerOp, Region
|
||||
for (OpOperand& operand : op->getOpOperands()) {
|
||||
Value value = operand.get();
|
||||
if (isDefinedInsideRegion(value, region) || isConstantExternalValue(value)
|
||||
|| isExplicitHostOperand(op, operand.getOperandNumber()))
|
||||
|| isExplicitHostMemCopyOperand(op, operand.getOperandNumber()))
|
||||
continue;
|
||||
|
||||
InFlightDiagnostic diagnostic = ownerOp->emitOpError()
|
||||
|
||||
@@ -618,10 +618,6 @@ BlockArgument appendInput(MaterializerState& state, MaterializedClass& materiali
|
||||
llvm_unreachable("Cannot reach here");
|
||||
}
|
||||
|
||||
Value createIndexConstant(MaterializerState& state, Operation* anchor, int64_t value) {
|
||||
return getOrCreateHostIndexConstant(state.constantFolder, anchor, value);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Tensor packing helpers.
|
||||
// -----------------------------------------------------------------------------
|
||||
@@ -681,7 +677,7 @@ Value scaleIndexByDim0Size(MaterializerState& state, Operation* anchor, Value in
|
||||
if (dim0Size == 1)
|
||||
return index;
|
||||
|
||||
Value dim0SizeValue = createIndexConstant(state, anchor, dim0Size);
|
||||
Value dim0SizeValue = getOrCreateIndexConstant(state.constantFolder, anchor, dim0Size);
|
||||
return arith::MulIOp::create(state.rewriter, loc, index, dim0SizeValue).getResult();
|
||||
}
|
||||
|
||||
@@ -731,7 +727,7 @@ std::optional<Value> extractPackedProducerSlice(MaterializerState& state,
|
||||
|
||||
state.rewriter.setInsertionPoint(materializedClass.body->getTerminator());
|
||||
|
||||
Value firstOffset = createIndexConstant(state, materializedClass.op, rowOffset);
|
||||
Value firstOffset = getOrCreateIndexConstant(state.constantFolder, materializedClass.op, rowOffset);
|
||||
return createDim0ExtractSlice(state, materializedClass.op->getLoc(), packed, firstOffset, rowCount);
|
||||
}
|
||||
|
||||
@@ -754,7 +750,7 @@ Value getPackedSliceForRunIndex(MaterializerState& state,
|
||||
size_t index,
|
||||
Location loc) {
|
||||
int64_t rowOffset = static_cast<int64_t>(index) * fragmentType.getDimSize(0);
|
||||
Value firstOffset = createIndexConstant(state, anchor, rowOffset);
|
||||
Value firstOffset = getOrCreateIndexConstant(state.constantFolder, anchor, rowOffset);
|
||||
return createDim0ExtractSlice(state, loc, packed, firstOffset, fragmentType.getDimSize(0));
|
||||
}
|
||||
|
||||
@@ -939,7 +935,7 @@ Value createIndexTensorConstant(MaterializerState& state, Operation* anchor, Arr
|
||||
|
||||
auto type = RankedTensorType::get({static_cast<int64_t>(values.size())}, state.rewriter.getIndexType());
|
||||
auto attr = DenseIntElementsAttr::get(type, elements);
|
||||
return getOrCreateHostConstant(state.constantFolder, anchor, attr, type);
|
||||
return getOrCreateConstant(state.constantFolder, anchor, attr, type);
|
||||
}
|
||||
|
||||
bool allEqual(ArrayRef<int64_t> values) {
|
||||
@@ -1041,7 +1037,7 @@ Value createIndexedIndexValue(
|
||||
assert(!values.empty() && "expected at least one indexed value");
|
||||
|
||||
if (allEqual(values))
|
||||
return createIndexConstant(state, anchor, values.front());
|
||||
return getOrCreateIndexConstant(state.constantFolder, anchor, values.front());
|
||||
|
||||
if (std::optional<IndexedIndexPattern> pattern = getIndexedIndexPattern(values))
|
||||
return createAffineIndexValue(state, *pattern, index, loc);
|
||||
@@ -1110,7 +1106,7 @@ Value createOriginalLaneValue(MaterializerState& state,
|
||||
Location loc) {
|
||||
assert(!peers.empty() && "expected at least one peer instance");
|
||||
if (!materializedClass.isBatch)
|
||||
return createIndexConstant(state, materializedClass.op, peers.front().laneStart);
|
||||
return getOrCreateIndexConstant(state.constantFolder, materializedClass.op, peers.front().laneStart);
|
||||
|
||||
auto batch = cast<SpatComputeBatch>(materializedClass.op);
|
||||
auto laneArg = batch.getLaneArgument();
|
||||
@@ -1465,9 +1461,9 @@ void appendScalarSend(MaterializerState& state,
|
||||
assert(!sourceClass.isBatch && "scalar send helper expects a scalar source class");
|
||||
|
||||
state.rewriter.setInsertionPoint(sourceClass.body->getTerminator());
|
||||
Value channelIdValue = createIndexConstant(state, sourceClass.op, channelId);
|
||||
Value sourceCoreIdValue = createIndexConstant(state, sourceClass.op, sourceCoreId);
|
||||
Value targetCoreIdValue = createIndexConstant(state, sourceClass.op, targetCoreId);
|
||||
Value channelIdValue = getOrCreateIndexConstant(state.constantFolder, sourceClass.op, channelId);
|
||||
Value sourceCoreIdValue = getOrCreateIndexConstant(state.constantFolder, sourceClass.op, sourceCoreId);
|
||||
Value targetCoreIdValue = getOrCreateIndexConstant(state.constantFolder, sourceClass.op, targetCoreId);
|
||||
SpatChannelSendOp::create(state.rewriter, loc, channelIdValue, sourceCoreIdValue, targetCoreIdValue, payload);
|
||||
}
|
||||
|
||||
@@ -1485,9 +1481,9 @@ void appendScalarSendLoop(MaterializerState& state,
|
||||
|
||||
state.rewriter.setInsertionPoint(sourceClass.body->getTerminator());
|
||||
|
||||
Value lowerBound = createIndexConstant(state, sourceClass.op, 0);
|
||||
Value upperBound = createIndexConstant(state, sourceClass.op, static_cast<int64_t>(channelIds.size()));
|
||||
Value step = createIndexConstant(state, sourceClass.op, 1);
|
||||
Value lowerBound = getOrCreateIndexConstant(state.constantFolder, sourceClass.op, 0);
|
||||
Value upperBound = getOrCreateIndexConstant(state.constantFolder, sourceClass.op, static_cast<int64_t>(channelIds.size()));
|
||||
Value step = getOrCreateIndexConstant(state.constantFolder, sourceClass.op, 1);
|
||||
|
||||
auto loop = scf::ForOp::create(state.rewriter, loc, lowerBound, upperBound, step, ValueRange {});
|
||||
|
||||
@@ -1514,9 +1510,9 @@ Value buildProjectedPackedPayload(MaterializerState& state,
|
||||
state.rewriter, loc, descriptor.payloadType.getShape(), descriptor.payloadType.getElementType())
|
||||
.getResult();
|
||||
|
||||
Value lowerBound = createIndexConstant(state, anchor, 0);
|
||||
Value upperBound = createIndexConstant(state, anchor, descriptor.fragmentsPerLane);
|
||||
Value step = createIndexConstant(state, anchor, 1);
|
||||
Value lowerBound = getOrCreateIndexConstant(state.constantFolder, anchor, 0);
|
||||
Value upperBound = getOrCreateIndexConstant(state.constantFolder, anchor, descriptor.fragmentsPerLane);
|
||||
Value step = getOrCreateIndexConstant(state.constantFolder, anchor, 1);
|
||||
|
||||
auto loop = scf::ForOp::create(state.rewriter, loc, lowerBound, upperBound, step, ValueRange {init});
|
||||
|
||||
@@ -1531,7 +1527,7 @@ Value buildProjectedPackedPayload(MaterializerState& state,
|
||||
Value fragmentIndex = loop.getInductionVar();
|
||||
Value acc = body->getArgument(1);
|
||||
|
||||
Value fragmentsPerLane = createIndexConstant(state, anchor, descriptor.fragmentsPerLane);
|
||||
Value fragmentsPerLane = getOrCreateIndexConstant(state.constantFolder, anchor, descriptor.fragmentsPerLane);
|
||||
Value flatBase = arith::MulIOp::create(state.rewriter, loc, laneIndex, fragmentsPerLane).getResult();
|
||||
Value flatIndex = arith::AddIOp::create(state.rewriter, loc, flatBase, fragmentIndex).getResult();
|
||||
|
||||
@@ -1562,13 +1558,14 @@ void appendProjectedScalarSendLoop(MaterializerState& state,
|
||||
state.rewriter.setInsertionPoint(sourceClass.body->getTerminator());
|
||||
|
||||
if (channelIds.size() == 1) {
|
||||
Value channelId = createIndexConstant(state, sourceClass.op, channelIds.front());
|
||||
Value sourceCoreId = createIndexConstant(state, sourceClass.op, sourceCoreIds.front());
|
||||
Value targetCoreId = createIndexConstant(state, sourceClass.op, targetCoreIds.front());
|
||||
Value laneIndex = createIndexConstant(state, sourceClass.op, 0);
|
||||
Value channelId = getOrCreateIndexConstant(state.constantFolder, sourceClass.op, channelIds.front());
|
||||
Value sourceCoreId = getOrCreateIndexConstant(state.constantFolder, sourceClass.op, sourceCoreIds.front());
|
||||
Value targetCoreId = getOrCreateIndexConstant(state.constantFolder, sourceClass.op, targetCoreIds.front());
|
||||
Value laneIndex = getOrCreateIndexConstant(state.constantFolder, sourceClass.op, 0);
|
||||
Value sendPayload;
|
||||
if (descriptor.fragmentsPerLane == 1) {
|
||||
Value offset = createIndexConstant(state, sourceClass.op, descriptor.laneMajorSourceDim0Offsets.front());
|
||||
Value offset =
|
||||
getOrCreateIndexConstant(state.constantFolder, sourceClass.op, descriptor.laneMajorSourceDim0Offsets.front());
|
||||
sendPayload = createDim0ExtractSlice(state, loc, payload, offset, descriptor.fragmentType.getDimSize(0));
|
||||
}
|
||||
else {
|
||||
@@ -1579,9 +1576,9 @@ void appendProjectedScalarSendLoop(MaterializerState& state,
|
||||
return;
|
||||
}
|
||||
|
||||
Value lowerBound = createIndexConstant(state, sourceClass.op, 0);
|
||||
Value upperBound = createIndexConstant(state, sourceClass.op, static_cast<int64_t>(channelIds.size()));
|
||||
Value step = createIndexConstant(state, sourceClass.op, 1);
|
||||
Value lowerBound = getOrCreateIndexConstant(state.constantFolder, sourceClass.op, 0);
|
||||
Value upperBound = getOrCreateIndexConstant(state.constantFolder, sourceClass.op, static_cast<int64_t>(channelIds.size()));
|
||||
Value step = getOrCreateIndexConstant(state.constantFolder, sourceClass.op, 1);
|
||||
|
||||
auto loop = scf::ForOp::create(state.rewriter, loc, lowerBound, upperBound, step, ValueRange {});
|
||||
|
||||
@@ -1645,9 +1642,9 @@ Value appendScalarReceive(MaterializerState& state,
|
||||
assert(!targetClass.isBatch && "scalar receive helper expects a scalar target class");
|
||||
|
||||
state.rewriter.setInsertionPoint(targetClass.body->getTerminator());
|
||||
Value channelIdValue = createIndexConstant(state, targetClass.op, channelId);
|
||||
Value sourceCoreIdValue = createIndexConstant(state, targetClass.op, sourceCoreId);
|
||||
Value targetCoreIdValue = createIndexConstant(state, targetClass.op, targetCoreId);
|
||||
Value channelIdValue = getOrCreateIndexConstant(state.constantFolder, targetClass.op, channelId);
|
||||
Value sourceCoreIdValue = getOrCreateIndexConstant(state.constantFolder, targetClass.op, sourceCoreId);
|
||||
Value targetCoreIdValue = getOrCreateIndexConstant(state.constantFolder, targetClass.op, targetCoreId);
|
||||
return SpatChannelReceiveOp::create(state.rewriter, loc, type, channelIdValue, sourceCoreIdValue, targetCoreIdValue)
|
||||
.getOutput();
|
||||
}
|
||||
@@ -2132,9 +2129,9 @@ FailureOr<Value> materializeDeferredLocalPackedScalarRunValue(MaterializerState&
|
||||
Value init =
|
||||
tensor::EmptyOp::create(state.rewriter, loc, packedType->getShape(), packedType->getElementType()).getResult();
|
||||
|
||||
Value lowerBound = createIndexConstant(state, targetClass.op, 0);
|
||||
Value upperBound = createIndexConstant(state, targetClass.op, static_cast<int64_t>(keys.size()));
|
||||
Value step = createIndexConstant(state, targetClass.op, 1);
|
||||
Value lowerBound = getOrCreateIndexConstant(state.constantFolder, targetClass.op, 0);
|
||||
Value upperBound = getOrCreateIndexConstant(state.constantFolder, targetClass.op, static_cast<int64_t>(keys.size()));
|
||||
Value step = getOrCreateIndexConstant(state.constantFolder, targetClass.op, 1);
|
||||
|
||||
auto loop = scf::ForOp::create(state.rewriter, loc, lowerBound, upperBound, step, ValueRange {init});
|
||||
|
||||
@@ -2198,9 +2195,9 @@ FailureOr<Value> insertDeferredLocalPackedScalarRunIntoWholeBatch(MaterializerSt
|
||||
|
||||
SmallVector<size_t, 1> resultIndices {run.resultIndex};
|
||||
|
||||
Value lowerBound = createIndexConstant(state, targetClass.op, 0);
|
||||
Value upperBound = createIndexConstant(state, targetClass.op, static_cast<int64_t>(keys.size()));
|
||||
Value step = createIndexConstant(state, targetClass.op, 1);
|
||||
Value lowerBound = getOrCreateIndexConstant(state.constantFolder, targetClass.op, 0);
|
||||
Value upperBound = getOrCreateIndexConstant(state.constantFolder, targetClass.op, static_cast<int64_t>(keys.size()));
|
||||
Value step = getOrCreateIndexConstant(state.constantFolder, targetClass.op, 1);
|
||||
|
||||
state.rewriter.setInsertionPoint(targetClass.body->getTerminator());
|
||||
auto loop = scf::ForOp::create(state.rewriter, loc, lowerBound, upperBound, step, ValueRange {destination});
|
||||
@@ -2262,9 +2259,10 @@ FailureOr<Value> insertDeferredPackedScalarRunIntoWholeBatch(MaterializerState&
|
||||
if (outputOffsets.size() != run.channelIds.size())
|
||||
return failure();
|
||||
|
||||
Value lowerBound = createIndexConstant(state, targetClass.op, 0);
|
||||
Value upperBound = createIndexConstant(state, targetClass.op, static_cast<int64_t>(run.channelIds.size()));
|
||||
Value step = createIndexConstant(state, targetClass.op, 1);
|
||||
Value lowerBound = getOrCreateIndexConstant(state.constantFolder, targetClass.op, 0);
|
||||
Value upperBound =
|
||||
getOrCreateIndexConstant(state.constantFolder, targetClass.op, static_cast<int64_t>(run.channelIds.size()));
|
||||
Value step = getOrCreateIndexConstant(state.constantFolder, targetClass.op, 1);
|
||||
|
||||
state.rewriter.setInsertionPoint(targetClass.body->getTerminator());
|
||||
auto loop = scf::ForOp::create(state.rewriter, loc, lowerBound, upperBound, step, ValueRange {destination});
|
||||
@@ -2343,9 +2341,9 @@ FailureOr<Value> insertPackedScalarRunIntoWholeBatch(MaterializerState& state,
|
||||
slotRowOffsets.push_back(static_cast<int64_t>(slotKey->instance.laneStart) * plan.rowsPerLane);
|
||||
}
|
||||
|
||||
Value lowerBound = createIndexConstant(state, targetClass.op, 0);
|
||||
Value upperBound = createIndexConstant(state, targetClass.op, static_cast<int64_t>(run.slots.size()));
|
||||
Value step = createIndexConstant(state, targetClass.op, 1);
|
||||
Value lowerBound = getOrCreateIndexConstant(state.constantFolder, targetClass.op, 0);
|
||||
Value upperBound = getOrCreateIndexConstant(state.constantFolder, targetClass.op, static_cast<int64_t>(run.slots.size()));
|
||||
Value step = getOrCreateIndexConstant(state.constantFolder, targetClass.op, 1);
|
||||
|
||||
state.rewriter.setInsertionPoint(targetClass.body->getTerminator());
|
||||
auto loop = scf::ForOp::create(state.rewriter, loc, lowerBound, upperBound, step, ValueRange {destination});
|
||||
@@ -2507,7 +2505,7 @@ FailureOr<Value> emitWholeBatchAssemblyPlan(MaterializerState& state,
|
||||
state.rewriter.setInsertionPoint(targetClass.body->getTerminator());
|
||||
|
||||
int64_t rowOffset = static_cast<int64_t>(fragment.key.instance.laneStart) * plan.rowsPerLane;
|
||||
Value outputOffset = createIndexConstant(state, targetClass.op, rowOffset);
|
||||
Value outputOffset = getOrCreateIndexConstant(state.constantFolder, targetClass.op, rowOffset);
|
||||
result = insertFragmentIntoWholeBatch(state, fragment.fragment, result, outputOffset, loc);
|
||||
}
|
||||
|
||||
@@ -3050,7 +3048,7 @@ FailureOr<SmallVector<Value, 4>> materializeBatchOutputGroupLoop(MaterializerSta
|
||||
const ComputeInstance& instance = run.front().peers.front();
|
||||
|
||||
state.rewriter.setInsertionPoint(targetClass.body->getTerminator());
|
||||
Value laneValue = createIndexConstant(state, targetClass.op, instance.laneStart);
|
||||
Value laneValue = getOrCreateIndexConstant(state.constantFolder, targetClass.op, instance.laneStart);
|
||||
return cloneBatchBodyForLane(state, targetClass, instance, laneValue, group.resultIndices);
|
||||
}
|
||||
|
||||
@@ -3087,9 +3085,9 @@ FailureOr<SmallVector<Value, 4>> materializeBatchOutputGroupLoop(MaterializerSta
|
||||
laneStarts.push_back(instance.laneStart);
|
||||
}
|
||||
|
||||
Value lowerBound = createIndexConstant(state, targetClass.op, 0);
|
||||
Value upperBound = createIndexConstant(state, targetClass.op, static_cast<int64_t>(run.size()));
|
||||
Value step = createIndexConstant(state, targetClass.op, 1);
|
||||
Value lowerBound = getOrCreateIndexConstant(state.constantFolder, targetClass.op, 0);
|
||||
Value upperBound = getOrCreateIndexConstant(state.constantFolder, targetClass.op, static_cast<int64_t>(run.size()));
|
||||
Value step = getOrCreateIndexConstant(state.constantFolder, targetClass.op, 1);
|
||||
|
||||
state.rewriter.setInsertionPoint(targetClass.body->getTerminator());
|
||||
auto loop = scf::ForOp::create(state.rewriter, loc, lowerBound, upperBound, step, ValueRange(initValues));
|
||||
@@ -3563,9 +3561,9 @@ LogicalResult materializeBatchClassRun(MaterializerState& state,
|
||||
if (failed(buildBatchRunSendPlans(state, targetClass, run, group, sendPlans)))
|
||||
return failure();
|
||||
|
||||
Value lowerBound = createIndexConstant(state, targetClass.op, 0);
|
||||
Value upperBound = createIndexConstant(state, targetClass.op, static_cast<int64_t>(run.size()));
|
||||
Value step = createIndexConstant(state, targetClass.op, 1);
|
||||
Value lowerBound = getOrCreateIndexConstant(state.constantFolder, targetClass.op, 0);
|
||||
Value upperBound = getOrCreateIndexConstant(state.constantFolder, targetClass.op, static_cast<int64_t>(run.size()));
|
||||
Value step = getOrCreateIndexConstant(state.constantFolder, targetClass.op, 1);
|
||||
|
||||
state.rewriter.setInsertionPoint(targetClass.body->getTerminator());
|
||||
auto loop = scf::ForOp::create(state.rewriter, loc, lowerBound, upperBound, step, ValueRange {});
|
||||
@@ -3669,9 +3667,9 @@ Value createReceiveConcatLoop(MaterializerState& state,
|
||||
assert(channelIds.size() == targetCoreIds.size() && "channel/target count mismatch");
|
||||
assert(!channelIds.empty() && "expected at least one receive");
|
||||
|
||||
Value lowerBound = createIndexConstant(state, anchor, 0);
|
||||
Value upperBound = createIndexConstant(state, anchor, static_cast<int64_t>(channelIds.size()));
|
||||
Value step = createIndexConstant(state, anchor, 1);
|
||||
Value lowerBound = getOrCreateIndexConstant(state.constantFolder, anchor, 0);
|
||||
Value upperBound = getOrCreateIndexConstant(state.constantFolder, anchor, static_cast<int64_t>(channelIds.size()));
|
||||
Value step = getOrCreateIndexConstant(state.constantFolder, anchor, 1);
|
||||
|
||||
state.rewriter.setInsertionPoint(insertionPoint);
|
||||
Value init =
|
||||
|
||||
Reference in New Issue
Block a user