Files
Raptor/src/PIM/Compiler/PimMemoryLiveness.cpp
T
NiccoloN 69021d56aa
Validate Operations / validate-operations (push) Has been cancelled
automatic code reformat
2026-06-03 19:43:56 +02:00

734 lines
29 KiB
C++

#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/IR/Value.h"
#include "mlir/Interfaces/DestinationStyleOpInterface.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Support/raw_ostream.h"
#include <numeric>
#include <string>
#include <tuple>
#include <utility>
#include "Common/Support/CheckedArithmetic.hpp"
#include "Common/Support/ReportUtils.hpp"
#include "src/Accelerators/PIM/Common/PimCommon.hpp"
#include "src/Accelerators/PIM/Compiler/PimMemoryLiveness.hpp"
#include "src/Accelerators/PIM/Dialect/Pim/PimOps.hpp"
using namespace llvm;
using namespace mlir;
using namespace onnx_mlir;
namespace {
static std::optional<unsigned> getLaneForMemoryValue(mlir::Value value, std::optional<unsigned> lane) {
if (!lane)
return std::nullopt;
auto allocOp = value.getDefiningOp<memref::AllocOp>();
if (!allocOp || !allocOp->getParentOfType<pim::PimCoreBatchOp>())
return std::nullopt;
return lane;
}
static MemoryValueKey getMemoryValueKey(mlir::Value value, std::optional<unsigned> lane = std::nullopt) {
return {value, getLaneForMemoryValue(value, lane)};
}
struct MemoryTouchInterval {
uint64_t start = 0;
uint64_t end = 0;
Operation* startOp = nullptr;
Operation* endOp = nullptr;
Operation* firstTouchOp = nullptr;
Operation* lastTouchOp = nullptr;
uint64_t firstTouchPosition = 0;
uint64_t lastTouchPosition = 0;
bool hasRuntimeUse = false;
bool startUsedAllocFallback = false;
bool endUsedFallback = false;
bool escapesLoop = false;
std::string fallbackReason;
llvm::SmallVector<std::string, 8> aliasesFollowed;
};
struct OperationOrdering {
llvm::DenseMap<Operation*, uint64_t> position;
llvm::DenseMap<Operation*, uint64_t> subtreeEnd;
uint64_t nextPosition = 0;
};
static std::string printValueToString(mlir::Value value) {
std::string text;
llvm::raw_string_ostream os(text);
value.print(os);
os.flush();
return text;
}
static std::string printOperationToString(Operation* op) {
if (!op)
return "<none>";
std::string text;
llvm::raw_string_ostream os(text);
op->print(os);
os.flush();
return text;
}
static std::string printLocationToString(Location loc) {
std::string text;
llvm::raw_string_ostream os(text);
loc.print(os);
os.flush();
return text;
}
static std::string collapseWhitespace(StringRef text) {
std::string out;
out.reserve(text.size());
bool lastWasSpace = false;
for (char c : text) {
bool isSpace = c == ' ' || c == '\n' || c == '\t' || c == '\r';
if (isSpace) {
if (!lastWasSpace && !out.empty())
out.push_back(' ');
lastWasSpace = true;
continue;
}
out.push_back(c);
lastWasSpace = false;
}
return out;
}
static std::string abbreviate(StringRef text, size_t maxLen) {
if (text.size() <= maxLen)
return text.str();
return (text.take_front(maxLen - 3) + "...").str();
}
static std::string summarizeValue(mlir::Value value, size_t maxLen = 72) {
return abbreviate(collapseWhitespace(printValueToString(value)), maxLen);
}
static std::string summarizeOperation(Operation* op, size_t maxLen = 96) {
if (!op)
return "<none>";
std::string prefix = op->getName().getStringRef().str();
std::string full = collapseWhitespace(printOperationToString(op));
if (full == prefix)
return prefix;
return abbreviate(prefix + " :: " + full, maxLen);
}
static std::string summarizeLocation(Location loc, size_t maxLen = 88) {
return abbreviate(collapseWhitespace(printLocationToString(loc)), maxLen);
}
static void assignOperationOrdering(Operation* op, OperationOrdering& ordering) {
uint64_t position = ordering.nextPosition++;
ordering.position[op] = position;
uint64_t end = position;
for (Region& region : op->getRegions())
for (Block& block : region)
for (Operation& nestedOp : block) {
assignOperationOrdering(&nestedOp, ordering);
end = std::max(end, ordering.subtreeEnd.lookup(&nestedOp));
}
ordering.subtreeEnd[op] = end;
}
static OperationOrdering buildOperationOrdering(Operation* coreLikeOp) {
OperationOrdering ordering;
if (!coreLikeOp || coreLikeOp->getNumRegions() != 1 || coreLikeOp->getRegion(0).empty())
return ordering;
for (Operation& op : coreLikeOp->getRegion(0).front())
assignOperationOrdering(&op, ordering);
return ordering;
}
static bool isSupportedAliasOp(Operation* op) {
return isa<memref::SubViewOp, memref::CastOp, memref::CollapseShapeOp, memref::ExpandShapeOp>(op);
}
static bool isRuntimeMemoryTouchOp(Operation* op) {
return isa<pim::PimMemCopyHostToDevOp,
pim::PimMemCopyDevToHostOp,
pim::PimMemCopyOp,
pim::PimReceiveOp,
pim::PimSendOp,
pim::PimConcatOp,
pim::PimVMMOp,
pim::PimTransposeOp,
pim::PimVVAddOp,
pim::PimVVSubOp,
pim::PimVVMulOp,
pim::PimVVMaxOp,
pim::PimVVDMulOp,
pim::PimVAvgOp,
pim::PimVReluOp,
pim::PimVTanhOp,
pim::PimVSigmOp,
pim::PimVSoftmaxOp>(op);
}
static bool isIgnoredLivenessUser(Operation* op) {
return isSupportedAliasOp(op) || isa<scf::ForOp, scf::YieldOp, memref::DeallocOp>(op) || isCoreStaticAddressOp(op);
}
static bool isWithin(mlir::Value value, Region* region) {
if (!region)
return false;
if (auto blockArg = dyn_cast<BlockArgument>(value))
return blockArg.getOwner()->getParent() == region;
if (Operation* definingOp = value.getDefiningOp())
return definingOp->getParentRegion() == region || region->isAncestor(definingOp->getParentRegion());
return false;
}
static bool isNestedAllocation(Operation* coreLikeOp, memref::AllocOp allocOp) {
if (!coreLikeOp || coreLikeOp->getNumRegions() != 1 || coreLikeOp->getRegion(0).empty())
return false;
return allocOp->getBlock() != &coreLikeOp->getRegion(0).front();
}
static void addFallbackReason(std::string& reason, StringRef newReason) {
if (newReason.empty())
return;
if (!reason.empty())
reason += "; ";
reason += newReason.str();
}
static void appendAliasDescription(llvm::SmallVectorImpl<std::string>& aliases, mlir::Value value) {
std::string text = printValueToString(value);
if (!llvm::is_contained(aliases, text))
aliases.push_back(std::move(text));
}
struct OrderedTouchRange {
uint64_t start = 0;
uint64_t end = 0;
Operation* startOp = nullptr;
Operation* endOp = nullptr;
bool escapedLoop = false;
};
static OrderedTouchRange
getEffectiveTouchRange(mlir::Value definingValue, Operation* user, const OperationOrdering& ordering) {
OrderedTouchRange range {ordering.position.lookup(user), ordering.position.lookup(user), user, user, false};
for (Operation* current = user; current; current = current->getParentOp()) {
auto forOp = dyn_cast<scf::ForOp>(current);
if (!forOp || isWithin(definingValue, &forOp.getRegion()))
continue;
range.start = std::min(range.start, ordering.position.lookup(forOp));
range.end = std::max(range.end, ordering.subtreeEnd.lookup(forOp));
range.startOp = forOp;
range.endOp = forOp;
range.escapedLoop = true;
}
return range;
}
static MemoryTouchInterval
computeMemoryTouchInterval(memref::AllocOp allocOp, const OperationOrdering& ordering, uint64_t fallbackEnd) {
MemoryTouchInterval interval;
interval.start = ordering.position.lookup(allocOp);
interval.end = interval.start;
interval.startOp = allocOp;
interval.endOp = allocOp;
SmallPtrSet<mlir::Value, 16> visitedValues;
SmallPtrSet<Operation*, 32> visitedUsers;
SmallVector<mlir::Value> pendingValues;
pendingValues.push_back(allocOp.getResult());
auto parentLoop = allocOp->getParentOfType<scf::ForOp>();
while (!pendingValues.empty()) {
mlir::Value value = pendingValues.pop_back_val();
if (!visitedValues.insert(value).second)
continue;
for (Operation* user : value.getUsers()) {
if (!visitedUsers.insert(user).second)
continue;
if (isSupportedAliasOp(user)) {
for (mlir::Value result : user->getResults()) {
pendingValues.push_back(result);
appendAliasDescription(interval.aliasesFollowed, result);
}
}
if (auto dpsOp = dyn_cast<DestinationStyleOpInterface>(user)) {
for (OpResult result : user->getResults()) {
OpOperand* tiedOperand = dpsOp.getTiedOpOperand(result);
if (!tiedOperand || tiedOperand->get() != value)
continue;
pendingValues.push_back(result);
appendAliasDescription(interval.aliasesFollowed, result);
}
}
if (auto forOp = dyn_cast<scf::ForOp>(user)) {
for (auto [index, initArg] : llvm::enumerate(forOp.getInitArgs())) {
if (initArg != value)
continue;
pendingValues.push_back(forOp.getRegionIterArgs()[index]);
pendingValues.push_back(forOp.getResult(index));
appendAliasDescription(interval.aliasesFollowed, forOp.getRegionIterArgs()[index]);
appendAliasDescription(interval.aliasesFollowed, forOp.getResult(index));
if (parentLoop && forOp != parentLoop)
interval.escapesLoop = true;
}
}
if (auto yieldOp = dyn_cast<scf::YieldOp>(user)) {
auto forOp = dyn_cast<scf::ForOp>(yieldOp->getParentOp());
if (!forOp) {
addFallbackReason(interval.fallbackReason, "yield without scf.for parent");
}
else {
for (auto [index, operand] : llvm::enumerate(yieldOp.getOperands())) {
if (operand != value)
continue;
pendingValues.push_back(forOp.getResult(index));
appendAliasDescription(interval.aliasesFollowed, forOp.getResult(index));
if (parentLoop && forOp == parentLoop)
interval.escapesLoop = true;
}
}
}
if (isRuntimeMemoryTouchOp(user)) {
uint64_t touchPosition = ordering.position.lookup(user);
if (!interval.hasRuntimeUse || touchPosition < interval.firstTouchPosition) {
interval.firstTouchPosition = touchPosition;
interval.firstTouchOp = user;
}
if (!interval.hasRuntimeUse || touchPosition > interval.lastTouchPosition) {
interval.lastTouchPosition = touchPosition;
interval.lastTouchOp = user;
}
OrderedTouchRange range = getEffectiveTouchRange(allocOp.getResult(), user, ordering);
interval.escapesLoop |= range.escapedLoop;
if (!interval.hasRuntimeUse) {
interval.start = range.start;
interval.end = range.end;
interval.startOp = range.startOp;
interval.endOp = range.endOp;
interval.hasRuntimeUse = true;
}
else {
if (range.start < interval.start) {
interval.start = range.start;
interval.startOp = range.startOp;
}
if (range.end > interval.end) {
interval.end = range.end;
interval.endOp = range.endOp;
}
}
continue;
}
if (isIgnoredLivenessUser(user))
continue;
addFallbackReason(interval.fallbackReason, "unhandled user op");
interval.endUsedFallback = true;
}
}
if (!interval.hasRuntimeUse) {
interval.startUsedAllocFallback = true;
interval.endUsedFallback = true;
interval.start = ordering.position.lookup(allocOp);
interval.end = fallbackEnd;
interval.startOp = allocOp;
interval.endOp = allocOp->getParentOp();
interval.firstTouchPosition = interval.start;
interval.lastTouchPosition = interval.end;
addFallbackReason(interval.fallbackReason, "no runtime memory touch");
return interval;
}
if (interval.endUsedFallback) {
interval.end = std::max(interval.end, fallbackEnd);
interval.endOp = allocOp->getParentOp();
}
return interval;
}
static FailureOr<size_t> getAllocSizeBytes(memref::AllocOp allocOp) {
auto type = dyn_cast<ShapedType>(allocOp.getType());
if (!type)
return failure();
auto checkedBytes = pim::getCheckedShapedTypeSizeInBytes(type, allocOp, "memory allocation byte size");
if (failed(checkedBytes))
return failure();
return pim::checkedSize(*checkedBytes, allocOp, "memory allocation byte size");
}
static bool intervalsOverlap(const LocalAllocInterval& lhs, const LocalAllocInterval& rhs) {
return !(lhs.end < rhs.start || rhs.end < lhs.start);
}
static uint64_t getSlotLogicalBytes(const PlannedPhysicalSlot& slot, ArrayRef<LocalAllocInterval> intervals) {
uint64_t slotLogicalBytes = 0;
for (size_t intervalIndex : slot.intervalIndices)
slotLogicalBytes += intervals[intervalIndex].size;
return slotLogicalBytes;
}
} // namespace
SmallVector<LocalAllocInterval, 0> onnx_mlir::buildLocalAllocIntervals(Operation* coreLikeOp,
std::optional<unsigned> lane) {
SmallVector<LocalAllocInterval, 0> intervals;
OperationOrdering ordering = buildOperationOrdering(coreLikeOp);
if (ordering.position.empty())
return intervals;
uint64_t fallbackEnd = ordering.nextPosition == 0 ? 0 : ordering.nextPosition - 1;
size_t nextIntervalId = 0;
coreLikeOp->walk([&](memref::AllocOp allocOp) {
auto checkedSize = getAllocSizeBytes(allocOp);
if (failed(checkedSize)) {
llvm::errs() << "Failed to compute local allocation size for value: ";
allocOp.getResult().print(llvm::errs());
llvm::errs() << "\n";
llvm_unreachable("Failed to compute local allocation size");
}
MemoryTouchInterval touchInterval = computeMemoryTouchInterval(allocOp, ordering, fallbackEnd);
LocalAllocInterval interval;
interval.id = nextIntervalId++;
interval.alloc = allocOp;
interval.key = getMemoryValueKey(allocOp.getResult(), lane);
interval.start = touchInterval.start;
interval.end = touchInterval.end;
interval.size = *checkedSize;
interval.startOp = touchInterval.startOp;
interval.endOp = touchInterval.endOp;
interval.firstTouchOp = touchInterval.firstTouchOp;
interval.lastTouchOp = touchInterval.lastTouchOp;
interval.firstTouchPosition = touchInterval.firstTouchPosition;
interval.lastTouchPosition = touchInterval.lastTouchPosition;
interval.startUsedAllocFallback = touchInterval.startUsedAllocFallback;
interval.endUsedFallback = touchInterval.endUsedFallback;
interval.hasRuntimeUse = touchInterval.hasRuntimeUse;
interval.insideNestedRegion = isNestedAllocation(coreLikeOp, allocOp);
interval.escapesLoop = touchInterval.escapesLoop;
interval.fallbackReason = std::move(touchInterval.fallbackReason);
interval.aliasesFollowed = std::move(touchInterval.aliasesFollowed);
intervals.push_back(std::move(interval));
});
return intervals;
}
SmallVector<PlannedPhysicalSlot, 0> onnx_mlir::planPhysicalSlots(MutableArrayRef<LocalAllocInterval> intervals) {
SmallVector<PlannedPhysicalSlot, 0> slots;
SmallVector<size_t> intervalOrder(intervals.size());
std::iota(intervalOrder.begin(), intervalOrder.end(), 0);
llvm::stable_sort(intervalOrder, [&](size_t lhsIndex, size_t rhsIndex) {
const LocalAllocInterval& lhs = intervals[lhsIndex];
const LocalAllocInterval& rhs = intervals[rhsIndex];
if (lhs.size != rhs.size)
return lhs.size > rhs.size;
if (lhs.start != rhs.start)
return lhs.start < rhs.start;
if (lhs.end != rhs.end)
return lhs.end < rhs.end;
return lhs.id < rhs.id;
});
for (size_t intervalIndex : intervalOrder) {
LocalAllocInterval& interval = intervals[intervalIndex];
PlannedPhysicalSlot* bestSlot = nullptr;
auto bestKey = std::tuple<size_t, size_t, size_t, size_t>(std::numeric_limits<size_t>::max(),
std::numeric_limits<size_t>::max(),
std::numeric_limits<size_t>::max(),
std::numeric_limits<size_t>::max());
for (size_t slotIndex = 0; slotIndex < slots.size(); ++slotIndex) {
PlannedPhysicalSlot& slot = slots[slotIndex];
bool compatible = true;
for (size_t otherIndex : slot.intervalIndices) {
if (intervalsOverlap(interval, intervals[otherIndex])) {
compatible = false;
break;
}
}
if (!compatible)
continue;
size_t resultingSize = std::max(slot.requiredSize, interval.size);
size_t growth = resultingSize - slot.requiredSize;
auto candidateKey =
std::tuple<size_t, size_t, size_t, size_t>(growth, resultingSize, slot.intervalIndices.size(), slot.id);
if (candidateKey < bestKey) {
bestKey = candidateKey;
bestSlot = &slot;
}
}
if (!bestSlot) {
slots.push_back({slots.size(), interval.size, interval.size, 0, {intervalIndex}});
interval.slotPlanIndex = slots.size() - 1;
interval.physicalSlotId = slots.back().id;
interval.physicalSlotSize = slots.back().requiredSize;
continue;
}
bestSlot->requiredSize = std::max(bestSlot->requiredSize, interval.size);
bestSlot->size = bestSlot->requiredSize;
bestSlot->intervalIndices.push_back(intervalIndex);
interval.slotPlanIndex = static_cast<size_t>(bestSlot - slots.data());
interval.physicalSlotId = bestSlot->id;
interval.physicalSlotSize = bestSlot->requiredSize;
}
return slots;
}
MemoryPlanArtifacts onnx_mlir::buildMemoryPlanArtifacts(Operation* coreLikeOp,
std::optional<unsigned> lane,
ArrayRef<LocalAllocInterval> intervals,
ArrayRef<PlannedPhysicalSlot> slots,
size_t addressLimit,
PimMemoryReportLevel reportLevel) {
MemoryPlanArtifacts artifacts;
uint64_t totalLogicalBytes = 0;
uint64_t totalPhysicalBytes = 0;
uint64_t fallbackIntervals = 0;
uint64_t noRuntimeTouchIntervals = 0;
uint64_t reusedAllocations = 0;
uint64_t nestedIntervals = 0;
uint64_t loopEscapingIntervals = 0;
size_t largestLogicalAllocation = 0;
size_t largestPhysicalSlot = 0;
size_t maximumAssignedAddress = 0;
for (const LocalAllocInterval& interval : intervals) {
totalLogicalBytes += interval.size;
largestLogicalAllocation = std::max(largestLogicalAllocation, interval.size);
maximumAssignedAddress = std::max(maximumAssignedAddress, interval.assignedAddress + interval.physicalSlotSize);
if (interval.startUsedAllocFallback || interval.endUsedFallback)
++fallbackIntervals;
if (!interval.hasRuntimeUse)
++noRuntimeTouchIntervals;
if (interval.insideNestedRegion)
++nestedIntervals;
if (interval.escapesLoop)
++loopEscapingIntervals;
}
for (const PlannedPhysicalSlot& slot : slots) {
totalPhysicalBytes += slot.size;
largestPhysicalSlot = std::max(largestPhysicalSlot, slot.size);
if (slot.intervalIndices.size() > 1)
reusedAllocations += slot.intervalIndices.size() - 1;
}
uint64_t savedBytes = totalLogicalBytes >= totalPhysicalBytes ? totalLogicalBytes - totalPhysicalBytes : 0;
double savedPercent =
totalLogicalBytes == 0 ? 0.0 : 100.0 * static_cast<double>(savedBytes) / static_cast<double>(totalLogicalBytes);
raw_string_ostream os(artifacts.textReport);
os << "=== PIM Memory Liveness Report ===\n";
os << "Op: " << coreLikeOp->getName() << "\n";
if (lane)
os << "Lane: " << *lane << "\n";
os << "Summary:\n";
os << " logical allocation bytes: " << formatReportMemory(totalLogicalBytes) << " (" << totalLogicalBytes << ")\n";
os << " physical allocation bytes: " << formatReportMemory(totalPhysicalBytes) << " (" << totalPhysicalBytes
<< ")\n";
os << " saved bytes: " << formatReportMemory(savedBytes) << " (" << savedBytes << ")\n";
os << " saved percent: " << format("%.2f%%", savedPercent) << "\n";
os << " intervals: " << intervals.size() << "\n";
os << " physical slots: " << slots.size() << "\n";
os << " reused allocations: " << reusedAllocations << "\n";
os << " fallback intervals: " << fallbackIntervals << "\n";
os << " intervals with no runtime memory touch: " << noRuntimeTouchIntervals << "\n";
os << " nested allocations: " << nestedIntervals << "\n";
os << " loop-escaping allocations: " << loopEscapingIntervals << "\n";
os << " largest logical allocation: " << largestLogicalAllocation << "\n";
os << " largest physical slot: " << largestPhysicalSlot << "\n";
os << " address limit: " << addressLimit << "\n";
os << " peak physical memory: " << formatReportMemory(maximumAssignedAddress) << " (" << maximumAssignedAddress
<< ")\n";
os << " maximum assigned address: " << maximumAssignedAddress << "\n";
os << "\nHow To Read:\n";
os << " `summary` only shows the strongest reuse cases and the worst offenders.\n";
os << " Use `--pim-memory-report=full` when you need the complete slot-by-slot and interval-by-interval dump.\n";
os << " Large single-use slots, fallback intervals, and nested single-use allocations are the best places\n";
os << " to inspect if allocations should be moved, sunk, or made easier to coalesce earlier in the pipeline.\n";
SmallVector<const PlannedPhysicalSlot*> reusedSlots;
SmallVector<const PlannedPhysicalSlot*> singleUseSlots;
for (const PlannedPhysicalSlot& slot : slots)
if (slot.intervalIndices.size() > 1)
reusedSlots.push_back(&slot);
else
singleUseSlots.push_back(&slot);
llvm::stable_sort(reusedSlots, [&](const PlannedPhysicalSlot* lhs, const PlannedPhysicalSlot* rhs) {
uint64_t lhsLogicalBytes = getSlotLogicalBytes(*lhs, intervals);
uint64_t rhsLogicalBytes = getSlotLogicalBytes(*rhs, intervals);
if (lhs->intervalIndices.size() != rhs->intervalIndices.size())
return lhs->intervalIndices.size() > rhs->intervalIndices.size();
if (lhsLogicalBytes != rhsLogicalBytes)
return lhsLogicalBytes > rhsLogicalBytes;
if (lhs->size != rhs->size)
return lhs->size > rhs->size;
return lhs->id < rhs->id;
});
llvm::stable_sort(singleUseSlots, [&](const PlannedPhysicalSlot* lhs, const PlannedPhysicalSlot* rhs) {
if (lhs->size != rhs->size)
return lhs->size > rhs->size;
return lhs->id < rhs->id;
});
constexpr size_t kSummaryReuseLimit = 6;
constexpr size_t kSummaryOffenderLimit = 10;
os << "\nBest Reuse:\n";
if (reusedSlots.empty()) {
os << " no slots were shared by multiple intervals\n";
}
else {
for (const PlannedPhysicalSlot* slot : ArrayRef(reusedSlots).take_front(kSummaryReuseLimit)) {
uint64_t slotLogicalBytes = getSlotLogicalBytes(*slot, intervals);
os << " slot #" << slot->id << " addr=" << slot->address << " size=" << formatReportMemory(slot->size)
<< " intervals=" << slot->intervalIndices.size() << " logical_sum=" << formatReportMemory(slotLogicalBytes)
<< "\n";
for (size_t intervalIndex : slot->intervalIndices) {
const LocalAllocInterval& interval = intervals[intervalIndex];
os << " #" << interval.id << " [" << interval.start << "," << interval.end << "]"
<< " logical=" << formatReportMemory(interval.size)
<< " first=" << summarizeOperation(interval.firstTouchOp, 40)
<< " last=" << summarizeOperation(interval.lastTouchOp, 40) << "\n";
}
}
}
os << "\nTop Offenders:\n";
bool printedAttention = false;
for (const PlannedPhysicalSlot* slot : ArrayRef(singleUseSlots).take_front(kSummaryOffenderLimit)) {
const LocalAllocInterval& interval = intervals[slot->intervalIndices.front()];
printedAttention = true;
os << " slot #" << slot->id << " is single-use"
<< " size=" << formatReportMemory(slot->size) << " interval=#" << interval.id
<< " value=" << summarizeValue(interval.key.value, 56) << "\n";
os << " first=" << summarizeOperation(interval.firstTouchOp, 40)
<< " last=" << summarizeOperation(interval.lastTouchOp, 40)
<< " nested=" << (interval.insideNestedRegion ? "yes" : "no")
<< " escapes_loop=" << (interval.escapesLoop ? "yes" : "no") << "\n";
}
size_t fallbackPrinted = 0;
for (const LocalAllocInterval& interval : intervals) {
if (!(interval.startUsedAllocFallback || interval.endUsedFallback) || fallbackPrinted >= kSummaryOffenderLimit)
continue;
printedAttention = true;
++fallbackPrinted;
os << " fallback interval #" << interval.id << " size=" << formatReportMemory(interval.size)
<< " value=" << summarizeValue(interval.key.value, 56) << "\n";
os << " reason: " << (interval.fallbackReason.empty() ? "<none>" : interval.fallbackReason) << "\n";
}
size_t nestedPrinted = 0;
for (const LocalAllocInterval& interval : intervals) {
if (nestedPrinted >= kSummaryOffenderLimit)
break;
if (!(interval.insideNestedRegion && slots[interval.slotPlanIndex].intervalIndices.size() == 1))
continue;
printedAttention = true;
++nestedPrinted;
os << " nested single-use interval #" << interval.id << " slot #" << interval.physicalSlotId
<< " size=" << formatReportMemory(interval.size) << " value=" << summarizeValue(interval.key.value, 56)
<< "\n";
os << " hint: move or sink this alloc inside the nested region if the IR allows it.\n";
}
if (!printedAttention)
os << " no obvious blockers detected in this core\n";
if (reportLevel == PimMemoryReportFull) {
os << "\nSlot Reuse:\n";
for (const PlannedPhysicalSlot& slot : slots) {
uint64_t slotLogicalBytes = getSlotLogicalBytes(slot, intervals);
os << " slot #" << slot.id << " addr=" << slot.address << " size=" << formatReportMemory(slot.size) << " ("
<< slot.size << ")"
<< " intervals=" << slot.intervalIndices.size() << " logical_sum=" << formatReportMemory(slotLogicalBytes)
<< "\n";
for (size_t intervalIndex : slot.intervalIndices) {
const LocalAllocInterval& interval = intervals[intervalIndex];
mlir::Value allocValue = interval.key.value;
os << " [" << interval.start << "," << interval.end << "]"
<< " #" << interval.id << " logical=" << formatReportMemory(interval.size)
<< " nested=" << (interval.insideNestedRegion ? "yes" : "no")
<< " escapes_loop=" << (interval.escapesLoop ? "yes" : "no")
<< " first=" << summarizeOperation(interval.firstTouchOp, 48)
<< " last=" << summarizeOperation(interval.lastTouchOp, 48) << "\n";
os << " value=" << summarizeValue(allocValue) << "\n";
}
}
}
if (reportLevel == PimMemoryReportFull) {
os << "\nInterval Details:\n";
for (const LocalAllocInterval& interval : intervals) {
const PlannedPhysicalSlot& slot = slots[interval.slotPlanIndex];
mlir::Value allocValue = interval.key.value;
Operation* definingOp = allocValue.getDefiningOp();
os << " #" << interval.id << " slot=" << slot.id << " live=[" << interval.start << "," << interval.end << "]"
<< " logical=" << formatReportMemory(interval.size)
<< " slot_size=" << formatReportMemory(interval.physicalSlotSize) << " addr=" << interval.assignedAddress
<< "\n";
os << " value=" << summarizeValue(allocValue, 88) << "\n";
os << " type=" << allocValue.getType() << "\n";
os << " loc="
<< summarizeLocation(definingOp ? definingOp->getLoc() : UnknownLoc::get(coreLikeOp->getContext())) << "\n";
os << " nested=" << (interval.insideNestedRegion ? "yes" : "no")
<< " escapes_loop=" << (interval.escapesLoop ? "yes" : "no")
<< " start_fallback=" << (interval.startUsedAllocFallback ? "yes" : "no")
<< " end_fallback=" << (interval.endUsedFallback ? "yes" : "no") << "\n";
os << " first_use=" << summarizeOperation(interval.firstTouchOp) << " @" << interval.firstTouchPosition
<< "\n";
os << " last_use=" << summarizeOperation(interval.lastTouchOp) << " @" << interval.lastTouchPosition << "\n";
os << " slot_peers=";
bool first = true;
for (size_t otherIndex : slot.intervalIndices) {
if (intervals[otherIndex].id == interval.id)
continue;
if (!first)
os << ", ";
os << "#" << intervals[otherIndex].id;
first = false;
}
if (first)
os << "<none>";
os << "\n";
if (!interval.fallbackReason.empty())
os << " fallback_reason=" << interval.fallbackReason << "\n";
if (!interval.aliasesFollowed.empty()) {
os << " aliases_followed=" << interval.aliasesFollowed.size() << "\n";
for (const std::string& alias : interval.aliasesFollowed)
os << " - " << abbreviate(collapseWhitespace(alias), 108) << "\n";
}
}
}
os.flush();
return artifacts;
}