Merge branch 'main' of chef.heaplab.deib.polimi.it:nnicolosi/Raptor into main
Some checks failed
Validate Operations / validate-operations (push) Has been cancelled

This commit is contained in:
ilgeco
2026-04-21 15:44:40 +02:00
20 changed files with 2525 additions and 858 deletions

View File

@@ -31,8 +31,7 @@ Moreover, if compiling with build type debug, it is also suggested to use
mold as linker (you will need to install it if you don't have it already) mold as linker (you will need to install it if you don't have it already)
to reduce memory usage during linking. You can use it by setting the options: to reduce memory usage during linking. You can use it by setting the options:
``` ```
-DCMAKE_EXE_LINKER_FLAGS="-fuse-ld=mold" \ -DLLVM_USE_LINKER=mold
-DCMAKE_SHARED_LINKER_FLAGS="-fuse-ld=mold"
``` ```
### Raptor ### Raptor
@@ -45,7 +44,8 @@ Also in this case, it is suggested to use mold as linker to reduce link time and
setting the options: setting the options:
``` ```
-DCMAKE_EXE_LINKER_FLAGS="-fuse-ld=mold" \ -DCMAKE_EXE_LINKER_FLAGS="-fuse-ld=mold" \
-DCMAKE_SHARED_LINKER_FLAGS="-fuse-ld=mold" -DCMAKE_SHARED_LINKER_FLAGS="-fuse-ld=mold" \
-DCMAKE_MODULE_LINKER_FLAGS="-fuse-ld=mold"
``` ```
``` ```

View File

@@ -661,9 +661,8 @@ void SpatialToPimPass::annotateChannelCoreIds(func::FuncOp funcOp) {
broadcastSendOp = op; broadcastSendOp = op;
continue; continue;
} }
if (auto op = dyn_cast<spatial::SpatChannelBroadcastReceiveOp>(user)) { if (auto op = dyn_cast<spatial::SpatChannelBroadcastReceiveOp>(user))
continue; continue;
}
llvm_unreachable("Unexpected user of spat.channel_new during Spatial-to-PIM lowering"); llvm_unreachable("Unexpected user of spat.channel_new during Spatial-to-PIM lowering");
} }
@@ -719,7 +718,8 @@ void SpatialToPimPass::lowerBroadcastChannelOps(func::FuncOp funcOp, IRRewriter&
auto sizeAttr = getTensorSizeInBytesAttr(rewriter, receiveOp.getResult()); auto sizeAttr = getTensorSizeInBytesAttr(rewriter, receiveOp.getResult());
auto sourceCoreIdAttr = getSpatialChannelSourceCoreIdAttr(rewriter, receiveOp.getChannel()); auto sourceCoreIdAttr = getSpatialChannelSourceCoreIdAttr(rewriter, receiveOp.getChannel());
Value receivedValue = Value receivedValue =
PimReceiveOp::create(rewriter, receiveOp.getLoc(), outputBuffer.getType(), outputBuffer, sizeAttr, sourceCoreIdAttr) PimReceiveOp::create(
rewriter, receiveOp.getLoc(), outputBuffer.getType(), outputBuffer, sizeAttr, sourceCoreIdAttr)
.getOutput(); .getOutput();
rewriter.replaceOp(receiveOp, receivedValue); rewriter.replaceOp(receiveOp, receivedValue);
} }

View File

@@ -5,6 +5,8 @@ add_pim_library(SpatialOps
SpatialOps.cpp SpatialOps.cpp
Transforms/MergeComputeNodes/MergeComputeNodesPass.cpp Transforms/MergeComputeNodes/MergeComputeNodesPass.cpp
Transforms/MergeComputeNodes/DCPGraph/Graph.cpp Transforms/MergeComputeNodes/DCPGraph/Graph.cpp
Transforms/MergeComputeNodes/DCPGraph/GraphDebug.cpp
Transforms/MergeComputeNodes/DCPGraph/GraphSupport.cpp
Transforms/MergeComputeNodes/DCPGraph/Task.cpp Transforms/MergeComputeNodes/DCPGraph/Task.cpp
Transforms/MergeComputeNodes/DCPGraph/DCPAnalysis.cpp Transforms/MergeComputeNodes/DCPGraph/DCPAnalysis.cpp

View File

@@ -17,7 +17,7 @@ namespace spatial {
using namespace mlir; using namespace mlir;
SpatWeightedCompute getOriginalSpatWeightCompute(Operation* op) { SpatWeightedCompute getOriginalSpatWeightedCompute(Operation* op) {
if (!op) if (!op)
return {}; return {};
while (auto extract = llvm::dyn_cast<tensor::ExtractSliceOp>(op)) { while (auto extract = llvm::dyn_cast<tensor::ExtractSliceOp>(op)) {
@@ -30,32 +30,32 @@ SpatWeightedCompute getOriginalSpatWeightCompute(Operation* op) {
return {}; return {};
} }
DCPAnalysisResult DCPAnalysis::runAnalysis() { DCPAnalysisResult DCPAnalysis::run() {
using EdgesIndex = std::tuple<int64_t, int64_t, int64_t>;
llvm::SmallVector<SpatWeightedCompute, 10> spatWeightedComputes; llvm::SmallVector<SpatWeightedCompute, 10> spatWeightedComputes;
llvm::SmallVector<EdgesIndex, 10> edges; llvm::SmallVector<IndexedEdge, 10> edges;
for (auto& regions : entryOp->getRegions()) for (auto& region : entryOp->getRegions())
for (SpatWeightedCompute spatWeightedCompute : regions.getOps<SpatWeightedCompute>()) for (SpatWeightedCompute spatWeightedCompute : region.getOps<SpatWeightedCompute>())
spatWeightedComputes.push_back(spatWeightedCompute); spatWeightedComputes.push_back(spatWeightedCompute);
for (auto [indexEndEdge, spatWeightedCompute] : llvm::enumerate(spatWeightedComputes)) { for (auto [indexEndEdge, spatWeightedCompute] : llvm::enumerate(spatWeightedComputes)) {
for (Value input : spatWeightedCompute.getInputs()) { for (Value input : spatWeightedCompute.getInputs()) {
if (auto spatWeightedComputeArgOp = getOriginalSpatWeightCompute(input.getDefiningOp())) { if (auto producerCompute = getOriginalSpatWeightedCompute(input.getDefiningOp())) {
auto elemIter = llvm::find(spatWeightedComputes, spatWeightedComputeArgOp); auto producerIt = llvm::find(spatWeightedComputes, producerCompute);
assert(elemIter != spatWeightedComputes.end()); assert(producerIt != spatWeightedComputes.end());
auto indexStartEdge = std::distance(spatWeightedComputes.begin(), elemIter); auto indexStartEdge = std::distance(spatWeightedComputes.begin(), producerIt);
ResultRange outputs = spatWeightedComputeArgOp.getResults(); ResultRange outputs = producerCompute.getResults();
int64_t totalSize = 0; int64_t totalSize = 0;
for (auto output : outputs) { for (auto output : outputs) {
ShapedType result = cast<ShapedType>(output.getType()); ShapedType resultType = cast<ShapedType>(output.getType());
totalSize += getSizeInBytes(result); totalSize += getSizeInBytes(resultType);
} }
edges.push_back({indexStartEdge, indexEndEdge, totalSize}); edges.push_back({indexStartEdge, indexEndEdge, totalSize});
} }
} }
} }
GraphDCP graphDCP(spatWeightedComputes, edges); GraphDCP graphDCP(spatWeightedComputes, edges);
graphDCP.DCP(); graphDCP.setContext(entryOp->getContext());
graphDCP.runDcp();
return graphDCP.getResult(); return graphDCP.getResult();
} }

View File

@@ -3,6 +3,7 @@
#include "mlir/IR/Operation.h" #include "mlir/IR/Operation.h"
#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include <vector> #include <vector>
@@ -10,8 +11,8 @@
struct DCPAnalysisResult { struct DCPAnalysisResult {
std::vector<onnx_mlir::spatial::SpatWeightedCompute> dominanceOrderCompute; std::vector<onnx_mlir::spatial::SpatWeightedCompute> dominanceOrderCompute;
llvm::DenseMap<onnx_mlir::spatial::SpatWeightedCompute, size_t> computeToCPUMap; llvm::DenseMap<onnx_mlir::spatial::SpatWeightedCompute, size_t> computeToCpuMap;
llvm::DenseSet<onnx_mlir::spatial::SpatWeightedCompute> isLastComputeOfACpu; llvm::DenseSet<onnx_mlir::spatial::SpatWeightedCompute> isLastComputeOfCpu;
llvm::DenseMap<size_t, onnx_mlir::spatial::SpatWeightedCompute> cpuToLastComputeMap; llvm::DenseMap<size_t, onnx_mlir::spatial::SpatWeightedCompute> cpuToLastComputeMap;
}; };
@@ -21,12 +22,12 @@ struct DCPAnalysis {
private: private:
DCPAnalysisResult result; DCPAnalysisResult result;
mlir::Operation* entryOp; mlir::Operation* entryOp;
DCPAnalysisResult runAnalysis(); DCPAnalysisResult run();
public: public:
DCPAnalysis(mlir::Operation* op) DCPAnalysis(mlir::Operation* op)
: entryOp(op) { : entryOp(op) {
result = runAnalysis(); result = run();
} }
DCPAnalysisResult& getResult() { return result; } DCPAnalysisResult& getResult() { return result; }
}; };

View File

@@ -2,6 +2,7 @@
#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include <list> #include <list>
#include <optional> #include <optional>
@@ -12,90 +13,144 @@
#include "Task.hpp" #include "Task.hpp"
#include "Utils.hpp" #include "Utils.hpp"
std::optional<DoubleEdge> addEdge(TaskDCP* parent, TaskDCP* child, Weight_t weight); namespace mlir {
void removeEdge(TaskDCP* parent, TaskDCP* child); class MLIRContext;
int getTranferCost(TaskDCP* parent, TaskDCP* child); } // namespace mlir
std::optional<EdgePair> addEdge(TaskDCP* parent, TaskDCP* child, Weight weight, bool isScheduling = false);
void removeEdge(TaskDCP* parent, TaskDCP* child, bool isScheduling = false);
Weight getTransferCost(TaskDCP* parent, TaskDCP* child);
class GraphDCP { class GraphDCP {
public: public:
struct CandidateRelations {
llvm::DenseSet<TaskDCP*> ancestors;
llvm::DenseSet<TaskDCP*> descendants;
// descendants ordered by position in the graph's topological order;
// iterating this avoids walking non-descendant tail tasks on hot paths.
llvm::SmallVector<TaskDCP*, 32> descendantsTopoOrder;
};
struct ScheduledTaskInfo { struct ScheduledTaskInfo {
size_t nodeIndex; size_t nodeIndex;
int aest; Time aest;
int alst; Time alst;
int weight; Weight weight;
}; };
private: private:
using CpuTaskList = std::list<TaskDCP*>;
struct FindSlot { struct FindSlot {
int aest; Time aest;
int index; int index;
}; };
std::vector<TaskDCP> nodes; std::vector<TaskDCP> nodes;
onnx_mlir::LabeledList<TaskDCP> topologicalOrder; onnx_mlir::LabeledList<TaskDCP> topologicalOrder;
std::unordered_map<CPU, std::list<TaskDCP*>> mapCPUTasks; std::vector<CpuTaskList> cpuTasks;
CPU last_cpu = 0; std::unordered_map<CPU, CrossbarUsage> cpuCrossbarUsage;
CPU lastCpu = 0;
long long flag = 1; long long flag = 1;
int DCPL; Time dcpl = 0;
Time maxCompletion = 0;
Time secondMaxCompletion = 0;
TaskDCP* maxCompletionTask = nullptr;
int maxCpuCount = 1000;
mlir::MLIRContext* context = nullptr;
TaskInsertion insertTaskInCPU(CPU cpu, TaskDCP* task, size_t position); TaskInsertion insertTaskInCPU(CPU cpu, TaskDCP* task, size_t position);
void removeTaskFromCPU(CPU cpu, TaskDCP* task); void removeTaskFromCPU(CPU cpu, TaskDCP* task);
CpuTaskList& getOrCreateCpuTasks(CPU cpu);
const CpuTaskList* findCpuTasks(CPU cpu) const;
std::vector<TaskDCP*> getRoots(); std::vector<TaskDCP*> getRoots();
long long getUniqueFlag() { return flag++; } long long getUniqueFlag() { return flag++; }
void initAEST(); void initAest();
int initDCPL(); void initAlst();
void initALST();
int computeAEST(TaskDCP* task, CPU cpu); Time computeAestOnCpu(TaskDCP* task, CPU cpu);
int computeDCPL(TaskDCP* task, CPU cpu); Time computeDcplOnCpu(TaskDCP* task, CPU cpu);
int getDCPL() { return DCPL; } Time getDcpl() const { return dcpl; }
Time computeTaskAlstOnCpu(TaskDCP* task, CPU cpu, Time scheduleDcpl);
void updateAestFromTask(TaskDCP* task);
void updateAestFromTaskWithDescendants(TaskDCP* task, const llvm::DenseSet<TaskDCP*>& descendants);
void updateAestFromTaskWithDescendants(TaskDCP* task, llvm::ArrayRef<TaskDCP*> descendantsTopoOrder);
// Propagates AEST like the overload above but returns early (before touching
// the remaining descendants) as soon as a task's completion exceeds
// `dcplBudget`, signalling that the new DCPL would exceed the budget.
// Returns true iff the full propagation completed without exceeding the
// budget. Uses the caller's snapshot to restore AEST on the aborted tail.
bool tryUpdateAestWithinBudget(TaskDCP* task,
llvm::ArrayRef<TaskDCP*> descendantsTopoOrder,
Time dcplBudget);
void initTopological(); void initTopological();
void topologicalMoveAfter(TaskDCP* task, TaskDCP* pivotPoint); void topologicalMoveAfter(TaskDCP* task, TaskDCP* pivotPoint, TaskInsertion* insertion = nullptr);
void topologicalMoveBefore(TaskDCP* task, TaskDCP* pivotPoint); void topologicalMoveBefore(TaskDCP* task, TaskDCP* pivotPoint, TaskInsertion* insertion = nullptr);
llvm::DenseMap<TaskDCP*, int> computeALST(TaskDCP* task, CPU cpu); llvm::DenseMap<TaskDCP*, Time> computeAlst(TaskDCP* task, CPU cpu, const CandidateRelations& relations);
size_t getNodeIndex(const TaskDCP* task) const; size_t getNodeIndex(const TaskDCP* task) const;
TaskDCP* findCandidate(std::vector<TaskDCP*> nodes); TaskDCP* findCandidate(const std::vector<TaskDCP*>& readyNodes);
void selectProcessor(TaskDCP* candidate, bool push); void selectProcessor(TaskDCP* candidate, bool push);
CPU lastCPU() const { return last_cpu; } CPU getLastCpu() const { return lastCpu; }
void incLastCPU() { last_cpu++; } void incrementLastCpu() { lastCpu++; }
FindSlot findSlot(TaskDCP* candidate, CPU cpu, bool push); FindSlot findSlot(TaskDCP* candidate, CPU cpu, bool push, const CandidateRelations& relations);
void to_dot(); FindSlot findSlotWithFixedFinalTime(
TaskDCP* candidate, CPU cpu, const CandidateRelations& relations, Time finalTime, Time aestOnCpu);
void dumpDot();
friend TaskInsertion; friend TaskInsertion;
friend class TaskDCP;
CrossbarUsage getCpuCrossbarUsage(CPU cpu) const;
CrossbarUsage getCpuCrossbarCapacity() const;
CrossbarUsage getTaskCrossbarFootprint(const TaskDCP* task) const;
void reserveTaskCrossbars(CPU cpu, const TaskDCP* task);
void releaseTaskCrossbars(CPU cpu, const TaskDCP* task);
bool wouldExhaustCrossbarCapacity(CPU cpu, const TaskDCP* task) const;
public: public:
void DCP(); void runDcp();
GraphDCP(llvm::ArrayRef<onnx_mlir::spatial::SpatWeightedCompute> spatWeightedComputes, GraphDCP(llvm::ArrayRef<onnx_mlir::spatial::SpatWeightedCompute> spatWeightedComputes,
llvm::ArrayRef<EdgesIndex> edges) llvm::ArrayRef<IndexedEdge> edges)
: nodes(), mapCPUTasks() { : nodes(), cpuTasks(), cpuCrossbarUsage() {
for (auto spatWeightedCompute : spatWeightedComputes) for (auto spatWeightedCompute : spatWeightedComputes)
nodes.emplace_back(spatWeightedCompute); nodes.emplace_back(spatWeightedCompute);
for (auto [start, end, weight] : edges) for (auto [start, end, weight] : edges)
makeEdge(start, end, weight); makeEdge(start, end, weight);
} }
GraphDCP(llvm::ArrayRef<Weight_t> nodeWeights, llvm::ArrayRef<EdgesIndex> edges) GraphDCP(llvm::ArrayRef<Weight> nodeWeights,
: nodes(), mapCPUTasks() { llvm::ArrayRef<IndexedEdge> edges,
llvm::ArrayRef<CrossbarUsage> nodeCrossbarUsage = {})
: nodes(), cpuTasks(), cpuCrossbarUsage() {
assert((nodeCrossbarUsage.empty() || nodeCrossbarUsage.size() == nodeWeights.size())
&& "synthetic crossbar usage must match synthetic node weights");
nodes.reserve(nodeWeights.size()); nodes.reserve(nodeWeights.size());
for (auto [index, weight] : llvm::enumerate(nodeWeights)) for (auto [index, weight] : llvm::enumerate(nodeWeights))
nodes.emplace_back(index, weight); nodes.emplace_back(index, weight, nodeCrossbarUsage.empty() ? 0 : nodeCrossbarUsage[index]);
for (auto [start, end, weight] : edges) for (auto [start, end, weight] : edges)
makeEdge(start, end, weight); makeEdge(start, end, weight);
} }
DCPAnalysisResult getResult(); DCPAnalysisResult getResult();
std::vector<ScheduledTaskInfo> getScheduledTasks(CPU cpu) const; std::vector<ScheduledTaskInfo> getScheduledTasks(CPU cpu) const;
CPU cpuCount() const { return last_cpu; } CPU cpuCount() const { return lastCpu; }
void makeEdge(size_t parent_index, size_t child_index, Weight_t weight) { void makeEdge(size_t parentIndex, size_t childIndex, Weight weight) {
addEdge(&nodes[parent_index], &nodes[child_index], weight); addEdge(&nodes[parentIndex], &nodes[childIndex], weight);
} }
size_t taskInCPU(CPU cpu) { return mapCPUTasks[cpu].size(); } size_t taskInCpu(CPU cpu) { return getOrCreateCpuTasks(cpu).size(); }
void setMaxCpuCount(int value) { maxCpuCount = value; }
int getMaxCpuCount() const { return maxCpuCount; }
// Optional MLIR context used to drive mlir::parallelFor inside runDcp. If
// null the scheduler runs single-threaded (tests use this path).
void setContext(mlir::MLIRContext* ctx) { context = ctx; }
}; };

View File

@@ -0,0 +1,152 @@
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"
#include <fstream>
#include <string>
#include "GraphDebug.hpp"
#include "src/Accelerators/PIM/Common/PimCommon.hpp"
namespace dcp_graph {
#ifdef DCP_DEBUG_ENABLED
DcpProgressLogger::DcpProgressLogger(size_t totalTasks)
: logProgress(totalTasks >= 200),
totalTasks(totalTasks),
startTime(std::chrono::steady_clock::now()),
lastProgressPrint(startTime) {}
std::string DcpProgressLogger::formatDuration(double seconds) {
if (seconds < 0)
seconds = 0;
long totalSeconds = static_cast<long>(seconds + 0.5);
long hours = totalSeconds / 3600;
long minutes = (totalSeconds % 3600) / 60;
long secs = totalSeconds % 60;
if (hours > 0)
return llvm::formatv("{0}:{1:02}:{2:02}", hours, minutes, secs).str();
return llvm::formatv("{0}:{1:02}", minutes, secs).str();
}
void DcpProgressLogger::recordFindDuration(double seconds) { findCandidateSeconds += seconds; }
void DcpProgressLogger::recordSelectDuration(double seconds) { selectProcessorSeconds += seconds; }
void DcpProgressLogger::recordUpdateDuration(double seconds) { updateTimingSeconds += seconds; }
void DcpProgressLogger::advanceCompleted(size_t taskCount) { completedTasks += taskCount; }
void DcpProgressLogger::printStart(size_t readyCount) const {
if (!logProgress)
return;
llvm::errs() << llvm::formatv("[DCP] start: tasks={0} ready={1}\n", totalTasks, readyCount);
}
void DcpProgressLogger::maybePrintSlowCandidate(size_t nodeIndex,
double elapsedSeconds,
size_t readyCount,
CPU cpuCount) const {
if (!logProgress || elapsedSeconds < 1.0)
return;
llvm::errs() << llvm::formatv("[DCP] slow candidate node={0} elapsed={1} ready={2} cpus={3}\n",
nodeIndex,
formatDuration(elapsedSeconds),
readyCount,
cpuCount);
}
void DcpProgressLogger::printProgress(size_t readyCount, CPU cpuCount, llvm::StringRef stage, bool force) {
if (!logProgress)
return;
auto now = std::chrono::steady_clock::now();
if (!force && now - lastProgressPrint < std::chrono::seconds(1) && completedTasks != totalTasks)
return;
double elapsedSeconds = std::chrono::duration<double>(now - startTime).count();
double rate = elapsedSeconds > 0.0 ? static_cast<double>(completedTasks) / elapsedSeconds : 0.0;
double etaSeconds = rate > 0.0 ? static_cast<double>(totalTasks - completedTasks) / rate : 0.0;
double percent = totalTasks == 0 ? 100.0 : (100.0 * static_cast<double>(completedTasks) / totalTasks);
llvm::errs() << llvm::formatv("[DCP] {0}/{1} ({2:F1}%) ready={3} cpus={4} stage={5} elapsed={6} eta={7}\n",
completedTasks,
totalTasks,
percent,
readyCount,
cpuCount,
stage,
formatDuration(elapsedSeconds),
completedTasks == totalTasks ? "0:00" : formatDuration(etaSeconds));
llvm::errs() << llvm::formatv(" time(find={0}, select={1}, update={2})\n",
formatDuration(findCandidateSeconds),
formatDuration(selectProcessorSeconds),
formatDuration(updateTimingSeconds));
lastProgressPrint = now;
}
#else
DcpProgressLogger::DcpProgressLogger(size_t) {}
void DcpProgressLogger::recordFindDuration(double) {}
void DcpProgressLogger::recordSelectDuration(double) {}
void DcpProgressLogger::recordUpdateDuration(double) {}
void DcpProgressLogger::advanceCompleted(size_t) {}
void DcpProgressLogger::printStart(size_t) const {}
void DcpProgressLogger::maybePrintSlowCandidate(size_t, double, size_t, CPU) const {}
void DcpProgressLogger::printProgress(size_t, CPU, llvm::StringRef, bool) {}
#endif
void dumpGraphDot(const std::vector<TaskDCP>& nodes,
const std::vector<std::list<TaskDCP*>>& cpuTasks,
CPU lastCpu) {
static int dumpIndex = 0;
std::string outputDir = onnx_mlir::getOutputDir();
if (outputDir.empty())
return;
std::string graphDir = outputDir + "/dcp_graph";
onnx_mlir::createDirectory(graphDir);
std::fstream file(graphDir + "/graph_" + std::to_string(dumpIndex++) + ".dot", std::ios::out);
file << "digraph G {\n";
if (!cpuTasks.empty()) {
for (CPU cpu = 0; cpu < lastCpu; cpu++) {
file << "subgraph cluster_" << cpu << "{\nstyle=filled;\ncolor=lightgrey;\n";
size_t cpuIndex = static_cast<size_t>(cpu);
if (cpuIndex >= cpuTasks.size()) {
file << " }\n";
continue;
}
for (auto node : cpuTasks[cpuIndex]) {
file << node->Id() << " [label=\"";
file << "n:" << node->Id() << "\n";
file << "aest:" << node->getAest() << "\n";
file << "alst:" << node->getAlst() << "\n";
file << "weight:" << node->getWeight() << "\"]\n";
}
file << " }\n";
}
}
else {
for (const auto& node : nodes) {
file << node.Id() << " [label=\"";
file << "n:" << node.Id() << "\n";
file << "aest:" << node.getAest() << "\n";
file << "alst:" << node.getAlst() << "\n";
file << "weight:" << node.getWeight() << "\"]\n";
}
}
for (const auto& node : nodes)
for (const auto& child : node.children) {
file << node.Id() << " -> " << child.first->Id();
file << " [label=\"" << child.second << "\"]\n";
}
file << "}\n";
file.flush();
file.close();
}
} // namespace dcp_graph

View File

@@ -0,0 +1,57 @@
#pragma once
#include "llvm/ADT/StringRef.h"
#include <chrono>
#include <list>
#include <vector>
#include "Task.hpp"
#include "Utils.hpp"
// Uncomment to enable DCP progress logging and per-phase profiling during
// development. When disabled the logger methods are no-ops and the helpers
// compile away.
#define DCP_DEBUG_ENABLED
#ifdef DCP_DEBUG_ENABLED
#define DCP_DEBUG_IF(...) __VA_ARGS__
#else
#define DCP_DEBUG_IF(...)
#endif
namespace dcp_graph {
class DcpProgressLogger {
public:
explicit DcpProgressLogger(size_t totalTasks);
void recordFindDuration(double seconds);
void recordSelectDuration(double seconds);
void recordUpdateDuration(double seconds);
void advanceCompleted(size_t taskCount = 1);
void printStart(size_t readyCount) const;
void maybePrintSlowCandidate(size_t nodeIndex, double elapsedSeconds, size_t readyCount, CPU cpuCount) const;
void printProgress(size_t readyCount, CPU cpuCount, llvm::StringRef stage, bool force);
#ifdef DCP_DEBUG_ENABLED
private:
static std::string formatDuration(double seconds);
bool logProgress = false;
size_t totalTasks = 0;
size_t completedTasks = 0;
std::chrono::steady_clock::time_point startTime;
std::chrono::steady_clock::time_point lastProgressPrint;
double findCandidateSeconds = 0.0;
double selectProcessorSeconds = 0.0;
double updateTimingSeconds = 0.0;
#endif
};
void dumpGraphDot(const std::vector<TaskDCP>& nodes,
const std::vector<std::list<TaskDCP*>>& cpuTasks,
CPU lastCpu);
} // namespace dcp_graph

View File

@@ -0,0 +1,105 @@
#include "llvm/ADT/STLExtras.h"
#include <algorithm>
#include <vector>
#include "GraphSupport.hpp"
#include "Task.hpp"
#include "UniqueWorklist.hpp"
namespace dcp_graph {
llvm::DenseSet<TaskDCP*> collectReachableTasks(TaskDCP* root, bool followParents) {
llvm::DenseSet<TaskDCP*> reachable;
std::vector<TaskDCP*> worklist;
worklist.reserve(32);
auto enqueueEdges = [&](TaskDCP* task) {
const auto& edges = followParents ? task->parents : task->children;
for (const auto& edge : edges)
if (reachable.insert(edge.first).second)
worklist.push_back(edge.first);
};
enqueueEdges(root);
while (!worklist.empty()) {
TaskDCP* task = worklist.back();
worklist.pop_back();
enqueueEdges(task);
}
return reachable;
}
GraphDCP::CandidateRelations computeCandidateRelations(TaskDCP* candidate) {
return {collectReachableTasks(candidate, true), collectReachableTasks(candidate, false)};
}
LocalScheduleSnapshot captureLocalScheduleState(TaskDCP* task,
const llvm::DenseSet<TaskDCP*>& descendants,
Time dcpl,
Time maxCompletion,
Time secondMaxCompletion,
TaskDCP* maxCompletionTask) {
LocalScheduleSnapshot snapshot;
snapshot.aestBackup.reserve(descendants.size() + 1);
snapshot.aestBackup.emplace_back(task, task->getAest());
for (TaskDCP* descendant : descendants)
snapshot.aestBackup.emplace_back(descendant, descendant->getAest());
snapshot.dcpl = dcpl;
snapshot.maxCompletion = maxCompletion;
snapshot.secondMaxCompletion = secondMaxCompletion;
snapshot.maxCompletionTask = maxCompletionTask;
return snapshot;
}
void restoreLocalScheduleState(const LocalScheduleSnapshot& snapshot,
Time& dcpl,
Time& maxCompletion,
Time& secondMaxCompletion,
TaskDCP*& maxCompletionTask) {
for (const auto& [task, aest] : snapshot.aestBackup)
task->setAest(aest);
dcpl = snapshot.dcpl;
maxCompletion = snapshot.maxCompletion;
secondMaxCompletion = snapshot.secondMaxCompletion;
maxCompletionTask = snapshot.maxCompletionTask;
}
int countDependencyParents(const TaskDCP* task) {
return static_cast<int>(llvm::count_if(task->parents, [](const Edge& edge) { return !edge.isScheduling; }));
}
void recordTopologicalMove(TaskDCP* task, TaskInsertion* insertion) {
if (insertion == nullptr)
return;
auto alreadyRecorded =
llvm::any_of(insertion->topologicalMoves,
[task](const TaskInsertion::TopologicalMoveRecord& move) { return move.task == task; });
if (alreadyRecorded)
return;
insertion->topologicalMoves.push_back({task, onnx_mlir::LabeledList<TaskDCP>::next(task)});
}
std::vector<TaskDCP*> collectDominanceOrder(llvm::ArrayRef<TaskDCP*> roots, size_t nodeCount) {
UniqueWorkList<std::vector<TaskDCP*>> worklist(roots);
worklist.reserve(nodeCount);
size_t index = 0;
while (index != worklist.size()) {
bool modified = true;
while (modified) {
modified = false;
for (const auto& child : worklist.at(index)->children)
if (worklist.allElementsContained(
child.first->parents.begin(), child.first->parents.end(), [](Edge edge) { return edge.first; }))
modified |= worklist.pushBack(child.first);
}
index++;
}
return {worklist.begin(), worklist.end()};
}
} // namespace dcp_graph

View File

@@ -0,0 +1,41 @@
#pragma once
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallVector.h"
#include <utility>
#include <vector>
#include "Graph.hpp"
namespace dcp_graph {
struct LocalScheduleSnapshot {
llvm::SmallVector<std::pair<TaskDCP*, Time>, 64> aestBackup;
Time dcpl = 0;
Time maxCompletion = 0;
Time secondMaxCompletion = 0;
TaskDCP* maxCompletionTask = nullptr;
};
llvm::DenseSet<TaskDCP*> collectReachableTasks(TaskDCP* root, bool followParents);
GraphDCP::CandidateRelations computeCandidateRelations(TaskDCP* candidate);
LocalScheduleSnapshot captureLocalScheduleState(TaskDCP* task,
const llvm::DenseSet<TaskDCP*>& descendants,
Time dcpl,
Time maxCompletion,
Time secondMaxCompletion,
TaskDCP* maxCompletionTask);
void restoreLocalScheduleState(const LocalScheduleSnapshot& snapshot,
Time& dcpl,
Time& maxCompletion,
Time& secondMaxCompletion,
TaskDCP*& maxCompletionTask);
int countDependencyParents(const TaskDCP* task);
void recordTopologicalMove(TaskDCP* task, TaskInsertion* insertion);
std::vector<TaskDCP*> collectDominanceOrder(llvm::ArrayRef<TaskDCP*> roots, size_t nodeCount);
} // namespace dcp_graph

View File

@@ -4,57 +4,63 @@
#include "Task.hpp" #include "Task.hpp"
#include "UniqueWorklist.hpp" #include "UniqueWorklist.hpp"
std::optional<Edge_t> TaskDCP::addChild(TaskDCP* child, Weight_t weight) { std::optional<Edge> TaskDCP::addChild(TaskDCP* child, Weight weight, bool isScheduling) {
std::optional<Edge_t> oldEdge = std::nullopt; std::optional<Edge> oldEdge = std::nullopt;
auto founded_element = auto foundElement = std::find_if(children.begin(), children.end(), [child, isScheduling](Edge element) {
std::find_if(childs.begin(), childs.end(), [child](Edge_t element) { return child == element.first; }); return child == element.first && isScheduling == element.isScheduling;
if (founded_element != childs.end()) { });
oldEdge = *founded_element; if (foundElement != children.end()) {
fastRemove(childs, founded_element); oldEdge = *foundElement;
fastRemove(children, foundElement);
} }
childs.emplace_back(child, weight); children.emplace_back(Edge {child, weight, isScheduling});
return oldEdge; return oldEdge;
} }
std::optional<Edge_t> TaskDCP::addParent(TaskDCP* parent, Weight_t weight) { std::optional<Edge> TaskDCP::addParent(TaskDCP* parent, Weight weight, bool isScheduling) {
std::optional<Edge_t> oldEdge = std::nullopt; std::optional<Edge> oldEdge = std::nullopt;
auto founded_element = auto foundElement = std::find_if(parents.begin(), parents.end(), [parent, isScheduling](Edge element) {
std::find_if(parents.begin(), parents.end(), [parent](Edge_t element) { return parent == element.first; }); return parent == element.first && isScheduling == element.isScheduling;
if (founded_element != parents.end()) { });
oldEdge = *founded_element; if (foundElement != parents.end()) {
fastRemove(parents, founded_element); oldEdge = *foundElement;
fastRemove(parents, foundElement);
} }
parents.emplace_back(parent, weight); parents.emplace_back(Edge {parent, weight, isScheduling});
return oldEdge; return oldEdge;
} }
bool TaskDCP::hasDescendent(TaskDCP* child) { bool TaskDCP::hasDescendant(TaskDCP* child) {
UniqueWorkList<std::vector<TaskDCP*>> worklist; UniqueWorkList<std::vector<TaskDCP*>> worklist;
worklist.reserve(32); worklist.reserve(32);
worklist.push_back(this); worklist.pushBack(this);
while (!worklist.empty()) { while (!worklist.empty()) {
TaskDCP* task = worklist.back(); TaskDCP* task = worklist.back();
worklist.pop_back(); worklist.popBack();
if (task == child) if (task == child)
return true; return true;
for (auto c : task->childs) for (auto edge : task->children)
worklist.push_back(c.first); worklist.pushBack(edge.first);
} }
return false; return false;
} }
// TODO fare qualcosa di sensato Weight TaskDCP::computeWeightOnCpu(GraphDCP* graph, CPU cpu) {
int TaskDCP::computeWeight(GraphDCP* graph, CPU cpu) { return origWeight; } if (crossbarUsage != 0 && graph->wouldExhaustCrossbarCapacity(cpu, this))
return std::numeric_limits<Weight>::max();
return baseWeight;
}
void TaskInsertion::rollBack() { void TaskInsertion::rollBack() {
graph->removeTaskFromCPU(cpuModified, taskInserted); graph->removeTaskFromCPU(cpuModified, taskInserted);
if (beforeNode.has_value()) { if (beforeNode.has_value()) {
auto double_edge = *beforeNode; auto edgePair = *beforeNode;
addEdge(double_edge.first.first, double_edge.second.first, double_edge.first.second); addEdge(edgePair.first.first, edgePair.second.first, edgePair.first.second, edgePair.first.isScheduling);
} }
if (afterNode.has_value()) { if (afterNode.has_value()) {
auto double_edge = *afterNode; auto edgePair = *afterNode;
addEdge(double_edge.first.first, double_edge.second.first, double_edge.first.second); addEdge(edgePair.first.first, edgePair.second.first, edgePair.first.second, edgePair.first.isScheduling);
} }
graph->topologicalOrder.moveBefore( taskInserted,&*oldTopologicalPosition ); // for (auto it = topologicalMoves.rbegin(); it != topologicalMoves.rend(); ++it)
// graph->topologicalOrder.moveBefore(it->task, it->nextTask);
} }

View File

@@ -7,110 +7,117 @@
#include "Utils.hpp" #include "Utils.hpp"
#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp" #include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
std::optional<DoubleEdge> addEdge(TaskDCP* parent, TaskDCP* child, Weight_t weight);
void removeEdge(TaskDCP* parent, TaskDCP* child);
class TaskDCP : public onnx_mlir::LabeledListNode<TaskDCP> { class TaskDCP : public onnx_mlir::LabeledListNode<TaskDCP> {
onnx_mlir::spatial::SpatWeightedCompute spatWeightedCompute; onnx_mlir::spatial::SpatWeightedCompute spatWeightedCompute;
int aest; Time aest;
int alst; Time alst;
std::optional<CPU> scheduledCPU; std::optional<CPU> scheduledCpu;
int weight; Weight weight;
int origWeight; Weight baseWeight;
CrossbarUsage crossbarUsage;
long long flag = 0; long long flag = 0;
int64_t syntheticId = -1; int64_t syntheticId = -1;
std::optional<Edge_t> addChild(TaskDCP* child, Weight_t weight); std::optional<Edge> addChild(TaskDCP* child, Weight weight, bool isScheduling);
std::optional<Edge_t> addChild(TaskDCP& child, Weight_t weight) { return addChild(&child, weight); } std::optional<Edge> addChild(TaskDCP& child, Weight weight, bool isScheduling) {
return addChild(&child, weight, isScheduling);
}
void removeChild(TaskDCP* to_remove) { fastRemove(childs, to_remove); } void removeChild(TaskDCP* toRemove, bool isScheduling) { fastRemove(children, toRemove, isScheduling); }
void removeChild(TaskDCP& to_remove) { fastRemove(childs, &to_remove); } void removeChild(TaskDCP& toRemove, bool isScheduling) { fastRemove(children, &toRemove, isScheduling); }
std::optional<Edge_t> addParent(TaskDCP* parent, Weight_t weight); std::optional<Edge> addParent(TaskDCP* parent, Weight weight, bool isScheduling);
std::optional<Edge_t> addParent(TaskDCP& parent, Weight_t weight) { return addParent(&parent, weight); } std::optional<Edge> addParent(TaskDCP& parent, Weight weight, bool isScheduling) {
return addParent(&parent, weight, isScheduling);
}
void removeParent(TaskDCP* to_remove) { fastRemove(parents, to_remove); } void removeParent(TaskDCP* toRemove, bool isScheduling) { fastRemove(parents, toRemove, isScheduling); }
void removeParent(TaskDCP& to_remove) { fastRemove(parents, &to_remove); } void removeParent(TaskDCP& toRemove, bool isScheduling) { fastRemove(parents, &toRemove, isScheduling); }
public: public:
std::vector<Edge_t> parents; std::vector<Edge> parents;
std::vector<Edge_t> childs; std::vector<Edge> children;
TaskDCP() = default; TaskDCP() = default;
TaskDCP(onnx_mlir::spatial::SpatWeightedCompute spatWeightedCompute) TaskDCP(onnx_mlir::spatial::SpatWeightedCompute spatWeightedCompute)
: onnx_mlir::LabeledListNode<TaskDCP>(), : onnx_mlir::LabeledListNode<TaskDCP>(),
spatWeightedCompute(spatWeightedCompute), spatWeightedCompute(spatWeightedCompute),
aest(0), aest(0),
alst(0), alst(0),
scheduledCPU(), scheduledCpu(),
weight(getSpatWeightCompute(spatWeightedCompute)), weight(getSpatComputeWeight(spatWeightedCompute)),
origWeight(weight), baseWeight(weight),
crossbarUsage(getSpatComputeCrossbarUsage(spatWeightedCompute)),
syntheticId(-1), syntheticId(-1),
parents(), parents(),
childs() {} children() {}
TaskDCP(int64_t id, int weight) TaskDCP(int64_t id, Weight weight, CrossbarUsage crossbarUsage = 0)
: onnx_mlir::LabeledListNode<TaskDCP>(), : onnx_mlir::LabeledListNode<TaskDCP>(),
spatWeightedCompute(), spatWeightedCompute(),
aest(0), aest(0),
alst(0), alst(0),
scheduledCPU(), scheduledCpu(),
weight(weight), weight(weight),
origWeight(weight), baseWeight(weight),
crossbarUsage(crossbarUsage),
flag(0), flag(0),
syntheticId(id), syntheticId(id),
parents(), parents(),
childs() {} children() {}
TaskDCP(const TaskDCP& node) = delete; TaskDCP(const TaskDCP& node) = delete;
TaskDCP(TaskDCP&& node) = default; TaskDCP(TaskDCP&& node) = default;
void setCPU(CPU cpu) { scheduledCPU = cpu; } void setCpu(CPU cpu) { scheduledCpu = cpu; }
std::optional<CPU> getCPU() const { return scheduledCPU; } std::optional<CPU> getCpu() const { return scheduledCpu; }
void resetCPU() { scheduledCPU = std::nullopt; } void resetCpu() { scheduledCpu = std::nullopt; }
int getWeight() const { Weight getWeight() const {
if (isScheduled()) if (isScheduled())
return weight; return weight;
return origWeight; return baseWeight;
} }
void setWeight(int val) { weight = val; } void setWeight(Weight value) { weight = value; }
void resetWeight() { weight = origWeight; } void resetWeight() { weight = baseWeight; }
int computeWeight(GraphDCP* graph, CPU cpu); Weight computeWeightOnCpu(GraphDCP* graph, CPU cpu);
CrossbarUsage getCrossbarUsage() const { return crossbarUsage; }
bool hasParents() const { return parents.size() != 0; } bool hasParents() const { return parents.size() != 0; }
bool hasChilds() const { return childs.size() != 0; } bool hasChildren() const { return children.size() != 0; }
int getAEST() const { return aest; } Time getAest() const { return aest; }
int getALST() const { return alst; } Time getAlst() const { return alst; }
void setAEST(int val) { void setAest(Time value) { aest = value; }
assert(val >= 0); void setAlst(Time value) { alst = value; }
aest = val; bool hasDescendant(TaskDCP* child);
}
void setALST(int val) { alst = val; }
bool hasDescendent(TaskDCP* child);
int64_t Id() const { int64_t Id() const {
if (spatWeightedCompute) if (spatWeightedCompute)
return reinterpret_cast<int64_t>(spatWeightedCompute.getAsOpaquePointer()); return reinterpret_cast<int64_t>(spatWeightedCompute.getAsOpaquePointer());
return syntheticId; return syntheticId;
} }
bool isCP() const { return alst == aest; } bool isCriticalPath() const { return alst == aest; }
bool isScheduled() const { return scheduledCPU.has_value(); } bool isScheduled() const { return scheduledCpu.has_value(); }
onnx_mlir::spatial::SpatWeightedCompute getSpatWeightedCompute() const { return spatWeightedCompute; } onnx_mlir::spatial::SpatWeightedCompute getSpatWeightedCompute() const { return spatWeightedCompute; }
void setFlag(long long val) { flag = val; } void setFlag(long long val) { flag = val; }
long long getFlag() const { return flag; } long long getFlag() const { return flag; }
onnx_mlir::LabeledList<TaskDCP>::Iterator getTopologicalPosition() { return getIterator(); } onnx_mlir::LabeledList<TaskDCP>::Iterator getTopologicalIterator() { return getIterator(); }
friend std::optional<DoubleEdge> addEdge(TaskDCP* parent, TaskDCP* child, Weight_t weight); friend std::optional<EdgePair> addEdge(TaskDCP* parent, TaskDCP* child, Weight weight, bool isScheduling);
friend void removeEdge(TaskDCP* parent, TaskDCP* child); friend void removeEdge(TaskDCP* parent, TaskDCP* child, bool isScheduling);
friend int getTranferCost(TaskDCP* parent, TaskDCP* child); friend Weight getTransferCost(TaskDCP* parent, TaskDCP* child);
}; };
struct TaskInsertion { struct TaskInsertion {
std::optional<DoubleEdge> beforeNode; struct TopologicalMoveRecord {
std::optional<DoubleEdge> afterNode; TaskDCP* task;
onnx_mlir::LabeledList<TaskDCP>::Iterator oldTopologicalPosition; TaskDCP* nextTask;
};
std::optional<EdgePair> beforeNode;
std::optional<EdgePair> afterNode;
std::vector<TopologicalMoveRecord> topologicalMoves;
CPU cpuModified; CPU cpuModified;
TaskDCP* taskInserted; TaskDCP* taskInserted;
GraphDCP* graph; GraphDCP* graph;

View File

@@ -1,58 +1,57 @@
#pragma once #pragma once
#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DenseSet.h"
#include <cassert> #include <cassert>
#include <type_traits> #include <type_traits>
#include <iostream>
#include <unordered_set>
template <typename T, typename = void> template <typename T, typename = void>
struct has_pop_front : std::false_type {}; struct HasPopFront : std::false_type {};
template <typename T> template <typename T>
struct has_pop_front<T, std::void_t<decltype(std::declval<T>().pop_front())>> : std::true_type {}; struct HasPopFront<T, std::void_t<decltype(std::declval<T>().pop_front())>> : std::true_type {};
template <typename T> template <typename T>
class UniqueWorkList { class UniqueWorkList {
using V = typename T::value_type; using ValueType = typename T::value_type;
T storage; T storage;
llvm::DenseSet<V> set; llvm::DenseSet<ValueType> uniqueElements;
public: public:
UniqueWorkList() = default; UniqueWorkList() = default;
template <typename arg_ty> template <typename RangeT>
UniqueWorkList(const arg_ty& from) UniqueWorkList(const RangeT& from)
: storage() { : storage() {
for (auto& element : from) { for (auto& element : from) {
if (!set.contains(element)) { if (!uniqueElements.contains(element)) {
storage.push_back(element); storage.push_back(element);
set.insert(element); uniqueElements.insert(element);
} }
} }
} }
bool empty() const { return storage.empty(); } bool empty() const { return storage.empty(); }
void reserve(size_t val) { return storage.reserve(val); } void reserve(size_t value) { return storage.reserve(value); }
size_t size() const { return storage.size(); } size_t size() const { return storage.size(); }
V& at(size_t i) { return storage.at(i); } ValueType& at(size_t index) { return storage.at(index); }
const V& at(size_t i) const { return storage.at(i); } const ValueType& at(size_t index) const { return storage.at(index); }
V& front() { return storage.front(); } ValueType& front() { return storage.front(); }
V& back() { return storage.back(); } ValueType& back() { return storage.back(); }
bool push_back(const V& val) { bool pushBack(const ValueType& value) {
if (!set.contains(val)) { if (!uniqueElements.contains(value)) {
storage.push_back(val); storage.push_back(value);
set.insert(val); uniqueElements.insert(value);
return true; return true;
} }
return false; return false;
} }
void pop_front() { void popFront() {
if constexpr (has_pop_front<T>::value) if constexpr (HasPopFront<T>::value)
storage.pop_front(); storage.pop_front();
else else
assert(false && "Underlying storage type does not support pop_front()"); assert(false && "Underlying storage type does not support pop_front()");
@@ -61,15 +60,15 @@ public:
auto cbegin() const { return storage.cbegin(); } auto cbegin() const { return storage.cbegin(); }
auto cend() const { return storage.cend(); } auto cend() const { return storage.cend(); }
void pop_back() { storage.pop_back(); } void popBack() { storage.pop_back(); }
template <typename Iterator, typename Mapper> template <typename Iterator, typename Mapper>
bool allElementContained(Iterator start, Iterator end, Mapper map) { bool allElementsContained(Iterator begin, Iterator end, Mapper map) const {
while (start != end) { auto it = begin;
if (!set.contains(map(*start))) while (it != end) {
if (!uniqueElements.contains(map(*it)))
return false; return false;
std::advance(start, 1); std::advance(it, 1);
} }
return true; return true;
} }
@@ -77,4 +76,8 @@ public:
auto begin() { return storage.begin(); } auto begin() { return storage.begin(); }
auto end() { return storage.end(); } auto end() { return storage.end(); }
auto begin() const { return storage.begin(); }
auto end() const { return storage.end(); }
}; };

View File

@@ -6,60 +6,106 @@
#include <algorithm> #include <algorithm>
#include <cstdint> #include <cstdint>
#include <limits>
#include <list> #include <list>
#include <type_traits>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "src/Accelerators/PIM/Common/LabeledList.hpp" #include "src/Accelerators/PIM/Common/LabeledList.hpp"
#include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp" #include "src/Accelerators/PIM/Dialect/Spatial/SpatialOps.hpp"
#include "src/Support/TypeUtilities.hpp"
using CPU = int; using CPU = int;
using Weight_t = int; using Weight = unsigned long long;
using Time = unsigned long long;
using CrossbarUsage = unsigned long long;
class TaskDCP; class TaskDCP;
class GraphDCP; class GraphDCP;
using Edge_t = std::pair<TaskDCP*, Weight_t>; struct Edge {
using DoubleEdge = std::pair<Edge_t, Edge_t>; TaskDCP* first;
using EdgesIndex = std::tuple<int64_t, int64_t, int64_t>; Weight second;
bool isScheduling = false;
};
using EdgePair = std::pair<Edge, Edge>;
using IndexedEdge = std::tuple<int64_t, int64_t, int64_t>;
inline void fastRemove(std::vector<Edge>& vector, TaskDCP* toRemove, bool isScheduling) {
auto position = std::find_if(vector.begin(), vector.end(), [toRemove, isScheduling](Edge edge) {
return edge.first == toRemove && edge.isScheduling == isScheduling;
});
if (position != vector.end()) {
std::swap(*(vector.end() - 1), *position);
vector.pop_back();
}
}
inline void fastRemove(std::vector<TaskDCP*>& vector, TaskDCP* toRemove) {
auto position =
std::find_if(vector.begin(), vector.end(), [toRemove](TaskDCP* element) { return element == toRemove; });
if (position != vector.end()) {
std::swap(*(vector.end() - 1), *position);
vector.pop_back();
}
}
template <typename P>
void fastRemove(std::vector<Edge>& vector, P position) {
if (position != vector.end()) {
std::swap(*(vector.end() - 1), *position);
vector.pop_back();
}
}
template <typename T> template <typename T>
void fastRemove(std::vector<std::pair<T*, Weight_t>>& vector, T* to_remove) { inline T checkedAdd(T lhs, T rhs) {
auto position = static_assert(std::is_unsigned_v<T>, "checkedAdd only supports unsigned types");
std::find_if(vector.begin(), vector.end(), [to_remove](Edge_t edge) { return edge.first == to_remove; }); assert(lhs <= std::numeric_limits<T>::max() - rhs && "unsigned addition overflow");
if (position != vector.end()) { return lhs + rhs;
std::swap(*(vector.end() - 1), *position);
vector.pop_back();
}
} }
inline void fastRemove(std::vector<TaskDCP*>& vector, TaskDCP* to_remove) { template <typename T>
auto position = inline T checkedMultiply(T lhs, T rhs) {
std::find_if(vector.begin(), vector.end(), [to_remove](TaskDCP* element) { return element == to_remove; }); static_assert(std::is_unsigned_v<T>, "checkedMultiply only supports unsigned types");
if (position != vector.end()) { if (lhs == 0 || rhs == 0)
std::swap(*(vector.end() - 1), *position); return 0;
vector.pop_back(); assert(lhs <= std::numeric_limits<T>::max() / rhs && "unsigned multiplication overflow");
} return lhs * rhs;
} }
template <typename T, typename P> template <typename T>
void fastRemove(std::vector<std::pair<T*, Weight_t>>& vector, P position) { inline T addOrMax(T lhs, T rhs) {
if (position != vector.end()) { static_assert(std::is_unsigned_v<T>, "addOrMax only supports unsigned types");
std::swap(*(vector.end() - 1), *position); if (lhs == std::numeric_limits<T>::max() || rhs == std::numeric_limits<T>::max())
vector.pop_back(); return std::numeric_limits<T>::max();
} return checkedAdd(lhs, rhs);
} }
// TODO Fare qualcosa di sensato template <typename T>
inline int64_t getSpatWeightCompute(onnx_mlir::spatial::SpatWeightedCompute spatWeightedCompute) { inline T subtractOrZero(T lhs, T rhs) {
int64_t tot = 0; static_assert(std::is_unsigned_v<T>, "subtractOrZero only supports unsigned types");
for (auto& region : spatWeightedCompute.getBody()) { if (lhs == std::numeric_limits<T>::max())
for (auto& inst : region) { return lhs;
for (auto result : inst.getResults()) if (rhs == std::numeric_limits<T>::max() || lhs <= rhs)
if (auto element = llvm::dyn_cast<mlir::ShapedType>(result.getType())) return 0;
tot += onnx_mlir::getSizeInBytes(element); return lhs - rhs;
} }
}
return tot; inline Time slackOrZero(Time earliestStart, Time latestStart) { return subtractOrZero(latestStart, earliestStart); }
inline Weight getSpatComputeWeight(onnx_mlir::spatial::SpatWeightedCompute spatWeightedCompute) {
constexpr Weight kOperationWeight = 100;
Weight numOperations = 0;
for (auto& block : spatWeightedCompute.getBody())
for ([[maybe_unused]] auto& op : block)
numOperations = checkedAdd(numOperations, static_cast<Weight>(1));
return checkedMultiply(numOperations, kOperationWeight);
}
inline CrossbarUsage getSpatComputeCrossbarUsage(onnx_mlir::spatial::SpatWeightedCompute spatWeightedCompute) {
CrossbarUsage crossbarUsage = 0;
for (auto& region : spatWeightedCompute.getBody())
for (auto& inst : region)
if (llvm::isa<onnx_mlir::spatial::SpatWeightedVMMOp>(inst))
crossbarUsage = checkedAdd(crossbarUsage, static_cast<CrossbarUsage>(1));
return crossbarUsage;
} }

View File

@@ -5,7 +5,6 @@
#include "mlir/IR/Region.h" #include "mlir/IR/Region.h"
#include "mlir/IR/Value.h" #include "mlir/IR/Value.h"
#include "mlir/IR/ValueRange.h" #include "mlir/IR/ValueRange.h"
#include "mlir/IR/Verifier.h"
#include "mlir/Pass/Pass.h" #include "mlir/Pass/Pass.h"
#include "mlir/Support/LLVM.h" #include "mlir/Support/LLVM.h"
@@ -14,13 +13,12 @@
#include "llvm/Support/Debug.h" #include "llvm/Support/Debug.h"
#include <cstddef> #include <cstddef>
#include <cstdint>
#include <functional> #include <functional>
#include <iterator> #include <iterator>
#include <memory> #include <memory>
#include "src/Accelerators/PIM/Common/PimCommon.hpp"
#include "DCPGraph/DCPAnalysis.hpp" #include "DCPGraph/DCPAnalysis.hpp"
#include "src/Accelerators/PIM/Common/PimCommon.hpp"
using namespace mlir; using namespace mlir;
@@ -36,10 +34,10 @@ struct ComputeValueResults {
class LazyInsertComputeResult { class LazyInsertComputeResult {
using InsertPoint = mlir::IRRewriter::InsertPoint; using InsertPoint = mlir::IRRewriter::InsertPoint;
ComputeValueResults computeResults; ComputeValueResults computeResults;
Value channelNewOpVal; Value channelValue;
bool onlyChannel; bool onlyChannel;
std::function<void(InsertPoint insertPoint)> channelSendInserter; std::function<void(InsertPoint insertPoint)> channelSendInserter;
InsertPoint insertPointSend; InsertPoint sendInsertPoint;
std::function<std::pair<Value, std::function<void(InsertPoint)>>()> channelNewInserter; std::function<std::pair<Value, std::function<void(InsertPoint)>>()> channelNewInserter;
public: public:
@@ -49,7 +47,7 @@ public:
: computeResults(computeValueResults), : computeResults(computeValueResults),
onlyChannel(isOnlyChannel), onlyChannel(isOnlyChannel),
channelSendInserter(nullptr), channelSendInserter(nullptr),
insertPointSend({}), sendInsertPoint({}),
channelNewInserter(channelNewInserter) {} channelNewInserter(channelNewInserter) {}
struct ChannelOrLocalOp { struct ChannelOrLocalOp {
@@ -59,23 +57,23 @@ public:
bool onlyChanneled() const { return onlyChannel; } bool onlyChanneled() const { return onlyChannel; }
ChannelOrLocalOp getAsChannelValueAndInsertSender(SpatWeightedCompute spatWeightedCompute) { ChannelOrLocalOp getAsChannelValueAndInsertSender(SpatWeightedCompute currentCompute) {
auto [first, second] = channelNewInserter(); auto [newChannelValue, senderInserter] = channelNewInserter();
channelNewOpVal = first; channelValue = newChannelValue;
channelSendInserter = second; channelSendInserter = senderInserter;
auto BB = computeResults.innerValue.getParentBlock(); auto* block = computeResults.innerValue.getParentBlock();
if (!BB->empty() && isa<spatial::SpatYieldOp>(BB->back())) if (!block->empty() && isa<spatial::SpatYieldOp>(block->back()))
insertPointSend = InsertPoint(BB, --BB->end()); sendInsertPoint = InsertPoint(block, --block->end());
else else
insertPointSend = InsertPoint(BB, BB->end()); sendInsertPoint = InsertPoint(block, block->end());
if (spatWeightedCompute) { if (currentCompute) {
for (auto& BB : spatWeightedCompute.getBody()) for (auto& block : currentCompute.getBody())
if (&BB == insertPointSend.getBlock()) if (&block == sendInsertPoint.getBlock())
return {computeResults.innerValue, false}; return {computeResults.innerValue, false};
} }
channelSendInserter(insertPointSend); channelSendInserter(sendInsertPoint);
return {channelNewOpVal, true}; return {channelValue, true};
} }
ChannelOrLocalOp getAsChannelValueAndInsertSender() { return getAsChannelValueAndInsertSender({}); } ChannelOrLocalOp getAsChannelValueAndInsertSender() { return getAsChannelValueAndInsertSender({}); }
@@ -86,7 +84,7 @@ struct MergeComputeNodesPass : PassWrapper<MergeComputeNodesPass, OperationPass<
private: private:
DenseMap<SpatWeightedCompute, LazyInsertComputeResult> newComputeNodeResults; DenseMap<SpatWeightedCompute, LazyInsertComputeResult> newComputeNodeResults;
DenseMap<SpatWeightedCompute, SpatWeightedCompute> oldToNewComputeMap; DenseMap<SpatWeightedCompute, SpatWeightedCompute> oldToNewComputeMap;
DenseMap<int64_t, SpatWeightedCompute> cputToNewComputeMap; DenseMap<int64_t, SpatWeightedCompute> cpuToNewComputeMap;
public: public:
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(MergeComputeNodesPass) MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(MergeComputeNodesPass)
@@ -101,17 +99,16 @@ public:
void runOnOperation() override { void runOnOperation() override {
DCPAnalysisResult& analysisResult = getAnalysis<spatial::DCPAnalysis>().getResult(); DCPAnalysisResult& analysisResult = getAnalysis<spatial::DCPAnalysis>().getResult();
auto& lastComputeOfCpu = analysisResult.isLastComputeOfACpu; auto& lastComputeOfCpu = analysisResult.isLastComputeOfCpu;
auto& cpuToLastComputeMap = analysisResult.cpuToLastComputeMap; auto& cpuToLastComputeMap = analysisResult.cpuToLastComputeMap;
IRRewriter rewriter(&getContext());
for (auto currentComputeNode : analysisResult.dominanceOrderCompute) { for (auto currentComputeNode : analysisResult.dominanceOrderCompute) {
size_t cpu = analysisResult.computeToCPUMap.at(currentComputeNode); size_t cpu = analysisResult.computeToCpuMap.at(currentComputeNode);
if (!cputToNewComputeMap.contains(cpu)) { if (!cpuToNewComputeMap.contains(cpu)) {
ValueTypeRange<ResultRange> newWeightedComputeType = cpuToLastComputeMap.at(cpu).getResultTypes(); ValueTypeRange<ResultRange> newWeightedComputeType = cpuToLastComputeMap.at(cpu).getResultTypes();
auto [newWeightedCompute, computeValueResult] = createNewComputeNode( auto [newWeightedCompute, computeValueResult] = createNewComputeNode(
currentComputeNode, newWeightedComputeType, lastComputeOfCpu.contains(currentComputeNode)); currentComputeNode, newWeightedComputeType, lastComputeOfCpu.contains(currentComputeNode));
cputToNewComputeMap[cpu] = newWeightedCompute; cpuToNewComputeMap[cpu] = newWeightedCompute;
newComputeNodeResults.insert( newComputeNodeResults.insert(
std::make_pair(currentComputeNode, std::make_pair(currentComputeNode,
createLazyComputeResult( createLazyComputeResult(
@@ -119,7 +116,7 @@ public:
} }
else { else {
auto [newWeightedCompute, computeValueResult] = mergeIntoComputeNode( auto [newWeightedCompute, computeValueResult] = mergeIntoComputeNode(
cputToNewComputeMap[cpu], currentComputeNode, lastComputeOfCpu.contains(currentComputeNode)); cpuToNewComputeMap[cpu], currentComputeNode, lastComputeOfCpu.contains(currentComputeNode));
newComputeNodeResults.insert( newComputeNodeResults.insert(
std::make_pair(currentComputeNode, std::make_pair(currentComputeNode,
createLazyComputeResult( createLazyComputeResult(
@@ -127,10 +124,10 @@ public:
} }
} }
for (auto computeNodetoRemove : llvm::make_early_inc_range(llvm::reverse(analysisResult.dominanceOrderCompute))) { for (auto computeNodeToRemove : llvm::make_early_inc_range(llvm::reverse(analysisResult.dominanceOrderCompute))) {
for (auto users : computeNodetoRemove->getUsers()) for (auto users : computeNodeToRemove->getUsers())
users->dump(); users->dump();
computeNodetoRemove.erase(); computeNodeToRemove.erase();
} }
func::FuncOp func = getOperation(); func::FuncOp func = getOperation();
dumpModule(cast<ModuleOp>(func->getParentOp()), "spatial1_dcp_merged"); dumpModule(cast<ModuleOp>(func->getParentOp()), "spatial1_dcp_merged");
@@ -186,9 +183,9 @@ private:
LazyInsertComputeResult& lazyArgWeight = newComputeNodeResults.at(argWeightCompute); LazyInsertComputeResult& lazyArgWeight = newComputeNodeResults.at(argWeightCompute);
auto [channelVal, isChannel] = lazyArgWeight.getAsChannelValueAndInsertSender(); auto [channelVal, isChannel] = lazyArgWeight.getAsChannelValueAndInsertSender();
assert(isChannel == true); assert(isChannel == true);
spatial::SpatChannelReceiveOp reciveOp = spatial::SpatChannelReceiveOp receiveOp =
spatial::SpatChannelReceiveOp::create(rewriter, loc, argWeightCompute.getType(0), channelVal); spatial::SpatChannelReceiveOp::create(rewriter, loc, argWeightCompute.getType(0), channelVal);
mapper.map(oldBB.getArgument(indexOld - indexOldStart), reciveOp); mapper.map(oldBB.getArgument(indexOld - indexOldStart), receiveOp);
} }
} }
@@ -238,8 +235,8 @@ private:
auto& toBB = toCompute.getBody().front(); auto& toBB = toCompute.getBody().front();
auto& fromBB = fromCompute.getBody().front(); auto& fromBB = fromCompute.getBody().front();
auto inputeArgMutable = toCompute.getInputsMutable(); auto inputArgMutable = toCompute.getInputsMutable();
// Insert reciveOp // Insert receiveOp
rewriter.setInsertionPointToEnd(&toBB); rewriter.setInsertionPointToEnd(&toBB);
for (auto [bbIndex, arg] : llvm::enumerate(fromCompute.getInputs())) { for (auto [bbIndex, arg] : llvm::enumerate(fromCompute.getInputs())) {
if (auto argWeightCompute = llvm::dyn_cast_if_present<SpatWeightedCompute>(arg.getDefiningOp())) { if (auto argWeightCompute = llvm::dyn_cast_if_present<SpatWeightedCompute>(arg.getDefiningOp())) {
@@ -248,9 +245,9 @@ private:
LazyInsertComputeResult::ChannelOrLocalOp channelOrLocal = LazyInsertComputeResult::ChannelOrLocalOp channelOrLocal =
lazyArgWeight.getAsChannelValueAndInsertSender(toCompute); lazyArgWeight.getAsChannelValueAndInsertSender(toCompute);
if (channelOrLocal.isChannel) { if (channelOrLocal.isChannel) {
spatial::SpatChannelReceiveOp reciveOp = spatial::SpatChannelReceiveOp receiveOp =
spatial::SpatChannelReceiveOp::create(rewriter, loc, argWeightCompute.getType(0), channelOrLocal.data); spatial::SpatChannelReceiveOp::create(rewriter, loc, argWeightCompute.getType(0), channelOrLocal.data);
mapper.map(fromBB.getArgument(bbIndex), reciveOp.getResult()); mapper.map(fromBB.getArgument(bbIndex), receiveOp.getResult());
} }
else { else {
mapper.map(fromBB.getArgument(bbIndex), channelOrLocal.data); mapper.map(fromBB.getArgument(bbIndex), channelOrLocal.data);
@@ -262,7 +259,7 @@ private:
if (founded == toCompute.getInputs().end()) { if (founded == toCompute.getInputs().end()) {
size_t sizeW = toCompute.getWeights().size(); size_t sizeW = toCompute.getWeights().size();
size_t sizeI = toCompute.getInputs().size(); size_t sizeI = toCompute.getInputs().size();
inputeArgMutable.append(arg); inputArgMutable.append(arg);
assert(sizeW == toCompute.getWeights().size()); assert(sizeW == toCompute.getWeights().size());
assert(sizeI + 1 == toCompute.getInputs().size()); assert(sizeI + 1 == toCompute.getInputs().size());
assert(sizeW + sizeI + 1 == toCompute.getOperands().size()); assert(sizeW + sizeI + 1 == toCompute.getOperands().size());
@@ -281,6 +278,12 @@ private:
assert(mapper.contains(oldBBarg)); assert(mapper.contains(oldBBarg));
ComputeValueResults computeValueResults; ComputeValueResults computeValueResults;
auto remapWeightIndex = [&](auto weightedOp) {
auto oldIndex = weightedOp.getWeightIndex();
auto newWeight = mapper.lookup(*std::next(fromCompute.getWeights().begin(), oldIndex));
auto newIndex = std::distance(toCompute.getWeights().begin(), llvm::find(toCompute.getWeights(), newWeight));
weightedOp.setWeightIndex(newIndex);
};
for (auto& op : fromCompute.getOps()) { for (auto& op : fromCompute.getOps()) {
if (auto yield = dyn_cast<spatial::SpatYieldOp>(&op)) { if (auto yield = dyn_cast<spatial::SpatYieldOp>(&op)) {
computeValueResults.innerValue = mapper.lookup(yield.getOperand(0)); computeValueResults.innerValue = mapper.lookup(yield.getOperand(0));
@@ -289,20 +292,10 @@ private:
} }
else { else {
auto newInst = rewriter.clone(op, mapper); auto newInst = rewriter.clone(op, mapper);
// TODO Refactor in a lambda? same code just different cast, but templated lambda are C++20 and a free function if (auto weightedMvmOp = llvm::dyn_cast<spatial::SpatWeightedMVMOp>(newInst))
// is a bit too much remapWeightIndex(weightedMvmOp);
if (auto vmOp = llvm::dyn_cast<spatial::SpatWeightedMVMOp>(newInst)) { if (auto weightedVmmOp = llvm::dyn_cast<spatial::SpatWeightedVMMOp>(newInst))
auto oldIndex = vmOp.getWeightIndex(); remapWeightIndex(weightedVmmOp);
auto newWeight = mapper.lookup(*std::next(fromCompute.getWeights().begin(), oldIndex));
auto newIndex = std::distance(toCompute.getWeights().begin(), llvm::find(toCompute.getWeights(), newWeight));
vmOp.setWeightIndex(newIndex);
}
if (auto vmOp = llvm::dyn_cast<spatial::SpatWeightedVMMOp>(newInst)) {
auto oldIndex = vmOp.getWeightIndex();
auto newWeight = mapper.lookup(*std::next(fromCompute.getWeights().begin(), oldIndex));
auto newIndex = std::distance(toCompute.getWeights().begin(), llvm::find(toCompute.getWeights(), newWeight));
vmOp.setWeightIndex(newIndex);
}
} }
} }
@@ -323,19 +316,18 @@ private:
IRRewriter rewriter(context); IRRewriter rewriter(context);
rewriter.setInsertionPointToStart(&funcOp.front()); rewriter.setInsertionPointToStart(&funcOp.front());
auto saveInsertionPointChnNew = rewriter.saveInsertionPoint(); auto savedChannelInsertPoint = rewriter.saveInsertionPoint();
auto insertNew = [saveInsertionPointChnNew, context, loc, computeValueResults]() { auto insertNew = [savedChannelInsertPoint, context, loc, computeValueResults]() {
IRRewriter rewriter(context); IRRewriter rewriter(context);
rewriter.restoreInsertionPoint(saveInsertionPointChnNew); rewriter.restoreInsertionPoint(savedChannelInsertPoint);
auto channelOp = spatial::SpatChannelNewOp::create(rewriter, loc, spatial::SpatChannelType::get(context)); auto channelOp = spatial::SpatChannelNewOp::create(rewriter, loc, spatial::SpatChannelType::get(context));
auto channelVal = channelOp.getResult(); auto channelVal = channelOp.getResult();
auto insertVal = auto insertVal = [&context, loc, computeValueResults, channelVal](mlir::IRRewriter::InsertPoint sendInsertPoint) {
[&context, loc, computeValueResults, channelVal](mlir::IRRewriter::InsertPoint insertPointChnSend) { IRRewriter rewriter(context);
IRRewriter rewriter(context); rewriter.restoreInsertionPoint(sendInsertPoint);
rewriter.restoreInsertionPoint(insertPointChnSend); auto spatSend = spatial::SpatChannelSendOp::create(rewriter, loc, channelVal, computeValueResults.innerValue);
auto spatSend = spatial::SpatChannelSendOp::create(rewriter, loc, channelVal, computeValueResults.innerValue); return spatSend;
return spatSend; };
};
std::pair<Value, std::function<void(mlir::IRRewriter::InsertPoint)>> ret {channelVal, insertVal}; std::pair<Value, std::function<void(mlir::IRRewriter::InsertPoint)>> ret {channelVal, insertVal};
return ret; return ret;
}; };

View File

@@ -25,8 +25,15 @@ function(add_pim_unittest test_name)
set_tests_properties(${test_name} PROPERTIES LABELS pim-unittest) set_tests_properties(${test_name} PROPERTIES LABELS pim-unittest)
endfunction() endfunction()
add_pim_unittest(TestPIM add_pim_unittest(LabeledListTest
TestPIM.cpp LabeledListTest.cpp
LINK_LIBS PRIVATE
OMPimCommon
)
add_pim_unittest(DCPTest
DCPTest.cpp
LINK_LIBS PRIVATE LINK_LIBS PRIVATE
OMPimCommon OMPimCommon

528
test/PIM/DCPTest.cpp Normal file
View File

@@ -0,0 +1,528 @@
#include <algorithm>
#include <cassert>
#include <cstdlib>
#include <filesystem>
#include <fstream>
#include <initializer_list>
#include <iostream>
#include <limits>
#include <optional>
#include <unordered_map>
#include <vector>
#include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp"
#include "src/Accelerators/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/DCPGraph/Graph.hpp"
#include "src/Compiler/CompilerOptions.hpp"
namespace {
struct ExpectedScheduledTask {
size_t nodeIndex;
Time aest;
Time alst;
Weight weight;
};
struct ScheduledPlacement {
CPU cpu;
GraphDCP::ScheduledTaskInfo task;
};
std::filesystem::path getDcpTestOutputDir() { return std::filesystem::temp_directory_path() / "raptor-test-pim"; }
void configureDcpDotOutput() {
auto outputDir = getDcpTestOutputDir();
std::error_code errorCode;
std::filesystem::remove_all(outputDir, errorCode);
std::filesystem::create_directories(outputDir, errorCode);
assert(!errorCode);
onnx_mlir::outputBaseName = (outputDir / "DCPTest.mlir").string();
}
std::optional<std::filesystem::path> getLatestDcpDotFile() {
auto graphDir = getDcpTestOutputDir() / "dcp_graph";
if (!std::filesystem::exists(graphDir))
return std::nullopt;
std::optional<std::filesystem::path> latestDot;
for (const auto& entry : std::filesystem::directory_iterator(graphDir)) {
if (!entry.is_regular_file() || entry.path().extension() != ".dot")
continue;
if (!latestDot || entry.path().filename() > latestDot->filename())
latestDot = entry.path();
}
return latestDot;
}
void dumpDcpFailureArtifacts() {
auto latestDot = getLatestDcpDotFile();
if (!latestDot) {
std::cerr << "No DCP dot file was produced.\n";
return;
}
std::cerr << "DCP dot file: " << latestDot->string() << '\n';
std::ifstream dotFile(*latestDot);
if (!dotFile.is_open()) {
std::cerr << "Failed to open DCP dot file.\n";
return;
}
std::cerr << dotFile.rdbuf();
}
void printCpuSchedule(GraphDCP& graph, CPU cpu) {
auto actualTasks = graph.getScheduledTasks(cpu);
std::cerr << "CPU " << cpu << " actual schedule:\n";
for (const auto& task : actualTasks) {
std::cerr << " " << task.nodeIndex << ") aest: " << task.aest << " alst: " << task.alst
<< " weight: " << task.weight << '\n';
}
}
void printGraphSchedule(GraphDCP& graph) {
for (CPU cpu = 0; cpu < graph.cpuCount(); ++cpu)
printCpuSchedule(graph, cpu);
}
bool checkScheduledTasks(GraphDCP& graph, CPU cpu, std::initializer_list<ExpectedScheduledTask> expectedTasks) {
auto actualTasks = graph.getScheduledTasks(cpu);
if (actualTasks.size() != expectedTasks.size()) {
printCpuSchedule(graph, cpu);
return false;
}
auto expectedIt = expectedTasks.begin();
for (const auto& actualTask : actualTasks) {
if (actualTask.nodeIndex != expectedIt->nodeIndex || actualTask.aest != expectedIt->aest
|| actualTask.alst != expectedIt->alst || actualTask.weight != expectedIt->weight) {
printCpuSchedule(graph, cpu);
return false;
}
++expectedIt;
}
return true;
}
std::unordered_map<size_t, ScheduledPlacement> collectScheduledPlacements(GraphDCP& graph) {
std::unordered_map<size_t, ScheduledPlacement> scheduledPlacements;
for (CPU cpu = 0; cpu < graph.cpuCount(); ++cpu) {
for (const auto& task : graph.getScheduledTasks(cpu)) {
auto [it, inserted] = scheduledPlacements.emplace(task.nodeIndex, ScheduledPlacement {cpu, task});
assert(inserted && "task scheduled multiple times");
(void) it;
}
}
return scheduledPlacements;
}
bool checkAllTasksScheduled(GraphDCP& graph, size_t expectedTaskCount) {
auto scheduledPlacements = collectScheduledPlacements(graph);
if (scheduledPlacements.size() != expectedTaskCount) {
std::cerr << "Expected " << expectedTaskCount << " scheduled tasks, got " << scheduledPlacements.size() << "\n";
printGraphSchedule(graph);
return false;
}
return true;
}
bool checkCpuSchedulesDoNotOverlap(GraphDCP& graph) {
for (CPU cpu = 0; cpu < graph.cpuCount(); ++cpu) {
auto scheduledTasks = graph.getScheduledTasks(cpu);
Time previousCompletion = 0;
bool firstTask = true;
for (const auto& task : scheduledTasks) {
Time completion = addOrMax(task.aest, task.weight);
if (task.aest > task.alst) {
std::cerr << "Task " << task.nodeIndex << " on CPU " << cpu << " has aest > alst\n";
printCpuSchedule(graph, cpu);
return false;
}
if (!firstTask && task.aest < previousCompletion) {
std::cerr << "CPU " << cpu << " has overlapping tasks\n";
printCpuSchedule(graph, cpu);
return false;
}
previousCompletion = completion;
firstTask = false;
}
}
return true;
}
bool checkDependencyConstraints(GraphDCP& graph, llvm::ArrayRef<IndexedEdge> edges) {
auto scheduledPlacements = collectScheduledPlacements(graph);
for (auto [parentIndex, childIndex, transferCost] : edges) {
const auto& parent = scheduledPlacements.at(parentIndex);
const auto& child = scheduledPlacements.at(childIndex);
Time requiredStart = addOrMax(parent.task.aest, parent.task.weight);
if (parent.cpu != child.cpu)
requiredStart = addOrMax(requiredStart, static_cast<Weight>(transferCost));
if (child.task.aest < requiredStart) {
std::cerr << "Dependency violation for edge " << parentIndex << " -> " << childIndex << '\n';
printGraphSchedule(graph);
return false;
}
}
return true;
}
Time getMaxCompletion(GraphDCP& graph) {
Time maxCompletion = 0;
for (CPU cpu = 0; cpu < graph.cpuCount(); ++cpu)
for (const auto& task : graph.getScheduledTasks(cpu))
maxCompletion = std::max(maxCompletion, addOrMax(task.aest, task.weight));
return maxCompletion;
}
int testDCPGraphSingleNode() {
std::cout << "testDCPGraphSingleNode:" << std::endl;
configureDcpDotOutput();
const std::vector<Weight> nodeWeights = {15};
GraphDCP graph(nodeWeights, {});
graph.runDcp();
if (graph.cpuCount() != 1) {
std::cerr << "Expected exactly 1 CPU, got " << graph.cpuCount() << "\n";
dumpDcpFailureArtifacts();
return 1;
}
if (!checkScheduledTasks(graph,
0,
{
{0, 0, 0, 15},
})) {
dumpDcpFailureArtifacts();
return 1;
}
return 0;
}
int testDCPGraphLinearChain() {
std::cout << "testDCPGraphLinearChain:" << std::endl;
configureDcpDotOutput();
const std::vector<Weight> nodeWeights = {10, 20, 5};
const std::vector<IndexedEdge> edges = {
{0, 1, 7},
{1, 2, 9},
};
GraphDCP graph(nodeWeights, edges);
graph.runDcp();
if (graph.cpuCount() != 1) {
std::cerr << "Expected a linear chain to stay on one CPU, got " << graph.cpuCount() << "\n";
dumpDcpFailureArtifacts();
return 1;
}
if (!checkScheduledTasks(graph,
0,
{
{0, 0, 0, 10},
{1, 10, 10, 20},
{2, 30, 30, 5 },
})) {
dumpDcpFailureArtifacts();
return 1;
}
if (!checkCpuSchedulesDoNotOverlap(graph) || !checkDependencyConstraints(graph, edges)) {
dumpDcpFailureArtifacts();
return 1;
}
return 0;
}
int testDCPGraphFixture() {
std::cout << "testDCPGraphFixture:" << std::endl;
configureDcpDotOutput();
const std::vector<Weight> nodeWeights = {
80,
40,
40,
40,
40,
40,
60,
30,
30,
30,
30,
40,
20,
20,
20,
20,
10,
10,
};
const std::vector<IndexedEdge> edges = {
{0, 1, 3 },
{0, 1, 120},
{0, 2, 120},
{0, 3, 120},
{0, 4, 120},
{0, 5, 120},
{0, 6, 120},
{2, 6, 80 },
{2, 7, 80 },
{3, 8, 80 },
{4, 9, 80 },
{5, 10, 80 },
{6, 7, 120},
{6, 8, 120},
{6, 9, 120},
{6, 10, 120},
{6, 11, 120},
{8, 11, 80 },
{8, 12, 80 },
{9, 13, 80 },
{10, 14, 80 },
{11, 12, 120},
{11, 13, 120},
{11, 14, 120},
{11, 15, 120},
{13, 15, 80 },
{13, 16, 80 },
{14, 17, 80 },
{15, 16, 120},
{15, 17, 120},
};
GraphDCP graph(nodeWeights, {});
for (auto [parent, child, weight] : edges)
graph.makeEdge(parent, child, weight);
graph.runDcp();
if (graph.cpuCount() != 4) {
dumpDcpFailureArtifacts();
return 1;
}
if (!checkScheduledTasks(graph,
3,
{
{1, 200, 400, 40},
})) {
dumpDcpFailureArtifacts();
return 1;
}
if (!checkScheduledTasks(graph,
2,
{
{5, 200, 260, 40},
{10, 300, 300, 30},
})) {
dumpDcpFailureArtifacts();
return 1;
}
if (!checkScheduledTasks(graph,
1,
{
{4, 200, 210, 40},
{7, 300, 410, 30},
})) {
dumpDcpFailureArtifacts();
return 1;
}
if (!checkScheduledTasks(graph,
0,
{
{0, 0, 0, 80},
{2, 80, 80, 40},
{6, 120, 120, 60},
{3, 180, 200, 40},
{8, 220, 240, 30},
{11, 250, 270, 40},
{12, 290, 310, 20},
{9, 320, 330, 30},
{13, 350, 360, 20},
{15, 370, 380, 20},
{16, 390, 400, 10},
{14, 410, 410, 20},
{17, 430, 430, 10},
})) {
dumpDcpFailureArtifacts();
return 1;
}
if (!checkAllTasksScheduled(graph, nodeWeights.size()) || !checkCpuSchedulesDoNotOverlap(graph)
|| !checkDependencyConstraints(graph, edges)) {
dumpDcpFailureArtifacts();
return 1;
}
return 0;
}
int testDCPGraphMaxCPUs() {
std::cout << "testDCPGraphMaxCPUs:" << std::endl;
configureDcpDotOutput();
const std::vector<Weight> nodeWeights = {20, 10, 10, 10, 10, 10, 10};
const std::vector<IndexedEdge> edges = {
{0, 1, 0},
{0, 2, 0},
{0, 3, 0},
{0, 4, 0},
{0, 5, 0},
{0, 6, 0},
};
GraphDCP graph(nodeWeights, edges);
graph.setMaxCpuCount(2);
graph.runDcp();
if (graph.cpuCount() != 2) {
std::cerr << "Expected exactly 2 CPUs with maxCpuCount=2, got " << graph.cpuCount() << "\n";
dumpDcpFailureArtifacts();
return 1;
}
if (!checkAllTasksScheduled(graph, nodeWeights.size()) || !checkCpuSchedulesDoNotOverlap(graph)
|| !checkDependencyConstraints(graph, edges)) {
dumpDcpFailureArtifacts();
return 1;
}
if (getMaxCompletion(graph) > 50) {
std::cerr << "Expected makespan <= 50 under maxCpuCount=2, got " << getMaxCompletion(graph) << "\n";
dumpDcpFailureArtifacts();
return 1;
}
return 0;
}
int testDCPGraphSingleCpuCap() {
std::cout << "testDCPGraphSingleCpuCap:" << std::endl;
configureDcpDotOutput();
const std::vector<Weight> nodeWeights = {20, 10, 10, 10};
const std::vector<IndexedEdge> edges = {
{0, 1, 0},
{0, 2, 0},
{0, 3, 0},
};
GraphDCP graph(nodeWeights, edges);
graph.setMaxCpuCount(1);
graph.runDcp();
if (graph.cpuCount() != 1) {
std::cerr << "Expected exactly 1 CPU with maxCpuCount=1, got " << graph.cpuCount() << "\n";
dumpDcpFailureArtifacts();
return 1;
}
if (!checkAllTasksScheduled(graph, nodeWeights.size()) || !checkCpuSchedulesDoNotOverlap(graph)
|| !checkDependencyConstraints(graph, edges)) {
dumpDcpFailureArtifacts();
return 1;
}
if (getMaxCompletion(graph) != 50) {
std::cerr << "Expected makespan 50 under maxCpuCount=1, got " << getMaxCompletion(graph) << "\n";
dumpDcpFailureArtifacts();
return 1;
}
return 0;
}
int testDCPGraphDiamondDependencies() {
std::cout << "testDCPGraphDiamondDependencies:" << std::endl;
configureDcpDotOutput();
const std::vector<Weight> nodeWeights = {15, 10, 12, 20};
const std::vector<IndexedEdge> edges = {
{0, 1, 5},
{0, 2, 7},
{1, 3, 3},
{2, 3, 2},
};
GraphDCP graph(nodeWeights, edges);
graph.runDcp();
if (!checkAllTasksScheduled(graph, nodeWeights.size()) || !checkCpuSchedulesDoNotOverlap(graph)
|| !checkDependencyConstraints(graph, edges)) {
dumpDcpFailureArtifacts();
return 1;
}
auto scheduledPlacements = collectScheduledPlacements(graph);
const auto& sink = scheduledPlacements.at(3).task;
if (sink.aest < 27) {
std::cerr << "Expected sink node to start no earlier than the longest parent path, got " << sink.aest << "\n";
dumpDcpFailureArtifacts();
return 1;
}
return 0;
}
int testDCPGraphCrossbarExhaustion() {
std::cout << "testDCPGraphCrossbarExhaustion:" << std::endl;
configureDcpDotOutput();
const size_t savedCrossbarSize = onnx_mlir::crossbarSize.getValue();
const size_t savedCrossbarCount = onnx_mlir::crossbarCountInCore.getValue();
onnx_mlir::crossbarSize = 4;
onnx_mlir::crossbarCountInCore = 2;
auto restoreCrossbarOptions = [&]() {
onnx_mlir::crossbarSize = savedCrossbarSize;
onnx_mlir::crossbarCountInCore = savedCrossbarCount;
};
const std::vector<Weight> nodeWeights = {10, 10, 10};
const std::vector<CrossbarUsage> nodeCrossbarUsage = {1, 1, 1};
GraphDCP graph(nodeWeights, {}, nodeCrossbarUsage);
graph.setMaxCpuCount(1);
graph.runDcp();
if (graph.cpuCount() != 1) {
restoreCrossbarOptions();
std::cerr << "Expected exactly 1 CPU with maxCpuCount=1, got " << graph.cpuCount() << "\n";
dumpDcpFailureArtifacts();
return 1;
}
auto scheduledTasks = graph.getScheduledTasks(0);
if (scheduledTasks.size() != 3) {
restoreCrossbarOptions();
std::cerr << "Expected all three tasks to be scheduled on CPU 0\n";
printCpuSchedule(graph, 0);
dumpDcpFailureArtifacts();
return 1;
}
if (scheduledTasks[0].weight != 10 || scheduledTasks[1].weight != std::numeric_limits<Weight>::max()
|| scheduledTasks[2].weight != std::numeric_limits<Weight>::max()) {
restoreCrossbarOptions();
std::cerr << "Unexpected effective weights under crossbar exhaustion\n";
printCpuSchedule(graph, 0);
dumpDcpFailureArtifacts();
return 1;
}
restoreCrossbarOptions();
return 0;
}
} // namespace
int main(int argc, char* argv[]) {
(void) argc;
(void) argv;
int failures = 0;
failures += testDCPGraphSingleNode();
failures += testDCPGraphLinearChain();
failures += testDCPGraphFixture();
failures += testDCPGraphMaxCPUs();
failures += testDCPGraphSingleCpuCap();
failures += testDCPGraphDiamondDependencies();
failures += testDCPGraphCrossbarExhaustion();
if (failures != 0) {
std::cerr << failures << " test failures\n";
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}

View File

@@ -0,0 +1,162 @@
#include <cassert>
#include <cstdlib>
#include <initializer_list>
#include <iostream>
#include <vector>
#include "src/Accelerators/PIM/Common/LabeledList.hpp"
using onnx_mlir::LabeledList;
using onnx_mlir::LabeledListNode;
namespace {
struct TestNode : public LabeledListNode<TestNode> {
explicit TestNode(int id)
: id(id) {}
int id;
};
void assertOrder(LabeledList<TestNode>& list, std::initializer_list<int> expectedOrder) {
auto expectedIt = expectedOrder.begin();
for (auto& node : list) {
assert(expectedIt != expectedOrder.end());
assert(node.id == *expectedIt);
++expectedIt;
}
assert(expectedIt == expectedOrder.end());
}
void assertStrictlyIncreasingLabels(LabeledList<TestNode>& list) {
auto it = list.begin();
if (it == list.end())
return;
auto previousLabel = it->getOrderLabel();
++it;
for (; it != list.end(); ++it) {
assert(previousLabel < it->getOrderLabel());
previousLabel = it->getOrderLabel();
}
}
int testLabeledListBasicMutation() {
std::cout << "testLabeledListBasicMutation:" << std::endl;
LabeledList<TestNode> list;
TestNode n1(1);
TestNode n2(2);
TestNode n3(3);
TestNode n4(4);
TestNode n5(5);
assert(list.empty());
assert(list.front() == nullptr);
assert(list.back() == nullptr);
assert(!list.contains(&n1));
assert(LabeledList<TestNode>::previous(&n1) == nullptr);
assert(LabeledList<TestNode>::next(&n1) == nullptr);
list.pushBack(&n1);
list.pushBack(&n3);
list.insertAfter(&n1, &n2);
list.pushFront(&n4);
list.insertBefore(nullptr, &n5);
assert(list.size() == 5);
assert(list.front() == &n4);
assert(list.back() == &n5);
assert(list.contains(&n2));
assertOrder(list, {4, 1, 2, 3, 5});
assert(LabeledList<TestNode>::next(&n4) == &n1);
assert(LabeledList<TestNode>::previous(&n1) == &n4);
assert(LabeledList<TestNode>::next(&n5) == nullptr);
assert(list.comesBefore(&n1, &n3));
assert(list.getOrderLabel(&n1) < list.getOrderLabel(&n3));
list.moveBefore(&n5, &n2);
assertOrder(list, {4, 1, 5, 2, 3});
list.moveAfter(&n4, &n3);
assertOrder(list, {1, 5, 2, 3, 4});
list.remove(&n2);
assert(!n2.isLinked());
assert(!list.contains(&n2));
assertOrder(list, {1, 5, 3, 4});
list.clear();
assert(list.empty());
assert(list.size() == 0);
assert(list.front() == nullptr);
assert(list.back() == nullptr);
assert(!n1.isLinked());
assert(!n3.isLinked());
assert(!n4.isLinked());
assert(!n5.isLinked());
return 0;
}
int testLabeledListRelabelingAndNoopMoves() {
std::cout << "testLabeledListRelabelingAndNoopMoves:" << std::endl;
constexpr int insertedNodeCount = 80;
LabeledList<TestNode> list;
TestNode head(0);
TestNode tail(999);
std::vector<TestNode> insertedNodes;
insertedNodes.reserve(insertedNodeCount);
for (int i = 0; i < insertedNodeCount; ++i)
insertedNodes.emplace_back(i + 1);
list.pushBack(&head);
list.pushBack(&tail);
for (auto& node : insertedNodes)
list.insertAfter(&head, &node);
assert(list.size() == insertedNodeCount + 2);
assert(list.front() == &head);
assert(list.back() == &tail);
assert(LabeledList<TestNode>::previous(&head) == nullptr);
assert(LabeledList<TestNode>::next(&tail) == nullptr);
assertStrictlyIncreasingLabels(list);
auto* firstInserted = LabeledList<TestNode>::next(&head);
auto* secondInserted = LabeledList<TestNode>::next(firstInserted);
list.moveBefore(firstInserted, secondInserted);
list.moveAfter(&head, nullptr);
list.moveAfter(&tail, LabeledList<TestNode>::previous(&tail));
assert(list.front() == &head);
assert(list.back() == &tail);
assert(firstInserted == &insertedNodes.back());
assert(secondInserted == &insertedNodes[insertedNodeCount - 2]);
assertStrictlyIncreasingLabels(list);
int expectedId = insertedNodeCount;
auto it = std::next(list.begin());
for (; it != list.end() && &*it != &tail; ++it, --expectedId)
assert(it->id == expectedId);
assert(expectedId == 0);
list.clear();
return 0;
}
} // namespace
int main(int argc, char* argv[]) {
(void) argc;
(void) argv;
int failures = 0;
failures += testLabeledListBasicMutation();
failures += testLabeledListRelabelingAndNoopMoves();
if (failures != 0) {
std::cerr << failures << " test failures\n";
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}

View File

@@ -1,202 +0,0 @@
/*
* SPDX-License-Identifier: Apache-2.0
*/
#include "src/Accelerators/PIM/Common/LabeledList.hpp"
#include "src/Accelerators/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/DCPGraph/Graph.hpp"
#include <cassert>
#include <cstdlib>
#include <initializer_list>
#include <iostream>
#include <vector>
using onnx_mlir::LabeledList;
using onnx_mlir::LabeledListNode;
namespace {
struct TestNode : public LabeledListNode<TestNode> {
explicit TestNode(int id)
: id(id) {}
int id;
};
void assertOrder(LabeledList<TestNode>& list, std::initializer_list<int> expectedOrder) {
auto expectedIt = expectedOrder.begin();
for (auto& node : list) {
assert(expectedIt != expectedOrder.end());
assert(node.id == *expectedIt);
++expectedIt;
}
assert(expectedIt == expectedOrder.end());
}
int testLabeledList() {
std::cout << "testLabeledList:" << std::endl;
LabeledList<TestNode> list;
TestNode n1(1);
TestNode n2(2);
TestNode n3(3);
TestNode n4(4);
TestNode n5(5);
list.pushBack(&n1);
list.pushBack(&n3);
list.insertAfter(&n1, &n2);
list.pushFront(&n4);
list.insertBefore(nullptr, &n5);
assertOrder(list, {4, 1, 2, 3, 5});
assert(LabeledList<TestNode>::next(&n4) == &n1);
assert(LabeledList<TestNode>::previous(&n1) == &n4);
assert(LabeledList<TestNode>::next(&n5) == nullptr);
assert(list.comesBefore(&n1, &n3));
assert(list.getOrderLabel(&n1) < list.getOrderLabel(&n3));
list.moveBefore(&n5, &n2);
assertOrder(list, {4, 1, 5, 2, 3});
list.moveAfter(&n4, &n3);
assertOrder(list, {1, 5, 2, 3, 4});
list.remove(&n2);
assert(!n2.isLinked());
assertOrder(list, {1, 5, 3, 4});
list.clear();
assert(list.empty());
assert(!n1.isLinked());
assert(!n3.isLinked());
assert(!n4.isLinked());
assert(!n5.isLinked());
return 0;
}
struct ExpectedScheduledTask {
size_t nodeIndex;
int aest;
int alst;
int weight;
};
void assertScheduledTasks(GraphDCP& graph, CPU cpu, std::initializer_list<ExpectedScheduledTask> expectedTasks) {
auto actualTasks = graph.getScheduledTasks(cpu);
assert(actualTasks.size() == expectedTasks.size());
auto expectedIt = expectedTasks.begin();
for (const auto& actualTask : actualTasks) {
assert(expectedIt != expectedTasks.end());
if (actualTask.nodeIndex != expectedIt->nodeIndex || actualTask.aest != expectedIt->aest
|| actualTask.alst != expectedIt->alst || actualTask.weight != expectedIt->weight) {
std::cerr << "CPU " << cpu << " actual schedule:\n";
for (const auto& task : actualTasks) {
std::cerr << " " << task.nodeIndex << ") aest: " << task.aest << " alst: " << task.alst
<< " weight: " << task.weight << '\n';
}
}
assert(actualTask.nodeIndex == expectedIt->nodeIndex);
assert(actualTask.aest == expectedIt->aest);
assert(actualTask.alst == expectedIt->alst);
assert(actualTask.weight == expectedIt->weight);
++expectedIt;
}
}
int testDCPGraphFixture() {
std::cout << "testDCPGraphFixture:" << std::endl;
const std::vector<Weight_t> nodeWeights = {
80, 40, 40, 40, 40, 40, 60, 30, 30, 30,
30, 40, 20, 20, 20, 20, 10, 10,
};
GraphDCP graph(nodeWeights, {});
graph.makeEdge(0, 1, 3);
graph.makeEdge(0, 1, 120);
graph.makeEdge(0, 2, 120);
graph.makeEdge(0, 3, 120);
graph.makeEdge(0, 4, 120);
graph.makeEdge(0, 5, 120);
graph.makeEdge(0, 6, 120);
graph.makeEdge(2, 6, 80);
graph.makeEdge(2, 7, 80);
graph.makeEdge(3, 8, 80);
graph.makeEdge(4, 9, 80);
graph.makeEdge(5, 10, 80);
graph.makeEdge(6, 7, 120);
graph.makeEdge(6, 8, 120);
graph.makeEdge(6, 9, 120);
graph.makeEdge(6, 10, 120);
graph.makeEdge(6, 11, 120);
graph.makeEdge(8, 11, 80);
graph.makeEdge(8, 12, 80);
graph.makeEdge(9, 13, 80);
graph.makeEdge(10, 14, 80);
graph.makeEdge(11, 12, 120);
graph.makeEdge(11, 13, 120);
graph.makeEdge(11, 14, 120);
graph.makeEdge(11, 15, 120);
graph.makeEdge(13, 15, 80);
graph.makeEdge(13, 16, 80);
graph.makeEdge(14, 17, 80);
graph.makeEdge(15, 16, 120);
graph.makeEdge(15, 17, 120);
graph.DCP();
for (CPU cpu = 0; cpu < graph.cpuCount(); ++cpu) {
auto scheduledTasks = graph.getScheduledTasks(cpu);
std::cerr << "CPU " << cpu << " computed schedule:\n";
for (const auto& task : scheduledTasks) {
std::cerr << " " << task.nodeIndex << ") aest: " << task.aest << " alst: " << task.alst
<< " weight: " << task.weight << '\n';
}
}
assert(graph.cpuCount() == 4);
assertScheduledTasks(graph, 3, {
{1, 200, 370, 40},
});
assertScheduledTasks(graph, 2, {
{5, 200, 260, 40},
{10, 300, 300, 30},
});
assertScheduledTasks(graph, 1, {
{4, 200, 210, 40},
{7, 300, 380, 30},
});
assertScheduledTasks(graph, 0, {
{0, 0, 0, 80},
{2, 80, 80, 40},
{6, 120, 120, 60},
{3, 180, 200, 40},
{8, 220, 240, 30},
{11, 250, 270, 40},
{12, 290, 310, 20},
{9, 320, 330, 30},
{13, 350, 360, 20},
{15, 370, 380, 20},
{16, 390, 400, 10},
{14, 410, 410, 20},
{17, 430, 430, 10},
});
return 0;
}
} // namespace
int main(int argc, char* argv[]) {
(void) argc;
(void) argv;
int failures = 0;
failures += testLabeledList();
failures += testDCPGraphFixture();
if (failures != 0) {
std::cerr << failures << " test failures\n";
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}