faster (and refactored) DCP analysis
All checks were successful
Validate Operations / validate-operations (push) Successful in 2h16m17s

This commit is contained in:
NiccoloN
2026-04-21 12:33:44 +02:00
parent f4c6da8f10
commit 85e2750d6c
20 changed files with 2525 additions and 858 deletions

528
test/PIM/DCPTest.cpp Normal file
View File

@@ -0,0 +1,528 @@
#include <algorithm>
#include <cassert>
#include <cstdlib>
#include <filesystem>
#include <fstream>
#include <initializer_list>
#include <iostream>
#include <limits>
#include <optional>
#include <unordered_map>
#include <vector>
#include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp"
#include "src/Accelerators/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/DCPGraph/Graph.hpp"
#include "src/Compiler/CompilerOptions.hpp"
namespace {
struct ExpectedScheduledTask {
size_t nodeIndex;
Time aest;
Time alst;
Weight weight;
};
struct ScheduledPlacement {
CPU cpu;
GraphDCP::ScheduledTaskInfo task;
};
std::filesystem::path getDcpTestOutputDir() { return std::filesystem::temp_directory_path() / "raptor-test-pim"; }
void configureDcpDotOutput() {
auto outputDir = getDcpTestOutputDir();
std::error_code errorCode;
std::filesystem::remove_all(outputDir, errorCode);
std::filesystem::create_directories(outputDir, errorCode);
assert(!errorCode);
onnx_mlir::outputBaseName = (outputDir / "DCPTest.mlir").string();
}
std::optional<std::filesystem::path> getLatestDcpDotFile() {
auto graphDir = getDcpTestOutputDir() / "dcp_graph";
if (!std::filesystem::exists(graphDir))
return std::nullopt;
std::optional<std::filesystem::path> latestDot;
for (const auto& entry : std::filesystem::directory_iterator(graphDir)) {
if (!entry.is_regular_file() || entry.path().extension() != ".dot")
continue;
if (!latestDot || entry.path().filename() > latestDot->filename())
latestDot = entry.path();
}
return latestDot;
}
void dumpDcpFailureArtifacts() {
auto latestDot = getLatestDcpDotFile();
if (!latestDot) {
std::cerr << "No DCP dot file was produced.\n";
return;
}
std::cerr << "DCP dot file: " << latestDot->string() << '\n';
std::ifstream dotFile(*latestDot);
if (!dotFile.is_open()) {
std::cerr << "Failed to open DCP dot file.\n";
return;
}
std::cerr << dotFile.rdbuf();
}
void printCpuSchedule(GraphDCP& graph, CPU cpu) {
auto actualTasks = graph.getScheduledTasks(cpu);
std::cerr << "CPU " << cpu << " actual schedule:\n";
for (const auto& task : actualTasks) {
std::cerr << " " << task.nodeIndex << ") aest: " << task.aest << " alst: " << task.alst
<< " weight: " << task.weight << '\n';
}
}
void printGraphSchedule(GraphDCP& graph) {
for (CPU cpu = 0; cpu < graph.cpuCount(); ++cpu)
printCpuSchedule(graph, cpu);
}
bool checkScheduledTasks(GraphDCP& graph, CPU cpu, std::initializer_list<ExpectedScheduledTask> expectedTasks) {
auto actualTasks = graph.getScheduledTasks(cpu);
if (actualTasks.size() != expectedTasks.size()) {
printCpuSchedule(graph, cpu);
return false;
}
auto expectedIt = expectedTasks.begin();
for (const auto& actualTask : actualTasks) {
if (actualTask.nodeIndex != expectedIt->nodeIndex || actualTask.aest != expectedIt->aest
|| actualTask.alst != expectedIt->alst || actualTask.weight != expectedIt->weight) {
printCpuSchedule(graph, cpu);
return false;
}
++expectedIt;
}
return true;
}
std::unordered_map<size_t, ScheduledPlacement> collectScheduledPlacements(GraphDCP& graph) {
std::unordered_map<size_t, ScheduledPlacement> scheduledPlacements;
for (CPU cpu = 0; cpu < graph.cpuCount(); ++cpu) {
for (const auto& task : graph.getScheduledTasks(cpu)) {
auto [it, inserted] = scheduledPlacements.emplace(task.nodeIndex, ScheduledPlacement {cpu, task});
assert(inserted && "task scheduled multiple times");
(void) it;
}
}
return scheduledPlacements;
}
bool checkAllTasksScheduled(GraphDCP& graph, size_t expectedTaskCount) {
auto scheduledPlacements = collectScheduledPlacements(graph);
if (scheduledPlacements.size() != expectedTaskCount) {
std::cerr << "Expected " << expectedTaskCount << " scheduled tasks, got " << scheduledPlacements.size() << "\n";
printGraphSchedule(graph);
return false;
}
return true;
}
bool checkCpuSchedulesDoNotOverlap(GraphDCP& graph) {
for (CPU cpu = 0; cpu < graph.cpuCount(); ++cpu) {
auto scheduledTasks = graph.getScheduledTasks(cpu);
Time previousCompletion = 0;
bool firstTask = true;
for (const auto& task : scheduledTasks) {
Time completion = addOrMax(task.aest, task.weight);
if (task.aest > task.alst) {
std::cerr << "Task " << task.nodeIndex << " on CPU " << cpu << " has aest > alst\n";
printCpuSchedule(graph, cpu);
return false;
}
if (!firstTask && task.aest < previousCompletion) {
std::cerr << "CPU " << cpu << " has overlapping tasks\n";
printCpuSchedule(graph, cpu);
return false;
}
previousCompletion = completion;
firstTask = false;
}
}
return true;
}
bool checkDependencyConstraints(GraphDCP& graph, llvm::ArrayRef<IndexedEdge> edges) {
auto scheduledPlacements = collectScheduledPlacements(graph);
for (auto [parentIndex, childIndex, transferCost] : edges) {
const auto& parent = scheduledPlacements.at(parentIndex);
const auto& child = scheduledPlacements.at(childIndex);
Time requiredStart = addOrMax(parent.task.aest, parent.task.weight);
if (parent.cpu != child.cpu)
requiredStart = addOrMax(requiredStart, static_cast<Weight>(transferCost));
if (child.task.aest < requiredStart) {
std::cerr << "Dependency violation for edge " << parentIndex << " -> " << childIndex << '\n';
printGraphSchedule(graph);
return false;
}
}
return true;
}
Time getMaxCompletion(GraphDCP& graph) {
Time maxCompletion = 0;
for (CPU cpu = 0; cpu < graph.cpuCount(); ++cpu)
for (const auto& task : graph.getScheduledTasks(cpu))
maxCompletion = std::max(maxCompletion, addOrMax(task.aest, task.weight));
return maxCompletion;
}
int testDCPGraphSingleNode() {
std::cout << "testDCPGraphSingleNode:" << std::endl;
configureDcpDotOutput();
const std::vector<Weight> nodeWeights = {15};
GraphDCP graph(nodeWeights, {});
graph.runDcp();
if (graph.cpuCount() != 1) {
std::cerr << "Expected exactly 1 CPU, got " << graph.cpuCount() << "\n";
dumpDcpFailureArtifacts();
return 1;
}
if (!checkScheduledTasks(graph,
0,
{
{0, 0, 0, 15},
})) {
dumpDcpFailureArtifacts();
return 1;
}
return 0;
}
int testDCPGraphLinearChain() {
std::cout << "testDCPGraphLinearChain:" << std::endl;
configureDcpDotOutput();
const std::vector<Weight> nodeWeights = {10, 20, 5};
const std::vector<IndexedEdge> edges = {
{0, 1, 7},
{1, 2, 9},
};
GraphDCP graph(nodeWeights, edges);
graph.runDcp();
if (graph.cpuCount() != 1) {
std::cerr << "Expected a linear chain to stay on one CPU, got " << graph.cpuCount() << "\n";
dumpDcpFailureArtifacts();
return 1;
}
if (!checkScheduledTasks(graph,
0,
{
{0, 0, 0, 10},
{1, 10, 10, 20},
{2, 30, 30, 5 },
})) {
dumpDcpFailureArtifacts();
return 1;
}
if (!checkCpuSchedulesDoNotOverlap(graph) || !checkDependencyConstraints(graph, edges)) {
dumpDcpFailureArtifacts();
return 1;
}
return 0;
}
int testDCPGraphFixture() {
std::cout << "testDCPGraphFixture:" << std::endl;
configureDcpDotOutput();
const std::vector<Weight> nodeWeights = {
80,
40,
40,
40,
40,
40,
60,
30,
30,
30,
30,
40,
20,
20,
20,
20,
10,
10,
};
const std::vector<IndexedEdge> edges = {
{0, 1, 3 },
{0, 1, 120},
{0, 2, 120},
{0, 3, 120},
{0, 4, 120},
{0, 5, 120},
{0, 6, 120},
{2, 6, 80 },
{2, 7, 80 },
{3, 8, 80 },
{4, 9, 80 },
{5, 10, 80 },
{6, 7, 120},
{6, 8, 120},
{6, 9, 120},
{6, 10, 120},
{6, 11, 120},
{8, 11, 80 },
{8, 12, 80 },
{9, 13, 80 },
{10, 14, 80 },
{11, 12, 120},
{11, 13, 120},
{11, 14, 120},
{11, 15, 120},
{13, 15, 80 },
{13, 16, 80 },
{14, 17, 80 },
{15, 16, 120},
{15, 17, 120},
};
GraphDCP graph(nodeWeights, {});
for (auto [parent, child, weight] : edges)
graph.makeEdge(parent, child, weight);
graph.runDcp();
if (graph.cpuCount() != 4) {
dumpDcpFailureArtifacts();
return 1;
}
if (!checkScheduledTasks(graph,
3,
{
{1, 200, 400, 40},
})) {
dumpDcpFailureArtifacts();
return 1;
}
if (!checkScheduledTasks(graph,
2,
{
{5, 200, 260, 40},
{10, 300, 300, 30},
})) {
dumpDcpFailureArtifacts();
return 1;
}
if (!checkScheduledTasks(graph,
1,
{
{4, 200, 210, 40},
{7, 300, 410, 30},
})) {
dumpDcpFailureArtifacts();
return 1;
}
if (!checkScheduledTasks(graph,
0,
{
{0, 0, 0, 80},
{2, 80, 80, 40},
{6, 120, 120, 60},
{3, 180, 200, 40},
{8, 220, 240, 30},
{11, 250, 270, 40},
{12, 290, 310, 20},
{9, 320, 330, 30},
{13, 350, 360, 20},
{15, 370, 380, 20},
{16, 390, 400, 10},
{14, 410, 410, 20},
{17, 430, 430, 10},
})) {
dumpDcpFailureArtifacts();
return 1;
}
if (!checkAllTasksScheduled(graph, nodeWeights.size()) || !checkCpuSchedulesDoNotOverlap(graph)
|| !checkDependencyConstraints(graph, edges)) {
dumpDcpFailureArtifacts();
return 1;
}
return 0;
}
int testDCPGraphMaxCPUs() {
std::cout << "testDCPGraphMaxCPUs:" << std::endl;
configureDcpDotOutput();
const std::vector<Weight> nodeWeights = {20, 10, 10, 10, 10, 10, 10};
const std::vector<IndexedEdge> edges = {
{0, 1, 0},
{0, 2, 0},
{0, 3, 0},
{0, 4, 0},
{0, 5, 0},
{0, 6, 0},
};
GraphDCP graph(nodeWeights, edges);
graph.setMaxCpuCount(2);
graph.runDcp();
if (graph.cpuCount() != 2) {
std::cerr << "Expected exactly 2 CPUs with maxCpuCount=2, got " << graph.cpuCount() << "\n";
dumpDcpFailureArtifacts();
return 1;
}
if (!checkAllTasksScheduled(graph, nodeWeights.size()) || !checkCpuSchedulesDoNotOverlap(graph)
|| !checkDependencyConstraints(graph, edges)) {
dumpDcpFailureArtifacts();
return 1;
}
if (getMaxCompletion(graph) > 50) {
std::cerr << "Expected makespan <= 50 under maxCpuCount=2, got " << getMaxCompletion(graph) << "\n";
dumpDcpFailureArtifacts();
return 1;
}
return 0;
}
int testDCPGraphSingleCpuCap() {
std::cout << "testDCPGraphSingleCpuCap:" << std::endl;
configureDcpDotOutput();
const std::vector<Weight> nodeWeights = {20, 10, 10, 10};
const std::vector<IndexedEdge> edges = {
{0, 1, 0},
{0, 2, 0},
{0, 3, 0},
};
GraphDCP graph(nodeWeights, edges);
graph.setMaxCpuCount(1);
graph.runDcp();
if (graph.cpuCount() != 1) {
std::cerr << "Expected exactly 1 CPU with maxCpuCount=1, got " << graph.cpuCount() << "\n";
dumpDcpFailureArtifacts();
return 1;
}
if (!checkAllTasksScheduled(graph, nodeWeights.size()) || !checkCpuSchedulesDoNotOverlap(graph)
|| !checkDependencyConstraints(graph, edges)) {
dumpDcpFailureArtifacts();
return 1;
}
if (getMaxCompletion(graph) != 50) {
std::cerr << "Expected makespan 50 under maxCpuCount=1, got " << getMaxCompletion(graph) << "\n";
dumpDcpFailureArtifacts();
return 1;
}
return 0;
}
int testDCPGraphDiamondDependencies() {
std::cout << "testDCPGraphDiamondDependencies:" << std::endl;
configureDcpDotOutput();
const std::vector<Weight> nodeWeights = {15, 10, 12, 20};
const std::vector<IndexedEdge> edges = {
{0, 1, 5},
{0, 2, 7},
{1, 3, 3},
{2, 3, 2},
};
GraphDCP graph(nodeWeights, edges);
graph.runDcp();
if (!checkAllTasksScheduled(graph, nodeWeights.size()) || !checkCpuSchedulesDoNotOverlap(graph)
|| !checkDependencyConstraints(graph, edges)) {
dumpDcpFailureArtifacts();
return 1;
}
auto scheduledPlacements = collectScheduledPlacements(graph);
const auto& sink = scheduledPlacements.at(3).task;
if (sink.aest < 27) {
std::cerr << "Expected sink node to start no earlier than the longest parent path, got " << sink.aest << "\n";
dumpDcpFailureArtifacts();
return 1;
}
return 0;
}
int testDCPGraphCrossbarExhaustion() {
std::cout << "testDCPGraphCrossbarExhaustion:" << std::endl;
configureDcpDotOutput();
const size_t savedCrossbarSize = onnx_mlir::crossbarSize.getValue();
const size_t savedCrossbarCount = onnx_mlir::crossbarCountInCore.getValue();
onnx_mlir::crossbarSize = 4;
onnx_mlir::crossbarCountInCore = 2;
auto restoreCrossbarOptions = [&]() {
onnx_mlir::crossbarSize = savedCrossbarSize;
onnx_mlir::crossbarCountInCore = savedCrossbarCount;
};
const std::vector<Weight> nodeWeights = {10, 10, 10};
const std::vector<CrossbarUsage> nodeCrossbarUsage = {1, 1, 1};
GraphDCP graph(nodeWeights, {}, nodeCrossbarUsage);
graph.setMaxCpuCount(1);
graph.runDcp();
if (graph.cpuCount() != 1) {
restoreCrossbarOptions();
std::cerr << "Expected exactly 1 CPU with maxCpuCount=1, got " << graph.cpuCount() << "\n";
dumpDcpFailureArtifacts();
return 1;
}
auto scheduledTasks = graph.getScheduledTasks(0);
if (scheduledTasks.size() != 3) {
restoreCrossbarOptions();
std::cerr << "Expected all three tasks to be scheduled on CPU 0\n";
printCpuSchedule(graph, 0);
dumpDcpFailureArtifacts();
return 1;
}
if (scheduledTasks[0].weight != 10 || scheduledTasks[1].weight != std::numeric_limits<Weight>::max()
|| scheduledTasks[2].weight != std::numeric_limits<Weight>::max()) {
restoreCrossbarOptions();
std::cerr << "Unexpected effective weights under crossbar exhaustion\n";
printCpuSchedule(graph, 0);
dumpDcpFailureArtifacts();
return 1;
}
restoreCrossbarOptions();
return 0;
}
} // namespace
int main(int argc, char* argv[]) {
(void) argc;
(void) argv;
int failures = 0;
failures += testDCPGraphSingleNode();
failures += testDCPGraphLinearChain();
failures += testDCPGraphFixture();
failures += testDCPGraphMaxCPUs();
failures += testDCPGraphSingleCpuCap();
failures += testDCPGraphDiamondDependencies();
failures += testDCPGraphCrossbarExhaustion();
if (failures != 0) {
std::cerr << failures << " test failures\n";
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}