faster (and refactored) DCP analysis
All checks were successful
Validate Operations / validate-operations (push) Successful in 2h16m17s
All checks were successful
Validate Operations / validate-operations (push) Successful in 2h16m17s
This commit is contained in:
528
test/PIM/DCPTest.cpp
Normal file
528
test/PIM/DCPTest.cpp
Normal file
@@ -0,0 +1,528 @@
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <initializer_list>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <optional>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp"
|
||||
#include "src/Accelerators/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/DCPGraph/Graph.hpp"
|
||||
#include "src/Compiler/CompilerOptions.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
struct ExpectedScheduledTask {
|
||||
size_t nodeIndex;
|
||||
Time aest;
|
||||
Time alst;
|
||||
Weight weight;
|
||||
};
|
||||
|
||||
struct ScheduledPlacement {
|
||||
CPU cpu;
|
||||
GraphDCP::ScheduledTaskInfo task;
|
||||
};
|
||||
|
||||
std::filesystem::path getDcpTestOutputDir() { return std::filesystem::temp_directory_path() / "raptor-test-pim"; }
|
||||
|
||||
void configureDcpDotOutput() {
|
||||
auto outputDir = getDcpTestOutputDir();
|
||||
std::error_code errorCode;
|
||||
std::filesystem::remove_all(outputDir, errorCode);
|
||||
std::filesystem::create_directories(outputDir, errorCode);
|
||||
assert(!errorCode);
|
||||
onnx_mlir::outputBaseName = (outputDir / "DCPTest.mlir").string();
|
||||
}
|
||||
|
||||
std::optional<std::filesystem::path> getLatestDcpDotFile() {
|
||||
auto graphDir = getDcpTestOutputDir() / "dcp_graph";
|
||||
if (!std::filesystem::exists(graphDir))
|
||||
return std::nullopt;
|
||||
|
||||
std::optional<std::filesystem::path> latestDot;
|
||||
for (const auto& entry : std::filesystem::directory_iterator(graphDir)) {
|
||||
if (!entry.is_regular_file() || entry.path().extension() != ".dot")
|
||||
continue;
|
||||
if (!latestDot || entry.path().filename() > latestDot->filename())
|
||||
latestDot = entry.path();
|
||||
}
|
||||
return latestDot;
|
||||
}
|
||||
|
||||
void dumpDcpFailureArtifacts() {
|
||||
auto latestDot = getLatestDcpDotFile();
|
||||
if (!latestDot) {
|
||||
std::cerr << "No DCP dot file was produced.\n";
|
||||
return;
|
||||
}
|
||||
|
||||
std::cerr << "DCP dot file: " << latestDot->string() << '\n';
|
||||
std::ifstream dotFile(*latestDot);
|
||||
if (!dotFile.is_open()) {
|
||||
std::cerr << "Failed to open DCP dot file.\n";
|
||||
return;
|
||||
}
|
||||
|
||||
std::cerr << dotFile.rdbuf();
|
||||
}
|
||||
|
||||
void printCpuSchedule(GraphDCP& graph, CPU cpu) {
|
||||
auto actualTasks = graph.getScheduledTasks(cpu);
|
||||
std::cerr << "CPU " << cpu << " actual schedule:\n";
|
||||
for (const auto& task : actualTasks) {
|
||||
std::cerr << " " << task.nodeIndex << ") aest: " << task.aest << " alst: " << task.alst
|
||||
<< " weight: " << task.weight << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
void printGraphSchedule(GraphDCP& graph) {
|
||||
for (CPU cpu = 0; cpu < graph.cpuCount(); ++cpu)
|
||||
printCpuSchedule(graph, cpu);
|
||||
}
|
||||
|
||||
bool checkScheduledTasks(GraphDCP& graph, CPU cpu, std::initializer_list<ExpectedScheduledTask> expectedTasks) {
|
||||
auto actualTasks = graph.getScheduledTasks(cpu);
|
||||
if (actualTasks.size() != expectedTasks.size()) {
|
||||
printCpuSchedule(graph, cpu);
|
||||
return false;
|
||||
}
|
||||
|
||||
auto expectedIt = expectedTasks.begin();
|
||||
for (const auto& actualTask : actualTasks) {
|
||||
if (actualTask.nodeIndex != expectedIt->nodeIndex || actualTask.aest != expectedIt->aest
|
||||
|| actualTask.alst != expectedIt->alst || actualTask.weight != expectedIt->weight) {
|
||||
printCpuSchedule(graph, cpu);
|
||||
return false;
|
||||
}
|
||||
++expectedIt;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::unordered_map<size_t, ScheduledPlacement> collectScheduledPlacements(GraphDCP& graph) {
|
||||
std::unordered_map<size_t, ScheduledPlacement> scheduledPlacements;
|
||||
for (CPU cpu = 0; cpu < graph.cpuCount(); ++cpu) {
|
||||
for (const auto& task : graph.getScheduledTasks(cpu)) {
|
||||
auto [it, inserted] = scheduledPlacements.emplace(task.nodeIndex, ScheduledPlacement {cpu, task});
|
||||
assert(inserted && "task scheduled multiple times");
|
||||
(void) it;
|
||||
}
|
||||
}
|
||||
return scheduledPlacements;
|
||||
}
|
||||
|
||||
bool checkAllTasksScheduled(GraphDCP& graph, size_t expectedTaskCount) {
|
||||
auto scheduledPlacements = collectScheduledPlacements(graph);
|
||||
if (scheduledPlacements.size() != expectedTaskCount) {
|
||||
std::cerr << "Expected " << expectedTaskCount << " scheduled tasks, got " << scheduledPlacements.size() << "\n";
|
||||
printGraphSchedule(graph);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool checkCpuSchedulesDoNotOverlap(GraphDCP& graph) {
|
||||
for (CPU cpu = 0; cpu < graph.cpuCount(); ++cpu) {
|
||||
auto scheduledTasks = graph.getScheduledTasks(cpu);
|
||||
Time previousCompletion = 0;
|
||||
bool firstTask = true;
|
||||
for (const auto& task : scheduledTasks) {
|
||||
Time completion = addOrMax(task.aest, task.weight);
|
||||
if (task.aest > task.alst) {
|
||||
std::cerr << "Task " << task.nodeIndex << " on CPU " << cpu << " has aest > alst\n";
|
||||
printCpuSchedule(graph, cpu);
|
||||
return false;
|
||||
}
|
||||
if (!firstTask && task.aest < previousCompletion) {
|
||||
std::cerr << "CPU " << cpu << " has overlapping tasks\n";
|
||||
printCpuSchedule(graph, cpu);
|
||||
return false;
|
||||
}
|
||||
previousCompletion = completion;
|
||||
firstTask = false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool checkDependencyConstraints(GraphDCP& graph, llvm::ArrayRef<IndexedEdge> edges) {
|
||||
auto scheduledPlacements = collectScheduledPlacements(graph);
|
||||
for (auto [parentIndex, childIndex, transferCost] : edges) {
|
||||
const auto& parent = scheduledPlacements.at(parentIndex);
|
||||
const auto& child = scheduledPlacements.at(childIndex);
|
||||
Time requiredStart = addOrMax(parent.task.aest, parent.task.weight);
|
||||
if (parent.cpu != child.cpu)
|
||||
requiredStart = addOrMax(requiredStart, static_cast<Weight>(transferCost));
|
||||
if (child.task.aest < requiredStart) {
|
||||
std::cerr << "Dependency violation for edge " << parentIndex << " -> " << childIndex << '\n';
|
||||
printGraphSchedule(graph);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
Time getMaxCompletion(GraphDCP& graph) {
|
||||
Time maxCompletion = 0;
|
||||
for (CPU cpu = 0; cpu < graph.cpuCount(); ++cpu)
|
||||
for (const auto& task : graph.getScheduledTasks(cpu))
|
||||
maxCompletion = std::max(maxCompletion, addOrMax(task.aest, task.weight));
|
||||
return maxCompletion;
|
||||
}
|
||||
|
||||
int testDCPGraphSingleNode() {
|
||||
std::cout << "testDCPGraphSingleNode:" << std::endl;
|
||||
configureDcpDotOutput();
|
||||
|
||||
const std::vector<Weight> nodeWeights = {15};
|
||||
GraphDCP graph(nodeWeights, {});
|
||||
graph.runDcp();
|
||||
|
||||
if (graph.cpuCount() != 1) {
|
||||
std::cerr << "Expected exactly 1 CPU, got " << graph.cpuCount() << "\n";
|
||||
dumpDcpFailureArtifacts();
|
||||
return 1;
|
||||
}
|
||||
if (!checkScheduledTasks(graph,
|
||||
0,
|
||||
{
|
||||
{0, 0, 0, 15},
|
||||
})) {
|
||||
dumpDcpFailureArtifacts();
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int testDCPGraphLinearChain() {
|
||||
std::cout << "testDCPGraphLinearChain:" << std::endl;
|
||||
configureDcpDotOutput();
|
||||
|
||||
const std::vector<Weight> nodeWeights = {10, 20, 5};
|
||||
const std::vector<IndexedEdge> edges = {
|
||||
{0, 1, 7},
|
||||
{1, 2, 9},
|
||||
};
|
||||
|
||||
GraphDCP graph(nodeWeights, edges);
|
||||
graph.runDcp();
|
||||
|
||||
if (graph.cpuCount() != 1) {
|
||||
std::cerr << "Expected a linear chain to stay on one CPU, got " << graph.cpuCount() << "\n";
|
||||
dumpDcpFailureArtifacts();
|
||||
return 1;
|
||||
}
|
||||
if (!checkScheduledTasks(graph,
|
||||
0,
|
||||
{
|
||||
{0, 0, 0, 10},
|
||||
{1, 10, 10, 20},
|
||||
{2, 30, 30, 5 },
|
||||
})) {
|
||||
dumpDcpFailureArtifacts();
|
||||
return 1;
|
||||
}
|
||||
if (!checkCpuSchedulesDoNotOverlap(graph) || !checkDependencyConstraints(graph, edges)) {
|
||||
dumpDcpFailureArtifacts();
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int testDCPGraphFixture() {
|
||||
std::cout << "testDCPGraphFixture:" << std::endl;
|
||||
configureDcpDotOutput();
|
||||
|
||||
const std::vector<Weight> nodeWeights = {
|
||||
80,
|
||||
40,
|
||||
40,
|
||||
40,
|
||||
40,
|
||||
40,
|
||||
60,
|
||||
30,
|
||||
30,
|
||||
30,
|
||||
30,
|
||||
40,
|
||||
20,
|
||||
20,
|
||||
20,
|
||||
20,
|
||||
10,
|
||||
10,
|
||||
};
|
||||
const std::vector<IndexedEdge> edges = {
|
||||
{0, 1, 3 },
|
||||
{0, 1, 120},
|
||||
{0, 2, 120},
|
||||
{0, 3, 120},
|
||||
{0, 4, 120},
|
||||
{0, 5, 120},
|
||||
{0, 6, 120},
|
||||
{2, 6, 80 },
|
||||
{2, 7, 80 },
|
||||
{3, 8, 80 },
|
||||
{4, 9, 80 },
|
||||
{5, 10, 80 },
|
||||
{6, 7, 120},
|
||||
{6, 8, 120},
|
||||
{6, 9, 120},
|
||||
{6, 10, 120},
|
||||
{6, 11, 120},
|
||||
{8, 11, 80 },
|
||||
{8, 12, 80 },
|
||||
{9, 13, 80 },
|
||||
{10, 14, 80 },
|
||||
{11, 12, 120},
|
||||
{11, 13, 120},
|
||||
{11, 14, 120},
|
||||
{11, 15, 120},
|
||||
{13, 15, 80 },
|
||||
{13, 16, 80 },
|
||||
{14, 17, 80 },
|
||||
{15, 16, 120},
|
||||
{15, 17, 120},
|
||||
};
|
||||
|
||||
GraphDCP graph(nodeWeights, {});
|
||||
for (auto [parent, child, weight] : edges)
|
||||
graph.makeEdge(parent, child, weight);
|
||||
|
||||
graph.runDcp();
|
||||
if (graph.cpuCount() != 4) {
|
||||
dumpDcpFailureArtifacts();
|
||||
return 1;
|
||||
}
|
||||
if (!checkScheduledTasks(graph,
|
||||
3,
|
||||
{
|
||||
{1, 200, 400, 40},
|
||||
})) {
|
||||
dumpDcpFailureArtifacts();
|
||||
return 1;
|
||||
}
|
||||
if (!checkScheduledTasks(graph,
|
||||
2,
|
||||
{
|
||||
{5, 200, 260, 40},
|
||||
{10, 300, 300, 30},
|
||||
})) {
|
||||
dumpDcpFailureArtifacts();
|
||||
return 1;
|
||||
}
|
||||
if (!checkScheduledTasks(graph,
|
||||
1,
|
||||
{
|
||||
{4, 200, 210, 40},
|
||||
{7, 300, 410, 30},
|
||||
})) {
|
||||
dumpDcpFailureArtifacts();
|
||||
return 1;
|
||||
}
|
||||
if (!checkScheduledTasks(graph,
|
||||
0,
|
||||
{
|
||||
{0, 0, 0, 80},
|
||||
{2, 80, 80, 40},
|
||||
{6, 120, 120, 60},
|
||||
{3, 180, 200, 40},
|
||||
{8, 220, 240, 30},
|
||||
{11, 250, 270, 40},
|
||||
{12, 290, 310, 20},
|
||||
{9, 320, 330, 30},
|
||||
{13, 350, 360, 20},
|
||||
{15, 370, 380, 20},
|
||||
{16, 390, 400, 10},
|
||||
{14, 410, 410, 20},
|
||||
{17, 430, 430, 10},
|
||||
})) {
|
||||
dumpDcpFailureArtifacts();
|
||||
return 1;
|
||||
}
|
||||
if (!checkAllTasksScheduled(graph, nodeWeights.size()) || !checkCpuSchedulesDoNotOverlap(graph)
|
||||
|| !checkDependencyConstraints(graph, edges)) {
|
||||
dumpDcpFailureArtifacts();
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int testDCPGraphMaxCPUs() {
|
||||
std::cout << "testDCPGraphMaxCPUs:" << std::endl;
|
||||
configureDcpDotOutput();
|
||||
|
||||
const std::vector<Weight> nodeWeights = {20, 10, 10, 10, 10, 10, 10};
|
||||
const std::vector<IndexedEdge> edges = {
|
||||
{0, 1, 0},
|
||||
{0, 2, 0},
|
||||
{0, 3, 0},
|
||||
{0, 4, 0},
|
||||
{0, 5, 0},
|
||||
{0, 6, 0},
|
||||
};
|
||||
|
||||
GraphDCP graph(nodeWeights, edges);
|
||||
graph.setMaxCpuCount(2);
|
||||
graph.runDcp();
|
||||
|
||||
if (graph.cpuCount() != 2) {
|
||||
std::cerr << "Expected exactly 2 CPUs with maxCpuCount=2, got " << graph.cpuCount() << "\n";
|
||||
dumpDcpFailureArtifacts();
|
||||
return 1;
|
||||
}
|
||||
if (!checkAllTasksScheduled(graph, nodeWeights.size()) || !checkCpuSchedulesDoNotOverlap(graph)
|
||||
|| !checkDependencyConstraints(graph, edges)) {
|
||||
dumpDcpFailureArtifacts();
|
||||
return 1;
|
||||
}
|
||||
if (getMaxCompletion(graph) > 50) {
|
||||
std::cerr << "Expected makespan <= 50 under maxCpuCount=2, got " << getMaxCompletion(graph) << "\n";
|
||||
dumpDcpFailureArtifacts();
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int testDCPGraphSingleCpuCap() {
|
||||
std::cout << "testDCPGraphSingleCpuCap:" << std::endl;
|
||||
configureDcpDotOutput();
|
||||
|
||||
const std::vector<Weight> nodeWeights = {20, 10, 10, 10};
|
||||
const std::vector<IndexedEdge> edges = {
|
||||
{0, 1, 0},
|
||||
{0, 2, 0},
|
||||
{0, 3, 0},
|
||||
};
|
||||
|
||||
GraphDCP graph(nodeWeights, edges);
|
||||
graph.setMaxCpuCount(1);
|
||||
graph.runDcp();
|
||||
|
||||
if (graph.cpuCount() != 1) {
|
||||
std::cerr << "Expected exactly 1 CPU with maxCpuCount=1, got " << graph.cpuCount() << "\n";
|
||||
dumpDcpFailureArtifacts();
|
||||
return 1;
|
||||
}
|
||||
if (!checkAllTasksScheduled(graph, nodeWeights.size()) || !checkCpuSchedulesDoNotOverlap(graph)
|
||||
|| !checkDependencyConstraints(graph, edges)) {
|
||||
dumpDcpFailureArtifacts();
|
||||
return 1;
|
||||
}
|
||||
if (getMaxCompletion(graph) != 50) {
|
||||
std::cerr << "Expected makespan 50 under maxCpuCount=1, got " << getMaxCompletion(graph) << "\n";
|
||||
dumpDcpFailureArtifacts();
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int testDCPGraphDiamondDependencies() {
|
||||
std::cout << "testDCPGraphDiamondDependencies:" << std::endl;
|
||||
configureDcpDotOutput();
|
||||
|
||||
const std::vector<Weight> nodeWeights = {15, 10, 12, 20};
|
||||
const std::vector<IndexedEdge> edges = {
|
||||
{0, 1, 5},
|
||||
{0, 2, 7},
|
||||
{1, 3, 3},
|
||||
{2, 3, 2},
|
||||
};
|
||||
|
||||
GraphDCP graph(nodeWeights, edges);
|
||||
graph.runDcp();
|
||||
|
||||
if (!checkAllTasksScheduled(graph, nodeWeights.size()) || !checkCpuSchedulesDoNotOverlap(graph)
|
||||
|| !checkDependencyConstraints(graph, edges)) {
|
||||
dumpDcpFailureArtifacts();
|
||||
return 1;
|
||||
}
|
||||
|
||||
auto scheduledPlacements = collectScheduledPlacements(graph);
|
||||
const auto& sink = scheduledPlacements.at(3).task;
|
||||
if (sink.aest < 27) {
|
||||
std::cerr << "Expected sink node to start no earlier than the longest parent path, got " << sink.aest << "\n";
|
||||
dumpDcpFailureArtifacts();
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int testDCPGraphCrossbarExhaustion() {
|
||||
std::cout << "testDCPGraphCrossbarExhaustion:" << std::endl;
|
||||
configureDcpDotOutput();
|
||||
|
||||
const size_t savedCrossbarSize = onnx_mlir::crossbarSize.getValue();
|
||||
const size_t savedCrossbarCount = onnx_mlir::crossbarCountInCore.getValue();
|
||||
onnx_mlir::crossbarSize = 4;
|
||||
onnx_mlir::crossbarCountInCore = 2;
|
||||
|
||||
auto restoreCrossbarOptions = [&]() {
|
||||
onnx_mlir::crossbarSize = savedCrossbarSize;
|
||||
onnx_mlir::crossbarCountInCore = savedCrossbarCount;
|
||||
};
|
||||
|
||||
const std::vector<Weight> nodeWeights = {10, 10, 10};
|
||||
const std::vector<CrossbarUsage> nodeCrossbarUsage = {1, 1, 1};
|
||||
GraphDCP graph(nodeWeights, {}, nodeCrossbarUsage);
|
||||
graph.setMaxCpuCount(1);
|
||||
graph.runDcp();
|
||||
|
||||
if (graph.cpuCount() != 1) {
|
||||
restoreCrossbarOptions();
|
||||
std::cerr << "Expected exactly 1 CPU with maxCpuCount=1, got " << graph.cpuCount() << "\n";
|
||||
dumpDcpFailureArtifacts();
|
||||
return 1;
|
||||
}
|
||||
|
||||
auto scheduledTasks = graph.getScheduledTasks(0);
|
||||
if (scheduledTasks.size() != 3) {
|
||||
restoreCrossbarOptions();
|
||||
std::cerr << "Expected all three tasks to be scheduled on CPU 0\n";
|
||||
printCpuSchedule(graph, 0);
|
||||
dumpDcpFailureArtifacts();
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (scheduledTasks[0].weight != 10 || scheduledTasks[1].weight != std::numeric_limits<Weight>::max()
|
||||
|| scheduledTasks[2].weight != std::numeric_limits<Weight>::max()) {
|
||||
restoreCrossbarOptions();
|
||||
std::cerr << "Unexpected effective weights under crossbar exhaustion\n";
|
||||
printCpuSchedule(graph, 0);
|
||||
dumpDcpFailureArtifacts();
|
||||
return 1;
|
||||
}
|
||||
|
||||
restoreCrossbarOptions();
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
(void) argc;
|
||||
(void) argv;
|
||||
|
||||
int failures = 0;
|
||||
failures += testDCPGraphSingleNode();
|
||||
failures += testDCPGraphLinearChain();
|
||||
failures += testDCPGraphFixture();
|
||||
failures += testDCPGraphMaxCPUs();
|
||||
failures += testDCPGraphSingleCpuCap();
|
||||
failures += testDCPGraphDiamondDependencies();
|
||||
failures += testDCPGraphCrossbarExhaustion();
|
||||
if (failures != 0) {
|
||||
std::cerr << failures << " test failures\n";
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
Reference in New Issue
Block a user