All checks were successful
Validate Operations / validate-operations (push) Successful in 21m14s
532 lines
15 KiB
C++
532 lines
15 KiB
C++
#include <algorithm>
|
|
#include <cassert>
|
|
#include <cstdlib>
|
|
#include <filesystem>
|
|
#include <fstream>
|
|
#include <initializer_list>
|
|
#include <iostream>
|
|
#include <limits>
|
|
#include <optional>
|
|
#include <unordered_map>
|
|
#include <vector>
|
|
|
|
#include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp"
|
|
#include "src/Accelerators/PIM/Dialect/Spatial/Transforms/MergeComputeNodes/DCPGraph/Graph.hpp"
|
|
#include "src/Compiler/CompilerOptions.hpp"
|
|
|
|
namespace {
|
|
|
|
struct ExpectedScheduledTask {
|
|
size_t nodeIndex;
|
|
Time aest;
|
|
Time alst;
|
|
Weight weight;
|
|
};
|
|
|
|
struct ScheduledPlacement {
|
|
CPU cpu;
|
|
GraphDCP::ScheduledTaskInfo task;
|
|
};
|
|
|
|
std::filesystem::path getDcpTestOutputDir() { return std::filesystem::temp_directory_path() / "raptor-test-pim"; }
|
|
|
|
void configureDcpDotOutput() {
|
|
auto outputDir = getDcpTestOutputDir();
|
|
std::error_code errorCode;
|
|
std::filesystem::remove_all(outputDir, errorCode);
|
|
std::filesystem::create_directories(outputDir, errorCode);
|
|
assert(!errorCode);
|
|
onnx_mlir::outputBaseName = (outputDir / "DCPTest.mlir").string();
|
|
}
|
|
|
|
std::optional<std::filesystem::path> getLatestDcpDotFile() {
|
|
auto graphDir = getDcpTestOutputDir() / "dcp_graph";
|
|
if (!std::filesystem::exists(graphDir))
|
|
return std::nullopt;
|
|
|
|
std::optional<std::filesystem::path> latestDot;
|
|
for (const auto& entry : std::filesystem::directory_iterator(graphDir)) {
|
|
if (!entry.is_regular_file() || entry.path().extension() != ".dot")
|
|
continue;
|
|
if (!latestDot || entry.path().filename() > latestDot->filename())
|
|
latestDot = entry.path();
|
|
}
|
|
return latestDot;
|
|
}
|
|
|
|
void dumpDcpFailureArtifacts() {
|
|
auto latestDot = getLatestDcpDotFile();
|
|
if (!latestDot) {
|
|
std::cerr << "No DCP dot file was produced.\n";
|
|
return;
|
|
}
|
|
|
|
std::cerr << "DCP dot file: " << latestDot->string() << '\n';
|
|
std::ifstream dotFile(*latestDot);
|
|
if (!dotFile.is_open()) {
|
|
std::cerr << "Failed to open DCP dot file.\n";
|
|
return;
|
|
}
|
|
|
|
std::cerr << dotFile.rdbuf();
|
|
}
|
|
|
|
void printCpuSchedule(GraphDCP& graph, CPU cpu) {
|
|
auto actualTasks = graph.getScheduledTasks(cpu);
|
|
std::cerr << "CPU " << cpu << " actual schedule:\n";
|
|
for (const auto& task : actualTasks) {
|
|
std::cerr << " " << task.nodeIndex << ") aest: " << task.aest << " alst: " << task.alst
|
|
<< " weight: " << task.weight << '\n';
|
|
}
|
|
}
|
|
|
|
void printGraphSchedule(GraphDCP& graph) {
|
|
for (CPU cpu = 0; cpu < graph.cpuCount(); ++cpu)
|
|
printCpuSchedule(graph, cpu);
|
|
}
|
|
|
|
bool checkScheduledTasks(GraphDCP& graph, CPU cpu, std::initializer_list<ExpectedScheduledTask> expectedTasks) {
|
|
auto actualTasks = graph.getScheduledTasks(cpu);
|
|
if (actualTasks.size() != expectedTasks.size()) {
|
|
printCpuSchedule(graph, cpu);
|
|
return false;
|
|
}
|
|
|
|
auto expectedIt = expectedTasks.begin();
|
|
for (const auto& actualTask : actualTasks) {
|
|
if (actualTask.nodeIndex != expectedIt->nodeIndex || actualTask.aest != expectedIt->aest
|
|
|| actualTask.alst != expectedIt->alst || actualTask.weight != expectedIt->weight) {
|
|
printCpuSchedule(graph, cpu);
|
|
return false;
|
|
}
|
|
++expectedIt;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
std::unordered_map<size_t, ScheduledPlacement> collectScheduledPlacements(GraphDCP& graph) {
|
|
std::unordered_map<size_t, ScheduledPlacement> scheduledPlacements;
|
|
for (CPU cpu = 0; cpu < graph.cpuCount(); ++cpu) {
|
|
for (const auto& task : graph.getScheduledTasks(cpu)) {
|
|
auto [it, inserted] = scheduledPlacements.emplace(task.nodeIndex, ScheduledPlacement {cpu, task});
|
|
assert(inserted && "task scheduled multiple times");
|
|
(void) it;
|
|
}
|
|
}
|
|
return scheduledPlacements;
|
|
}
|
|
|
|
bool checkAllTasksScheduled(GraphDCP& graph, size_t expectedTaskCount) {
|
|
auto scheduledPlacements = collectScheduledPlacements(graph);
|
|
if (scheduledPlacements.size() != expectedTaskCount) {
|
|
std::cerr << "Expected " << expectedTaskCount << " scheduled tasks, got " << scheduledPlacements.size() << "\n";
|
|
printGraphSchedule(graph);
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool checkCpuSchedulesDoNotOverlap(GraphDCP& graph) {
|
|
for (CPU cpu = 0; cpu < graph.cpuCount(); ++cpu) {
|
|
auto scheduledTasks = graph.getScheduledTasks(cpu);
|
|
Time previousCompletion = 0;
|
|
bool firstTask = true;
|
|
for (const auto& task : scheduledTasks) {
|
|
Time completion = addOrMax(task.aest, task.weight);
|
|
if (task.aest > task.alst) {
|
|
std::cerr << "Task " << task.nodeIndex << " on CPU " << cpu << " has aest > alst\n";
|
|
printCpuSchedule(graph, cpu);
|
|
return false;
|
|
}
|
|
if (!firstTask && task.aest < previousCompletion) {
|
|
std::cerr << "CPU " << cpu << " has overlapping tasks\n";
|
|
printCpuSchedule(graph, cpu);
|
|
return false;
|
|
}
|
|
previousCompletion = completion;
|
|
firstTask = false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool checkDependencyConstraints(GraphDCP& graph, llvm::ArrayRef<IndexedEdge> edges) {
|
|
auto scheduledPlacements = collectScheduledPlacements(graph);
|
|
for (auto [parentIndex, childIndex, transferCost] : edges) {
|
|
const auto& parent = scheduledPlacements.at(parentIndex);
|
|
const auto& child = scheduledPlacements.at(childIndex);
|
|
Time requiredStart = addOrMax(parent.task.aest, parent.task.weight);
|
|
if (parent.cpu != child.cpu)
|
|
requiredStart = addOrMax(requiredStart, static_cast<Weight>(transferCost));
|
|
if (child.task.aest < requiredStart) {
|
|
std::cerr << "Dependency violation for edge " << parentIndex << " -> " << childIndex << '\n';
|
|
printGraphSchedule(graph);
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
Time getMaxCompletion(GraphDCP& graph) {
|
|
Time maxCompletion = 0;
|
|
for (CPU cpu = 0; cpu < graph.cpuCount(); ++cpu)
|
|
for (const auto& task : graph.getScheduledTasks(cpu))
|
|
maxCompletion = std::max(maxCompletion, addOrMax(task.aest, task.weight));
|
|
return maxCompletion;
|
|
}
|
|
|
|
int testDCPGraphSingleNode() {
|
|
std::cout << "testDCPGraphSingleNode:" << std::endl;
|
|
configureDcpDotOutput();
|
|
|
|
const std::vector<Weight> nodeWeights = {15};
|
|
GraphDCP graph(nodeWeights, {});
|
|
graph.runDcp();
|
|
|
|
if (graph.cpuCount() != 1) {
|
|
std::cerr << "Expected exactly 1 CPU, got " << graph.cpuCount() << "\n";
|
|
dumpDcpFailureArtifacts();
|
|
return 1;
|
|
}
|
|
if (!checkScheduledTasks(graph,
|
|
0,
|
|
{
|
|
{0, 0, 0, 15},
|
|
})) {
|
|
dumpDcpFailureArtifacts();
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int testDCPGraphLinearChain() {
|
|
std::cout << "testDCPGraphLinearChain:" << std::endl;
|
|
configureDcpDotOutput();
|
|
|
|
const std::vector<Weight> nodeWeights = {10, 20, 5};
|
|
const std::vector<IndexedEdge> edges = {
|
|
{0, 1, 7},
|
|
{1, 2, 9},
|
|
};
|
|
|
|
GraphDCP graph(nodeWeights, edges);
|
|
graph.runDcp();
|
|
|
|
if (graph.cpuCount() != 1) {
|
|
std::cerr << "Expected a linear chain to stay on one CPU, got " << graph.cpuCount() << "\n";
|
|
dumpDcpFailureArtifacts();
|
|
return 1;
|
|
}
|
|
if (!checkScheduledTasks(graph,
|
|
0,
|
|
{
|
|
{0, 0, 0, 10},
|
|
{1, 10, 10, 20},
|
|
{2, 30, 30, 5 },
|
|
})) {
|
|
dumpDcpFailureArtifacts();
|
|
return 1;
|
|
}
|
|
if (!checkCpuSchedulesDoNotOverlap(graph) || !checkDependencyConstraints(graph, edges)) {
|
|
dumpDcpFailureArtifacts();
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int testDCPGraphFixture() {
|
|
std::cout << "testDCPGraphFixture:" << std::endl;
|
|
configureDcpDotOutput();
|
|
|
|
const std::vector<Weight> nodeWeights = {
|
|
80,
|
|
40,
|
|
40,
|
|
40,
|
|
40,
|
|
40,
|
|
60,
|
|
30,
|
|
30,
|
|
30,
|
|
30,
|
|
40,
|
|
20,
|
|
20,
|
|
20,
|
|
20,
|
|
10,
|
|
10,
|
|
};
|
|
const std::vector<IndexedEdge> edges = {
|
|
{0, 1, 3 },
|
|
{0, 1, 120},
|
|
{0, 2, 120},
|
|
{0, 3, 120},
|
|
{0, 4, 120},
|
|
{0, 5, 120},
|
|
{0, 6, 120},
|
|
{2, 6, 80 },
|
|
{2, 7, 80 },
|
|
{3, 8, 80 },
|
|
{4, 9, 80 },
|
|
{5, 10, 80 },
|
|
{6, 7, 120},
|
|
{6, 8, 120},
|
|
{6, 9, 120},
|
|
{6, 10, 120},
|
|
{6, 11, 120},
|
|
{8, 11, 80 },
|
|
{8, 12, 80 },
|
|
{9, 13, 80 },
|
|
{10, 14, 80 },
|
|
{11, 12, 120},
|
|
{11, 13, 120},
|
|
{11, 14, 120},
|
|
{11, 15, 120},
|
|
{13, 15, 80 },
|
|
{13, 16, 80 },
|
|
{14, 17, 80 },
|
|
{15, 16, 120},
|
|
{15, 17, 120},
|
|
};
|
|
|
|
GraphDCP graph(nodeWeights, {});
|
|
for (auto [parent, child, weight] : edges)
|
|
graph.makeEdge(parent, child, weight);
|
|
|
|
graph.runDcp();
|
|
if (graph.cpuCount() != 4) {
|
|
dumpDcpFailureArtifacts();
|
|
return 1;
|
|
}
|
|
if (!checkScheduledTasks(graph,
|
|
3,
|
|
{
|
|
{1, 200, 400, 40},
|
|
})) {
|
|
dumpDcpFailureArtifacts();
|
|
return 1;
|
|
}
|
|
if (!checkScheduledTasks(graph,
|
|
2,
|
|
{
|
|
{5, 200, 260, 40},
|
|
{10, 300, 300, 30},
|
|
})) {
|
|
dumpDcpFailureArtifacts();
|
|
return 1;
|
|
}
|
|
if (!checkScheduledTasks(graph,
|
|
1,
|
|
{
|
|
{4, 200, 210, 40},
|
|
{7, 300, 410, 30},
|
|
})) {
|
|
dumpDcpFailureArtifacts();
|
|
return 1;
|
|
}
|
|
if (!checkScheduledTasks(graph,
|
|
0,
|
|
{
|
|
{0, 0, 0, 80},
|
|
{2, 80, 80, 40},
|
|
{6, 120, 120, 60},
|
|
{3, 180, 200, 40},
|
|
{8, 220, 240, 30},
|
|
{11, 250, 270, 40},
|
|
{12, 290, 310, 20},
|
|
{9, 320, 330, 30},
|
|
{13, 350, 360, 20},
|
|
{15, 370, 380, 20},
|
|
{16, 390, 400, 10},
|
|
{14, 410, 410, 20},
|
|
{17, 430, 430, 10},
|
|
})) {
|
|
dumpDcpFailureArtifacts();
|
|
return 1;
|
|
}
|
|
if (!checkAllTasksScheduled(graph, nodeWeights.size()) || !checkCpuSchedulesDoNotOverlap(graph)
|
|
|| !checkDependencyConstraints(graph, edges)) {
|
|
dumpDcpFailureArtifacts();
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int testDCPGraphMaxCPUs() {
|
|
std::cout << "testDCPGraphMaxCPUs:" << std::endl;
|
|
configureDcpDotOutput();
|
|
|
|
const std::vector<Weight> nodeWeights = {20, 10, 10, 10, 10, 10, 10};
|
|
const std::vector<IndexedEdge> edges = {
|
|
{0, 1, 0},
|
|
{0, 2, 0},
|
|
{0, 3, 0},
|
|
{0, 4, 0},
|
|
{0, 5, 0},
|
|
{0, 6, 0},
|
|
};
|
|
|
|
GraphDCP graph(nodeWeights, edges);
|
|
graph.setMaxCpuCount(2);
|
|
graph.runDcp();
|
|
|
|
if (graph.cpuCount() != 2) {
|
|
std::cerr << "Expected exactly 2 CPUs with maxCpuCount=2, got " << graph.cpuCount() << "\n";
|
|
dumpDcpFailureArtifacts();
|
|
return 1;
|
|
}
|
|
if (!checkAllTasksScheduled(graph, nodeWeights.size()) || !checkCpuSchedulesDoNotOverlap(graph)
|
|
|| !checkDependencyConstraints(graph, edges)) {
|
|
dumpDcpFailureArtifacts();
|
|
return 1;
|
|
}
|
|
if (getMaxCompletion(graph) > 50) {
|
|
std::cerr << "Expected makespan <= 50 under maxCpuCount=2, got " << getMaxCompletion(graph) << "\n";
|
|
dumpDcpFailureArtifacts();
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int testDCPGraphSingleCpuCap() {
|
|
std::cout << "testDCPGraphSingleCpuCap:" << std::endl;
|
|
configureDcpDotOutput();
|
|
|
|
const std::vector<Weight> nodeWeights = {20, 10, 10, 10};
|
|
const std::vector<IndexedEdge> edges = {
|
|
{0, 1, 0},
|
|
{0, 2, 0},
|
|
{0, 3, 0},
|
|
};
|
|
|
|
GraphDCP graph(nodeWeights, edges);
|
|
graph.setMaxCpuCount(1);
|
|
graph.runDcp();
|
|
|
|
if (graph.cpuCount() != 1) {
|
|
std::cerr << "Expected exactly 1 CPU with maxCpuCount=1, got " << graph.cpuCount() << "\n";
|
|
dumpDcpFailureArtifacts();
|
|
return 1;
|
|
}
|
|
if (!checkAllTasksScheduled(graph, nodeWeights.size()) || !checkCpuSchedulesDoNotOverlap(graph)
|
|
|| !checkDependencyConstraints(graph, edges)) {
|
|
dumpDcpFailureArtifacts();
|
|
return 1;
|
|
}
|
|
if (getMaxCompletion(graph) != 50) {
|
|
std::cerr << "Expected makespan 50 under maxCpuCount=1, got " << getMaxCompletion(graph) << "\n";
|
|
dumpDcpFailureArtifacts();
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int testDCPGraphDiamondDependencies() {
|
|
std::cout << "testDCPGraphDiamondDependencies:" << std::endl;
|
|
configureDcpDotOutput();
|
|
|
|
const std::vector<Weight> nodeWeights = {15, 10, 12, 20};
|
|
const std::vector<IndexedEdge> edges = {
|
|
{0, 1, 5},
|
|
{0, 2, 7},
|
|
{1, 3, 3},
|
|
{2, 3, 2},
|
|
};
|
|
|
|
GraphDCP graph(nodeWeights, edges);
|
|
graph.runDcp();
|
|
|
|
if (!checkAllTasksScheduled(graph, nodeWeights.size()) || !checkCpuSchedulesDoNotOverlap(graph)
|
|
|| !checkDependencyConstraints(graph, edges)) {
|
|
dumpDcpFailureArtifacts();
|
|
return 1;
|
|
}
|
|
|
|
auto scheduledPlacements = collectScheduledPlacements(graph);
|
|
const auto& sink = scheduledPlacements.at(3).task;
|
|
if (sink.aest < 27) {
|
|
std::cerr << "Expected sink node to start no earlier than the longest parent path, got " << sink.aest << "\n";
|
|
dumpDcpFailureArtifacts();
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
// crossbarSize=4, crossbarCount=2 => capacity = 4*4*2 = 32.
|
|
// Each task with crossbarUsage=1 needs footprint = 4*4 = 16, so at most 1 task
|
|
// can fit per CPU (16+16 = 32 >= capacity). The scheduler must open a fresh CPU
|
|
// for each task; all three end up on separate CPUs with their base weight.
|
|
int testDCPGraphCrossbarExhaustion() {
|
|
std::cout << "testDCPGraphCrossbarExhaustion:" << std::endl;
|
|
configureDcpDotOutput();
|
|
|
|
const size_t savedCrossbarSize = onnx_mlir::crossbarSize.getValue();
|
|
const size_t savedCrossbarCount = onnx_mlir::crossbarCountInCore.getValue();
|
|
onnx_mlir::crossbarSize = 4;
|
|
onnx_mlir::crossbarCountInCore = 2;
|
|
|
|
auto restoreCrossbarOptions = [&]() {
|
|
onnx_mlir::crossbarSize = savedCrossbarSize;
|
|
onnx_mlir::crossbarCountInCore = savedCrossbarCount;
|
|
};
|
|
|
|
const std::vector<Weight> nodeWeights = {10, 10, 10};
|
|
const std::vector<CrossbarUsage> nodeCrossbarUsage = {1, 1, 1};
|
|
GraphDCP graph(nodeWeights, {}, nodeCrossbarUsage);
|
|
graph.setMaxCpuCount(3);
|
|
graph.runDcp();
|
|
|
|
if (graph.cpuCount() != 3) {
|
|
restoreCrossbarOptions();
|
|
std::cerr << "Expected 3 CPUs (one per task due to crossbar limit), got " << graph.cpuCount() << "\n";
|
|
dumpDcpFailureArtifacts();
|
|
return 1;
|
|
}
|
|
|
|
int failures = 0;
|
|
for (CPU c = 0; c < 3; c++) {
|
|
auto scheduledTasks = graph.getScheduledTasks(c);
|
|
if (scheduledTasks.size() != 1) {
|
|
std::cerr << "Expected exactly 1 task on CPU " << c << ", got " << scheduledTasks.size() << "\n";
|
|
printCpuSchedule(graph, c);
|
|
failures++;
|
|
continue;
|
|
}
|
|
if (scheduledTasks[0].weight != 10) {
|
|
std::cerr << "Expected weight=10 on CPU " << c << ", got " << scheduledTasks[0].weight << "\n";
|
|
printCpuSchedule(graph, c);
|
|
failures++;
|
|
}
|
|
}
|
|
|
|
restoreCrossbarOptions();
|
|
if (failures) dumpDcpFailureArtifacts();
|
|
return failures;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
int main(int argc, char* argv[]) {
|
|
(void) argc;
|
|
(void) argv;
|
|
|
|
int failures = 0;
|
|
failures += testDCPGraphSingleNode();
|
|
failures += testDCPGraphLinearChain();
|
|
failures += testDCPGraphFixture();
|
|
failures += testDCPGraphMaxCPUs();
|
|
failures += testDCPGraphSingleCpuCap();
|
|
failures += testDCPGraphDiamondDependencies();
|
|
failures += testDCPGraphCrossbarExhaustion();
|
|
if (failures != 0) {
|
|
std::cerr << failures << " test failures\n";
|
|
return EXIT_FAILURE;
|
|
}
|
|
return EXIT_SUCCESS;
|
|
}
|