Validate new option for compile only
Validate Operations / validate-operations (push) Has been cancelled

This commit is contained in:
ilgeco
2026-05-28 22:59:26 +02:00
parent 1ab489fe0a
commit f34698a2b6
2 changed files with 100 additions and 28 deletions
+22 -2
View File
@@ -6,7 +6,15 @@ import subprocess
import sys import sys
from pathlib import Path from pathlib import Path
from colorama import Style, Fore from colorama import Style, Fore
from validate_one import ProgressReporter, clean_workspace_artifacts, validate_network from validate_one import (
MODE_COMPILE_ONLY,
MODE_FULL,
MODE_RUN_ONLY,
MODE_STAGE_TITLES,
ProgressReporter,
clean_workspace_artifacts,
validate_network,
)
from raptor import PIM_PASS_LABELS from raptor import PIM_PASS_LABELS
@@ -71,6 +79,11 @@ def main():
help="Per-subprocess timeout in seconds for compiler, runner, and simulator commands.") help="Per-subprocess timeout in seconds for compiler, runner, and simulator commands.")
ap.add_argument("--clean", action="store_true", ap.add_argument("--clean", action="store_true",
help="Remove generated validation artifacts under each model workspace and exit.") help="Remove generated validation artifacts under each model workspace and exit.")
mode_group = ap.add_mutually_exclusive_group()
mode_group.add_argument("--compile-only", action="store_true",
help="Compile reference and PIM artifacts only; do not run reference, simulator, or compare.")
mode_group.add_argument("--run-only", action="store_true",
help="Reuse existing compiled artifacts and only run inputs/reference/simulator/compare.")
ap.add_argument("--verbose", action="store_true", ap.add_argument("--verbose", action="store_true",
help="Print per-stage progress and subprocess logs for passing validations too.") help="Print per-stage progress and subprocess logs for passing validations too.")
a = ap.parse_args() a = ap.parse_args()
@@ -111,12 +124,18 @@ def main():
print(f"Operations root: {operations_dir}") print(f"Operations root: {operations_dir}")
print("=" * 72) print("=" * 72)
mode = MODE_FULL
if a.compile_only:
mode = MODE_COMPILE_ONLY
elif a.run_only:
mode = MODE_RUN_ONLY
results = {} # relative_path -> passed results = {} # relative_path -> passed
pass_timing_sums = {label: 0.0 for _, label in PIM_PASS_LABELS} pass_timing_sums = {label: 0.0 for _, label in PIM_PASS_LABELS}
pass_timing_counts = {label: 0 for _, label in PIM_PASS_LABELS} pass_timing_counts = {label: 0 for _, label in PIM_PASS_LABELS}
total_timing_sum = 0.0 total_timing_sum = 0.0
timed_benchmark_count = 0 timed_benchmark_count = 0
reporter = ProgressReporter(len(onnx_files), verbose=a.verbose) reporter = ProgressReporter(len(onnx_files), stages_per_model=len(MODE_STAGE_TITLES[mode]), verbose=a.verbose)
for index, onnx_path in enumerate(onnx_files, start=1): for index, onnx_path in enumerate(onnx_files, start=1):
rel = onnx_path.relative_to(operations_dir) rel = onnx_path.relative_to(operations_dir)
try: try:
@@ -131,6 +150,7 @@ def main():
model_index=index, model_index=index,
model_total=len(onnx_files), model_total=len(onnx_files),
verbose=a.verbose, verbose=a.verbose,
mode=mode,
) )
results[str(rel)] = result.passed results[str(rel)] = result.passed
if result.pim_pass_timings: if result.pim_pass_timings:
+78 -26
View File
@@ -22,6 +22,25 @@ STAGE_TITLES = (
STAGE_COUNT = len(STAGE_TITLES) STAGE_COUNT = len(STAGE_TITLES)
GENERATED_DIR_NAMES = ("inputs", "outputs", "raptor", "runner", "simulation") GENERATED_DIR_NAMES = ("inputs", "outputs", "raptor", "runner", "simulation")
MODE_FULL = "full"
MODE_COMPILE_ONLY = "compile_only"
MODE_RUN_ONLY = "run_only"
MODE_STAGE_TITLES = {
MODE_FULL: STAGE_TITLES,
MODE_COMPILE_ONLY: (
"Compile ONNX",
"Build Runner",
"Compile PIM",
),
MODE_RUN_ONLY: (
"Generate Inputs",
"Run Reference",
"Run Simulator",
"Compare Outputs",
),
}
def sanitize_output_name(name): def sanitize_output_name(name):
return "".join(ch if ch.isalnum() or ch in "_.-" else "_" for ch in name[:255]) return "".join(ch if ch.isalnum() or ch in "_.-" else "_" for ch in name[:255])
@@ -272,21 +291,23 @@ def validate_network(network_onnx_path, raptor_path, onnx_include_dir,
simulator_dir, crossbar_size=64, crossbar_count=8, core_count=None, simulator_dir, crossbar_size=64, crossbar_count=8, core_count=None,
pim_merge_scheduler="peft", threshold=1e-3, pim_merge_scheduler="peft", threshold=1e-3,
seed=0, reporter=None, model_index=1, model_total=1, verbose=False, seed=0, reporter=None, model_index=1, model_total=1, verbose=False,
command_timeout_seconds=60.0): command_timeout_seconds=60.0, mode=MODE_FULL):
network_onnx_path = Path(network_onnx_path).resolve() network_onnx_path = Path(network_onnx_path).resolve()
raptor_path = Path(raptor_path).resolve() raptor_path = Path(raptor_path).resolve()
onnx_include_dir = Path(onnx_include_dir).resolve() onnx_include_dir = Path(onnx_include_dir).resolve()
simulator_dir = Path(simulator_dir).resolve() simulator_dir = Path(simulator_dir).resolve()
owns_reporter = reporter is None owns_reporter = reporter is None
reporter = reporter or ProgressReporter(model_total, verbose=verbose) reporter = reporter or ProgressReporter(model_total, stages_per_model=len(MODE_STAGE_TITLES[mode]), verbose=verbose)
workspace_dir = network_onnx_path.parent workspace_dir = network_onnx_path.parent
clean_workspace_artifacts(workspace_dir, network_onnx_path.stem)
raptor_dir = workspace_dir / "raptor" raptor_dir = workspace_dir / "raptor"
runner_dir = workspace_dir / "runner" runner_dir = workspace_dir / "runner"
runner_build_dir = runner_dir / "build" runner_build_dir = runner_dir / "build"
Path.mkdir(raptor_dir, exist_ok=True)
Path.mkdir(runner_build_dir, parents=True, exist_ok=True) if mode != MODE_RUN_ONLY:
clean_workspace_artifacts(workspace_dir, network_onnx_path.stem)
Path.mkdir(raptor_dir, exist_ok=True)
Path.mkdir(runner_build_dir, parents=True, exist_ok=True)
reporter.log(Fore.CYAN + f"[{model_index}/{model_total}]" + Style.RESET_ALL + reporter.log(Fore.CYAN + f"[{model_index}/{model_total}]" + Style.RESET_ALL +
f" {Style.BRIGHT}Validating {network_onnx_path.name}{Style.RESET_ALL}") f" {Style.BRIGHT}Validating {network_onnx_path.name}{Style.RESET_ALL}")
@@ -294,21 +315,51 @@ def validate_network(network_onnx_path, raptor_path, onnx_include_dir,
pim_pass_timings = {} pim_pass_timings = {}
try: try:
print_stage(reporter, model_index, model_total, network_onnx_path.name, "Compile ONNX") stem = network_onnx_path.stem
network_so_path, network_mlir_path = compile_onnx_network( network_so_path = runner_dir / f"{stem}.so"
network_onnx_path, raptor_path, raptor_dir, runner_dir, reporter=reporter, network_mlir_path = raptor_dir / f"{stem}.onnx.mlir"
timeout_sec=command_timeout_seconds) runner_path = runner_build_dir / "runner"
print_info(reporter, f"MLIR saved to {network_mlir_path}") pim_output_base = raptor_dir / stem
print_info(reporter, f"Shared library saved to {network_so_path}")
reporter.advance()
print_stage(reporter, model_index, model_total, network_onnx_path.name, "Build Runner") if mode != MODE_RUN_ONLY:
gen_network_runner(network_onnx_path, network_so_path, onnx_include_dir, out=runner_dir / "runner.c", print_stage(reporter, model_index, model_total, network_onnx_path.name, "Compile ONNX")
verbose=False) network_so_path, network_mlir_path = compile_onnx_network(
runner_path = build_onnx_runner(runner_dir, runner_build_dir, reporter=reporter, network_onnx_path, raptor_path, raptor_dir, runner_dir, reporter=reporter,
timeout_sec=command_timeout_seconds) timeout_sec=command_timeout_seconds)
print_info(reporter, f"Runner built at {runner_path}") print_info(reporter, f"MLIR saved to {network_mlir_path}")
reporter.advance() print_info(reporter, f"Shared library saved to {network_so_path}")
reporter.advance()
print_stage(reporter, model_index, model_total, network_onnx_path.name, "Build Runner")
gen_network_runner(network_onnx_path, network_so_path, onnx_include_dir, out=runner_dir / "runner.c",
verbose=False)
runner_path = build_onnx_runner(runner_dir, runner_build_dir, reporter=reporter,
timeout_sec=command_timeout_seconds)
print_info(reporter, f"Runner built at {runner_path}")
reporter.advance()
if mode == MODE_COMPILE_ONLY:
print_stage(reporter, model_index, model_total, network_onnx_path.name, "Compile PIM")
pim_pass_timings = compile_with_raptor(
network_mlir_path, raptor_path, pim_output_base, crossbar_size, crossbar_count,
core_count=core_count, pim_merge_scheduler=pim_merge_scheduler,
cwd=raptor_dir, verbose=verbose, reporter=reporter, timeout_sec=command_timeout_seconds)
print_info(reporter, f"PIM artifacts saved to {raptor_dir / 'pim'}")
reporter.advance()
reporter.record_result(True)
reporter.log(Style.BRIGHT + f"Result: {Fore.GREEN}PASS{Style.RESET_ALL}" + Style.RESET_ALL)
return ValidationResult(passed=True, pim_pass_timings=pim_pass_timings)
if mode == MODE_RUN_ONLY:
required_paths = [
(network_so_path, "compiled reference shared library"),
(network_mlir_path, "exported ONNX MLIR"),
(runner_path, "built reference runner"),
(raptor_dir / "pim" / "config.json", "compiled PIM artifacts"),
]
missing = [f"{description} at {path}" for path, description in required_paths if not path.exists()]
if missing:
raise FileNotFoundError("run-only mode requires existing artifacts:\n " + "\n ".join(missing))
print_stage(reporter, model_index, model_total, network_onnx_path.name, "Generate Inputs") print_stage(reporter, model_index, model_total, network_onnx_path.name, "Generate Inputs")
inputs_descriptor, outputs_descriptor = onnx_io(network_onnx_path) inputs_descriptor, outputs_descriptor = onnx_io(network_onnx_path)
@@ -326,13 +377,14 @@ def validate_network(network_onnx_path, raptor_path, onnx_include_dir,
print_info(reporter, f"Reference outputs saved to {out_dir}") print_info(reporter, f"Reference outputs saved to {out_dir}")
reporter.advance() reporter.advance()
print_stage(reporter, model_index, model_total, network_onnx_path.name, "Compile PIM") if mode != MODE_RUN_ONLY:
pim_pass_timings = compile_with_raptor( print_stage(reporter, model_index, model_total, network_onnx_path.name, "Compile PIM")
network_mlir_path, raptor_path, raptor_dir / network_onnx_path.stem, crossbar_size, crossbar_count, pim_pass_timings = compile_with_raptor(
core_count=core_count, pim_merge_scheduler=pim_merge_scheduler, network_mlir_path, raptor_path, pim_output_base, crossbar_size, crossbar_count,
cwd=raptor_dir, verbose=verbose, reporter=reporter, timeout_sec=command_timeout_seconds) core_count=core_count, pim_merge_scheduler=pim_merge_scheduler,
print_info(reporter, f"PIM artifacts saved to {raptor_dir / 'pim'}") cwd=raptor_dir, verbose=verbose, reporter=reporter, timeout_sec=command_timeout_seconds)
reporter.advance() print_info(reporter, f"PIM artifacts saved to {raptor_dir / 'pim'}")
reporter.advance()
print_stage(reporter, model_index, model_total, network_onnx_path.name, "Run Simulator") print_stage(reporter, model_index, model_total, network_onnx_path.name, "Run Simulator")
pim_dir = raptor_dir / "pim" pim_dir = raptor_dir / "pim"