diff --git a/validation/validate.py b/validation/validate.py index 79abfd3..6114aaa 100644 --- a/validation/validate.py +++ b/validation/validate.py @@ -6,7 +6,15 @@ import subprocess import sys from pathlib import Path from colorama import Style, Fore -from validate_one import ProgressReporter, clean_workspace_artifacts, validate_network +from validate_one import ( + MODE_COMPILE_ONLY, + MODE_FULL, + MODE_RUN_ONLY, + MODE_STAGE_TITLES, + ProgressReporter, + clean_workspace_artifacts, + validate_network, +) from raptor import PIM_PASS_LABELS @@ -71,6 +79,11 @@ def main(): help="Per-subprocess timeout in seconds for compiler, runner, and simulator commands.") ap.add_argument("--clean", action="store_true", help="Remove generated validation artifacts under each model workspace and exit.") + mode_group = ap.add_mutually_exclusive_group() + mode_group.add_argument("--compile-only", action="store_true", + help="Compile reference and PIM artifacts only; do not run reference, simulator, or compare.") + mode_group.add_argument("--run-only", action="store_true", + help="Reuse existing compiled artifacts and only run inputs/reference/simulator/compare.") ap.add_argument("--verbose", action="store_true", help="Print per-stage progress and subprocess logs for passing validations too.") a = ap.parse_args() @@ -111,12 +124,18 @@ def main(): print(f"Operations root: {operations_dir}") print("=" * 72) + mode = MODE_FULL + if a.compile_only: + mode = MODE_COMPILE_ONLY + elif a.run_only: + mode = MODE_RUN_ONLY + results = {} # relative_path -> passed pass_timing_sums = {label: 0.0 for _, label in PIM_PASS_LABELS} pass_timing_counts = {label: 0 for _, label in PIM_PASS_LABELS} total_timing_sum = 0.0 timed_benchmark_count = 0 - reporter = ProgressReporter(len(onnx_files), verbose=a.verbose) + reporter = ProgressReporter(len(onnx_files), stages_per_model=len(MODE_STAGE_TITLES[mode]), verbose=a.verbose) for index, onnx_path in enumerate(onnx_files, start=1): rel = onnx_path.relative_to(operations_dir) try: @@ -131,6 +150,7 @@ def main(): model_index=index, model_total=len(onnx_files), verbose=a.verbose, + mode=mode, ) results[str(rel)] = result.passed if result.pim_pass_timings: diff --git a/validation/validate_one.py b/validation/validate_one.py index 187eaa9..d9dedb0 100644 --- a/validation/validate_one.py +++ b/validation/validate_one.py @@ -22,6 +22,25 @@ STAGE_TITLES = ( STAGE_COUNT = len(STAGE_TITLES) GENERATED_DIR_NAMES = ("inputs", "outputs", "raptor", "runner", "simulation") +MODE_FULL = "full" +MODE_COMPILE_ONLY = "compile_only" +MODE_RUN_ONLY = "run_only" + +MODE_STAGE_TITLES = { + MODE_FULL: STAGE_TITLES, + MODE_COMPILE_ONLY: ( + "Compile ONNX", + "Build Runner", + "Compile PIM", + ), + MODE_RUN_ONLY: ( + "Generate Inputs", + "Run Reference", + "Run Simulator", + "Compare Outputs", + ), +} + def sanitize_output_name(name): return "".join(ch if ch.isalnum() or ch in "_.-" else "_" for ch in name[:255]) @@ -272,21 +291,23 @@ def validate_network(network_onnx_path, raptor_path, onnx_include_dir, simulator_dir, crossbar_size=64, crossbar_count=8, core_count=None, pim_merge_scheduler="peft", threshold=1e-3, seed=0, reporter=None, model_index=1, model_total=1, verbose=False, - command_timeout_seconds=60.0): + command_timeout_seconds=60.0, mode=MODE_FULL): network_onnx_path = Path(network_onnx_path).resolve() raptor_path = Path(raptor_path).resolve() onnx_include_dir = Path(onnx_include_dir).resolve() simulator_dir = Path(simulator_dir).resolve() owns_reporter = reporter is None - reporter = reporter or ProgressReporter(model_total, verbose=verbose) + reporter = reporter or ProgressReporter(model_total, stages_per_model=len(MODE_STAGE_TITLES[mode]), verbose=verbose) workspace_dir = network_onnx_path.parent - clean_workspace_artifacts(workspace_dir, network_onnx_path.stem) raptor_dir = workspace_dir / "raptor" runner_dir = workspace_dir / "runner" runner_build_dir = runner_dir / "build" - Path.mkdir(raptor_dir, exist_ok=True) - Path.mkdir(runner_build_dir, parents=True, exist_ok=True) + + if mode != MODE_RUN_ONLY: + clean_workspace_artifacts(workspace_dir, network_onnx_path.stem) + Path.mkdir(raptor_dir, exist_ok=True) + Path.mkdir(runner_build_dir, parents=True, exist_ok=True) reporter.log(Fore.CYAN + f"[{model_index}/{model_total}]" + Style.RESET_ALL + f" {Style.BRIGHT}Validating {network_onnx_path.name}{Style.RESET_ALL}") @@ -294,21 +315,51 @@ def validate_network(network_onnx_path, raptor_path, onnx_include_dir, pim_pass_timings = {} try: - print_stage(reporter, model_index, model_total, network_onnx_path.name, "Compile ONNX") - network_so_path, network_mlir_path = compile_onnx_network( - network_onnx_path, raptor_path, raptor_dir, runner_dir, reporter=reporter, - timeout_sec=command_timeout_seconds) - print_info(reporter, f"MLIR saved to {network_mlir_path}") - print_info(reporter, f"Shared library saved to {network_so_path}") - reporter.advance() + stem = network_onnx_path.stem + network_so_path = runner_dir / f"{stem}.so" + network_mlir_path = raptor_dir / f"{stem}.onnx.mlir" + runner_path = runner_build_dir / "runner" + pim_output_base = raptor_dir / stem - print_stage(reporter, model_index, model_total, network_onnx_path.name, "Build Runner") - gen_network_runner(network_onnx_path, network_so_path, onnx_include_dir, out=runner_dir / "runner.c", - verbose=False) - runner_path = build_onnx_runner(runner_dir, runner_build_dir, reporter=reporter, - timeout_sec=command_timeout_seconds) - print_info(reporter, f"Runner built at {runner_path}") - reporter.advance() + if mode != MODE_RUN_ONLY: + print_stage(reporter, model_index, model_total, network_onnx_path.name, "Compile ONNX") + network_so_path, network_mlir_path = compile_onnx_network( + network_onnx_path, raptor_path, raptor_dir, runner_dir, reporter=reporter, + timeout_sec=command_timeout_seconds) + print_info(reporter, f"MLIR saved to {network_mlir_path}") + print_info(reporter, f"Shared library saved to {network_so_path}") + reporter.advance() + + print_stage(reporter, model_index, model_total, network_onnx_path.name, "Build Runner") + gen_network_runner(network_onnx_path, network_so_path, onnx_include_dir, out=runner_dir / "runner.c", + verbose=False) + runner_path = build_onnx_runner(runner_dir, runner_build_dir, reporter=reporter, + timeout_sec=command_timeout_seconds) + print_info(reporter, f"Runner built at {runner_path}") + reporter.advance() + + if mode == MODE_COMPILE_ONLY: + print_stage(reporter, model_index, model_total, network_onnx_path.name, "Compile PIM") + pim_pass_timings = compile_with_raptor( + network_mlir_path, raptor_path, pim_output_base, crossbar_size, crossbar_count, + core_count=core_count, pim_merge_scheduler=pim_merge_scheduler, + cwd=raptor_dir, verbose=verbose, reporter=reporter, timeout_sec=command_timeout_seconds) + print_info(reporter, f"PIM artifacts saved to {raptor_dir / 'pim'}") + reporter.advance() + reporter.record_result(True) + reporter.log(Style.BRIGHT + f"Result: {Fore.GREEN}PASS{Style.RESET_ALL}" + Style.RESET_ALL) + return ValidationResult(passed=True, pim_pass_timings=pim_pass_timings) + + if mode == MODE_RUN_ONLY: + required_paths = [ + (network_so_path, "compiled reference shared library"), + (network_mlir_path, "exported ONNX MLIR"), + (runner_path, "built reference runner"), + (raptor_dir / "pim" / "config.json", "compiled PIM artifacts"), + ] + missing = [f"{description} at {path}" for path, description in required_paths if not path.exists()] + if missing: + raise FileNotFoundError("run-only mode requires existing artifacts:\n " + "\n ".join(missing)) print_stage(reporter, model_index, model_total, network_onnx_path.name, "Generate Inputs") inputs_descriptor, outputs_descriptor = onnx_io(network_onnx_path) @@ -326,13 +377,14 @@ def validate_network(network_onnx_path, raptor_path, onnx_include_dir, print_info(reporter, f"Reference outputs saved to {out_dir}") reporter.advance() - print_stage(reporter, model_index, model_total, network_onnx_path.name, "Compile PIM") - pim_pass_timings = compile_with_raptor( - network_mlir_path, raptor_path, raptor_dir / network_onnx_path.stem, crossbar_size, crossbar_count, - core_count=core_count, pim_merge_scheduler=pim_merge_scheduler, - cwd=raptor_dir, verbose=verbose, reporter=reporter, timeout_sec=command_timeout_seconds) - print_info(reporter, f"PIM artifacts saved to {raptor_dir / 'pim'}") - reporter.advance() + if mode != MODE_RUN_ONLY: + print_stage(reporter, model_index, model_total, network_onnx_path.name, "Compile PIM") + pim_pass_timings = compile_with_raptor( + network_mlir_path, raptor_path, pim_output_base, crossbar_size, crossbar_count, + core_count=core_count, pim_merge_scheduler=pim_merge_scheduler, + cwd=raptor_dir, verbose=verbose, reporter=reporter, timeout_sec=command_timeout_seconds) + print_info(reporter, f"PIM artifacts saved to {raptor_dir / 'pim'}") + reporter.advance() print_stage(reporter, model_index, model_total, network_onnx_path.name, "Run Simulator") pim_dir = raptor_dir / "pim"