#!/usr/bin/env python3 from __future__ import annotations import argparse import gzip import importlib.util import json import mmap import os import re import shlex import shutil import subprocess import sys import time import types from collections import Counter from dataclasses import asdict, dataclass from pathlib import Path from typing import Any import numpy as np import onnx REPO = Path(__file__).resolve().parents[2] VALIDATION_DIR = REPO / "validation" sys.path.insert(0, str(VALIDATION_DIR)) from gen_network_runner import gen_network_runner # noqa: E402 from onnx_utils import _ONNX_TO_NP, gen_random_inputs, onnx_io, save_inputs_to_files, write_inputs_to_memory_bin # noqa: E402 from validate_one import build_dump_ranges, parse_pim_simulator_outputs # noqa: E402 from raptor import compile_with_raptor # noqa: E402 @dataclass class StepRecord: name: str duration_sec: float command: str status: str = "passed" returncode: int | None = None error: str | None = None output_tail: str | None = None @dataclass class CompareResult: passed: bool max_diffs: dict[str, float] status: str = "done" error: str | None = None def load_pimcomp_exporter(): path = REPO / "third_party/PIMCOMP-NN/verification/export_to_pim_simulator.py" spec = importlib.util.spec_from_file_location("pimcomp_exporter", path) module = importlib.util.module_from_spec(spec) assert spec is not None and spec.loader is not None sys.modules.setdefault("cv2", types.ModuleType("cv2")) spec.loader.exec_module(module) return module def load_mesh_builder(): path = REPO / "validation/pimsim-configs/generate_mesh_config.py" spec = importlib.util.spec_from_file_location("mesh_builder", path) module = importlib.util.module_from_spec(spec) assert spec is not None and spec.loader is not None spec.loader.exec_module(module) return module def shell_join(cmd: list[str]) -> str: return shlex.join(str(arg) for arg in cmd) def print_step(name: str, cmd: list[str] | None = None, cwd: Path | None = None): print(f"\n[{name}]") if cmd is not None: print(f" cwd: {cwd or REPO}") print(f" $ {shell_join(cmd)}") def output_tail(output: str | bytes | None, limit: int = 4000) -> str: if output is None: return "" if isinstance(output, bytes): output = output.decode(errors="replace") return output[-limit:] def exception_message(exc: BaseException) -> str: if isinstance(exc, subprocess.CalledProcessError): command = shell_join([str(arg) for arg in exc.cmd]) if isinstance(exc.cmd, list) else str(exc.cmd) tail = output_tail(exc.output) message = f"command failed with exit code {exc.returncode}: {command}" if tail: message += f"\n--- output tail ---\n{tail}" return message if isinstance(exc, subprocess.TimeoutExpired): command = shell_join([str(arg) for arg in exc.cmd]) if isinstance(exc.cmd, list) else str(exc.cmd) tail = output_tail(exc.output) message = f"command timed out after {exc.timeout} seconds: {command}" if tail: message += f"\n--- output tail ---\n{tail}" return message return f"{type(exc).__name__}: {exc}" def print_failure(name: str, exc: BaseException | str) -> None: message = exc if isinstance(exc, str) else exception_message(exc) print(f"\n[{name} FAILED]") for line in message.splitlines()[:20]: print(f" {line}") def run_logged( name: str, cmd: list[str], *, cwd: Path, timeout_sec: float, steps: list[StepRecord], ) -> str: print_step(name, cmd, cwd) start = time.perf_counter() command = shell_join(cmd) try: proc = subprocess.run( cmd, cwd=cwd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, timeout=timeout_sec, ) except subprocess.TimeoutExpired as exc: duration = time.perf_counter() - start tail = output_tail(exc.output) steps.append( StepRecord( name=name, duration_sec=duration, command=command, status="timeout", error=f"Timed out after {timeout_sec} seconds", output_tail=tail or None, ) ) raise duration = time.perf_counter() - start if proc.returncode != 0: tail = output_tail(proc.stdout) steps.append( StepRecord( name=name, duration_sec=duration, command=command, status="failed", returncode=proc.returncode, error=f"Exited with status {proc.returncode}", output_tail=tail or None, ) ) raise subprocess.CalledProcessError(proc.returncode, cmd, output=tail) steps.append(StepRecord(name=name, duration_sec=duration, command=command)) return proc.stdout def remove_tree(path: Path) -> None: if not path.exists() and not path.is_symlink(): return if path.is_symlink() or path.is_file(): path.unlink() return while True: children = list(path.iterdir()) if not children: break for child in children: remove_tree(child) path.rmdir() def load_model_inputs(model_path: Path, seed: int): model = onnx.load(model_path) initializer_names = {init.name for init in model.graph.initializer} initializer_values = { init.name: onnx.numpy_helper.to_array(init) for init in model.graph.initializer } inputs_desc, outputs_desc = onnx_io(model_path) runtime_desc = [desc for desc in inputs_desc if desc[1] not in initializer_names] runtime_arrays, _ = gen_random_inputs(runtime_desc, seed=seed) runtime_by_name = { desc[1]: arr for desc, arr in zip(runtime_desc, runtime_arrays) } arrays_in_order = [] for _, name, elem_type, _ in inputs_desc: if name in initializer_values: arrays_in_order.append(initializer_values[name].astype(_ONNX_TO_NP[elem_type], copy=False)) else: arrays_in_order.append(runtime_by_name[name]) runtime_only = [arr for desc, arr in zip(inputs_desc, arrays_in_order) if desc[1] not in initializer_names] return inputs_desc, outputs_desc, arrays_in_order, runtime_only def compare_simulator_outputs( output_bin: Path, outputs_desc: list[tuple[int, str, int, list[int]]], reference_dir: Path, *, threshold: float, rtol: float, ) -> CompareResult: sim_arrays = parse_pim_simulator_outputs(output_bin, outputs_desc) max_diffs: dict[str, float] = {} passed = True for sim_array, (idx, name, _, shape) in zip(sim_arrays, outputs_desc): csv_name = reference_dir / f"output{idx}_{sanitize_output_name(name)}.csv" ref = np.loadtxt(csv_name, delimiter=",", dtype=np.float32).reshape(shape) diff = np.abs(sim_array.astype(np.float64) - ref.astype(np.float64)) allowed = threshold + rtol * np.abs(ref.astype(np.float64)) max_diffs[name] = float(np.max(diff)) if not np.all(diff <= allowed): passed = False return CompareResult(passed=passed, max_diffs=max_diffs) def sanitize_output_name(name: str) -> str: return "".join(ch if ch.isalnum() or ch in "_.-" else "_" for ch in name[:255]) def load_effective_hardware(args: argparse.Namespace) -> dict[str, int]: config_path = args.pimcomp_dir / "config.json" with open(config_path, "r", encoding="utf-8") as f: config = json.load(f) rows, cols = config["chip_config"]["network_config"]["layout"] xbar_h, xbar_w = config["chip_config"]["core_config"]["matrix_config"]["xbar_size"] hardware = { "mesh_rows": args.mesh_rows or rows, "mesh_cols": args.mesh_cols or cols, "crossbar_count": args.crossbar_count or config["chip_config"]["core_config"]["matrix_config"]["xbar_array_count"], "crossbar_size": args.crossbar_size or xbar_h, } if xbar_h != xbar_w: raise ValueError(f"Only square crossbars are supported, got {xbar_h}x{xbar_w}") hardware["core_count"] = args.core_count or hardware["mesh_rows"] * hardware["mesh_cols"] return hardware def write_pimsim_config(args: argparse.Namespace, out_dir: Path, hardware: dict[str, int]) -> Path: mesh_builder = load_mesh_builder() example_config = REPO / "backend-simulators/pim/pimsim-nn/example/config/latency_config.json" with open(example_config, "r", encoding="utf-8") as f: config = json.load(f) config["chip_config"]["core_config"]["matrix_config"]["xbar_array_count"] = hardware["crossbar_count"] config["chip_config"]["core_config"]["matrix_config"]["xbar_size"] = [ hardware["crossbar_size"], hardware["crossbar_size"], ] config["chip_config"]["network_config"]["layout"] = [ hardware["mesh_rows"], hardware["mesh_cols"], ] config["chip_config"]["network_config"]["net_config_file_path"] = f"network_mesh_{hardware['core_count']}.json" config["chip_config"]["core_cnt"] = hardware["core_count"] config["sim_config"]["sim_mode"] = 1 if args.pimsim_mode == "latency" else 0 config["sim_config"]["sim_time"] = args.pimsim_time_ms out_dir.mkdir(parents=True, exist_ok=True) config_path = out_dir / f"{args.pimsim_mode}_config.json" network_path = out_dir / f"network_mesh_{hardware['core_count']}.json" with open(config_path, "w", encoding="utf-8") as f: json.dump(config, f, indent=2) f.write("\n") with open(network_path, "w", encoding="utf-8") as f: json.dump( mesh_builder.build_network( hardware["core_count"], (hardware["mesh_rows"], hardware["mesh_cols"]), ), f, separators=(",", ":"), ) f.write("\n") return config_path def compile_reference( args: argparse.Namespace, model_path: Path, work_dir: Path, steps: list[StepRecord], ) -> tuple[Path, Path, Path]: raptor_dir = work_dir / "reference" runner_dir = work_dir / "runner" build_dir = runner_dir / "build" raptor_dir.mkdir(parents=True, exist_ok=True) build_dir.mkdir(parents=True, exist_ok=True) stem = model_path.stem onnx_ir_base = raptor_dir / stem runner_base = runner_dir / stem run_logged( "Reference Emit ONNX IR", [str(args.raptor_path), str(model_path), "-o", str(onnx_ir_base), "--EmitONNXIR"], cwd=REPO, timeout_sec=args.timeout_seconds, steps=steps, ) run_logged( "Reference Native Compile", [str(args.raptor_path), "-O3", str(model_path), "-o", str(runner_base)], cwd=REPO, timeout_sec=args.timeout_seconds, steps=steps, ) network_so = runner_base.with_suffix(".so") network_mlir = onnx_ir_base.with_suffix(".onnx.mlir") print_step("Generate Runner Source") gen_network_runner(model_path, network_so, args.onnx_include_dir, out=runner_dir / "runner.c", verbose=False) run_logged( "Configure Runner", ["cmake", str(runner_dir), "-DCMAKE_BUILD_TYPE=Release", "-DCMAKE_C_FLAGS_RELEASE=-O3"], cwd=build_dir, timeout_sec=args.timeout_seconds, steps=steps, ) run_logged( "Build Runner", ["cmake", "--build", ".", "-j"], cwd=build_dir, timeout_sec=args.timeout_seconds, steps=steps, ) return network_mlir, network_so, build_dir / "runner" def generate_reference_outputs( runner_path: Path, runner_build_dir: Path, model_path: Path, arrays_in_order: list[np.ndarray], steps: list[StepRecord], args: argparse.Namespace, out_dir: Path, ) -> Path: inputs_dir = out_dir / "inputs" reference_dir = out_dir / "reference_outputs" inputs_dir.mkdir(parents=True, exist_ok=True) reference_dir.mkdir(parents=True, exist_ok=True) flags, _ = save_inputs_to_files(model_path, arrays_in_order, inputs_dir) run_logged( "Run Reference", [str(runner_path), *flags, "--save-csv-dir", str(reference_dir)], cwd=runner_build_dir, timeout_sec=args.timeout_seconds, steps=steps, ) return reference_dir def compile_raptor_target( model_mlir: Path, out_dir: Path, hardware: dict[str, int], args: argparse.Namespace, steps: list[StepRecord], ) -> tuple[Path, dict[str, float]]: out_dir.mkdir(parents=True, exist_ok=True) cmd = [ str(args.raptor_path), str(model_mlir), "-o", str(out_dir / "model"), "--maccel=PIM", "--EmitPimCodegen", f"--crossbar-size={hardware['crossbar_size']}", f"--crossbar-count={hardware['crossbar_count']}", f"--core-count={hardware['core_count']}", "--pim-emit-json", *args.raptor_extra_arg, ] print_step("Compile Raptor PIM", cmd, REPO) start = time.perf_counter() command = shell_join(cmd) raptor_extra_args = ["--pim-emit-json", *args.raptor_extra_arg] try: timings = compile_with_raptor( model_mlir, args.raptor_path, out_dir / "model", hardware["crossbar_size"], hardware["crossbar_count"], core_count=hardware["core_count"], raptor_extra_args=raptor_extra_args, cwd=out_dir, verbose=args.verbose_raptor_compile, timeout_sec=args.timeout_seconds, ) except Exception as exc: steps.append( StepRecord( name="Compile Raptor PIM", duration_sec=time.perf_counter() - start, command=command, status="failed", error=exception_message(exc), ) ) raise steps.append( StepRecord( name="Compile Raptor PIM", duration_sec=time.perf_counter() - start, command=command, ) ) return out_dir / "pim", timings def run_rust_validation( label: str, pim_dir: Path, config_path: Path, outputs_desc: list[tuple[int, str, int, list[int]]], reference_dir: Path, steps: list[StepRecord], args: argparse.Namespace, ) -> CompareResult: output_bin = pim_dir.parent / "semantic_validation" / "out.bin" dump_ranges = build_dump_ranges(config_path, outputs_desc) cmd = [ "cargo", "run", "--no-default-features", "--release", "--package", "pim-simulator", "--bin", "pim-simulator", "--", "-f", str(pim_dir), "-o", str(output_bin), "-d", dump_ranges, ] simulation_dir = pim_dir.parent / "semantic_validation" simulation_dir.mkdir(parents=True, exist_ok=True) run_logged( label, cmd, cwd=args.pim_simulator_dir, timeout_sec=args.timeout_seconds, steps=steps, ) return compare_simulator_outputs( output_bin, outputs_desc, reference_dir, threshold=args.threshold, rtol=args.rtol, ) def copy_pimcomp_outputs(args: argparse.Namespace, out_dir: Path): out_dir.mkdir(parents=True, exist_ok=True) for name in ("SimulationInfo.gz", "VerificationInfo.json", "MappingResult.txt"): shutil.copy2(args.pimcomp_dir / "output" / name, out_dir / name) def compile_pimcomp( args: argparse.Namespace, model_path: Path, out_dir: Path, steps: list[StepRecord], ) -> tuple[Path, Path]: out_dir.mkdir(parents=True, exist_ok=True) model_name = f"compare_{model_path.stem}" frontend_json = args.pimcomp_dir / "models/JSON" / f"{model_name}.json" frontend_cmd = [ "python3", "frontend.py", "--model_path", str(model_path), "--save_path", str(frontend_json), ] run_logged( "PIMCOMP Frontend", frontend_cmd, cwd=args.pimcomp_dir / "frontend", timeout_sec=args.timeout_seconds, steps=steps, ) backend_cmd = [ str(args.pimcomp_dir / "build" / "PIMCOMP-NN"), f"-m={model_name}", "-p=batch", "-v=YES", "-s=YES", ] run_logged( "PIMCOMP Backend", backend_cmd, cwd=args.pimcomp_dir / "build", timeout_sec=args.timeout_seconds, steps=steps, ) copy_pimcomp_outputs(args, out_dir) return out_dir / "VerificationInfo.json", out_dir / "SimulationInfo.gz" def export_pimcomp_for_pimsim_nn(simulation_info: Path, output_dir: Path) -> Path: if output_dir.exists(): remove_tree(output_dir) with gzip.open(simulation_info, "rt", encoding="utf-8") as f: sim_info = json.load(f) output_dir.mkdir(parents=True, exist_ok=True) sim_config = sim_info["config"] present_core_indices = sorted( int(key[4:]) for key, value in sim_info.items() if key.startswith("core") and isinstance(value, list) and value ) if not present_core_indices: raise ValueError("PIMCOMP SimulationInfo.gz does not contain any non-empty core instruction streams") expected_core_indices = list(range(present_core_indices[-1] + 1)) if present_core_indices != expected_core_indices: raise ValueError(f"PIMCOMP core numbering is not contiguous: {present_core_indices}") config = { "core_cnt": len(present_core_indices), "xbar_size": sim_config["xbar_size"], "xbar_array_count": sim_config["xbar_array_count"], "cell_precision": sim_config["cell_precision"], "adc_count": sim_config["adc_count"], "array_group_map": {}, } for core_idx in present_core_indices: core_name = f"core{core_idx}" config["array_group_map"][core_name] = sim_config["array_group_map"].get(core_name, []) with open(output_dir / "config.json", "w", encoding="utf-8") as f: json.dump(config, f, separators=(",", ":")) f.write("\n") for core_idx in present_core_indices: core_key = f"core{core_idx}" instructions = sim_info[core_key] with open(output_dir / f"core_{core_idx}.json", "w", encoding="utf-8") as f: json.dump(instructions, f, separators=(",", ":")) f.write("\n") return output_dir def flatten_pimcomp_input(array: np.ndarray) -> np.ndarray: tensor = array.astype(np.float32, copy=False) if tensor.ndim == 4: tensor = tensor.transpose((0, 2, 3, 1)) return tensor.reshape(-1) def export_pimcomp_for_rust( model_path: Path, verification_info: Path, simulation_info: Path, runtime_inputs: list[np.ndarray], output_dir: Path, ) -> Path: if len(runtime_inputs) != 1: raise ValueError("PIMCOMP export currently requires exactly one runtime input tensor") if output_dir.exists(): remove_tree(output_dir) exporter = load_pimcomp_exporter() with open(verification_info, "r", encoding="utf-8") as f: final_info = json.load(f) with gzip.open(simulation_info, "rt", encoding="utf-8") as f: sim_info = json.load(f) onnx_model, weights, gemm_weights, output_to_weight, output_to_bias = exporter.load_model_info( model_path, final_info ) input_tensor = flatten_pimcomp_input(runtime_inputs[0]) node_list = final_info["node_list"] max_output = exporter.max_output_element_num(node_list) local_group_map = exporter.map_local_groups(final_info, sim_info) output_dir.mkdir(parents=True, exist_ok=True) weights_dir = output_dir / "weights" weights_dir.mkdir(parents=True, exist_ok=True) input_addr = 0 cursor = exporter.byte_offset(len(input_tensor)) bias_addrs: dict[str, int] = {} for node_name, bias_name in output_to_bias.items(): bias = weights[bias_name].astype(np.float32).flatten() bias_addrs[node_name] = cursor cursor += exporter.byte_offset(len(bias)) lldi_addrs: dict[tuple[bytes, int], int] = {} for core_idx in range(sim_info["config"]["core_cnt"]): for inst in sim_info.get(f"core{core_idx}", []) or []: if inst["op"] != "lldi": continue key = (exporter.float32_bytes(inst["imm"]), inst["len"]) if key not in lldi_addrs: lldi_addrs[key] = cursor cursor += exporter.byte_offset(inst["len"]) output_base = (cursor + 255) & ~255 memory_size = output_base + exporter.byte_offset(max_output * len(node_list)) memory = bytearray(memory_size) memory[input_addr : input_addr + input_tensor.nbytes] = input_tensor.tobytes() for node_name, bias_name in output_to_bias.items(): bias = weights[bias_name].astype(np.float32).flatten() start = bias_addrs[node_name] memory[start : start + bias.nbytes] = bias.tobytes() for (value_bytes, element_num), start in lldi_addrs.items(): value = np.frombuffer(value_bytes, dtype=np.float32)[0] blob = np.full(element_num, value, dtype=np.float32) memory[start : start + blob.nbytes] = blob.tobytes() config = { "core_cnt": sim_info["config"]["core_cnt"], "xbar_size": sim_info["config"]["xbar_size"], "xbar_array_count": sim_info["config"]["xbar_array_count"], "cell_precision": sim_info["config"]["cell_precision"], "adc_count": sim_info["config"]["adc_count"], "array_group_map": {}, "inputs_addresses": [input_addr], "outputs_addresses": [], } output_name_to_node = {node["name"]: node for node in node_list} for graph_output in onnx_model.graph.output: node = output_name_to_node[graph_output.name] config["outputs_addresses"].append(output_base + exporter.byte_offset(node["new_node_index"] * max_output)) ag_info = final_info["AG_info"] weight_counter = 0 xbar_size = int(sim_info["config"]["xbar_size"][0]) for core_idx in range(config["core_cnt"]): core_name = f"core{core_idx}" core_dir = output_dir / f"core_{core_idx}" core_dir.mkdir(parents=True, exist_ok=True) local_to_global = local_group_map.get(core_idx, {}) ag_counts = sim_info["config"]["array_group_map"].get(core_name, []) group_prefix = [] total_crossbars = 0 for count in ag_counts: group_prefix.append(total_crossbars) total_crossbars += count config["array_group_map"][core_name] = list(range(total_crossbars)) for local_group, global_ag in sorted(local_to_global.items()): info = ag_info[global_ag] weight_name = output_to_weight[info["node_name"]] matrix = gemm_weights[weight_name] row_slice = slice(info["height_start"], info["height_end"] + 1) first_physical = group_prefix[local_group] for crossbar_idx, crossbar in enumerate(info["crossbar"]): col_slice = slice(crossbar["width_start"], crossbar["width_end"] + 1) tile = np.zeros((xbar_size, col_slice.stop - col_slice.start), dtype=np.float32) tile_rows = matrix[row_slice, col_slice].astype(np.float32) tile[: tile_rows.shape[0], :] = tile_rows weight_path = weights_dir / f"crossbar_{weight_counter}.bin" weight_path.write_bytes(tile.tobytes(order="C")) os.symlink(weight_path.resolve(), core_dir / f"crossbar_{first_physical + crossbar_idx}.bin") weight_counter += 1 instructions = [] last_sldi_by_rd: dict[int, int] = {} ver_ops = exporter.filtered_verification_ops(final_info, core_idx) ver_index = 0 for sim_inst in sim_info.get(core_name, []) or []: op = sim_inst["op"] if op == "setbw": instructions.append(sim_inst) continue if op == "sldi": translated = {"op": "sldi", "rd": sim_inst["rd"], "imm": exporter.byte_offset(sim_inst["imm"])} instructions.append(translated) last_sldi_by_rd[sim_inst["rd"]] = len(instructions) - 1 continue if ver_index >= len(ver_ops): raise RuntimeError(f"core{core_idx}: simulation op {op} has no matching verification op") ver_inst = ver_ops[ver_index] ver_index += 1 ver_op = ver_inst["operation"].lower() if ver_op != op: raise RuntimeError( f"core{core_idx}: simulation/verification op mismatch {op} vs {ver_op} at {ver_index - 1}" ) if op == "ld": if ver_inst["stage"] == "INPUT": src = input_addr + exporter.byte_offset(ver_inst["source_offset"]) elif ver_inst["stage"] == "BIAS": src = bias_addrs[node_list[ver_inst["node_index"]]["name"]] + exporter.byte_offset(ver_inst["source_offset"]) else: raise RuntimeError(f"Unsupported LD stage {ver_inst['stage']}") instructions[last_sldi_by_rd[sim_inst["rs1"]]]["imm"] = src translated = dict(sim_inst) translated["size"] = exporter.byte_offset(sim_inst["size"]) instructions.append(translated) elif op == "st": dst = output_base + exporter.byte_offset( ver_inst["node_index"] * max_output + ver_inst["destination_offset"] ) instructions[last_sldi_by_rd[sim_inst["rd"]]]["imm"] = dst translated = dict(sim_inst) translated["size"] = exporter.byte_offset(sim_inst["size"]) instructions.append(translated) elif op == "lldi": key = (exporter.float32_bytes(sim_inst["imm"]), sim_inst["len"]) src = lldi_addrs[key] temp_rd = 1 if sim_inst["rd"] == 0 else 0 instructions.append({"op": "sldi", "rd": temp_rd, "imm": src}) instructions.append( { "op": "ld", "rd": sim_inst["rd"], "rs1": temp_rd, "size": exporter.byte_offset(sim_inst["len"]), "offset": sim_inst["offset"], } ) elif op in ("lmv", "vvadd", "vvmul", "vvmax", "vrelu"): translated = dict(sim_inst) translated["len"] = exporter.byte_offset(sim_inst["len"]) instructions.append(translated) elif op in ("send", "recv"): translated = dict(sim_inst) translated["size"] = exporter.byte_offset(sim_inst["size"]) instructions.append(translated) elif op == "mvmul": local_group = sim_inst["group"] global_ag = local_to_global[local_group] first_physical = group_prefix[local_group] widths = [ crossbar["width_end"] - crossbar["width_start"] + 1 for crossbar in ag_info[global_ag]["crossbar"] ] dst = instructions[last_sldi_by_rd[sim_inst["rd"]]]["imm"] src = instructions[last_sldi_by_rd[sim_inst["rs1"]]]["imm"] out_offset = 0 for idx, width in enumerate(widths): instructions.append({"op": "sldi", "rd": sim_inst["rd"], "imm": dst + exporter.byte_offset(out_offset)}) instructions.append({"op": "sldi", "rd": sim_inst["rs1"], "imm": src}) translated = dict(sim_inst) translated["group"] = first_physical + idx instructions.append(translated) out_offset += width else: raise RuntimeError(f"Unsupported PIMCOMP op {op}") with open(output_dir / f"core_{core_idx}.json", "w", encoding="utf-8") as f: json.dump(instructions, f, separators=(",", ":")) f.write("\n") with open(output_dir / "config.json", "w", encoding="utf-8") as f: json.dump(config, f, separators=(",", ":")) f.write("\n") (output_dir / "memory.bin").write_bytes(memory) return output_dir def parse_pimsim_nn_report(output: str) -> dict[str, float | int | str]: patterns = { "output_count": r"output count:\s+([0-9]+)\s+samples", "throughput": r"throughput:\s+([0-9.]+)\s+samples/s", "average_latency_ms": r"average latency:\s+([0-9.eE+-]+)\s+ms", "latency_ms": r"latency:\s+([0-9.eE+-]+)\s+ms", "average_power_mw": r"average power:\s+([0-9.eE+-]+)\s+mW", "average_energy_pj": r"average energy:\s+([0-9.eE+-]+)\s+pJ/it", } result: dict[str, float | int | str] = {"raw_output": output} for key, pattern in patterns.items(): match = re.search(pattern, output) if match: value = match.group(1) result[key] = int(value) if key == "output_count" else float(value) return result def run_pimsim_nn( label: str, inst_path: Path, config_path: Path, single_file: bool, steps: list[StepRecord], args: argparse.Namespace, ) -> dict[str, Any]: cmd = [ str(args.pimsim_nn_build_dir / "ChipTest"), str(inst_path), str(config_path), "true" if single_file else "false", ] output = run_logged( label, cmd, cwd=args.pimsim_nn_build_dir, timeout_sec=args.timeout_seconds * 10.0, steps=steps, ) return parse_pimsim_nn_report(output) def parse_raptor_instructions(pim_dir: Path) -> dict[str, Any]: op_re = re.compile(br'"op":"([^"]+)"') counts = Counter() per_core = [] for path in sorted(pim_dir.glob("core_*.json"), key=lambda p: int(p.stem.split("_")[1])): with path.open("rb") as f: mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) core_counts = Counter(m.group(1).decode() for m in op_re.finditer(mm)) mm.close() total = sum(core_counts.values()) counts.update(core_counts) per_core.append( { "core": path.stem, "total": total, "send": core_counts.get("send", 0), "recv": core_counts.get("recv", 0), "mvmul": core_counts.get("mvmul", 0), } ) return { "active_cores": sum(1 for entry in per_core if entry["total"]), "total_instructions": int(sum(counts.values())), "op_counts": dict(counts), "top_cores_by_total": sorted(per_core, key=lambda entry: entry["total"], reverse=True)[:10], "top_cores_by_send": sorted(per_core, key=lambda entry: entry["send"], reverse=True)[:10], "top_cores_by_recv": sorted(per_core, key=lambda entry: entry["recv"], reverse=True)[:10], } def parse_pimcomp_instructions(simulation_info: Path) -> dict[str, Any]: with gzip.open(simulation_info, "rt", encoding="utf-8") as f: data = json.load(f) per_core = [] counts = Counter() for key in sorted((name for name in data if name.startswith("core")), key=lambda name: int(name[4:])): insts = data[key] core_counts = Counter((inst.get("operation") or inst.get("op") or "unknown").lower() for inst in insts) counts.update(core_counts) per_core.append( { "core": key, "total": int(sum(core_counts.values())), "send": core_counts.get("send", 0), "recv": core_counts.get("recv", 0), "mvmul": core_counts.get("mvmul", 0), } ) return { "active_cores": sum(1 for entry in per_core if entry["total"]), "total_instructions": int(sum(counts.values())), "op_counts": dict(counts), "top_cores_by_total": sorted(per_core, key=lambda entry: entry["total"], reverse=True)[:10], "top_cores_by_send": sorted(per_core, key=lambda entry: entry["send"], reverse=True)[:10], "top_cores_by_recv": sorted(per_core, key=lambda entry: entry["recv"], reverse=True)[:10], } def format_op_table(counts: dict[str, int], total: int) -> list[str]: if total <= 0: return ["| n/a | 0 | n/a |"] rows = [] for op, count in sorted(counts.items(), key=lambda item: item[1], reverse=True): rows.append(f"| `{op}` | {count} | {100.0 * count / total:.2f}% |") return rows def validation_status(result: CompareResult) -> str: if result.status == "done": return "PASS" if result.passed else "FAIL" return result.status.upper() def skipped_validation(reason: str) -> CompareResult: return CompareResult(passed=False, max_diffs={}, status="skipped", error=reason) def failed_validation(error: BaseException | str) -> CompareResult: message = error if isinstance(error, str) else exception_message(error) return CompareResult(passed=False, max_diffs={}, status="failed", error=message) def skipped_perf(reason: str) -> dict[str, Any]: return {"skipped": True, "reason": reason} def failed_perf(error: BaseException | str) -> dict[str, Any]: message = error if isinstance(error, str) else exception_message(error) return {"error": message} def perf_status(perf: dict[str, Any]) -> str: if perf.get("skipped"): return "SKIPPED" if perf.get("error"): return "FAILED" return "DONE" def perf_value(perf: dict[str, Any], key: str) -> Any: return perf[key] if key in perf else "n/a" def empty_instruction_summary(reason: str | None = None, error: str | None = None) -> dict[str, Any]: result: dict[str, Any] = { "active_cores": 0, "total_instructions": 0, "op_counts": {}, "top_cores_by_total": [], "top_cores_by_send": [], "top_cores_by_recv": [], } if reason is not None: result["skipped"] = True result["reason"] = reason if error is not None: result["error"] = error return result def optional_path(path: Path | None) -> str | None: return str(path) if path is not None else None def record_failure(failures: list[dict[str, str]], stage: str, exc: BaseException | str) -> None: message = exc if isinstance(exc, str) else exception_message(exc) failures.append({"stage": stage, "error": message}) print_failure(stage, message) def try_stage( failures: list[dict[str, str]], stage: str, func, *args, **kwargs, ): try: return func(*args, **kwargs) except Exception as exc: record_failure(failures, stage, exc) return None def try_stage_success( failures: list[dict[str, str]], stage: str, func, *args, **kwargs, ) -> bool: try: func(*args, **kwargs) return True except Exception as exc: record_failure(failures, stage, exc) return False def write_report( report_path: Path, *, model_path: Path, hardware: dict[str, int], steps: list[StepRecord], failures: list[dict[str, str]], raptor_validation: CompareResult, pimcomp_validation: CompareResult, raptor_perf: dict[str, Any], pimcomp_perf: dict[str, Any], raptor_instr: dict[str, Any], pimcomp_instr: dict[str, Any], raptor_pass_timings: dict[str, float], pimsim_mode: str, ): lines = [ "# Raptor vs PIMCOMP Comparison Report", "", f"- Model: `{model_path}`", f"- Hardware: `{hardware.get('core_count', 'n/a')} cores`, `{hardware.get('crossbar_count', 'n/a')} xbars/core`, `{hardware.get('crossbar_size', 'n/a')}x{hardware.get('crossbar_size', 'n/a')}` crossbars, mesh `{hardware.get('mesh_rows', 'n/a')}x{hardware.get('mesh_cols', 'n/a')}`", "", ] if failures or any(step.status != "passed" for step in steps): lines.extend( [ "## Failures / Skipped Work", "", "The script did not abort. The failed stage was recorded and any dependent stage was skipped when its inputs were not available.", "", ] ) if failures: lines.extend(["| Stage | Error |", "|---|---|"]) for failure in failures: error = failure["error"].replace("\n", "
") lines.append(f"| {failure['stage']} | {error} |") lines.append("") lines.extend( [ "## Semantic Validation", "", f"- Raptor via `pim-simulator`: `{validation_status(raptor_validation)}`", f"- PIMCOMP via exported `pim-simulator`: `{validation_status(pimcomp_validation)}`", ] ) if raptor_validation.error: lines.append(f"- Raptor validation note: `{raptor_validation.error.splitlines()[0]}`") if pimcomp_validation.error: lines.append(f"- PIMCOMP validation note: `{pimcomp_validation.error.splitlines()[0]}`") lines.extend(["", "### Max Output Differences", ""]) diff_names = sorted(set(raptor_validation.max_diffs) | set(pimcomp_validation.max_diffs)) if diff_names: lines.extend(["| Output | Raptor max diff | PIMCOMP max diff |", "|---|---:|---:|"]) for name in diff_names: lines.append( f"| `{name}` | {raptor_validation.max_diffs.get(name, float('nan')):.6e} | " f"{pimcomp_validation.max_diffs.get(name, float('nan')):.6e} |" ) else: lines.append("No output differences are available because validation did not run or failed before comparison.") lines.extend( [ "", "## pimsim-nn Performance", "", f"- Mode: `{pimsim_mode}`", "", ] ) if pimsim_mode == "throughput": lines.extend( [ "| Compiler | Status | Throughput (samples/s) | Avg latency (ms) | Avg power (mW) | Avg energy (pJ/it) | Output count |", "|---|---|---:|---:|---:|---:|---:|", f"| Raptor | {perf_status(raptor_perf)} | {perf_value(raptor_perf, 'throughput')} | {perf_value(raptor_perf, 'average_latency_ms')} | " f"{perf_value(raptor_perf, 'average_power_mw')} | {perf_value(raptor_perf, 'average_energy_pj')} | {perf_value(raptor_perf, 'output_count')} |", f"| PIMCOMP | {perf_status(pimcomp_perf)} | {perf_value(pimcomp_perf, 'throughput')} | {perf_value(pimcomp_perf, 'average_latency_ms')} | " f"{perf_value(pimcomp_perf, 'average_power_mw')} | {perf_value(pimcomp_perf, 'average_energy_pj')} | {perf_value(pimcomp_perf, 'output_count')} |", "", ] ) else: lines.extend( [ "| Compiler | Status | Latency (ms) | Avg power (mW) | Avg energy (pJ) |", "|---|---|---:|---:|---:|", f"| Raptor | {perf_status(raptor_perf)} | {perf_value(raptor_perf, 'latency_ms')} | " f"{perf_value(raptor_perf, 'average_power_mw')} | {perf_value(raptor_perf, 'average_energy_pj')} |", f"| PIMCOMP | {perf_status(pimcomp_perf)} | {perf_value(pimcomp_perf, 'latency_ms')} | " f"{perf_value(pimcomp_perf, 'average_power_mw')} | {perf_value(pimcomp_perf, 'average_energy_pj')} |", "", ] ) if raptor_perf.get("reason") or raptor_perf.get("error"): lines.append(f"- Raptor pimsim-nn note: `{(raptor_perf.get('reason') or raptor_perf.get('error')).splitlines()[0]}`") if pimcomp_perf.get("reason") or pimcomp_perf.get("error"): lines.append(f"- PIMCOMP pimsim-nn note: `{(pimcomp_perf.get('reason') or pimcomp_perf.get('error')).splitlines()[0]}`") if lines[-1] != "": lines.append("") lines.extend( [ "## Instruction Summary", "", "| Compiler | Status | Active cores | Total instructions | Sends | Receives | MVMUL |", "|---|---|---:|---:|---:|---:|---:|", f"| Raptor | {'FAILED' if raptor_instr.get('error') else 'SKIPPED' if raptor_instr.get('skipped') else 'DONE'} | {raptor_instr.get('active_cores', 0)} | {raptor_instr.get('total_instructions', 0)} | {raptor_instr.get('op_counts', {}).get('send', 0)} | {raptor_instr.get('op_counts', {}).get('recv', 0)} | {raptor_instr.get('op_counts', {}).get('mvmul', 0)} |", f"| PIMCOMP | {'FAILED' if pimcomp_instr.get('error') else 'SKIPPED' if pimcomp_instr.get('skipped') else 'DONE'} | {pimcomp_instr.get('active_cores', 0)} | {pimcomp_instr.get('total_instructions', 0)} | {pimcomp_instr.get('op_counts', {}).get('send', 0)} | {pimcomp_instr.get('op_counts', {}).get('recv', 0)} | {pimcomp_instr.get('op_counts', {}).get('mvmul', 0)} |", "", "### Raptor Op Distribution", "", "| Op | Count | Share |", "|---|---:|---:|", *format_op_table(raptor_instr.get("op_counts", {}), raptor_instr.get("total_instructions", 0)), "", "### PIMCOMP Op Distribution", "", "| Op | Count | Share |", "|---|---:|---:|", *format_op_table(pimcomp_instr.get("op_counts", {}), pimcomp_instr.get("total_instructions", 0)), "", "## Step Timings", "", "| Step | Status | Duration (s) | Return code |", "|---|---|---:|---:|", ] ) for step in steps: lines.append( f"| {step.name} | {step.status.upper()} | {step.duration_sec:.3f} | " f"{step.returncode if step.returncode is not None else ''} |" ) failed_steps = [step for step in steps if step.status != "passed"] if failed_steps: lines.extend(["", "### Failed Step Details", ""]) for step in failed_steps: lines.extend( [ f"#### {step.name}", "", f"- Command: `{step.command}`", f"- Error: `{step.error or 'n/a'}`", ] ) if step.output_tail: lines.extend(["", "```text", step.output_tail, "```"]) lines.append("") if raptor_pass_timings: lines.extend(["", "## Raptor Pass Timings", "", "| Pass | Duration (s) |", "|---|---:|"]) for name, duration in raptor_pass_timings.items(): lines.append(f"| {name} | {duration:.4f} |") report_path.write_text("\n".join(lines) + "\n", encoding="utf-8") def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", required=True, type=Path) parser.add_argument("--out-dir", required=True, type=Path) parser.add_argument("--raptor-path", default=REPO / "build_release/Release/bin/onnx-mlir", type=Path) parser.add_argument("--onnx-include-dir", default=REPO / "onnx-mlir/include", type=Path) parser.add_argument("--pimcomp-dir", default=REPO / "third_party/PIMCOMP-NN", type=Path) parser.add_argument("--pim-simulator-dir", default=REPO / "backend-simulators/pim/pim-simulator", type=Path) parser.add_argument("--pimsim-nn-build-dir", default=REPO / "backend-simulators/pim/pimsim-nn/build", type=Path) parser.add_argument("--seed", type=int, default=0) parser.add_argument("--threshold", type=float, default=1e-3) parser.add_argument("--rtol", type=float, default=1e-5) parser.add_argument("--timeout-seconds", type=float, default=3600.0) parser.add_argument("--core-count", type=int) parser.add_argument("--crossbar-count", type=int) parser.add_argument("--crossbar-size", type=int) parser.add_argument("--mesh-rows", type=int) parser.add_argument("--mesh-cols", type=int) parser.add_argument("--pimsim-time-ms", type=int, default=1000) parser.add_argument("--pimsim-mode", choices=["latency", "throughput"], default="latency") parser.add_argument("--skip-pimsim-nn", action="store_true") parser.add_argument("--verbose-raptor-compile", action="store_true") parser.add_argument("--raptor-extra-arg", action="append", default=[]) parser.add_argument( "--fail-on-error", action="store_true", help="Return a non-zero process status after writing the reports if any compilation/run stage failed.", ) args = parser.parse_args() model_path = args.model.resolve() out_dir = args.out_dir.resolve() out_dir.mkdir(parents=True, exist_ok=True) failures: list[dict[str, str]] = [] steps: list[StepRecord] = [] hardware: dict[str, int] = { "mesh_rows": 0, "mesh_cols": 0, "crossbar_count": 0, "crossbar_size": 0, "core_count": 0, } inputs_desc: list[tuple[int, str, int, list[int]]] = [] outputs_desc: list[tuple[int, str, int, list[int]]] = [] arrays_in_order: list[np.ndarray] = [] runtime_inputs: list[np.ndarray] = [] network_mlir: Path | None = None runner_path: Path | None = None reference_dir: Path | None = None raptor_pim_dir: Path | None = None raptor_pass_timings: dict[str, float] = {} verification_info: Path | None = None simulation_info: Path | None = None pimcomp_export_dir: Path | None = None pimsim_config: Path | None = None raptor_validation = skipped_validation("Raptor validation did not run") pimcomp_validation = skipped_validation("PIMCOMP validation did not run") raptor_perf: dict[str, Any] = skipped_perf("pimsim-nn Raptor did not run") pimcomp_perf: dict[str, Any] = skipped_perf("pimsim-nn PIMCOMP did not run") raptor_instr: dict[str, Any] = empty_instruction_summary("Raptor instruction parsing did not run") pimcomp_instr: dict[str, Any] = empty_instruction_summary("PIMCOMP instruction parsing did not run") loaded_hardware = try_stage(failures, "Load hardware configuration", load_effective_hardware, args) if loaded_hardware is not None: hardware = loaded_hardware model_io = try_stage(failures, "Load model inputs", load_model_inputs, model_path, args.seed) if model_io is not None: inputs_desc, outputs_desc, arrays_in_order, runtime_inputs = model_io expected_network_mlir = out_dir / "reference" / f"{model_path.stem}.onnx.mlir" expected_runner_path = out_dir / "runner" / "build" / "runner" reference_compile = try_stage( failures, "Compile reference", compile_reference, args, model_path, out_dir, steps, ) if reference_compile is not None: network_mlir, _, runner_path = reference_compile else: if expected_network_mlir.exists(): network_mlir = expected_network_mlir print(f"\n[Continue] Reusing partial ONNX MLIR: {network_mlir}") if expected_runner_path.exists(): runner_path = expected_runner_path print(f"\n[Continue] Reusing partial runner: {runner_path}") if runner_path is not None and runner_path.exists() and model_io is not None: generated_reference = try_stage( failures, "Run reference", generate_reference_outputs, runner_path, runner_path.parent, model_path, arrays_in_order, steps, args, out_dir, ) if generated_reference is not None: reference_dir = generated_reference else: record_failure( failures, "Skip reference outputs", "Reference outputs were skipped because the native runner or model inputs are not available.", ) if network_mlir is not None and network_mlir.exists() and hardware["core_count"] > 0: compiled_raptor = try_stage( failures, "Compile Raptor PIM", compile_raptor_target, network_mlir, out_dir / "raptor", hardware, args, steps, ) if compiled_raptor is not None: raptor_pim_dir, raptor_pass_timings = compiled_raptor else: record_failure( failures, "Skip Raptor PIM compile", "Raptor PIM compile was skipped because the ONNX MLIR or hardware configuration is not available.", ) if raptor_pim_dir is not None: wrote_inputs = try_stage_success( failures, "Write Raptor inputs", write_inputs_to_memory_bin, raptor_pim_dir / "memory.bin", raptor_pim_dir / "config.json", runtime_inputs, ) if wrote_inputs and reference_dir is not None and outputs_desc: validation = try_stage( failures, "Rust Validation Raptor", run_rust_validation, "Rust Validation Raptor", raptor_pim_dir, raptor_pim_dir / "config.json", outputs_desc, reference_dir, steps, args, ) raptor_validation = validation if validation is not None else failed_validation("Raptor validation failed") elif reference_dir is None: raptor_validation = skipped_validation("Reference outputs are not available") elif not outputs_desc: raptor_validation = skipped_validation("Output descriptors are not available") else: raptor_validation = skipped_validation("Raptor input materialization failed") else: raptor_validation = skipped_validation("Raptor PIM compilation did not produce a PIM directory") compiled_pimcomp = try_stage( failures, "Compile PIMCOMP", compile_pimcomp, args, model_path, out_dir / "pimcomp", steps, ) if compiled_pimcomp is not None: verification_info, simulation_info = compiled_pimcomp if verification_info is not None and simulation_info is not None and model_io is not None: exported = try_stage( failures, "Export PIMCOMP for Rust", export_pimcomp_for_rust, model_path, verification_info, simulation_info, runtime_inputs, out_dir / "pimcomp_exported", ) if exported is not None: pimcomp_export_dir = exported elif verification_info is None or simulation_info is None: record_failure( failures, "Skip PIMCOMP Rust export", "PIMCOMP Rust export was skipped because PIMCOMP did not produce VerificationInfo.json and SimulationInfo.gz.", ) else: record_failure( failures, "Skip PIMCOMP Rust export", "PIMCOMP Rust export was skipped because model inputs are not available.", ) if pimcomp_export_dir is not None and reference_dir is not None and outputs_desc: validation = try_stage( failures, "Rust Validation PIMCOMP", run_rust_validation, "Rust Validation PIMCOMP", pimcomp_export_dir, pimcomp_export_dir / "config.json", outputs_desc, reference_dir, steps, args, ) pimcomp_validation = validation if validation is not None else failed_validation("PIMCOMP validation failed") elif pimcomp_export_dir is None: pimcomp_validation = skipped_validation("PIMCOMP Rust export is not available") elif reference_dir is None: pimcomp_validation = skipped_validation("Reference outputs are not available") else: pimcomp_validation = skipped_validation("Output descriptors are not available") if hardware["core_count"] > 0: written_config = try_stage( failures, "Write pimsim-nn config", write_pimsim_config, args, out_dir / "pimsim_config", hardware, ) if written_config is not None: pimsim_config = written_config else: record_failure( failures, "Skip pimsim-nn config", "pimsim-nn config was skipped because the hardware configuration is not available.", ) if args.skip_pimsim_nn: raptor_perf = skipped_perf("Skipped by --skip-pimsim-nn") pimcomp_perf = skipped_perf("Skipped by --skip-pimsim-nn") elif pimsim_config is None: raptor_perf = skipped_perf("pimsim-nn config is not available") pimcomp_perf = skipped_perf("pimsim-nn config is not available") else: if raptor_pim_dir is not None: perf = try_stage( failures, "pimsim-nn Raptor", run_pimsim_nn, "pimsim-nn Raptor", raptor_pim_dir, pimsim_config, False, steps, args, ) raptor_perf = perf if perf is not None else failed_perf("pimsim-nn Raptor failed") else: raptor_perf = skipped_perf("Raptor PIM directory is not available") if simulation_info is not None: pimcomp_pimsim_dir = try_stage( failures, "Export PIMCOMP for pimsim-nn", export_pimcomp_for_pimsim_nn, simulation_info, out_dir / "pimcomp_pimsim_nn", ) if pimcomp_pimsim_dir is not None: perf = try_stage( failures, "pimsim-nn PIMCOMP", run_pimsim_nn, "pimsim-nn PIMCOMP", pimcomp_pimsim_dir, pimsim_config, False, steps, args, ) pimcomp_perf = perf if perf is not None else failed_perf("pimsim-nn PIMCOMP failed") else: pimcomp_perf = failed_perf("PIMCOMP pimsim-nn export failed") else: pimcomp_perf = skipped_perf("PIMCOMP SimulationInfo.gz is not available") if raptor_pim_dir is not None and raptor_pim_dir.exists(): parsed = try_stage(failures, "Parse Raptor instructions", parse_raptor_instructions, raptor_pim_dir) raptor_instr = parsed if parsed is not None else empty_instruction_summary(error="Failed to parse Raptor instructions") else: raptor_instr = empty_instruction_summary("Raptor PIM directory is not available") if simulation_info is not None and simulation_info.exists(): parsed = try_stage(failures, "Parse PIMCOMP instructions", parse_pimcomp_instructions, simulation_info) pimcomp_instr = parsed if parsed is not None else empty_instruction_summary(error="Failed to parse PIMCOMP instructions") else: pimcomp_instr = empty_instruction_summary("PIMCOMP SimulationInfo.gz is not available") report_path = out_dir / "comparison_report.md" write_report( report_path, model_path=model_path, hardware=hardware, steps=steps, failures=failures, raptor_validation=raptor_validation, pimcomp_validation=pimcomp_validation, raptor_perf=raptor_perf, pimcomp_perf=pimcomp_perf, raptor_instr=raptor_instr, pimcomp_instr=pimcomp_instr, raptor_pass_timings=raptor_pass_timings, pimsim_mode=args.pimsim_mode, ) json_report = { "model": str(model_path), "hardware": hardware, "pimsim_mode": args.pimsim_mode, "failures": failures, "steps": [asdict(step) for step in steps], "raptor_validation": asdict(raptor_validation), "pimcomp_validation": asdict(pimcomp_validation), "raptor_performance": raptor_perf, "pimcomp_performance": pimcomp_perf, "raptor_instruction_summary": raptor_instr, "pimcomp_instruction_summary": pimcomp_instr, "raptor_pass_timings": raptor_pass_timings, "paths": { "reference_outputs": optional_path(reference_dir), "raptor_pim": optional_path(raptor_pim_dir), "pimcomp_simulation_info": optional_path(simulation_info), "pimcomp_exported_pim": optional_path(pimcomp_export_dir), "pimsim_config": optional_path(pimsim_config), "report_markdown": str(report_path), }, } json_path = out_dir / "comparison_report.json" with open(json_path, "w", encoding="utf-8") as f: json.dump(json_report, f, indent=2) f.write("\n") print(f"\n[Done]") print(f" Report: {report_path}") print(f" JSON: {json_path}") if failures or any(step.status != "passed" for step in steps): print(f" Completed with {len(failures)} recorded failure/skipped stage(s).") if args.fail_on_error and (failures or any(step.status != "passed" for step in steps)): raise SystemExit(1) if __name__ == "__main__": main()