Files
Raptor/validation/validate_one.py
NiccoloN 412ca957f6
All checks were successful
Validate Operations / validate-operations (push) Successful in 22m38s
multiple-output spat computes
2026-04-23 09:28:57 +02:00

368 lines
14 KiB
Python

import argparse
import json
import numpy as np
import subprocess
import shutil
import sys
from dataclasses import dataclass, field
from pathlib import Path
from colorama import Style, Fore
from onnx_utils import gen_random_inputs, save_inputs_to_files, onnx_io, write_inputs_to_memory_bin, _ONNX_TO_NP
from raptor import compile_with_raptor
from gen_network_runner import gen_network_runner
from subprocess_utils import run_command_with_reporter
STAGE_TITLES = (
"Compile ONNX",
"Build Runner",
"Generate Inputs",
"Run Reference",
"Compile PIM",
"Run Simulator",
"Compare Outputs",
)
STAGE_COUNT = len(STAGE_TITLES)
GENERATED_DIR_NAMES = ("inputs", "outputs", "raptor", "runner", "simulation")
def sanitize_output_name(name):
return "".join(ch if ch.isalnum() or ch in "_.-" else "_" for ch in name[:255])
@dataclass
class ValidationResult:
passed: bool
pim_pass_timings: dict[str, float] = field(default_factory=dict)
class ProgressReporter:
def __init__(self, total_models, stages_per_model=STAGE_COUNT):
self.total_models = total_models
self.stages_per_model = stages_per_model
self.total_steps = max(1, total_models * stages_per_model)
self.completed_steps = 0
self.passed_models = 0
self.failed_models = 0
self.current_label = ""
self.enabled = True
self.columns = shutil.get_terminal_size((100, 20)).columns
self.suspended = False
def _clear(self):
if self.enabled:
sys.stdout.write("\033[2K\r")
def _render(self):
if not self.enabled or self.suspended:
return
bar_width = 24
filled = int(bar_width * self.completed_steps / self.total_steps)
counts_text = f"P:{self.passed_models} F:{self.failed_models}"
prefix_text = f"[{'#' * filled}{'-' * (bar_width - filled)}] {self.completed_steps}/{self.total_steps}"
if len(prefix_text) > self.columns:
prefix_text = f"{self.completed_steps}/{self.total_steps}"
if prefix_text.startswith("["):
bar = Fore.GREEN + ("#" * filled) + Fore.CYAN + ("-" * (bar_width - filled))
prefix = Fore.CYAN + f"[{bar}{Fore.CYAN}] {self.completed_steps}/{self.total_steps}" + Style.RESET_ALL
else:
prefix = Fore.CYAN + prefix_text + Style.RESET_ALL
counts = (
" "
+ Style.BRIGHT
+ Fore.GREEN
+ f"P:{self.passed_models}"
+ Style.RESET_ALL
+ " "
+ Style.BRIGHT
+ Fore.RED
+ f"F:{self.failed_models}"
+ Style.RESET_ALL
)
model_counter = ""
label = ""
if self.current_label.startswith("[") and "] " in self.current_label:
model_counter, label = self.current_label.split("] ", 1)
model_counter = f" {model_counter}]"
label = f" {label}"
elif self.current_label:
label = f" {self.current_label}"
available_label_width = max(0, self.columns - len(prefix_text) - len(model_counter) - len(counts_text) - 3)
label = label[:available_label_width]
sys.stdout.write("\r" + prefix + model_counter + counts + label + Style.RESET_ALL)
sys.stdout.flush()
def log(self, message="", color=None):
if self.enabled:
self._clear()
if color:
print(color + message + Style.RESET_ALL)
else:
print(message)
self._render()
def set_stage(self, model_index, model_total, model_name, stage_name):
self.current_label = f"[{model_index}/{model_total}] {model_name} · {stage_name}"
self._render()
def advance(self):
self.completed_steps = min(self.total_steps, self.completed_steps + 1)
self._render()
def record_result(self, passed):
if passed:
self.passed_models += 1
else:
self.failed_models += 1
self._render()
def suspend(self):
self.suspended = True
self._clear()
sys.stdout.flush()
def resume(self):
self.suspended = False
def finish(self):
if self.enabled:
self.suspended = True
self._clear()
sys.stdout.flush()
def run_command(cmd, cwd=None, reporter=None):
run_command_with_reporter(cmd, cwd=cwd, reporter=reporter)
def clean_workspace_artifacts(workspace_dir, model_stem):
workspace_dir = Path(workspace_dir)
removed_paths = []
def remove_path(path):
if path.is_symlink() or path.is_file():
path.unlink(missing_ok=True)
removed_paths.append(path)
elif path.is_dir():
shutil.rmtree(path)
removed_paths.append(path)
for name in GENERATED_DIR_NAMES:
remove_path(workspace_dir / name)
for suffix in (".onnx.mlir", ".so", ".tmp"):
remove_path(workspace_dir / f"{model_stem}{suffix}")
return removed_paths
def print_stage(reporter, model_index, model_total, model_name, title):
stage_colors = {
STAGE_TITLES[0]: Fore.BLUE,
STAGE_TITLES[1]: Fore.MAGENTA,
STAGE_TITLES[2]: Fore.YELLOW,
STAGE_TITLES[3]: Fore.GREEN,
STAGE_TITLES[4]: Fore.CYAN,
STAGE_TITLES[5]: Fore.MAGENTA,
STAGE_TITLES[6]: Fore.YELLOW,
}
color = stage_colors.get(title, Fore.WHITE)
reporter.log(Style.BRIGHT + color + f"[{title}]" + Style.RESET_ALL)
reporter.set_stage(model_index, model_total, model_name, title)
def print_info(reporter, message):
reporter.log(f" {message}")
def compile_onnx_network(network_onnx_path, raptor_path, raptor_dir, runner_dir, reporter=None):
stem = network_onnx_path.stem
onnx_ir_base = raptor_dir / stem
runner_base = runner_dir / stem
run_command([raptor_path, network_onnx_path, "-o", onnx_ir_base, "--EmitONNXIR"], reporter=reporter)
run_command([raptor_path, network_onnx_path, "-o", runner_base], reporter=reporter)
network_so_path = runner_base.with_suffix(".so")
network_mlir_path = onnx_ir_base.with_suffix(".onnx.mlir")
onnx_ir_base.with_suffix(".tmp").unlink(missing_ok=True)
return network_so_path, network_mlir_path
def build_onnx_runner(source_dir, build_dir, reporter=None):
run_command(["cmake", source_dir], cwd=build_dir, reporter=reporter)
run_command(["cmake", "--build", ".", "-j"], cwd=build_dir, reporter=reporter)
return build_dir / "runner"
def build_dump_ranges(config_path, outputs_descriptor):
with open(config_path) as f:
output_addresses = json.load(f)["outputs_addresses"]
ranges = []
for addr, (_, _, dtype_code, shape) in zip(output_addresses, outputs_descriptor):
byte_size = int(np.prod(shape)) * np.dtype(_ONNX_TO_NP[dtype_code]).itemsize
ranges.append(f"{addr},{byte_size}")
return ",".join(ranges)
def run_pim_simulator(simulator_dir, pim_dir, output_bin_path, dump_ranges, reporter=None):
run_command(
["cargo", "run", "--no-default-features", "--release", "--package", "pim-simulator", "--bin", "pim-simulator", "--",
"-f", str(pim_dir), "-o", str(output_bin_path), "-d", dump_ranges],
cwd=simulator_dir,
reporter=reporter,
)
def parse_pim_simulator_outputs(output_bin_path, outputs_descriptor):
raw = output_bin_path.read_bytes()
arrays = []
offset = 0
for _, _, dtype_code, shape in outputs_descriptor:
dtype = np.dtype(_ONNX_TO_NP[dtype_code])
count = int(np.prod(shape))
array = np.frombuffer(raw, dtype=dtype, count=count, offset=offset).reshape(shape)
offset += count * dtype.itemsize
arrays.append(array)
return arrays
def validate_outputs(sim_arrays, runner_out_dir, outputs_descriptor, threshold=1e-3):
all_passed = True
rows = []
for sim_array, (oi, name, _, shape) in zip(sim_arrays, outputs_descriptor):
csv_name = f"output{oi}_{sanitize_output_name(name)}.csv"
runner_array = np.loadtxt(runner_out_dir / csv_name, delimiter=',', dtype=np.float32).reshape(shape)
max_diff = float(np.max(np.abs(sim_array.astype(np.float64) - runner_array.astype(np.float64))))
passed = max_diff <= threshold
rows.append((name, f"{max_diff:.6e}", passed))
if not passed:
all_passed = False
name_width = max(len("Output"), *(len(name) for name, _, _ in rows))
diff_width = max(len("Max diff"), *(len(diff) for _, diff, _ in rows))
result_width = len("Result")
separator = f" +-{'-' * name_width}-+-{'-' * diff_width}-+-{'-' * result_width}-+"
print(separator)
print(f" | {'Output'.ljust(name_width)} | {'Max diff'.ljust(diff_width)} | {'Result'} |")
print(separator)
for name, diff_text, passed in rows:
status_text = ("PASS" if passed else "FAIL").ljust(result_width)
status = Fore.GREEN + status_text + Style.RESET_ALL if passed else Fore.RED + status_text + Style.RESET_ALL
print(f" | {name.ljust(name_width)} | {diff_text.ljust(diff_width)} | {status} |")
print(separator)
return all_passed
def validate_network(network_onnx_path, raptor_path, onnx_include_dir,
simulator_dir, crossbar_size=64, crossbar_count=8, threshold=1e-3,
reporter=None, model_index=1, model_total=1):
network_onnx_path = Path(network_onnx_path).resolve()
raptor_path = Path(raptor_path).resolve()
onnx_include_dir = Path(onnx_include_dir).resolve()
simulator_dir = Path(simulator_dir).resolve()
owns_reporter = reporter is None
reporter = reporter or ProgressReporter(model_total)
workspace_dir = network_onnx_path.parent
clean_workspace_artifacts(workspace_dir, network_onnx_path.stem)
raptor_dir = workspace_dir / "raptor"
runner_dir = workspace_dir / "runner"
runner_build_dir = runner_dir / "build"
Path.mkdir(raptor_dir, exist_ok=True)
Path.mkdir(runner_build_dir, parents=True, exist_ok=True)
reporter.log(Fore.CYAN + f"[{model_index}/{model_total}]" + Style.RESET_ALL +
f" {Style.BRIGHT}Validating {network_onnx_path.name}{Style.RESET_ALL}")
failed_with_exception = False
pim_pass_timings = {}
try:
print_stage(reporter, model_index, model_total, network_onnx_path.name, "Compile ONNX")
network_so_path, network_mlir_path = compile_onnx_network(
network_onnx_path, raptor_path, raptor_dir, runner_dir, reporter=reporter)
print_info(reporter, f"MLIR saved to {network_mlir_path}")
print_info(reporter, f"Shared library saved to {network_so_path}")
reporter.advance()
print_stage(reporter, model_index, model_total, network_onnx_path.name, "Build Runner")
gen_network_runner(network_onnx_path, network_so_path, onnx_include_dir, out=runner_dir / "runner.c", verbose=False)
runner_path = build_onnx_runner(runner_dir, runner_build_dir, reporter=reporter)
print_info(reporter, f"Runner built at {runner_path}")
reporter.advance()
print_stage(reporter, model_index, model_total, network_onnx_path.name, "Generate Inputs")
inputs_descriptor, outputs_descriptor = onnx_io(network_onnx_path)
inputs_list, _inputs_dict = gen_random_inputs(inputs_descriptor)
flags, _files = save_inputs_to_files(network_onnx_path, inputs_list, out_dir=workspace_dir / "inputs")
print_info(reporter, f"Saved {len(inputs_list)} input file(s) to {workspace_dir / 'inputs'}")
reporter.advance()
print_stage(reporter, model_index, model_total, network_onnx_path.name, "Run Reference")
out_dir = workspace_dir / "outputs"
Path.mkdir(out_dir, exist_ok=True)
run_cmd = [runner_path, *flags]
run_cmd += ["--save-csv-dir", f"{out_dir}"]
run_command(run_cmd, cwd=runner_build_dir, reporter=reporter)
print_info(reporter, f"Reference outputs saved to {out_dir}")
reporter.advance()
print_stage(reporter, model_index, model_total, network_onnx_path.name, "Compile PIM")
pim_pass_timings = compile_with_raptor(
network_mlir_path, raptor_path, raptor_dir / network_onnx_path.stem,
crossbar_size, crossbar_count,
cwd=raptor_dir, reporter=reporter)
print_info(reporter, f"PIM artifacts saved to {raptor_dir / 'pim'}")
reporter.advance()
print_stage(reporter, model_index, model_total, network_onnx_path.name, "Run Simulator")
pim_dir = raptor_dir / "pim"
write_inputs_to_memory_bin(pim_dir / "memory.bin", pim_dir / "config.json", inputs_list)
simulation_dir = workspace_dir / "simulation"
Path.mkdir(simulation_dir, exist_ok=True)
dump_ranges = build_dump_ranges(pim_dir / "config.json", outputs_descriptor)
output_bin_path = simulation_dir / "out.bin"
run_pim_simulator(simulator_dir, pim_dir, output_bin_path, dump_ranges, reporter=reporter)
print_info(reporter, f"Simulator output saved to {output_bin_path}")
reporter.advance()
print_stage(reporter, model_index, model_total, network_onnx_path.name, "Compare Outputs")
sim_arrays = parse_pim_simulator_outputs(output_bin_path, outputs_descriptor)
reporter.suspend()
passed = validate_outputs(sim_arrays, out_dir, outputs_descriptor, threshold)
reporter.resume()
reporter.advance()
reporter.record_result(passed)
status = Fore.GREEN + "PASS" + Style.RESET_ALL if passed else Fore.RED + "FAIL" + Style.RESET_ALL
reporter.log(Style.BRIGHT + f"Result: {status}" + Style.RESET_ALL)
return ValidationResult(passed=passed, pim_pass_timings=pim_pass_timings)
except Exception:
failed_with_exception = True
reporter.record_result(False)
reporter.log(Style.BRIGHT + Fore.RED + "Result: FAIL" + Style.RESET_ALL)
reporter.suspend()
raise
finally:
if not failed_with_exception:
reporter.log("=" * 72)
if owns_reporter:
reporter.finish()
if __name__ == '__main__':
ap = argparse.ArgumentParser()
ap.add_argument("--network-onnx", required=True)
ap.add_argument("--raptor-path", required=True)
ap.add_argument("--onnx-include-dir", required=True)
a = ap.parse_args()
simulator_dir = Path(__file__).parent.resolve() / ".." / "backend-simulators" / "pim" / "pim-simulator"
passed = validate_network(
a.network_onnx, a.raptor_path, a.onnx_include_dir, simulator_dir
)
raise SystemExit(0 if passed.passed else 1)