Files
Raptor/validation/validate_one.py
NiccoloN dbe646ac0d
Some checks failed
Validate Operations / config (push) Successful in 1m27s
Validate Operations / build-mlir-cache (push) Successful in 2h0m37s
Validate Operations / validate (push) Failing after 3m41s
fix gemm segfault
print exit signals on validation failure
2026-03-20 14:00:16 +01:00

315 lines
13 KiB
Python

import argparse
import json
import numpy as np
import subprocess
import shutil
import sys
from pathlib import Path
from colorama import Style, Fore
from onnx_utils import gen_random_inputs, save_inputs_to_files, onnx_io, write_inputs_to_memory_bin, _ONNX_TO_NP
from raptor import compile_with_raptor
from gen_network_runner import gen_network_runner
from subprocess_utils import run_command_with_reporter
STAGE_COUNT = 6
GENERATED_DIR_NAMES = ("inputs", "outputs", "raptor", "runner", "simulation")
class ProgressReporter:
def __init__(self, total_models, stages_per_model=STAGE_COUNT):
self.total_models = total_models
self.stages_per_model = stages_per_model
self.total_steps = max(1, total_models * stages_per_model)
self.completed_steps = 0
self.current_label = ""
self.enabled = True
self.columns = shutil.get_terminal_size((100, 20)).columns
self.suspended = False
def _clear(self):
if self.enabled:
sys.stdout.write("\033[2K\r")
def _render(self):
if not self.enabled or self.suspended:
return
bar_width = 24
filled = int(bar_width * self.completed_steps / self.total_steps)
prefix_text = f"[{'#' * filled}{'-' * (bar_width - filled)}] {self.completed_steps}/{self.total_steps}"
if len(prefix_text) > self.columns:
prefix_text = f"{self.completed_steps}/{self.total_steps}"
label = f" {self.current_label}" if self.current_label else ""
available_label_width = max(0, self.columns - len(prefix_text))
label = label[:available_label_width]
if prefix_text.startswith("["):
bar = Fore.GREEN + ("#" * filled) + Fore.CYAN + ("-" * (bar_width - filled))
prefix = Fore.CYAN + f"[{bar}{Fore.CYAN}] {self.completed_steps}/{self.total_steps}" + Style.RESET_ALL
else:
prefix = Fore.CYAN + prefix_text + Style.RESET_ALL
sys.stdout.write("\r" + prefix + label + Style.RESET_ALL)
sys.stdout.flush()
def log(self, message="", color=None):
if self.enabled:
self._clear()
if color:
print(color + message + Style.RESET_ALL)
else:
print(message)
self._render()
def set_stage(self, model_index, model_total, model_name, stage_name):
self.current_label = f"[{model_index}/{model_total}] {model_name} · {stage_name}"
self._render()
def advance(self):
self.completed_steps = min(self.total_steps, self.completed_steps + 1)
self._render()
def suspend(self):
self.suspended = True
self._clear()
sys.stdout.flush()
def resume(self):
self.suspended = False
def finish(self):
if self.enabled:
self.suspended = True
self._clear()
sys.stdout.flush()
def run_command(cmd, cwd=None, reporter=None):
run_command_with_reporter(cmd, cwd=cwd, reporter=reporter)
def clean_workspace_artifacts(workspace_dir, model_stem):
workspace_dir = Path(workspace_dir)
removed_paths = []
def remove_path(path):
if path.is_symlink() or path.is_file():
path.unlink(missing_ok=True)
removed_paths.append(path)
elif path.is_dir():
shutil.rmtree(path)
removed_paths.append(path)
for name in GENERATED_DIR_NAMES:
remove_path(workspace_dir / name)
for suffix in (".onnx.mlir", ".so", ".tmp"):
remove_path(workspace_dir / f"{model_stem}{suffix}")
return removed_paths
def print_stage(reporter, model_index, model_total, model_name, title):
stage_colors = {
"Compile ONNX": Fore.BLUE,
"Build Runner": Fore.MAGENTA,
"Generate Inputs": Fore.YELLOW,
"Run Reference": Fore.GREEN,
"Compile PIM": Fore.CYAN,
"Run Simulator": Fore.MAGENTA,
"Compare Outputs": Fore.YELLOW,
}
color = stage_colors.get(title, Fore.WHITE)
reporter.log(Style.BRIGHT + color + f"[{title}]" + Style.RESET_ALL)
reporter.set_stage(model_index, model_total, model_name, title)
def print_info(reporter, message):
reporter.log(f" {message}")
def compile_onnx_network(network_onnx_path, raptor_path, raptor_dir, runner_dir, reporter=None):
stem = network_onnx_path.stem
onnx_ir_base = raptor_dir / stem
runner_base = runner_dir / stem
run_command([raptor_path, network_onnx_path, "-o", onnx_ir_base, "--EmitONNXIR"], reporter=reporter)
run_command([raptor_path, network_onnx_path, "-o", runner_base], reporter=reporter)
network_so_path = runner_base.with_suffix(".so")
network_mlir_path = onnx_ir_base.with_suffix(".onnx.mlir")
onnx_ir_base.with_suffix(".tmp").unlink(missing_ok=True)
return network_so_path, network_mlir_path
def build_onnx_runner(source_dir, build_dir, reporter=None):
run_command(["cmake", source_dir], cwd=build_dir, reporter=reporter)
run_command(["cmake", "--build", ".", "-j"], cwd=build_dir, reporter=reporter)
return build_dir / "runner"
def build_dump_ranges(config_path, outputs_descriptor):
with open(config_path) as f:
output_addresses = json.load(f)["outputs_addresses"]
ranges = []
for addr, (_, _, dtype_code, shape) in zip(output_addresses, outputs_descriptor):
byte_size = int(np.prod(shape)) * np.dtype(_ONNX_TO_NP[dtype_code]).itemsize
ranges.append(f"{addr},{byte_size}")
return ",".join(ranges)
def run_pim_simulator(simulator_dir, pim_dir, output_bin_path, dump_ranges, reporter=None):
run_command(
["cargo", "run", "--release", "--package", "pim-simulator", "--bin", "pim-simulator", "--",
"-f", str(pim_dir), "-o", str(output_bin_path), "-d", dump_ranges],
cwd=simulator_dir,
reporter=reporter,
)
def parse_pim_simulator_outputs(output_bin_path, outputs_descriptor):
raw = output_bin_path.read_bytes()
arrays = []
offset = 0
for _, _, dtype_code, shape in outputs_descriptor:
dtype = np.dtype(_ONNX_TO_NP[dtype_code])
count = int(np.prod(shape))
array = np.frombuffer(raw, dtype=dtype, count=count, offset=offset).reshape(shape)
offset += count * dtype.itemsize
arrays.append(array)
return arrays
def validate_outputs(sim_arrays, runner_out_dir, outputs_descriptor, threshold=1e-3):
all_passed = True
rows = []
for sim_array, (oi, name, _, shape) in zip(sim_arrays, outputs_descriptor):
csv_name = f"output{oi}_{name}.csv"
runner_array = np.loadtxt(runner_out_dir / csv_name, delimiter=',', dtype=np.float32).reshape(shape)
max_diff = float(np.max(np.abs(sim_array.astype(np.float64) - runner_array.astype(np.float64))))
passed = max_diff <= threshold
rows.append((name, f"{max_diff:.6e}", passed))
if not passed:
all_passed = False
name_width = max(len("Output"), *(len(name) for name, _, _ in rows))
diff_width = max(len("Max diff"), *(len(diff) for _, diff, _ in rows))
result_width = len("Result")
separator = f" +-{'-' * name_width}-+-{'-' * diff_width}-+-{'-' * result_width}-+"
print(separator)
print(f" | {'Output'.ljust(name_width)} | {'Max diff'.ljust(diff_width)} | {'Result'} |")
print(separator)
for name, diff_text, passed in rows:
status_text = ("PASS" if passed else "FAIL").ljust(result_width)
status = Fore.GREEN + status_text + Style.RESET_ALL if passed else Fore.RED + status_text + Style.RESET_ALL
print(f" | {name.ljust(name_width)} | {diff_text.ljust(diff_width)} | {status} |")
print(separator)
return all_passed
def validate_network(network_onnx_path, raptor_path, onnx_include_dir,
simulator_dir, crossbar_size=64, crossbar_count=8, threshold=1e-3,
reporter=None, model_index=1, model_total=1):
network_onnx_path = Path(network_onnx_path).resolve()
raptor_path = Path(raptor_path).resolve()
onnx_include_dir = Path(onnx_include_dir).resolve()
simulator_dir = Path(simulator_dir).resolve()
owns_reporter = reporter is None
reporter = reporter or ProgressReporter(model_total)
workspace_dir = network_onnx_path.parent
clean_workspace_artifacts(workspace_dir, network_onnx_path.stem)
raptor_dir = workspace_dir / "raptor"
runner_dir = workspace_dir / "runner"
runner_build_dir = runner_dir / "build"
Path.mkdir(raptor_dir, exist_ok=True)
Path.mkdir(runner_build_dir, parents=True, exist_ok=True)
reporter.log(Fore.CYAN + f"[{model_index}/{model_total}]" + Style.RESET_ALL +
f" {Style.BRIGHT}Validating {network_onnx_path.name}{Style.RESET_ALL}")
failed_with_exception = False
try:
print_stage(reporter, model_index, model_total, network_onnx_path.name, "Compile ONNX")
network_so_path, network_mlir_path = compile_onnx_network(
network_onnx_path, raptor_path, raptor_dir, runner_dir, reporter=reporter)
print_info(reporter, f"MLIR saved to {network_mlir_path}")
print_info(reporter, f"Shared library saved to {network_so_path}")
reporter.advance()
print_stage(reporter, model_index, model_total, network_onnx_path.name, "Build Runner")
gen_network_runner(network_onnx_path, network_so_path, onnx_include_dir, out=runner_dir / "runner.c", verbose=False)
runner_path = build_onnx_runner(runner_dir, runner_build_dir, reporter=reporter)
print_info(reporter, f"Runner built at {runner_path}")
reporter.advance()
print_stage(reporter, model_index, model_total, network_onnx_path.name, "Generate Inputs")
inputs_descriptor, outputs_descriptor = onnx_io(network_onnx_path)
inputs_list, _inputs_dict = gen_random_inputs(inputs_descriptor)
flags, _files = save_inputs_to_files(network_onnx_path, inputs_list, out_dir=workspace_dir / "inputs")
print_info(reporter, f"Saved {len(inputs_list)} input file(s) to {workspace_dir / 'inputs'}")
reporter.advance()
print_stage(reporter, model_index, model_total, network_onnx_path.name, "Run Reference")
out_dir = workspace_dir / "outputs"
Path.mkdir(out_dir, exist_ok=True)
run_cmd = [runner_path, *flags]
run_cmd += ["--save-csv-dir", f"{out_dir}"]
run_command(run_cmd, cwd=runner_build_dir, reporter=reporter)
print_info(reporter, f"Reference outputs saved to {out_dir}")
reporter.advance()
print_stage(reporter, model_index, model_total, network_onnx_path.name, "Compile PIM")
compile_with_raptor(
network_mlir_path, raptor_path, raptor_dir / network_onnx_path.stem,
crossbar_size, crossbar_count,
cwd=raptor_dir, reporter=reporter)
print_info(reporter, f"PIM artifacts saved to {raptor_dir / 'pim'}")
reporter.advance()
print_stage(reporter, model_index, model_total, network_onnx_path.name, "Run Simulator")
pim_dir = raptor_dir / "pim"
write_inputs_to_memory_bin(pim_dir / "memory.bin", pim_dir / "config.json", inputs_list)
simulation_dir = workspace_dir / "simulation"
Path.mkdir(simulation_dir, exist_ok=True)
dump_ranges = build_dump_ranges(pim_dir / "config.json", outputs_descriptor)
output_bin_path = simulation_dir / "out.bin"
run_pim_simulator(simulator_dir, pim_dir, output_bin_path, dump_ranges, reporter=reporter)
print_info(reporter, f"Simulator output saved to {output_bin_path}")
reporter.advance()
print_stage(reporter, model_index, model_total, network_onnx_path.name, "Compare Outputs")
sim_arrays = parse_pim_simulator_outputs(output_bin_path, outputs_descriptor)
reporter.suspend()
passed = validate_outputs(sim_arrays, out_dir, outputs_descriptor, threshold)
reporter.resume()
reporter.advance()
status = Fore.GREEN + "PASS" + Style.RESET_ALL if passed else Fore.RED + "FAIL" + Style.RESET_ALL
reporter.log(Style.BRIGHT + f"Result: {status}" + Style.RESET_ALL)
return passed
except Exception:
failed_with_exception = True
reporter.log(Style.BRIGHT + Fore.RED + "Result: FAIL" + Style.RESET_ALL)
reporter.suspend()
raise
finally:
if not failed_with_exception:
reporter.log("=" * 72)
if owns_reporter:
reporter.finish()
if __name__ == '__main__':
ap = argparse.ArgumentParser()
ap.add_argument("--network-onnx", required=True)
ap.add_argument("--raptor-path", required=True)
ap.add_argument("--onnx-include-dir", required=True)
a = ap.parse_args()
simulator_dir = Path(__file__).parent.resolve() / ".." / "backend-simulators" / "pim" / "pim-simulator"
passed = validate_network(
a.network_onnx, a.raptor_path, a.onnx_include_dir, simulator_dir
)
raise SystemExit(0 if passed else 1)