Files
Raptor/validation/tools/compare_raptor_pimcomp.py
T
ilgeco be0bcc9dcc
Validate Operations / validate-operations (push) Waiting to run
E' ancora tutto rotto
2026-06-25 16:24:14 +02:00

1498 lines
56 KiB
Python

#!/usr/bin/env python3
from __future__ import annotations
import argparse
import gzip
import importlib.util
import json
import mmap
import os
import re
import shlex
import shutil
import subprocess
import sys
import time
import types
from collections import Counter
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Any
import numpy as np
import onnx
REPO = Path(__file__).resolve().parents[2]
VALIDATION_DIR = REPO / "validation"
sys.path.insert(0, str(VALIDATION_DIR))
from gen_network_runner import gen_network_runner # noqa: E402
from onnx_utils import _ONNX_TO_NP, gen_random_inputs, onnx_io, save_inputs_to_files, write_inputs_to_memory_bin # noqa: E402
from validate_one import build_dump_ranges, parse_pim_simulator_outputs # noqa: E402
from raptor import compile_with_raptor # noqa: E402
@dataclass
class StepRecord:
name: str
duration_sec: float
command: str
status: str = "passed"
returncode: int | None = None
error: str | None = None
output_tail: str | None = None
@dataclass
class CompareResult:
passed: bool
max_diffs: dict[str, float]
status: str = "done"
error: str | None = None
def load_pimcomp_exporter():
path = REPO / "third_party/PIMCOMP-NN/verification/export_to_pim_simulator.py"
spec = importlib.util.spec_from_file_location("pimcomp_exporter", path)
module = importlib.util.module_from_spec(spec)
assert spec is not None and spec.loader is not None
sys.modules.setdefault("cv2", types.ModuleType("cv2"))
spec.loader.exec_module(module)
return module
def load_mesh_builder():
path = REPO / "validation/pimsim-configs/generate_mesh_config.py"
spec = importlib.util.spec_from_file_location("mesh_builder", path)
module = importlib.util.module_from_spec(spec)
assert spec is not None and spec.loader is not None
spec.loader.exec_module(module)
return module
def shell_join(cmd: list[str]) -> str:
return shlex.join(str(arg) for arg in cmd)
def print_step(name: str, cmd: list[str] | None = None, cwd: Path | None = None):
print(f"\n[{name}]")
if cmd is not None:
print(f" cwd: {cwd or REPO}")
print(f" $ {shell_join(cmd)}")
def output_tail(output: str | bytes | None, limit: int = 4000) -> str:
if output is None:
return ""
if isinstance(output, bytes):
output = output.decode(errors="replace")
return output[-limit:]
def exception_message(exc: BaseException) -> str:
if isinstance(exc, subprocess.CalledProcessError):
command = shell_join([str(arg) for arg in exc.cmd]) if isinstance(exc.cmd, list) else str(exc.cmd)
tail = output_tail(exc.output)
message = f"command failed with exit code {exc.returncode}: {command}"
if tail:
message += f"\n--- output tail ---\n{tail}"
return message
if isinstance(exc, subprocess.TimeoutExpired):
command = shell_join([str(arg) for arg in exc.cmd]) if isinstance(exc.cmd, list) else str(exc.cmd)
tail = output_tail(exc.output)
message = f"command timed out after {exc.timeout} seconds: {command}"
if tail:
message += f"\n--- output tail ---\n{tail}"
return message
return f"{type(exc).__name__}: {exc}"
def print_failure(name: str, exc: BaseException | str) -> None:
message = exc if isinstance(exc, str) else exception_message(exc)
print(f"\n[{name} FAILED]")
for line in message.splitlines()[:20]:
print(f" {line}")
def run_logged(
name: str,
cmd: list[str],
*,
cwd: Path,
timeout_sec: float,
steps: list[StepRecord],
) -> str:
print_step(name, cmd, cwd)
start = time.perf_counter()
command = shell_join(cmd)
try:
proc = subprocess.run(
cmd,
cwd=cwd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
timeout=timeout_sec,
)
except subprocess.TimeoutExpired as exc:
duration = time.perf_counter() - start
tail = output_tail(exc.output)
steps.append(
StepRecord(
name=name,
duration_sec=duration,
command=command,
status="timeout",
error=f"Timed out after {timeout_sec} seconds",
output_tail=tail or None,
)
)
raise
duration = time.perf_counter() - start
if proc.returncode != 0:
tail = output_tail(proc.stdout)
steps.append(
StepRecord(
name=name,
duration_sec=duration,
command=command,
status="failed",
returncode=proc.returncode,
error=f"Exited with status {proc.returncode}",
output_tail=tail or None,
)
)
raise subprocess.CalledProcessError(proc.returncode, cmd, output=tail)
steps.append(StepRecord(name=name, duration_sec=duration, command=command))
return proc.stdout
def remove_tree(path: Path) -> None:
if not path.exists() and not path.is_symlink():
return
if path.is_symlink() or path.is_file():
path.unlink()
return
while True:
children = list(path.iterdir())
if not children:
break
for child in children:
remove_tree(child)
path.rmdir()
def load_model_inputs(model_path: Path, seed: int):
model = onnx.load(model_path)
initializer_names = {init.name for init in model.graph.initializer}
initializer_values = {
init.name: onnx.numpy_helper.to_array(init) for init in model.graph.initializer
}
inputs_desc, outputs_desc = onnx_io(model_path)
runtime_desc = [desc for desc in inputs_desc if desc[1] not in initializer_names]
runtime_arrays, _ = gen_random_inputs(runtime_desc, seed=seed)
runtime_by_name = {
desc[1]: arr for desc, arr in zip(runtime_desc, runtime_arrays)
}
arrays_in_order = []
for _, name, elem_type, _ in inputs_desc:
if name in initializer_values:
arrays_in_order.append(initializer_values[name].astype(_ONNX_TO_NP[elem_type], copy=False))
else:
arrays_in_order.append(runtime_by_name[name])
runtime_only = [arr for desc, arr in zip(inputs_desc, arrays_in_order) if desc[1] not in initializer_names]
return inputs_desc, outputs_desc, arrays_in_order, runtime_only
def compare_simulator_outputs(
output_bin: Path,
outputs_desc: list[tuple[int, str, int, list[int]]],
reference_dir: Path,
*,
threshold: float,
rtol: float,
) -> CompareResult:
sim_arrays = parse_pim_simulator_outputs(output_bin, outputs_desc)
max_diffs: dict[str, float] = {}
passed = True
for sim_array, (idx, name, _, shape) in zip(sim_arrays, outputs_desc):
csv_name = reference_dir / f"output{idx}_{sanitize_output_name(name)}.csv"
ref = np.loadtxt(csv_name, delimiter=",", dtype=np.float32).reshape(shape)
diff = np.abs(sim_array.astype(np.float64) - ref.astype(np.float64))
allowed = threshold + rtol * np.abs(ref.astype(np.float64))
max_diffs[name] = float(np.max(diff))
if not np.all(diff <= allowed):
passed = False
return CompareResult(passed=passed, max_diffs=max_diffs)
def sanitize_output_name(name: str) -> str:
return "".join(ch if ch.isalnum() or ch in "_.-" else "_" for ch in name[:255])
def load_effective_hardware(args: argparse.Namespace) -> dict[str, int]:
config_path = args.pimcomp_dir / "config.json"
with open(config_path, "r", encoding="utf-8") as f:
config = json.load(f)
rows, cols = config["chip_config"]["network_config"]["layout"]
xbar_h, xbar_w = config["chip_config"]["core_config"]["matrix_config"]["xbar_size"]
hardware = {
"mesh_rows": args.mesh_rows or rows,
"mesh_cols": args.mesh_cols or cols,
"crossbar_count": args.crossbar_count or config["chip_config"]["core_config"]["matrix_config"]["xbar_array_count"],
"crossbar_size": args.crossbar_size or xbar_h,
}
if xbar_h != xbar_w:
raise ValueError(f"Only square crossbars are supported, got {xbar_h}x{xbar_w}")
hardware["core_count"] = args.core_count or hardware["mesh_rows"] * hardware["mesh_cols"]
return hardware
def write_pimsim_config(args: argparse.Namespace, out_dir: Path, hardware: dict[str, int]) -> Path:
mesh_builder = load_mesh_builder()
example_config = REPO / "backend-simulators/pim/pimsim-nn/example/config/latency_config.json"
with open(example_config, "r", encoding="utf-8") as f:
config = json.load(f)
config["chip_config"]["core_config"]["matrix_config"]["xbar_array_count"] = hardware["crossbar_count"]
config["chip_config"]["core_config"]["matrix_config"]["xbar_size"] = [
hardware["crossbar_size"],
hardware["crossbar_size"],
]
config["chip_config"]["network_config"]["layout"] = [
hardware["mesh_rows"],
hardware["mesh_cols"],
]
config["chip_config"]["network_config"]["net_config_file_path"] = f"network_mesh_{hardware['core_count']}.json"
config["chip_config"]["core_cnt"] = hardware["core_count"]
config["sim_config"]["sim_mode"] = 1 if args.pimsim_mode == "latency" else 0
config["sim_config"]["sim_time"] = args.pimsim_time_ms
out_dir.mkdir(parents=True, exist_ok=True)
config_path = out_dir / f"{args.pimsim_mode}_config.json"
network_path = out_dir / f"network_mesh_{hardware['core_count']}.json"
with open(config_path, "w", encoding="utf-8") as f:
json.dump(config, f, indent=2)
f.write("\n")
with open(network_path, "w", encoding="utf-8") as f:
json.dump(
mesh_builder.build_network(
hardware["core_count"],
(hardware["mesh_rows"], hardware["mesh_cols"]),
),
f,
separators=(",", ":"),
)
f.write("\n")
return config_path
def compile_reference(
args: argparse.Namespace,
model_path: Path,
work_dir: Path,
steps: list[StepRecord],
) -> tuple[Path, Path, Path]:
raptor_dir = work_dir / "reference"
runner_dir = work_dir / "runner"
build_dir = runner_dir / "build"
raptor_dir.mkdir(parents=True, exist_ok=True)
build_dir.mkdir(parents=True, exist_ok=True)
stem = model_path.stem
onnx_ir_base = raptor_dir / stem
runner_base = runner_dir / stem
run_logged(
"Reference Emit ONNX IR",
[str(args.raptor_path), str(model_path), "-o", str(onnx_ir_base), "--EmitONNXIR"],
cwd=REPO,
timeout_sec=args.timeout_seconds,
steps=steps,
)
run_logged(
"Reference Native Compile",
[str(args.raptor_path), "-O3", str(model_path), "-o", str(runner_base)],
cwd=REPO,
timeout_sec=args.timeout_seconds,
steps=steps,
)
network_so = runner_base.with_suffix(".so")
network_mlir = onnx_ir_base.with_suffix(".onnx.mlir")
print_step("Generate Runner Source")
gen_network_runner(model_path, network_so, args.onnx_include_dir, out=runner_dir / "runner.c", verbose=False)
run_logged(
"Configure Runner",
["cmake", str(runner_dir), "-DCMAKE_BUILD_TYPE=Release", "-DCMAKE_C_FLAGS_RELEASE=-O3"],
cwd=build_dir,
timeout_sec=args.timeout_seconds,
steps=steps,
)
run_logged(
"Build Runner",
["cmake", "--build", ".", "-j"],
cwd=build_dir,
timeout_sec=args.timeout_seconds,
steps=steps,
)
return network_mlir, network_so, build_dir / "runner"
def generate_reference_outputs(
runner_path: Path,
runner_build_dir: Path,
model_path: Path,
arrays_in_order: list[np.ndarray],
steps: list[StepRecord],
args: argparse.Namespace,
out_dir: Path,
) -> Path:
inputs_dir = out_dir / "inputs"
reference_dir = out_dir / "reference_outputs"
inputs_dir.mkdir(parents=True, exist_ok=True)
reference_dir.mkdir(parents=True, exist_ok=True)
flags, _ = save_inputs_to_files(model_path, arrays_in_order, inputs_dir)
run_logged(
"Run Reference",
[str(runner_path), *flags, "--save-csv-dir", str(reference_dir)],
cwd=runner_build_dir,
timeout_sec=args.timeout_seconds,
steps=steps,
)
return reference_dir
def compile_raptor_target(
model_mlir: Path,
out_dir: Path,
hardware: dict[str, int],
args: argparse.Namespace,
steps: list[StepRecord],
) -> tuple[Path, dict[str, float]]:
out_dir.mkdir(parents=True, exist_ok=True)
cmd = [
str(args.raptor_path),
str(model_mlir),
"-o",
str(out_dir / "model"),
"--maccel=PIM",
"--EmitPimCodegen",
f"--crossbar-size={hardware['crossbar_size']}",
f"--crossbar-count={hardware['crossbar_count']}",
f"--core-count={hardware['core_count']}",
"--pim-emit-json",
*args.raptor_extra_arg,
]
print_step("Compile Raptor PIM", cmd, REPO)
start = time.perf_counter()
command = shell_join(cmd)
raptor_extra_args = ["--pim-emit-json", *args.raptor_extra_arg]
try:
timings = compile_with_raptor(
model_mlir,
args.raptor_path,
out_dir / "model",
hardware["crossbar_size"],
hardware["crossbar_count"],
core_count=hardware["core_count"],
raptor_extra_args=raptor_extra_args,
cwd=out_dir,
verbose=args.verbose_raptor_compile,
timeout_sec=args.timeout_seconds,
)
except Exception as exc:
steps.append(
StepRecord(
name="Compile Raptor PIM",
duration_sec=time.perf_counter() - start,
command=command,
status="failed",
error=exception_message(exc),
)
)
raise
steps.append(
StepRecord(
name="Compile Raptor PIM",
duration_sec=time.perf_counter() - start,
command=command,
)
)
return out_dir / "pim", timings
def run_rust_validation(
label: str,
pim_dir: Path,
config_path: Path,
outputs_desc: list[tuple[int, str, int, list[int]]],
reference_dir: Path,
steps: list[StepRecord],
args: argparse.Namespace,
) -> CompareResult:
output_bin = pim_dir.parent / "semantic_validation" / "out.bin"
dump_ranges = build_dump_ranges(config_path, outputs_desc)
cmd = [
"cargo",
"run",
"--no-default-features",
"--release",
"--package",
"pim-simulator",
"--bin",
"pim-simulator",
"--",
"-f",
str(pim_dir),
"-o",
str(output_bin),
"-d",
dump_ranges,
]
simulation_dir = pim_dir.parent / "semantic_validation"
simulation_dir.mkdir(parents=True, exist_ok=True)
run_logged(
label,
cmd,
cwd=args.pim_simulator_dir,
timeout_sec=args.timeout_seconds,
steps=steps,
)
return compare_simulator_outputs(
output_bin,
outputs_desc,
reference_dir,
threshold=args.threshold,
rtol=args.rtol,
)
def copy_pimcomp_outputs(args: argparse.Namespace, out_dir: Path):
out_dir.mkdir(parents=True, exist_ok=True)
for name in ("SimulationInfo.gz", "VerificationInfo.json", "MappingResult.txt"):
shutil.copy2(args.pimcomp_dir / "output" / name, out_dir / name)
def compile_pimcomp(
args: argparse.Namespace,
model_path: Path,
out_dir: Path,
steps: list[StepRecord],
) -> tuple[Path, Path]:
out_dir.mkdir(parents=True, exist_ok=True)
model_name = f"compare_{model_path.stem}"
frontend_json = args.pimcomp_dir / "models/JSON" / f"{model_name}.json"
frontend_cmd = [
"python3",
"frontend.py",
"--model_path",
str(model_path),
"--save_path",
str(frontend_json),
]
run_logged(
"PIMCOMP Frontend",
frontend_cmd,
cwd=args.pimcomp_dir / "frontend",
timeout_sec=args.timeout_seconds,
steps=steps,
)
backend_cmd = [
str(args.pimcomp_dir / "build" / "PIMCOMP-NN"),
f"-m={model_name}",
"-p=batch",
"-v=YES",
"-s=YES",
]
run_logged(
"PIMCOMP Backend",
backend_cmd,
cwd=args.pimcomp_dir / "build",
timeout_sec=args.timeout_seconds,
steps=steps,
)
copy_pimcomp_outputs(args, out_dir)
return out_dir / "VerificationInfo.json", out_dir / "SimulationInfo.gz"
def export_pimcomp_for_pimsim_nn(simulation_info: Path, output_dir: Path) -> Path:
if output_dir.exists():
remove_tree(output_dir)
with gzip.open(simulation_info, "rt", encoding="utf-8") as f:
sim_info = json.load(f)
output_dir.mkdir(parents=True, exist_ok=True)
sim_config = sim_info["config"]
present_core_indices = sorted(
int(key[4:]) for key, value in sim_info.items() if key.startswith("core") and isinstance(value, list) and value
)
if not present_core_indices:
raise ValueError("PIMCOMP SimulationInfo.gz does not contain any non-empty core instruction streams")
expected_core_indices = list(range(present_core_indices[-1] + 1))
if present_core_indices != expected_core_indices:
raise ValueError(f"PIMCOMP core numbering is not contiguous: {present_core_indices}")
config = {
"core_cnt": len(present_core_indices),
"xbar_size": sim_config["xbar_size"],
"xbar_array_count": sim_config["xbar_array_count"],
"cell_precision": sim_config["cell_precision"],
"adc_count": sim_config["adc_count"],
"array_group_map": {},
}
for core_idx in present_core_indices:
core_name = f"core{core_idx}"
config["array_group_map"][core_name] = sim_config["array_group_map"].get(core_name, [])
with open(output_dir / "config.json", "w", encoding="utf-8") as f:
json.dump(config, f, separators=(",", ":"))
f.write("\n")
for core_idx in present_core_indices:
core_key = f"core{core_idx}"
instructions = sim_info[core_key]
with open(output_dir / f"core_{core_idx}.json", "w", encoding="utf-8") as f:
json.dump(instructions, f, separators=(",", ":"))
f.write("\n")
return output_dir
def flatten_pimcomp_input(array: np.ndarray) -> np.ndarray:
tensor = array.astype(np.float32, copy=False)
if tensor.ndim == 4:
tensor = tensor.transpose((0, 2, 3, 1))
return tensor.reshape(-1)
def export_pimcomp_for_rust(
model_path: Path,
verification_info: Path,
simulation_info: Path,
runtime_inputs: list[np.ndarray],
output_dir: Path,
) -> Path:
if len(runtime_inputs) != 1:
raise ValueError("PIMCOMP export currently requires exactly one runtime input tensor")
if output_dir.exists():
remove_tree(output_dir)
exporter = load_pimcomp_exporter()
with open(verification_info, "r", encoding="utf-8") as f:
final_info = json.load(f)
with gzip.open(simulation_info, "rt", encoding="utf-8") as f:
sim_info = json.load(f)
onnx_model, weights, gemm_weights, output_to_weight, output_to_bias = exporter.load_model_info(
model_path, final_info
)
input_tensor = flatten_pimcomp_input(runtime_inputs[0])
node_list = final_info["node_list"]
max_output = exporter.max_output_element_num(node_list)
local_group_map = exporter.map_local_groups(final_info, sim_info)
output_dir.mkdir(parents=True, exist_ok=True)
weights_dir = output_dir / "weights"
weights_dir.mkdir(parents=True, exist_ok=True)
input_addr = 0
cursor = exporter.byte_offset(len(input_tensor))
bias_addrs: dict[str, int] = {}
for node_name, bias_name in output_to_bias.items():
bias = weights[bias_name].astype(np.float32).flatten()
bias_addrs[node_name] = cursor
cursor += exporter.byte_offset(len(bias))
lldi_addrs: dict[tuple[bytes, int], int] = {}
for core_idx in range(sim_info["config"]["core_cnt"]):
for inst in sim_info.get(f"core{core_idx}", []) or []:
if inst["op"] != "lldi":
continue
key = (exporter.float32_bytes(inst["imm"]), inst["len"])
if key not in lldi_addrs:
lldi_addrs[key] = cursor
cursor += exporter.byte_offset(inst["len"])
output_base = (cursor + 255) & ~255
memory_size = output_base + exporter.byte_offset(max_output * len(node_list))
memory = bytearray(memory_size)
memory[input_addr : input_addr + input_tensor.nbytes] = input_tensor.tobytes()
for node_name, bias_name in output_to_bias.items():
bias = weights[bias_name].astype(np.float32).flatten()
start = bias_addrs[node_name]
memory[start : start + bias.nbytes] = bias.tobytes()
for (value_bytes, element_num), start in lldi_addrs.items():
value = np.frombuffer(value_bytes, dtype=np.float32)[0]
blob = np.full(element_num, value, dtype=np.float32)
memory[start : start + blob.nbytes] = blob.tobytes()
config = {
"core_cnt": sim_info["config"]["core_cnt"],
"xbar_size": sim_info["config"]["xbar_size"],
"xbar_array_count": sim_info["config"]["xbar_array_count"],
"cell_precision": sim_info["config"]["cell_precision"],
"adc_count": sim_info["config"]["adc_count"],
"array_group_map": {},
"inputs_addresses": [input_addr],
"outputs_addresses": [],
}
output_name_to_node = {node["name"]: node for node in node_list}
for graph_output in onnx_model.graph.output:
node = output_name_to_node[graph_output.name]
config["outputs_addresses"].append(output_base + exporter.byte_offset(node["new_node_index"] * max_output))
ag_info = final_info["AG_info"]
weight_counter = 0
xbar_size = int(sim_info["config"]["xbar_size"][0])
for core_idx in range(config["core_cnt"]):
core_name = f"core{core_idx}"
core_dir = output_dir / f"core_{core_idx}"
core_dir.mkdir(parents=True, exist_ok=True)
local_to_global = local_group_map.get(core_idx, {})
ag_counts = sim_info["config"]["array_group_map"].get(core_name, [])
group_prefix = []
total_crossbars = 0
for count in ag_counts:
group_prefix.append(total_crossbars)
total_crossbars += count
config["array_group_map"][core_name] = list(range(total_crossbars))
for local_group, global_ag in sorted(local_to_global.items()):
info = ag_info[global_ag]
weight_name = output_to_weight[info["node_name"]]
matrix = gemm_weights[weight_name]
row_slice = slice(info["height_start"], info["height_end"] + 1)
first_physical = group_prefix[local_group]
for crossbar_idx, crossbar in enumerate(info["crossbar"]):
col_slice = slice(crossbar["width_start"], crossbar["width_end"] + 1)
tile = np.zeros((xbar_size, col_slice.stop - col_slice.start), dtype=np.float32)
tile_rows = matrix[row_slice, col_slice].astype(np.float32)
tile[: tile_rows.shape[0], :] = tile_rows
weight_path = weights_dir / f"crossbar_{weight_counter}.bin"
weight_path.write_bytes(tile.tobytes(order="C"))
os.symlink(weight_path.resolve(), core_dir / f"crossbar_{first_physical + crossbar_idx}.bin")
weight_counter += 1
instructions = []
last_sldi_by_rd: dict[int, int] = {}
ver_ops = exporter.filtered_verification_ops(final_info, core_idx)
ver_index = 0
for sim_inst in sim_info.get(core_name, []) or []:
op = sim_inst["op"]
if op == "setbw":
instructions.append(sim_inst)
continue
if op == "sldi":
translated = {"op": "sldi", "rd": sim_inst["rd"], "imm": exporter.byte_offset(sim_inst["imm"])}
instructions.append(translated)
last_sldi_by_rd[sim_inst["rd"]] = len(instructions) - 1
continue
if ver_index >= len(ver_ops):
raise RuntimeError(f"core{core_idx}: simulation op {op} has no matching verification op")
ver_inst = ver_ops[ver_index]
ver_index += 1
ver_op = ver_inst["operation"].lower()
if ver_op != op:
raise RuntimeError(
f"core{core_idx}: simulation/verification op mismatch {op} vs {ver_op} at {ver_index - 1}"
)
if op == "ld":
if ver_inst["stage"] == "INPUT":
src = input_addr + exporter.byte_offset(ver_inst["source_offset"])
elif ver_inst["stage"] == "BIAS":
src = bias_addrs[node_list[ver_inst["node_index"]]["name"]] + exporter.byte_offset(ver_inst["source_offset"])
else:
raise RuntimeError(f"Unsupported LD stage {ver_inst['stage']}")
instructions[last_sldi_by_rd[sim_inst["rs1"]]]["imm"] = src
translated = dict(sim_inst)
translated["size"] = exporter.byte_offset(sim_inst["size"])
instructions.append(translated)
elif op == "st":
dst = output_base + exporter.byte_offset(
ver_inst["node_index"] * max_output + ver_inst["destination_offset"]
)
instructions[last_sldi_by_rd[sim_inst["rd"]]]["imm"] = dst
translated = dict(sim_inst)
translated["size"] = exporter.byte_offset(sim_inst["size"])
instructions.append(translated)
elif op == "lldi":
key = (exporter.float32_bytes(sim_inst["imm"]), sim_inst["len"])
src = lldi_addrs[key]
temp_rd = 1 if sim_inst["rd"] == 0 else 0
instructions.append({"op": "sldi", "rd": temp_rd, "imm": src})
instructions.append(
{
"op": "ld",
"rd": sim_inst["rd"],
"rs1": temp_rd,
"size": exporter.byte_offset(sim_inst["len"]),
"offset": sim_inst["offset"],
}
)
elif op in ("lmv", "vvadd", "vvmul", "vvmax", "vrelu"):
translated = dict(sim_inst)
translated["len"] = exporter.byte_offset(sim_inst["len"])
instructions.append(translated)
elif op in ("send", "recv"):
translated = dict(sim_inst)
translated["size"] = exporter.byte_offset(sim_inst["size"])
instructions.append(translated)
elif op == "mvmul":
local_group = sim_inst["group"]
global_ag = local_to_global[local_group]
first_physical = group_prefix[local_group]
widths = [
crossbar["width_end"] - crossbar["width_start"] + 1
for crossbar in ag_info[global_ag]["crossbar"]
]
dst = instructions[last_sldi_by_rd[sim_inst["rd"]]]["imm"]
src = instructions[last_sldi_by_rd[sim_inst["rs1"]]]["imm"]
out_offset = 0
for idx, width in enumerate(widths):
instructions.append({"op": "sldi", "rd": sim_inst["rd"], "imm": dst + exporter.byte_offset(out_offset)})
instructions.append({"op": "sldi", "rd": sim_inst["rs1"], "imm": src})
translated = dict(sim_inst)
translated["group"] = first_physical + idx
instructions.append(translated)
out_offset += width
else:
raise RuntimeError(f"Unsupported PIMCOMP op {op}")
with open(output_dir / f"core_{core_idx}.json", "w", encoding="utf-8") as f:
json.dump(instructions, f, separators=(",", ":"))
f.write("\n")
with open(output_dir / "config.json", "w", encoding="utf-8") as f:
json.dump(config, f, separators=(",", ":"))
f.write("\n")
(output_dir / "memory.bin").write_bytes(memory)
return output_dir
def parse_pimsim_nn_report(output: str) -> dict[str, float | int | str]:
patterns = {
"output_count": r"output count:\s+([0-9]+)\s+samples",
"throughput": r"throughput:\s+([0-9.]+)\s+samples/s",
"average_latency_ms": r"average latency:\s+([0-9.eE+-]+)\s+ms",
"latency_ms": r"latency:\s+([0-9.eE+-]+)\s+ms",
"average_power_mw": r"average power:\s+([0-9.eE+-]+)\s+mW",
"average_energy_pj": r"average energy:\s+([0-9.eE+-]+)\s+pJ/it",
}
result: dict[str, float | int | str] = {"raw_output": output}
for key, pattern in patterns.items():
match = re.search(pattern, output)
if match:
value = match.group(1)
result[key] = int(value) if key == "output_count" else float(value)
return result
def run_pimsim_nn(
label: str,
inst_path: Path,
config_path: Path,
single_file: bool,
steps: list[StepRecord],
args: argparse.Namespace,
) -> dict[str, Any]:
cmd = [
str(args.pimsim_nn_build_dir / "ChipTest"),
str(inst_path),
str(config_path),
"true" if single_file else "false",
]
output = run_logged(
label,
cmd,
cwd=args.pimsim_nn_build_dir,
timeout_sec=args.timeout_seconds * 10.0,
steps=steps,
)
return parse_pimsim_nn_report(output)
def parse_raptor_instructions(pim_dir: Path) -> dict[str, Any]:
op_re = re.compile(br'"op":"([^"]+)"')
counts = Counter()
per_core = []
for path in sorted(pim_dir.glob("core_*.json"), key=lambda p: int(p.stem.split("_")[1])):
with path.open("rb") as f:
mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
core_counts = Counter(m.group(1).decode() for m in op_re.finditer(mm))
mm.close()
total = sum(core_counts.values())
counts.update(core_counts)
per_core.append(
{
"core": path.stem,
"total": total,
"send": core_counts.get("send", 0),
"recv": core_counts.get("recv", 0),
"mvmul": core_counts.get("mvmul", 0),
}
)
return {
"active_cores": sum(1 for entry in per_core if entry["total"]),
"total_instructions": int(sum(counts.values())),
"op_counts": dict(counts),
"top_cores_by_total": sorted(per_core, key=lambda entry: entry["total"], reverse=True)[:10],
"top_cores_by_send": sorted(per_core, key=lambda entry: entry["send"], reverse=True)[:10],
"top_cores_by_recv": sorted(per_core, key=lambda entry: entry["recv"], reverse=True)[:10],
}
def parse_pimcomp_instructions(simulation_info: Path) -> dict[str, Any]:
with gzip.open(simulation_info, "rt", encoding="utf-8") as f:
data = json.load(f)
per_core = []
counts = Counter()
for key in sorted((name for name in data if name.startswith("core")), key=lambda name: int(name[4:])):
insts = data[key]
core_counts = Counter((inst.get("operation") or inst.get("op") or "unknown").lower() for inst in insts)
counts.update(core_counts)
per_core.append(
{
"core": key,
"total": int(sum(core_counts.values())),
"send": core_counts.get("send", 0),
"recv": core_counts.get("recv", 0),
"mvmul": core_counts.get("mvmul", 0),
}
)
return {
"active_cores": sum(1 for entry in per_core if entry["total"]),
"total_instructions": int(sum(counts.values())),
"op_counts": dict(counts),
"top_cores_by_total": sorted(per_core, key=lambda entry: entry["total"], reverse=True)[:10],
"top_cores_by_send": sorted(per_core, key=lambda entry: entry["send"], reverse=True)[:10],
"top_cores_by_recv": sorted(per_core, key=lambda entry: entry["recv"], reverse=True)[:10],
}
def format_op_table(counts: dict[str, int], total: int) -> list[str]:
if total <= 0:
return ["| n/a | 0 | n/a |"]
rows = []
for op, count in sorted(counts.items(), key=lambda item: item[1], reverse=True):
rows.append(f"| `{op}` | {count} | {100.0 * count / total:.2f}% |")
return rows
def validation_status(result: CompareResult) -> str:
if result.status == "done":
return "PASS" if result.passed else "FAIL"
return result.status.upper()
def skipped_validation(reason: str) -> CompareResult:
return CompareResult(passed=False, max_diffs={}, status="skipped", error=reason)
def failed_validation(error: BaseException | str) -> CompareResult:
message = error if isinstance(error, str) else exception_message(error)
return CompareResult(passed=False, max_diffs={}, status="failed", error=message)
def skipped_perf(reason: str) -> dict[str, Any]:
return {"skipped": True, "reason": reason}
def failed_perf(error: BaseException | str) -> dict[str, Any]:
message = error if isinstance(error, str) else exception_message(error)
return {"error": message}
def perf_status(perf: dict[str, Any]) -> str:
if perf.get("skipped"):
return "SKIPPED"
if perf.get("error"):
return "FAILED"
return "DONE"
def perf_value(perf: dict[str, Any], key: str) -> Any:
return perf[key] if key in perf else "n/a"
def empty_instruction_summary(reason: str | None = None, error: str | None = None) -> dict[str, Any]:
result: dict[str, Any] = {
"active_cores": 0,
"total_instructions": 0,
"op_counts": {},
"top_cores_by_total": [],
"top_cores_by_send": [],
"top_cores_by_recv": [],
}
if reason is not None:
result["skipped"] = True
result["reason"] = reason
if error is not None:
result["error"] = error
return result
def optional_path(path: Path | None) -> str | None:
return str(path) if path is not None else None
def record_failure(failures: list[dict[str, str]], stage: str, exc: BaseException | str) -> None:
message = exc if isinstance(exc, str) else exception_message(exc)
failures.append({"stage": stage, "error": message})
print_failure(stage, message)
def try_stage(
failures: list[dict[str, str]],
stage: str,
func,
*args,
**kwargs,
):
try:
return func(*args, **kwargs)
except Exception as exc:
record_failure(failures, stage, exc)
return None
def try_stage_success(
failures: list[dict[str, str]],
stage: str,
func,
*args,
**kwargs,
) -> bool:
try:
func(*args, **kwargs)
return True
except Exception as exc:
record_failure(failures, stage, exc)
return False
def write_report(
report_path: Path,
*,
model_path: Path,
hardware: dict[str, int],
steps: list[StepRecord],
failures: list[dict[str, str]],
raptor_validation: CompareResult,
pimcomp_validation: CompareResult,
raptor_perf: dict[str, Any],
pimcomp_perf: dict[str, Any],
raptor_instr: dict[str, Any],
pimcomp_instr: dict[str, Any],
raptor_pass_timings: dict[str, float],
pimsim_mode: str,
):
lines = [
"# Raptor vs PIMCOMP Comparison Report",
"",
f"- Model: `{model_path}`",
f"- Hardware: `{hardware.get('core_count', 'n/a')} cores`, `{hardware.get('crossbar_count', 'n/a')} xbars/core`, `{hardware.get('crossbar_size', 'n/a')}x{hardware.get('crossbar_size', 'n/a')}` crossbars, mesh `{hardware.get('mesh_rows', 'n/a')}x{hardware.get('mesh_cols', 'n/a')}`",
"",
]
if failures or any(step.status != "passed" for step in steps):
lines.extend(
[
"## Failures / Skipped Work",
"",
"The script did not abort. The failed stage was recorded and any dependent stage was skipped when its inputs were not available.",
"",
]
)
if failures:
lines.extend(["| Stage | Error |", "|---|---|"])
for failure in failures:
error = failure["error"].replace("\n", "<br>")
lines.append(f"| {failure['stage']} | {error} |")
lines.append("")
lines.extend(
[
"## Semantic Validation",
"",
f"- Raptor via `pim-simulator`: `{validation_status(raptor_validation)}`",
f"- PIMCOMP via exported `pim-simulator`: `{validation_status(pimcomp_validation)}`",
]
)
if raptor_validation.error:
lines.append(f"- Raptor validation note: `{raptor_validation.error.splitlines()[0]}`")
if pimcomp_validation.error:
lines.append(f"- PIMCOMP validation note: `{pimcomp_validation.error.splitlines()[0]}`")
lines.extend(["", "### Max Output Differences", ""])
diff_names = sorted(set(raptor_validation.max_diffs) | set(pimcomp_validation.max_diffs))
if diff_names:
lines.extend(["| Output | Raptor max diff | PIMCOMP max diff |", "|---|---:|---:|"])
for name in diff_names:
lines.append(
f"| `{name}` | {raptor_validation.max_diffs.get(name, float('nan')):.6e} | "
f"{pimcomp_validation.max_diffs.get(name, float('nan')):.6e} |"
)
else:
lines.append("No output differences are available because validation did not run or failed before comparison.")
lines.extend(
[
"",
"## pimsim-nn Performance",
"",
f"- Mode: `{pimsim_mode}`",
"",
]
)
if pimsim_mode == "throughput":
lines.extend(
[
"| Compiler | Status | Throughput (samples/s) | Avg latency (ms) | Avg power (mW) | Avg energy (pJ/it) | Output count |",
"|---|---|---:|---:|---:|---:|---:|",
f"| Raptor | {perf_status(raptor_perf)} | {perf_value(raptor_perf, 'throughput')} | {perf_value(raptor_perf, 'average_latency_ms')} | "
f"{perf_value(raptor_perf, 'average_power_mw')} | {perf_value(raptor_perf, 'average_energy_pj')} | {perf_value(raptor_perf, 'output_count')} |",
f"| PIMCOMP | {perf_status(pimcomp_perf)} | {perf_value(pimcomp_perf, 'throughput')} | {perf_value(pimcomp_perf, 'average_latency_ms')} | "
f"{perf_value(pimcomp_perf, 'average_power_mw')} | {perf_value(pimcomp_perf, 'average_energy_pj')} | {perf_value(pimcomp_perf, 'output_count')} |",
"",
]
)
else:
lines.extend(
[
"| Compiler | Status | Latency (ms) | Avg power (mW) | Avg energy (pJ) |",
"|---|---|---:|---:|---:|",
f"| Raptor | {perf_status(raptor_perf)} | {perf_value(raptor_perf, 'latency_ms')} | "
f"{perf_value(raptor_perf, 'average_power_mw')} | {perf_value(raptor_perf, 'average_energy_pj')} |",
f"| PIMCOMP | {perf_status(pimcomp_perf)} | {perf_value(pimcomp_perf, 'latency_ms')} | "
f"{perf_value(pimcomp_perf, 'average_power_mw')} | {perf_value(pimcomp_perf, 'average_energy_pj')} |",
"",
]
)
if raptor_perf.get("reason") or raptor_perf.get("error"):
lines.append(f"- Raptor pimsim-nn note: `{(raptor_perf.get('reason') or raptor_perf.get('error')).splitlines()[0]}`")
if pimcomp_perf.get("reason") or pimcomp_perf.get("error"):
lines.append(f"- PIMCOMP pimsim-nn note: `{(pimcomp_perf.get('reason') or pimcomp_perf.get('error')).splitlines()[0]}`")
if lines[-1] != "":
lines.append("")
lines.extend(
[
"## Instruction Summary",
"",
"| Compiler | Status | Active cores | Total instructions | Sends | Receives | MVMUL |",
"|---|---|---:|---:|---:|---:|---:|",
f"| Raptor | {'FAILED' if raptor_instr.get('error') else 'SKIPPED' if raptor_instr.get('skipped') else 'DONE'} | {raptor_instr.get('active_cores', 0)} | {raptor_instr.get('total_instructions', 0)} | {raptor_instr.get('op_counts', {}).get('send', 0)} | {raptor_instr.get('op_counts', {}).get('recv', 0)} | {raptor_instr.get('op_counts', {}).get('mvmul', 0)} |",
f"| PIMCOMP | {'FAILED' if pimcomp_instr.get('error') else 'SKIPPED' if pimcomp_instr.get('skipped') else 'DONE'} | {pimcomp_instr.get('active_cores', 0)} | {pimcomp_instr.get('total_instructions', 0)} | {pimcomp_instr.get('op_counts', {}).get('send', 0)} | {pimcomp_instr.get('op_counts', {}).get('recv', 0)} | {pimcomp_instr.get('op_counts', {}).get('mvmul', 0)} |",
"",
"### Raptor Op Distribution",
"",
"| Op | Count | Share |",
"|---|---:|---:|",
*format_op_table(raptor_instr.get("op_counts", {}), raptor_instr.get("total_instructions", 0)),
"",
"### PIMCOMP Op Distribution",
"",
"| Op | Count | Share |",
"|---|---:|---:|",
*format_op_table(pimcomp_instr.get("op_counts", {}), pimcomp_instr.get("total_instructions", 0)),
"",
"## Step Timings",
"",
"| Step | Status | Duration (s) | Return code |",
"|---|---|---:|---:|",
]
)
for step in steps:
lines.append(
f"| {step.name} | {step.status.upper()} | {step.duration_sec:.3f} | "
f"{step.returncode if step.returncode is not None else ''} |"
)
failed_steps = [step for step in steps if step.status != "passed"]
if failed_steps:
lines.extend(["", "### Failed Step Details", ""])
for step in failed_steps:
lines.extend(
[
f"#### {step.name}",
"",
f"- Command: `{step.command}`",
f"- Error: `{step.error or 'n/a'}`",
]
)
if step.output_tail:
lines.extend(["", "```text", step.output_tail, "```"])
lines.append("")
if raptor_pass_timings:
lines.extend(["", "## Raptor Pass Timings", "", "| Pass | Duration (s) |", "|---|---:|"])
for name, duration in raptor_pass_timings.items():
lines.append(f"| {name} | {duration:.4f} |")
report_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--model", required=True, type=Path)
parser.add_argument("--out-dir", required=True, type=Path)
parser.add_argument("--raptor-path", default=REPO / "build_release/Release/bin/onnx-mlir", type=Path)
parser.add_argument("--onnx-include-dir", default=REPO / "onnx-mlir/include", type=Path)
parser.add_argument("--pimcomp-dir", default=REPO / "third_party/PIMCOMP-NN", type=Path)
parser.add_argument("--pim-simulator-dir", default=REPO / "backend-simulators/pim/pim-simulator", type=Path)
parser.add_argument("--pimsim-nn-build-dir", default=REPO / "backend-simulators/pim/pimsim-nn/build", type=Path)
parser.add_argument("--seed", type=int, default=0)
parser.add_argument("--threshold", type=float, default=1e-3)
parser.add_argument("--rtol", type=float, default=1e-5)
parser.add_argument("--timeout-seconds", type=float, default=3600.0)
parser.add_argument("--core-count", type=int)
parser.add_argument("--crossbar-count", type=int)
parser.add_argument("--crossbar-size", type=int)
parser.add_argument("--mesh-rows", type=int)
parser.add_argument("--mesh-cols", type=int)
parser.add_argument("--pimsim-time-ms", type=int, default=1000)
parser.add_argument("--pimsim-mode", choices=["latency", "throughput"], default="latency")
parser.add_argument("--skip-pimsim-nn", action="store_true")
parser.add_argument("--verbose-raptor-compile", action="store_true")
parser.add_argument("--raptor-extra-arg", action="append", default=[])
parser.add_argument(
"--fail-on-error",
action="store_true",
help="Return a non-zero process status after writing the reports if any compilation/run stage failed.",
)
args = parser.parse_args()
model_path = args.model.resolve()
out_dir = args.out_dir.resolve()
out_dir.mkdir(parents=True, exist_ok=True)
failures: list[dict[str, str]] = []
steps: list[StepRecord] = []
hardware: dict[str, int] = {
"mesh_rows": 0,
"mesh_cols": 0,
"crossbar_count": 0,
"crossbar_size": 0,
"core_count": 0,
}
inputs_desc: list[tuple[int, str, int, list[int]]] = []
outputs_desc: list[tuple[int, str, int, list[int]]] = []
arrays_in_order: list[np.ndarray] = []
runtime_inputs: list[np.ndarray] = []
network_mlir: Path | None = None
runner_path: Path | None = None
reference_dir: Path | None = None
raptor_pim_dir: Path | None = None
raptor_pass_timings: dict[str, float] = {}
verification_info: Path | None = None
simulation_info: Path | None = None
pimcomp_export_dir: Path | None = None
pimsim_config: Path | None = None
raptor_validation = skipped_validation("Raptor validation did not run")
pimcomp_validation = skipped_validation("PIMCOMP validation did not run")
raptor_perf: dict[str, Any] = skipped_perf("pimsim-nn Raptor did not run")
pimcomp_perf: dict[str, Any] = skipped_perf("pimsim-nn PIMCOMP did not run")
raptor_instr: dict[str, Any] = empty_instruction_summary("Raptor instruction parsing did not run")
pimcomp_instr: dict[str, Any] = empty_instruction_summary("PIMCOMP instruction parsing did not run")
loaded_hardware = try_stage(failures, "Load hardware configuration", load_effective_hardware, args)
if loaded_hardware is not None:
hardware = loaded_hardware
model_io = try_stage(failures, "Load model inputs", load_model_inputs, model_path, args.seed)
if model_io is not None:
inputs_desc, outputs_desc, arrays_in_order, runtime_inputs = model_io
expected_network_mlir = out_dir / "reference" / f"{model_path.stem}.onnx.mlir"
expected_runner_path = out_dir / "runner" / "build" / "runner"
reference_compile = try_stage(
failures,
"Compile reference",
compile_reference,
args,
model_path,
out_dir,
steps,
)
if reference_compile is not None:
network_mlir, _, runner_path = reference_compile
else:
if expected_network_mlir.exists():
network_mlir = expected_network_mlir
print(f"\n[Continue] Reusing partial ONNX MLIR: {network_mlir}")
if expected_runner_path.exists():
runner_path = expected_runner_path
print(f"\n[Continue] Reusing partial runner: {runner_path}")
if runner_path is not None and runner_path.exists() and model_io is not None:
generated_reference = try_stage(
failures,
"Run reference",
generate_reference_outputs,
runner_path,
runner_path.parent,
model_path,
arrays_in_order,
steps,
args,
out_dir,
)
if generated_reference is not None:
reference_dir = generated_reference
else:
record_failure(
failures,
"Skip reference outputs",
"Reference outputs were skipped because the native runner or model inputs are not available.",
)
if network_mlir is not None and network_mlir.exists() and hardware["core_count"] > 0:
compiled_raptor = try_stage(
failures,
"Compile Raptor PIM",
compile_raptor_target,
network_mlir,
out_dir / "raptor",
hardware,
args,
steps,
)
if compiled_raptor is not None:
raptor_pim_dir, raptor_pass_timings = compiled_raptor
else:
record_failure(
failures,
"Skip Raptor PIM compile",
"Raptor PIM compile was skipped because the ONNX MLIR or hardware configuration is not available.",
)
if raptor_pim_dir is not None:
wrote_inputs = try_stage_success(
failures,
"Write Raptor inputs",
write_inputs_to_memory_bin,
raptor_pim_dir / "memory.bin",
raptor_pim_dir / "config.json",
runtime_inputs,
)
if wrote_inputs and reference_dir is not None and outputs_desc:
validation = try_stage(
failures,
"Rust Validation Raptor",
run_rust_validation,
"Rust Validation Raptor",
raptor_pim_dir,
raptor_pim_dir / "config.json",
outputs_desc,
reference_dir,
steps,
args,
)
raptor_validation = validation if validation is not None else failed_validation("Raptor validation failed")
elif reference_dir is None:
raptor_validation = skipped_validation("Reference outputs are not available")
elif not outputs_desc:
raptor_validation = skipped_validation("Output descriptors are not available")
else:
raptor_validation = skipped_validation("Raptor input materialization failed")
else:
raptor_validation = skipped_validation("Raptor PIM compilation did not produce a PIM directory")
compiled_pimcomp = try_stage(
failures,
"Compile PIMCOMP",
compile_pimcomp,
args,
model_path,
out_dir / "pimcomp",
steps,
)
if compiled_pimcomp is not None:
verification_info, simulation_info = compiled_pimcomp
if verification_info is not None and simulation_info is not None and model_io is not None:
exported = try_stage(
failures,
"Export PIMCOMP for Rust",
export_pimcomp_for_rust,
model_path,
verification_info,
simulation_info,
runtime_inputs,
out_dir / "pimcomp_exported",
)
if exported is not None:
pimcomp_export_dir = exported
elif verification_info is None or simulation_info is None:
record_failure(
failures,
"Skip PIMCOMP Rust export",
"PIMCOMP Rust export was skipped because PIMCOMP did not produce VerificationInfo.json and SimulationInfo.gz.",
)
else:
record_failure(
failures,
"Skip PIMCOMP Rust export",
"PIMCOMP Rust export was skipped because model inputs are not available.",
)
if pimcomp_export_dir is not None and reference_dir is not None and outputs_desc:
validation = try_stage(
failures,
"Rust Validation PIMCOMP",
run_rust_validation,
"Rust Validation PIMCOMP",
pimcomp_export_dir,
pimcomp_export_dir / "config.json",
outputs_desc,
reference_dir,
steps,
args,
)
pimcomp_validation = validation if validation is not None else failed_validation("PIMCOMP validation failed")
elif pimcomp_export_dir is None:
pimcomp_validation = skipped_validation("PIMCOMP Rust export is not available")
elif reference_dir is None:
pimcomp_validation = skipped_validation("Reference outputs are not available")
else:
pimcomp_validation = skipped_validation("Output descriptors are not available")
if hardware["core_count"] > 0:
written_config = try_stage(
failures,
"Write pimsim-nn config",
write_pimsim_config,
args,
out_dir / "pimsim_config",
hardware,
)
if written_config is not None:
pimsim_config = written_config
else:
record_failure(
failures,
"Skip pimsim-nn config",
"pimsim-nn config was skipped because the hardware configuration is not available.",
)
if args.skip_pimsim_nn:
raptor_perf = skipped_perf("Skipped by --skip-pimsim-nn")
pimcomp_perf = skipped_perf("Skipped by --skip-pimsim-nn")
elif pimsim_config is None:
raptor_perf = skipped_perf("pimsim-nn config is not available")
pimcomp_perf = skipped_perf("pimsim-nn config is not available")
else:
if raptor_pim_dir is not None:
perf = try_stage(
failures,
"pimsim-nn Raptor",
run_pimsim_nn,
"pimsim-nn Raptor",
raptor_pim_dir,
pimsim_config,
False,
steps,
args,
)
raptor_perf = perf if perf is not None else failed_perf("pimsim-nn Raptor failed")
else:
raptor_perf = skipped_perf("Raptor PIM directory is not available")
if simulation_info is not None:
pimcomp_pimsim_dir = try_stage(
failures,
"Export PIMCOMP for pimsim-nn",
export_pimcomp_for_pimsim_nn,
simulation_info,
out_dir / "pimcomp_pimsim_nn",
)
if pimcomp_pimsim_dir is not None:
perf = try_stage(
failures,
"pimsim-nn PIMCOMP",
run_pimsim_nn,
"pimsim-nn PIMCOMP",
pimcomp_pimsim_dir,
pimsim_config,
False,
steps,
args,
)
pimcomp_perf = perf if perf is not None else failed_perf("pimsim-nn PIMCOMP failed")
else:
pimcomp_perf = failed_perf("PIMCOMP pimsim-nn export failed")
else:
pimcomp_perf = skipped_perf("PIMCOMP SimulationInfo.gz is not available")
if raptor_pim_dir is not None and raptor_pim_dir.exists():
parsed = try_stage(failures, "Parse Raptor instructions", parse_raptor_instructions, raptor_pim_dir)
raptor_instr = parsed if parsed is not None else empty_instruction_summary(error="Failed to parse Raptor instructions")
else:
raptor_instr = empty_instruction_summary("Raptor PIM directory is not available")
if simulation_info is not None and simulation_info.exists():
parsed = try_stage(failures, "Parse PIMCOMP instructions", parse_pimcomp_instructions, simulation_info)
pimcomp_instr = parsed if parsed is not None else empty_instruction_summary(error="Failed to parse PIMCOMP instructions")
else:
pimcomp_instr = empty_instruction_summary("PIMCOMP SimulationInfo.gz is not available")
report_path = out_dir / "comparison_report.md"
write_report(
report_path,
model_path=model_path,
hardware=hardware,
steps=steps,
failures=failures,
raptor_validation=raptor_validation,
pimcomp_validation=pimcomp_validation,
raptor_perf=raptor_perf,
pimcomp_perf=pimcomp_perf,
raptor_instr=raptor_instr,
pimcomp_instr=pimcomp_instr,
raptor_pass_timings=raptor_pass_timings,
pimsim_mode=args.pimsim_mode,
)
json_report = {
"model": str(model_path),
"hardware": hardware,
"pimsim_mode": args.pimsim_mode,
"failures": failures,
"steps": [asdict(step) for step in steps],
"raptor_validation": asdict(raptor_validation),
"pimcomp_validation": asdict(pimcomp_validation),
"raptor_performance": raptor_perf,
"pimcomp_performance": pimcomp_perf,
"raptor_instruction_summary": raptor_instr,
"pimcomp_instruction_summary": pimcomp_instr,
"raptor_pass_timings": raptor_pass_timings,
"paths": {
"reference_outputs": optional_path(reference_dir),
"raptor_pim": optional_path(raptor_pim_dir),
"pimcomp_simulation_info": optional_path(simulation_info),
"pimcomp_exported_pim": optional_path(pimcomp_export_dir),
"pimsim_config": optional_path(pimsim_config),
"report_markdown": str(report_path),
},
}
json_path = out_dir / "comparison_report.json"
with open(json_path, "w", encoding="utf-8") as f:
json.dump(json_report, f, indent=2)
f.write("\n")
print(f"\n[Done]")
print(f" Report: {report_path}")
print(f" JSON: {json_path}")
if failures or any(step.status != "passed" for step in steps):
print(f" Completed with {len(failures)} recorded failure/skipped stage(s).")
if args.fail_on_error and (failures or any(step.status != "passed" for step in steps)):
raise SystemExit(1)
if __name__ == "__main__":
main()