1498 lines
56 KiB
Python
1498 lines
56 KiB
Python
#!/usr/bin/env python3
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import gzip
|
|
import importlib.util
|
|
import json
|
|
import mmap
|
|
import os
|
|
import re
|
|
import shlex
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
import types
|
|
from collections import Counter
|
|
from dataclasses import asdict, dataclass
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import numpy as np
|
|
import onnx
|
|
|
|
|
|
REPO = Path(__file__).resolve().parents[2]
|
|
VALIDATION_DIR = REPO / "validation"
|
|
sys.path.insert(0, str(VALIDATION_DIR))
|
|
|
|
from gen_network_runner import gen_network_runner # noqa: E402
|
|
from onnx_utils import _ONNX_TO_NP, gen_random_inputs, onnx_io, save_inputs_to_files, write_inputs_to_memory_bin # noqa: E402
|
|
from validate_one import build_dump_ranges, parse_pim_simulator_outputs # noqa: E402
|
|
from raptor import compile_with_raptor # noqa: E402
|
|
|
|
|
|
@dataclass
|
|
class StepRecord:
|
|
name: str
|
|
duration_sec: float
|
|
command: str
|
|
status: str = "passed"
|
|
returncode: int | None = None
|
|
error: str | None = None
|
|
output_tail: str | None = None
|
|
|
|
|
|
@dataclass
|
|
class CompareResult:
|
|
passed: bool
|
|
max_diffs: dict[str, float]
|
|
status: str = "done"
|
|
error: str | None = None
|
|
|
|
|
|
def load_pimcomp_exporter():
|
|
path = REPO / "third_party/PIMCOMP-NN/verification/export_to_pim_simulator.py"
|
|
spec = importlib.util.spec_from_file_location("pimcomp_exporter", path)
|
|
module = importlib.util.module_from_spec(spec)
|
|
assert spec is not None and spec.loader is not None
|
|
sys.modules.setdefault("cv2", types.ModuleType("cv2"))
|
|
spec.loader.exec_module(module)
|
|
return module
|
|
|
|
|
|
def load_mesh_builder():
|
|
path = REPO / "validation/pimsim-configs/generate_mesh_config.py"
|
|
spec = importlib.util.spec_from_file_location("mesh_builder", path)
|
|
module = importlib.util.module_from_spec(spec)
|
|
assert spec is not None and spec.loader is not None
|
|
spec.loader.exec_module(module)
|
|
return module
|
|
|
|
|
|
def shell_join(cmd: list[str]) -> str:
|
|
return shlex.join(str(arg) for arg in cmd)
|
|
|
|
|
|
def print_step(name: str, cmd: list[str] | None = None, cwd: Path | None = None):
|
|
print(f"\n[{name}]")
|
|
if cmd is not None:
|
|
print(f" cwd: {cwd or REPO}")
|
|
print(f" $ {shell_join(cmd)}")
|
|
|
|
|
|
def output_tail(output: str | bytes | None, limit: int = 4000) -> str:
|
|
if output is None:
|
|
return ""
|
|
if isinstance(output, bytes):
|
|
output = output.decode(errors="replace")
|
|
return output[-limit:]
|
|
|
|
|
|
def exception_message(exc: BaseException) -> str:
|
|
if isinstance(exc, subprocess.CalledProcessError):
|
|
command = shell_join([str(arg) for arg in exc.cmd]) if isinstance(exc.cmd, list) else str(exc.cmd)
|
|
tail = output_tail(exc.output)
|
|
message = f"command failed with exit code {exc.returncode}: {command}"
|
|
if tail:
|
|
message += f"\n--- output tail ---\n{tail}"
|
|
return message
|
|
if isinstance(exc, subprocess.TimeoutExpired):
|
|
command = shell_join([str(arg) for arg in exc.cmd]) if isinstance(exc.cmd, list) else str(exc.cmd)
|
|
tail = output_tail(exc.output)
|
|
message = f"command timed out after {exc.timeout} seconds: {command}"
|
|
if tail:
|
|
message += f"\n--- output tail ---\n{tail}"
|
|
return message
|
|
return f"{type(exc).__name__}: {exc}"
|
|
|
|
|
|
def print_failure(name: str, exc: BaseException | str) -> None:
|
|
message = exc if isinstance(exc, str) else exception_message(exc)
|
|
print(f"\n[{name} FAILED]")
|
|
for line in message.splitlines()[:20]:
|
|
print(f" {line}")
|
|
|
|
|
|
def run_logged(
|
|
name: str,
|
|
cmd: list[str],
|
|
*,
|
|
cwd: Path,
|
|
timeout_sec: float,
|
|
steps: list[StepRecord],
|
|
) -> str:
|
|
print_step(name, cmd, cwd)
|
|
start = time.perf_counter()
|
|
command = shell_join(cmd)
|
|
try:
|
|
proc = subprocess.run(
|
|
cmd,
|
|
cwd=cwd,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.STDOUT,
|
|
text=True,
|
|
timeout=timeout_sec,
|
|
)
|
|
except subprocess.TimeoutExpired as exc:
|
|
duration = time.perf_counter() - start
|
|
tail = output_tail(exc.output)
|
|
steps.append(
|
|
StepRecord(
|
|
name=name,
|
|
duration_sec=duration,
|
|
command=command,
|
|
status="timeout",
|
|
error=f"Timed out after {timeout_sec} seconds",
|
|
output_tail=tail or None,
|
|
)
|
|
)
|
|
raise
|
|
|
|
duration = time.perf_counter() - start
|
|
if proc.returncode != 0:
|
|
tail = output_tail(proc.stdout)
|
|
steps.append(
|
|
StepRecord(
|
|
name=name,
|
|
duration_sec=duration,
|
|
command=command,
|
|
status="failed",
|
|
returncode=proc.returncode,
|
|
error=f"Exited with status {proc.returncode}",
|
|
output_tail=tail or None,
|
|
)
|
|
)
|
|
raise subprocess.CalledProcessError(proc.returncode, cmd, output=tail)
|
|
|
|
steps.append(StepRecord(name=name, duration_sec=duration, command=command))
|
|
return proc.stdout
|
|
|
|
|
|
def remove_tree(path: Path) -> None:
|
|
if not path.exists() and not path.is_symlink():
|
|
return
|
|
if path.is_symlink() or path.is_file():
|
|
path.unlink()
|
|
return
|
|
while True:
|
|
children = list(path.iterdir())
|
|
if not children:
|
|
break
|
|
for child in children:
|
|
remove_tree(child)
|
|
path.rmdir()
|
|
|
|
|
|
def load_model_inputs(model_path: Path, seed: int):
|
|
model = onnx.load(model_path)
|
|
initializer_names = {init.name for init in model.graph.initializer}
|
|
initializer_values = {
|
|
init.name: onnx.numpy_helper.to_array(init) for init in model.graph.initializer
|
|
}
|
|
inputs_desc, outputs_desc = onnx_io(model_path)
|
|
runtime_desc = [desc for desc in inputs_desc if desc[1] not in initializer_names]
|
|
runtime_arrays, _ = gen_random_inputs(runtime_desc, seed=seed)
|
|
|
|
runtime_by_name = {
|
|
desc[1]: arr for desc, arr in zip(runtime_desc, runtime_arrays)
|
|
}
|
|
arrays_in_order = []
|
|
for _, name, elem_type, _ in inputs_desc:
|
|
if name in initializer_values:
|
|
arrays_in_order.append(initializer_values[name].astype(_ONNX_TO_NP[elem_type], copy=False))
|
|
else:
|
|
arrays_in_order.append(runtime_by_name[name])
|
|
runtime_only = [arr for desc, arr in zip(inputs_desc, arrays_in_order) if desc[1] not in initializer_names]
|
|
return inputs_desc, outputs_desc, arrays_in_order, runtime_only
|
|
|
|
|
|
def compare_simulator_outputs(
|
|
output_bin: Path,
|
|
outputs_desc: list[tuple[int, str, int, list[int]]],
|
|
reference_dir: Path,
|
|
*,
|
|
threshold: float,
|
|
rtol: float,
|
|
) -> CompareResult:
|
|
sim_arrays = parse_pim_simulator_outputs(output_bin, outputs_desc)
|
|
max_diffs: dict[str, float] = {}
|
|
passed = True
|
|
for sim_array, (idx, name, _, shape) in zip(sim_arrays, outputs_desc):
|
|
csv_name = reference_dir / f"output{idx}_{sanitize_output_name(name)}.csv"
|
|
ref = np.loadtxt(csv_name, delimiter=",", dtype=np.float32).reshape(shape)
|
|
diff = np.abs(sim_array.astype(np.float64) - ref.astype(np.float64))
|
|
allowed = threshold + rtol * np.abs(ref.astype(np.float64))
|
|
max_diffs[name] = float(np.max(diff))
|
|
if not np.all(diff <= allowed):
|
|
passed = False
|
|
return CompareResult(passed=passed, max_diffs=max_diffs)
|
|
|
|
|
|
def sanitize_output_name(name: str) -> str:
|
|
return "".join(ch if ch.isalnum() or ch in "_.-" else "_" for ch in name[:255])
|
|
|
|
|
|
def load_effective_hardware(args: argparse.Namespace) -> dict[str, int]:
|
|
config_path = args.pimcomp_dir / "config.json"
|
|
with open(config_path, "r", encoding="utf-8") as f:
|
|
config = json.load(f)
|
|
rows, cols = config["chip_config"]["network_config"]["layout"]
|
|
xbar_h, xbar_w = config["chip_config"]["core_config"]["matrix_config"]["xbar_size"]
|
|
hardware = {
|
|
"mesh_rows": args.mesh_rows or rows,
|
|
"mesh_cols": args.mesh_cols or cols,
|
|
"crossbar_count": args.crossbar_count or config["chip_config"]["core_config"]["matrix_config"]["xbar_array_count"],
|
|
"crossbar_size": args.crossbar_size or xbar_h,
|
|
}
|
|
if xbar_h != xbar_w:
|
|
raise ValueError(f"Only square crossbars are supported, got {xbar_h}x{xbar_w}")
|
|
hardware["core_count"] = args.core_count or hardware["mesh_rows"] * hardware["mesh_cols"]
|
|
return hardware
|
|
|
|
|
|
def write_pimsim_config(args: argparse.Namespace, out_dir: Path, hardware: dict[str, int]) -> Path:
|
|
mesh_builder = load_mesh_builder()
|
|
example_config = REPO / "backend-simulators/pim/pimsim-nn/example/config/latency_config.json"
|
|
with open(example_config, "r", encoding="utf-8") as f:
|
|
config = json.load(f)
|
|
config["chip_config"]["core_config"]["matrix_config"]["xbar_array_count"] = hardware["crossbar_count"]
|
|
config["chip_config"]["core_config"]["matrix_config"]["xbar_size"] = [
|
|
hardware["crossbar_size"],
|
|
hardware["crossbar_size"],
|
|
]
|
|
config["chip_config"]["network_config"]["layout"] = [
|
|
hardware["mesh_rows"],
|
|
hardware["mesh_cols"],
|
|
]
|
|
config["chip_config"]["network_config"]["net_config_file_path"] = f"network_mesh_{hardware['core_count']}.json"
|
|
config["chip_config"]["core_cnt"] = hardware["core_count"]
|
|
config["sim_config"]["sim_mode"] = 1 if args.pimsim_mode == "latency" else 0
|
|
config["sim_config"]["sim_time"] = args.pimsim_time_ms
|
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
config_path = out_dir / f"{args.pimsim_mode}_config.json"
|
|
network_path = out_dir / f"network_mesh_{hardware['core_count']}.json"
|
|
with open(config_path, "w", encoding="utf-8") as f:
|
|
json.dump(config, f, indent=2)
|
|
f.write("\n")
|
|
with open(network_path, "w", encoding="utf-8") as f:
|
|
json.dump(
|
|
mesh_builder.build_network(
|
|
hardware["core_count"],
|
|
(hardware["mesh_rows"], hardware["mesh_cols"]),
|
|
),
|
|
f,
|
|
separators=(",", ":"),
|
|
)
|
|
f.write("\n")
|
|
return config_path
|
|
|
|
|
|
def compile_reference(
|
|
args: argparse.Namespace,
|
|
model_path: Path,
|
|
work_dir: Path,
|
|
steps: list[StepRecord],
|
|
) -> tuple[Path, Path, Path]:
|
|
raptor_dir = work_dir / "reference"
|
|
runner_dir = work_dir / "runner"
|
|
build_dir = runner_dir / "build"
|
|
raptor_dir.mkdir(parents=True, exist_ok=True)
|
|
build_dir.mkdir(parents=True, exist_ok=True)
|
|
stem = model_path.stem
|
|
onnx_ir_base = raptor_dir / stem
|
|
runner_base = runner_dir / stem
|
|
|
|
run_logged(
|
|
"Reference Emit ONNX IR",
|
|
[str(args.raptor_path), str(model_path), "-o", str(onnx_ir_base), "--EmitONNXIR"],
|
|
cwd=REPO,
|
|
timeout_sec=args.timeout_seconds,
|
|
steps=steps,
|
|
)
|
|
run_logged(
|
|
"Reference Native Compile",
|
|
[str(args.raptor_path), "-O3", str(model_path), "-o", str(runner_base)],
|
|
cwd=REPO,
|
|
timeout_sec=args.timeout_seconds,
|
|
steps=steps,
|
|
)
|
|
network_so = runner_base.with_suffix(".so")
|
|
network_mlir = onnx_ir_base.with_suffix(".onnx.mlir")
|
|
|
|
print_step("Generate Runner Source")
|
|
gen_network_runner(model_path, network_so, args.onnx_include_dir, out=runner_dir / "runner.c", verbose=False)
|
|
|
|
run_logged(
|
|
"Configure Runner",
|
|
["cmake", str(runner_dir), "-DCMAKE_BUILD_TYPE=Release", "-DCMAKE_C_FLAGS_RELEASE=-O3"],
|
|
cwd=build_dir,
|
|
timeout_sec=args.timeout_seconds,
|
|
steps=steps,
|
|
)
|
|
run_logged(
|
|
"Build Runner",
|
|
["cmake", "--build", ".", "-j"],
|
|
cwd=build_dir,
|
|
timeout_sec=args.timeout_seconds,
|
|
steps=steps,
|
|
)
|
|
return network_mlir, network_so, build_dir / "runner"
|
|
|
|
|
|
def generate_reference_outputs(
|
|
runner_path: Path,
|
|
runner_build_dir: Path,
|
|
model_path: Path,
|
|
arrays_in_order: list[np.ndarray],
|
|
steps: list[StepRecord],
|
|
args: argparse.Namespace,
|
|
out_dir: Path,
|
|
) -> Path:
|
|
inputs_dir = out_dir / "inputs"
|
|
reference_dir = out_dir / "reference_outputs"
|
|
inputs_dir.mkdir(parents=True, exist_ok=True)
|
|
reference_dir.mkdir(parents=True, exist_ok=True)
|
|
flags, _ = save_inputs_to_files(model_path, arrays_in_order, inputs_dir)
|
|
run_logged(
|
|
"Run Reference",
|
|
[str(runner_path), *flags, "--save-csv-dir", str(reference_dir)],
|
|
cwd=runner_build_dir,
|
|
timeout_sec=args.timeout_seconds,
|
|
steps=steps,
|
|
)
|
|
return reference_dir
|
|
|
|
|
|
def compile_raptor_target(
|
|
model_mlir: Path,
|
|
out_dir: Path,
|
|
hardware: dict[str, int],
|
|
args: argparse.Namespace,
|
|
steps: list[StepRecord],
|
|
) -> tuple[Path, dict[str, float]]:
|
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
cmd = [
|
|
str(args.raptor_path),
|
|
str(model_mlir),
|
|
"-o",
|
|
str(out_dir / "model"),
|
|
"--maccel=PIM",
|
|
"--EmitPimCodegen",
|
|
f"--crossbar-size={hardware['crossbar_size']}",
|
|
f"--crossbar-count={hardware['crossbar_count']}",
|
|
f"--core-count={hardware['core_count']}",
|
|
"--pim-emit-json",
|
|
*args.raptor_extra_arg,
|
|
]
|
|
print_step("Compile Raptor PIM", cmd, REPO)
|
|
start = time.perf_counter()
|
|
command = shell_join(cmd)
|
|
raptor_extra_args = ["--pim-emit-json", *args.raptor_extra_arg]
|
|
try:
|
|
timings = compile_with_raptor(
|
|
model_mlir,
|
|
args.raptor_path,
|
|
out_dir / "model",
|
|
hardware["crossbar_size"],
|
|
hardware["crossbar_count"],
|
|
core_count=hardware["core_count"],
|
|
raptor_extra_args=raptor_extra_args,
|
|
cwd=out_dir,
|
|
verbose=args.verbose_raptor_compile,
|
|
timeout_sec=args.timeout_seconds,
|
|
)
|
|
except Exception as exc:
|
|
steps.append(
|
|
StepRecord(
|
|
name="Compile Raptor PIM",
|
|
duration_sec=time.perf_counter() - start,
|
|
command=command,
|
|
status="failed",
|
|
error=exception_message(exc),
|
|
)
|
|
)
|
|
raise
|
|
|
|
steps.append(
|
|
StepRecord(
|
|
name="Compile Raptor PIM",
|
|
duration_sec=time.perf_counter() - start,
|
|
command=command,
|
|
)
|
|
)
|
|
return out_dir / "pim", timings
|
|
|
|
|
|
def run_rust_validation(
|
|
label: str,
|
|
pim_dir: Path,
|
|
config_path: Path,
|
|
outputs_desc: list[tuple[int, str, int, list[int]]],
|
|
reference_dir: Path,
|
|
steps: list[StepRecord],
|
|
args: argparse.Namespace,
|
|
) -> CompareResult:
|
|
output_bin = pim_dir.parent / "semantic_validation" / "out.bin"
|
|
dump_ranges = build_dump_ranges(config_path, outputs_desc)
|
|
cmd = [
|
|
"cargo",
|
|
"run",
|
|
"--no-default-features",
|
|
"--release",
|
|
"--package",
|
|
"pim-simulator",
|
|
"--bin",
|
|
"pim-simulator",
|
|
"--",
|
|
"-f",
|
|
str(pim_dir),
|
|
"-o",
|
|
str(output_bin),
|
|
"-d",
|
|
dump_ranges,
|
|
]
|
|
simulation_dir = pim_dir.parent / "semantic_validation"
|
|
simulation_dir.mkdir(parents=True, exist_ok=True)
|
|
run_logged(
|
|
label,
|
|
cmd,
|
|
cwd=args.pim_simulator_dir,
|
|
timeout_sec=args.timeout_seconds,
|
|
steps=steps,
|
|
)
|
|
return compare_simulator_outputs(
|
|
output_bin,
|
|
outputs_desc,
|
|
reference_dir,
|
|
threshold=args.threshold,
|
|
rtol=args.rtol,
|
|
)
|
|
|
|
|
|
def copy_pimcomp_outputs(args: argparse.Namespace, out_dir: Path):
|
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
for name in ("SimulationInfo.gz", "VerificationInfo.json", "MappingResult.txt"):
|
|
shutil.copy2(args.pimcomp_dir / "output" / name, out_dir / name)
|
|
|
|
|
|
def compile_pimcomp(
|
|
args: argparse.Namespace,
|
|
model_path: Path,
|
|
out_dir: Path,
|
|
steps: list[StepRecord],
|
|
) -> tuple[Path, Path]:
|
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
model_name = f"compare_{model_path.stem}"
|
|
frontend_json = args.pimcomp_dir / "models/JSON" / f"{model_name}.json"
|
|
frontend_cmd = [
|
|
"python3",
|
|
"frontend.py",
|
|
"--model_path",
|
|
str(model_path),
|
|
"--save_path",
|
|
str(frontend_json),
|
|
]
|
|
run_logged(
|
|
"PIMCOMP Frontend",
|
|
frontend_cmd,
|
|
cwd=args.pimcomp_dir / "frontend",
|
|
timeout_sec=args.timeout_seconds,
|
|
steps=steps,
|
|
)
|
|
backend_cmd = [
|
|
str(args.pimcomp_dir / "build" / "PIMCOMP-NN"),
|
|
f"-m={model_name}",
|
|
"-p=batch",
|
|
"-v=YES",
|
|
"-s=YES",
|
|
]
|
|
run_logged(
|
|
"PIMCOMP Backend",
|
|
backend_cmd,
|
|
cwd=args.pimcomp_dir / "build",
|
|
timeout_sec=args.timeout_seconds,
|
|
steps=steps,
|
|
)
|
|
copy_pimcomp_outputs(args, out_dir)
|
|
return out_dir / "VerificationInfo.json", out_dir / "SimulationInfo.gz"
|
|
|
|
|
|
def export_pimcomp_for_pimsim_nn(simulation_info: Path, output_dir: Path) -> Path:
|
|
if output_dir.exists():
|
|
remove_tree(output_dir)
|
|
with gzip.open(simulation_info, "rt", encoding="utf-8") as f:
|
|
sim_info = json.load(f)
|
|
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
sim_config = sim_info["config"]
|
|
present_core_indices = sorted(
|
|
int(key[4:]) for key, value in sim_info.items() if key.startswith("core") and isinstance(value, list) and value
|
|
)
|
|
if not present_core_indices:
|
|
raise ValueError("PIMCOMP SimulationInfo.gz does not contain any non-empty core instruction streams")
|
|
expected_core_indices = list(range(present_core_indices[-1] + 1))
|
|
if present_core_indices != expected_core_indices:
|
|
raise ValueError(f"PIMCOMP core numbering is not contiguous: {present_core_indices}")
|
|
|
|
config = {
|
|
"core_cnt": len(present_core_indices),
|
|
"xbar_size": sim_config["xbar_size"],
|
|
"xbar_array_count": sim_config["xbar_array_count"],
|
|
"cell_precision": sim_config["cell_precision"],
|
|
"adc_count": sim_config["adc_count"],
|
|
"array_group_map": {},
|
|
}
|
|
for core_idx in present_core_indices:
|
|
core_name = f"core{core_idx}"
|
|
config["array_group_map"][core_name] = sim_config["array_group_map"].get(core_name, [])
|
|
|
|
with open(output_dir / "config.json", "w", encoding="utf-8") as f:
|
|
json.dump(config, f, separators=(",", ":"))
|
|
f.write("\n")
|
|
|
|
for core_idx in present_core_indices:
|
|
core_key = f"core{core_idx}"
|
|
instructions = sim_info[core_key]
|
|
with open(output_dir / f"core_{core_idx}.json", "w", encoding="utf-8") as f:
|
|
json.dump(instructions, f, separators=(",", ":"))
|
|
f.write("\n")
|
|
return output_dir
|
|
|
|
|
|
def flatten_pimcomp_input(array: np.ndarray) -> np.ndarray:
|
|
tensor = array.astype(np.float32, copy=False)
|
|
if tensor.ndim == 4:
|
|
tensor = tensor.transpose((0, 2, 3, 1))
|
|
return tensor.reshape(-1)
|
|
|
|
|
|
def export_pimcomp_for_rust(
|
|
model_path: Path,
|
|
verification_info: Path,
|
|
simulation_info: Path,
|
|
runtime_inputs: list[np.ndarray],
|
|
output_dir: Path,
|
|
) -> Path:
|
|
if len(runtime_inputs) != 1:
|
|
raise ValueError("PIMCOMP export currently requires exactly one runtime input tensor")
|
|
if output_dir.exists():
|
|
remove_tree(output_dir)
|
|
exporter = load_pimcomp_exporter()
|
|
with open(verification_info, "r", encoding="utf-8") as f:
|
|
final_info = json.load(f)
|
|
with gzip.open(simulation_info, "rt", encoding="utf-8") as f:
|
|
sim_info = json.load(f)
|
|
|
|
onnx_model, weights, gemm_weights, output_to_weight, output_to_bias = exporter.load_model_info(
|
|
model_path, final_info
|
|
)
|
|
input_tensor = flatten_pimcomp_input(runtime_inputs[0])
|
|
node_list = final_info["node_list"]
|
|
max_output = exporter.max_output_element_num(node_list)
|
|
local_group_map = exporter.map_local_groups(final_info, sim_info)
|
|
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
weights_dir = output_dir / "weights"
|
|
weights_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
input_addr = 0
|
|
cursor = exporter.byte_offset(len(input_tensor))
|
|
bias_addrs: dict[str, int] = {}
|
|
for node_name, bias_name in output_to_bias.items():
|
|
bias = weights[bias_name].astype(np.float32).flatten()
|
|
bias_addrs[node_name] = cursor
|
|
cursor += exporter.byte_offset(len(bias))
|
|
|
|
lldi_addrs: dict[tuple[bytes, int], int] = {}
|
|
for core_idx in range(sim_info["config"]["core_cnt"]):
|
|
for inst in sim_info.get(f"core{core_idx}", []) or []:
|
|
if inst["op"] != "lldi":
|
|
continue
|
|
key = (exporter.float32_bytes(inst["imm"]), inst["len"])
|
|
if key not in lldi_addrs:
|
|
lldi_addrs[key] = cursor
|
|
cursor += exporter.byte_offset(inst["len"])
|
|
|
|
output_base = (cursor + 255) & ~255
|
|
memory_size = output_base + exporter.byte_offset(max_output * len(node_list))
|
|
memory = bytearray(memory_size)
|
|
memory[input_addr : input_addr + input_tensor.nbytes] = input_tensor.tobytes()
|
|
for node_name, bias_name in output_to_bias.items():
|
|
bias = weights[bias_name].astype(np.float32).flatten()
|
|
start = bias_addrs[node_name]
|
|
memory[start : start + bias.nbytes] = bias.tobytes()
|
|
for (value_bytes, element_num), start in lldi_addrs.items():
|
|
value = np.frombuffer(value_bytes, dtype=np.float32)[0]
|
|
blob = np.full(element_num, value, dtype=np.float32)
|
|
memory[start : start + blob.nbytes] = blob.tobytes()
|
|
|
|
config = {
|
|
"core_cnt": sim_info["config"]["core_cnt"],
|
|
"xbar_size": sim_info["config"]["xbar_size"],
|
|
"xbar_array_count": sim_info["config"]["xbar_array_count"],
|
|
"cell_precision": sim_info["config"]["cell_precision"],
|
|
"adc_count": sim_info["config"]["adc_count"],
|
|
"array_group_map": {},
|
|
"inputs_addresses": [input_addr],
|
|
"outputs_addresses": [],
|
|
}
|
|
output_name_to_node = {node["name"]: node for node in node_list}
|
|
for graph_output in onnx_model.graph.output:
|
|
node = output_name_to_node[graph_output.name]
|
|
config["outputs_addresses"].append(output_base + exporter.byte_offset(node["new_node_index"] * max_output))
|
|
|
|
ag_info = final_info["AG_info"]
|
|
weight_counter = 0
|
|
xbar_size = int(sim_info["config"]["xbar_size"][0])
|
|
for core_idx in range(config["core_cnt"]):
|
|
core_name = f"core{core_idx}"
|
|
core_dir = output_dir / f"core_{core_idx}"
|
|
core_dir.mkdir(parents=True, exist_ok=True)
|
|
local_to_global = local_group_map.get(core_idx, {})
|
|
ag_counts = sim_info["config"]["array_group_map"].get(core_name, [])
|
|
group_prefix = []
|
|
total_crossbars = 0
|
|
for count in ag_counts:
|
|
group_prefix.append(total_crossbars)
|
|
total_crossbars += count
|
|
config["array_group_map"][core_name] = list(range(total_crossbars))
|
|
|
|
for local_group, global_ag in sorted(local_to_global.items()):
|
|
info = ag_info[global_ag]
|
|
weight_name = output_to_weight[info["node_name"]]
|
|
matrix = gemm_weights[weight_name]
|
|
row_slice = slice(info["height_start"], info["height_end"] + 1)
|
|
first_physical = group_prefix[local_group]
|
|
for crossbar_idx, crossbar in enumerate(info["crossbar"]):
|
|
col_slice = slice(crossbar["width_start"], crossbar["width_end"] + 1)
|
|
tile = np.zeros((xbar_size, col_slice.stop - col_slice.start), dtype=np.float32)
|
|
tile_rows = matrix[row_slice, col_slice].astype(np.float32)
|
|
tile[: tile_rows.shape[0], :] = tile_rows
|
|
weight_path = weights_dir / f"crossbar_{weight_counter}.bin"
|
|
weight_path.write_bytes(tile.tobytes(order="C"))
|
|
os.symlink(weight_path.resolve(), core_dir / f"crossbar_{first_physical + crossbar_idx}.bin")
|
|
weight_counter += 1
|
|
|
|
instructions = []
|
|
last_sldi_by_rd: dict[int, int] = {}
|
|
ver_ops = exporter.filtered_verification_ops(final_info, core_idx)
|
|
ver_index = 0
|
|
for sim_inst in sim_info.get(core_name, []) or []:
|
|
op = sim_inst["op"]
|
|
if op == "setbw":
|
|
instructions.append(sim_inst)
|
|
continue
|
|
if op == "sldi":
|
|
translated = {"op": "sldi", "rd": sim_inst["rd"], "imm": exporter.byte_offset(sim_inst["imm"])}
|
|
instructions.append(translated)
|
|
last_sldi_by_rd[sim_inst["rd"]] = len(instructions) - 1
|
|
continue
|
|
if ver_index >= len(ver_ops):
|
|
raise RuntimeError(f"core{core_idx}: simulation op {op} has no matching verification op")
|
|
ver_inst = ver_ops[ver_index]
|
|
ver_index += 1
|
|
ver_op = ver_inst["operation"].lower()
|
|
if ver_op != op:
|
|
raise RuntimeError(
|
|
f"core{core_idx}: simulation/verification op mismatch {op} vs {ver_op} at {ver_index - 1}"
|
|
)
|
|
if op == "ld":
|
|
if ver_inst["stage"] == "INPUT":
|
|
src = input_addr + exporter.byte_offset(ver_inst["source_offset"])
|
|
elif ver_inst["stage"] == "BIAS":
|
|
src = bias_addrs[node_list[ver_inst["node_index"]]["name"]] + exporter.byte_offset(ver_inst["source_offset"])
|
|
else:
|
|
raise RuntimeError(f"Unsupported LD stage {ver_inst['stage']}")
|
|
instructions[last_sldi_by_rd[sim_inst["rs1"]]]["imm"] = src
|
|
translated = dict(sim_inst)
|
|
translated["size"] = exporter.byte_offset(sim_inst["size"])
|
|
instructions.append(translated)
|
|
elif op == "st":
|
|
dst = output_base + exporter.byte_offset(
|
|
ver_inst["node_index"] * max_output + ver_inst["destination_offset"]
|
|
)
|
|
instructions[last_sldi_by_rd[sim_inst["rd"]]]["imm"] = dst
|
|
translated = dict(sim_inst)
|
|
translated["size"] = exporter.byte_offset(sim_inst["size"])
|
|
instructions.append(translated)
|
|
elif op == "lldi":
|
|
key = (exporter.float32_bytes(sim_inst["imm"]), sim_inst["len"])
|
|
src = lldi_addrs[key]
|
|
temp_rd = 1 if sim_inst["rd"] == 0 else 0
|
|
instructions.append({"op": "sldi", "rd": temp_rd, "imm": src})
|
|
instructions.append(
|
|
{
|
|
"op": "ld",
|
|
"rd": sim_inst["rd"],
|
|
"rs1": temp_rd,
|
|
"size": exporter.byte_offset(sim_inst["len"]),
|
|
"offset": sim_inst["offset"],
|
|
}
|
|
)
|
|
elif op in ("lmv", "vvadd", "vvmul", "vvmax", "vrelu"):
|
|
translated = dict(sim_inst)
|
|
translated["len"] = exporter.byte_offset(sim_inst["len"])
|
|
instructions.append(translated)
|
|
elif op in ("send", "recv"):
|
|
translated = dict(sim_inst)
|
|
translated["size"] = exporter.byte_offset(sim_inst["size"])
|
|
instructions.append(translated)
|
|
elif op == "mvmul":
|
|
local_group = sim_inst["group"]
|
|
global_ag = local_to_global[local_group]
|
|
first_physical = group_prefix[local_group]
|
|
widths = [
|
|
crossbar["width_end"] - crossbar["width_start"] + 1
|
|
for crossbar in ag_info[global_ag]["crossbar"]
|
|
]
|
|
dst = instructions[last_sldi_by_rd[sim_inst["rd"]]]["imm"]
|
|
src = instructions[last_sldi_by_rd[sim_inst["rs1"]]]["imm"]
|
|
out_offset = 0
|
|
for idx, width in enumerate(widths):
|
|
instructions.append({"op": "sldi", "rd": sim_inst["rd"], "imm": dst + exporter.byte_offset(out_offset)})
|
|
instructions.append({"op": "sldi", "rd": sim_inst["rs1"], "imm": src})
|
|
translated = dict(sim_inst)
|
|
translated["group"] = first_physical + idx
|
|
instructions.append(translated)
|
|
out_offset += width
|
|
else:
|
|
raise RuntimeError(f"Unsupported PIMCOMP op {op}")
|
|
|
|
with open(output_dir / f"core_{core_idx}.json", "w", encoding="utf-8") as f:
|
|
json.dump(instructions, f, separators=(",", ":"))
|
|
f.write("\n")
|
|
|
|
with open(output_dir / "config.json", "w", encoding="utf-8") as f:
|
|
json.dump(config, f, separators=(",", ":"))
|
|
f.write("\n")
|
|
(output_dir / "memory.bin").write_bytes(memory)
|
|
return output_dir
|
|
|
|
|
|
def parse_pimsim_nn_report(output: str) -> dict[str, float | int | str]:
|
|
patterns = {
|
|
"output_count": r"output count:\s+([0-9]+)\s+samples",
|
|
"throughput": r"throughput:\s+([0-9.]+)\s+samples/s",
|
|
"average_latency_ms": r"average latency:\s+([0-9.eE+-]+)\s+ms",
|
|
"latency_ms": r"latency:\s+([0-9.eE+-]+)\s+ms",
|
|
"average_power_mw": r"average power:\s+([0-9.eE+-]+)\s+mW",
|
|
"average_energy_pj": r"average energy:\s+([0-9.eE+-]+)\s+pJ/it",
|
|
}
|
|
result: dict[str, float | int | str] = {"raw_output": output}
|
|
for key, pattern in patterns.items():
|
|
match = re.search(pattern, output)
|
|
if match:
|
|
value = match.group(1)
|
|
result[key] = int(value) if key == "output_count" else float(value)
|
|
return result
|
|
|
|
|
|
def run_pimsim_nn(
|
|
label: str,
|
|
inst_path: Path,
|
|
config_path: Path,
|
|
single_file: bool,
|
|
steps: list[StepRecord],
|
|
args: argparse.Namespace,
|
|
) -> dict[str, Any]:
|
|
cmd = [
|
|
str(args.pimsim_nn_build_dir / "ChipTest"),
|
|
str(inst_path),
|
|
str(config_path),
|
|
"true" if single_file else "false",
|
|
]
|
|
output = run_logged(
|
|
label,
|
|
cmd,
|
|
cwd=args.pimsim_nn_build_dir,
|
|
timeout_sec=args.timeout_seconds * 10.0,
|
|
steps=steps,
|
|
)
|
|
return parse_pimsim_nn_report(output)
|
|
|
|
|
|
def parse_raptor_instructions(pim_dir: Path) -> dict[str, Any]:
|
|
op_re = re.compile(br'"op":"([^"]+)"')
|
|
counts = Counter()
|
|
per_core = []
|
|
for path in sorted(pim_dir.glob("core_*.json"), key=lambda p: int(p.stem.split("_")[1])):
|
|
with path.open("rb") as f:
|
|
mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
|
|
core_counts = Counter(m.group(1).decode() for m in op_re.finditer(mm))
|
|
mm.close()
|
|
total = sum(core_counts.values())
|
|
counts.update(core_counts)
|
|
per_core.append(
|
|
{
|
|
"core": path.stem,
|
|
"total": total,
|
|
"send": core_counts.get("send", 0),
|
|
"recv": core_counts.get("recv", 0),
|
|
"mvmul": core_counts.get("mvmul", 0),
|
|
}
|
|
)
|
|
return {
|
|
"active_cores": sum(1 for entry in per_core if entry["total"]),
|
|
"total_instructions": int(sum(counts.values())),
|
|
"op_counts": dict(counts),
|
|
"top_cores_by_total": sorted(per_core, key=lambda entry: entry["total"], reverse=True)[:10],
|
|
"top_cores_by_send": sorted(per_core, key=lambda entry: entry["send"], reverse=True)[:10],
|
|
"top_cores_by_recv": sorted(per_core, key=lambda entry: entry["recv"], reverse=True)[:10],
|
|
}
|
|
|
|
|
|
def parse_pimcomp_instructions(simulation_info: Path) -> dict[str, Any]:
|
|
with gzip.open(simulation_info, "rt", encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
per_core = []
|
|
counts = Counter()
|
|
for key in sorted((name for name in data if name.startswith("core")), key=lambda name: int(name[4:])):
|
|
insts = data[key]
|
|
core_counts = Counter((inst.get("operation") or inst.get("op") or "unknown").lower() for inst in insts)
|
|
counts.update(core_counts)
|
|
per_core.append(
|
|
{
|
|
"core": key,
|
|
"total": int(sum(core_counts.values())),
|
|
"send": core_counts.get("send", 0),
|
|
"recv": core_counts.get("recv", 0),
|
|
"mvmul": core_counts.get("mvmul", 0),
|
|
}
|
|
)
|
|
return {
|
|
"active_cores": sum(1 for entry in per_core if entry["total"]),
|
|
"total_instructions": int(sum(counts.values())),
|
|
"op_counts": dict(counts),
|
|
"top_cores_by_total": sorted(per_core, key=lambda entry: entry["total"], reverse=True)[:10],
|
|
"top_cores_by_send": sorted(per_core, key=lambda entry: entry["send"], reverse=True)[:10],
|
|
"top_cores_by_recv": sorted(per_core, key=lambda entry: entry["recv"], reverse=True)[:10],
|
|
}
|
|
|
|
|
|
def format_op_table(counts: dict[str, int], total: int) -> list[str]:
|
|
if total <= 0:
|
|
return ["| n/a | 0 | n/a |"]
|
|
rows = []
|
|
for op, count in sorted(counts.items(), key=lambda item: item[1], reverse=True):
|
|
rows.append(f"| `{op}` | {count} | {100.0 * count / total:.2f}% |")
|
|
return rows
|
|
|
|
|
|
def validation_status(result: CompareResult) -> str:
|
|
if result.status == "done":
|
|
return "PASS" if result.passed else "FAIL"
|
|
return result.status.upper()
|
|
|
|
|
|
def skipped_validation(reason: str) -> CompareResult:
|
|
return CompareResult(passed=False, max_diffs={}, status="skipped", error=reason)
|
|
|
|
|
|
def failed_validation(error: BaseException | str) -> CompareResult:
|
|
message = error if isinstance(error, str) else exception_message(error)
|
|
return CompareResult(passed=False, max_diffs={}, status="failed", error=message)
|
|
|
|
|
|
def skipped_perf(reason: str) -> dict[str, Any]:
|
|
return {"skipped": True, "reason": reason}
|
|
|
|
|
|
def failed_perf(error: BaseException | str) -> dict[str, Any]:
|
|
message = error if isinstance(error, str) else exception_message(error)
|
|
return {"error": message}
|
|
|
|
|
|
def perf_status(perf: dict[str, Any]) -> str:
|
|
if perf.get("skipped"):
|
|
return "SKIPPED"
|
|
if perf.get("error"):
|
|
return "FAILED"
|
|
return "DONE"
|
|
|
|
|
|
def perf_value(perf: dict[str, Any], key: str) -> Any:
|
|
return perf[key] if key in perf else "n/a"
|
|
|
|
|
|
def empty_instruction_summary(reason: str | None = None, error: str | None = None) -> dict[str, Any]:
|
|
result: dict[str, Any] = {
|
|
"active_cores": 0,
|
|
"total_instructions": 0,
|
|
"op_counts": {},
|
|
"top_cores_by_total": [],
|
|
"top_cores_by_send": [],
|
|
"top_cores_by_recv": [],
|
|
}
|
|
if reason is not None:
|
|
result["skipped"] = True
|
|
result["reason"] = reason
|
|
if error is not None:
|
|
result["error"] = error
|
|
return result
|
|
|
|
|
|
def optional_path(path: Path | None) -> str | None:
|
|
return str(path) if path is not None else None
|
|
|
|
|
|
def record_failure(failures: list[dict[str, str]], stage: str, exc: BaseException | str) -> None:
|
|
message = exc if isinstance(exc, str) else exception_message(exc)
|
|
failures.append({"stage": stage, "error": message})
|
|
print_failure(stage, message)
|
|
|
|
|
|
def try_stage(
|
|
failures: list[dict[str, str]],
|
|
stage: str,
|
|
func,
|
|
*args,
|
|
**kwargs,
|
|
):
|
|
try:
|
|
return func(*args, **kwargs)
|
|
except Exception as exc:
|
|
record_failure(failures, stage, exc)
|
|
return None
|
|
|
|
|
|
def try_stage_success(
|
|
failures: list[dict[str, str]],
|
|
stage: str,
|
|
func,
|
|
*args,
|
|
**kwargs,
|
|
) -> bool:
|
|
try:
|
|
func(*args, **kwargs)
|
|
return True
|
|
except Exception as exc:
|
|
record_failure(failures, stage, exc)
|
|
return False
|
|
|
|
|
|
def write_report(
|
|
report_path: Path,
|
|
*,
|
|
model_path: Path,
|
|
hardware: dict[str, int],
|
|
steps: list[StepRecord],
|
|
failures: list[dict[str, str]],
|
|
raptor_validation: CompareResult,
|
|
pimcomp_validation: CompareResult,
|
|
raptor_perf: dict[str, Any],
|
|
pimcomp_perf: dict[str, Any],
|
|
raptor_instr: dict[str, Any],
|
|
pimcomp_instr: dict[str, Any],
|
|
raptor_pass_timings: dict[str, float],
|
|
pimsim_mode: str,
|
|
):
|
|
lines = [
|
|
"# Raptor vs PIMCOMP Comparison Report",
|
|
"",
|
|
f"- Model: `{model_path}`",
|
|
f"- Hardware: `{hardware.get('core_count', 'n/a')} cores`, `{hardware.get('crossbar_count', 'n/a')} xbars/core`, `{hardware.get('crossbar_size', 'n/a')}x{hardware.get('crossbar_size', 'n/a')}` crossbars, mesh `{hardware.get('mesh_rows', 'n/a')}x{hardware.get('mesh_cols', 'n/a')}`",
|
|
"",
|
|
]
|
|
|
|
if failures or any(step.status != "passed" for step in steps):
|
|
lines.extend(
|
|
[
|
|
"## Failures / Skipped Work",
|
|
"",
|
|
"The script did not abort. The failed stage was recorded and any dependent stage was skipped when its inputs were not available.",
|
|
"",
|
|
]
|
|
)
|
|
if failures:
|
|
lines.extend(["| Stage | Error |", "|---|---|"])
|
|
for failure in failures:
|
|
error = failure["error"].replace("\n", "<br>")
|
|
lines.append(f"| {failure['stage']} | {error} |")
|
|
lines.append("")
|
|
|
|
lines.extend(
|
|
[
|
|
"## Semantic Validation",
|
|
"",
|
|
f"- Raptor via `pim-simulator`: `{validation_status(raptor_validation)}`",
|
|
f"- PIMCOMP via exported `pim-simulator`: `{validation_status(pimcomp_validation)}`",
|
|
]
|
|
)
|
|
if raptor_validation.error:
|
|
lines.append(f"- Raptor validation note: `{raptor_validation.error.splitlines()[0]}`")
|
|
if pimcomp_validation.error:
|
|
lines.append(f"- PIMCOMP validation note: `{pimcomp_validation.error.splitlines()[0]}`")
|
|
|
|
lines.extend(["", "### Max Output Differences", ""])
|
|
diff_names = sorted(set(raptor_validation.max_diffs) | set(pimcomp_validation.max_diffs))
|
|
if diff_names:
|
|
lines.extend(["| Output | Raptor max diff | PIMCOMP max diff |", "|---|---:|---:|"])
|
|
for name in diff_names:
|
|
lines.append(
|
|
f"| `{name}` | {raptor_validation.max_diffs.get(name, float('nan')):.6e} | "
|
|
f"{pimcomp_validation.max_diffs.get(name, float('nan')):.6e} |"
|
|
)
|
|
else:
|
|
lines.append("No output differences are available because validation did not run or failed before comparison.")
|
|
|
|
lines.extend(
|
|
[
|
|
"",
|
|
"## pimsim-nn Performance",
|
|
"",
|
|
f"- Mode: `{pimsim_mode}`",
|
|
"",
|
|
]
|
|
)
|
|
if pimsim_mode == "throughput":
|
|
lines.extend(
|
|
[
|
|
"| Compiler | Status | Throughput (samples/s) | Avg latency (ms) | Avg power (mW) | Avg energy (pJ/it) | Output count |",
|
|
"|---|---|---:|---:|---:|---:|---:|",
|
|
f"| Raptor | {perf_status(raptor_perf)} | {perf_value(raptor_perf, 'throughput')} | {perf_value(raptor_perf, 'average_latency_ms')} | "
|
|
f"{perf_value(raptor_perf, 'average_power_mw')} | {perf_value(raptor_perf, 'average_energy_pj')} | {perf_value(raptor_perf, 'output_count')} |",
|
|
f"| PIMCOMP | {perf_status(pimcomp_perf)} | {perf_value(pimcomp_perf, 'throughput')} | {perf_value(pimcomp_perf, 'average_latency_ms')} | "
|
|
f"{perf_value(pimcomp_perf, 'average_power_mw')} | {perf_value(pimcomp_perf, 'average_energy_pj')} | {perf_value(pimcomp_perf, 'output_count')} |",
|
|
"",
|
|
]
|
|
)
|
|
else:
|
|
lines.extend(
|
|
[
|
|
"| Compiler | Status | Latency (ms) | Avg power (mW) | Avg energy (pJ) |",
|
|
"|---|---|---:|---:|---:|",
|
|
f"| Raptor | {perf_status(raptor_perf)} | {perf_value(raptor_perf, 'latency_ms')} | "
|
|
f"{perf_value(raptor_perf, 'average_power_mw')} | {perf_value(raptor_perf, 'average_energy_pj')} |",
|
|
f"| PIMCOMP | {perf_status(pimcomp_perf)} | {perf_value(pimcomp_perf, 'latency_ms')} | "
|
|
f"{perf_value(pimcomp_perf, 'average_power_mw')} | {perf_value(pimcomp_perf, 'average_energy_pj')} |",
|
|
"",
|
|
]
|
|
)
|
|
if raptor_perf.get("reason") or raptor_perf.get("error"):
|
|
lines.append(f"- Raptor pimsim-nn note: `{(raptor_perf.get('reason') or raptor_perf.get('error')).splitlines()[0]}`")
|
|
if pimcomp_perf.get("reason") or pimcomp_perf.get("error"):
|
|
lines.append(f"- PIMCOMP pimsim-nn note: `{(pimcomp_perf.get('reason') or pimcomp_perf.get('error')).splitlines()[0]}`")
|
|
if lines[-1] != "":
|
|
lines.append("")
|
|
|
|
lines.extend(
|
|
[
|
|
"## Instruction Summary",
|
|
"",
|
|
"| Compiler | Status | Active cores | Total instructions | Sends | Receives | MVMUL |",
|
|
"|---|---|---:|---:|---:|---:|---:|",
|
|
f"| Raptor | {'FAILED' if raptor_instr.get('error') else 'SKIPPED' if raptor_instr.get('skipped') else 'DONE'} | {raptor_instr.get('active_cores', 0)} | {raptor_instr.get('total_instructions', 0)} | {raptor_instr.get('op_counts', {}).get('send', 0)} | {raptor_instr.get('op_counts', {}).get('recv', 0)} | {raptor_instr.get('op_counts', {}).get('mvmul', 0)} |",
|
|
f"| PIMCOMP | {'FAILED' if pimcomp_instr.get('error') else 'SKIPPED' if pimcomp_instr.get('skipped') else 'DONE'} | {pimcomp_instr.get('active_cores', 0)} | {pimcomp_instr.get('total_instructions', 0)} | {pimcomp_instr.get('op_counts', {}).get('send', 0)} | {pimcomp_instr.get('op_counts', {}).get('recv', 0)} | {pimcomp_instr.get('op_counts', {}).get('mvmul', 0)} |",
|
|
"",
|
|
"### Raptor Op Distribution",
|
|
"",
|
|
"| Op | Count | Share |",
|
|
"|---|---:|---:|",
|
|
*format_op_table(raptor_instr.get("op_counts", {}), raptor_instr.get("total_instructions", 0)),
|
|
"",
|
|
"### PIMCOMP Op Distribution",
|
|
"",
|
|
"| Op | Count | Share |",
|
|
"|---|---:|---:|",
|
|
*format_op_table(pimcomp_instr.get("op_counts", {}), pimcomp_instr.get("total_instructions", 0)),
|
|
"",
|
|
"## Step Timings",
|
|
"",
|
|
"| Step | Status | Duration (s) | Return code |",
|
|
"|---|---|---:|---:|",
|
|
]
|
|
)
|
|
for step in steps:
|
|
lines.append(
|
|
f"| {step.name} | {step.status.upper()} | {step.duration_sec:.3f} | "
|
|
f"{step.returncode if step.returncode is not None else ''} |"
|
|
)
|
|
failed_steps = [step for step in steps if step.status != "passed"]
|
|
if failed_steps:
|
|
lines.extend(["", "### Failed Step Details", ""])
|
|
for step in failed_steps:
|
|
lines.extend(
|
|
[
|
|
f"#### {step.name}",
|
|
"",
|
|
f"- Command: `{step.command}`",
|
|
f"- Error: `{step.error or 'n/a'}`",
|
|
]
|
|
)
|
|
if step.output_tail:
|
|
lines.extend(["", "```text", step.output_tail, "```"])
|
|
lines.append("")
|
|
|
|
if raptor_pass_timings:
|
|
lines.extend(["", "## Raptor Pass Timings", "", "| Pass | Duration (s) |", "|---|---:|"])
|
|
for name, duration in raptor_pass_timings.items():
|
|
lines.append(f"| {name} | {duration:.4f} |")
|
|
report_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--model", required=True, type=Path)
|
|
parser.add_argument("--out-dir", required=True, type=Path)
|
|
parser.add_argument("--raptor-path", default=REPO / "build_release/Release/bin/onnx-mlir", type=Path)
|
|
parser.add_argument("--onnx-include-dir", default=REPO / "onnx-mlir/include", type=Path)
|
|
parser.add_argument("--pimcomp-dir", default=REPO / "third_party/PIMCOMP-NN", type=Path)
|
|
parser.add_argument("--pim-simulator-dir", default=REPO / "backend-simulators/pim/pim-simulator", type=Path)
|
|
parser.add_argument("--pimsim-nn-build-dir", default=REPO / "backend-simulators/pim/pimsim-nn/build", type=Path)
|
|
parser.add_argument("--seed", type=int, default=0)
|
|
parser.add_argument("--threshold", type=float, default=1e-3)
|
|
parser.add_argument("--rtol", type=float, default=1e-5)
|
|
parser.add_argument("--timeout-seconds", type=float, default=3600.0)
|
|
parser.add_argument("--core-count", type=int)
|
|
parser.add_argument("--crossbar-count", type=int)
|
|
parser.add_argument("--crossbar-size", type=int)
|
|
parser.add_argument("--mesh-rows", type=int)
|
|
parser.add_argument("--mesh-cols", type=int)
|
|
parser.add_argument("--pimsim-time-ms", type=int, default=1000)
|
|
parser.add_argument("--pimsim-mode", choices=["latency", "throughput"], default="latency")
|
|
parser.add_argument("--skip-pimsim-nn", action="store_true")
|
|
parser.add_argument("--verbose-raptor-compile", action="store_true")
|
|
parser.add_argument("--raptor-extra-arg", action="append", default=[])
|
|
parser.add_argument(
|
|
"--fail-on-error",
|
|
action="store_true",
|
|
help="Return a non-zero process status after writing the reports if any compilation/run stage failed.",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
model_path = args.model.resolve()
|
|
out_dir = args.out_dir.resolve()
|
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
failures: list[dict[str, str]] = []
|
|
steps: list[StepRecord] = []
|
|
hardware: dict[str, int] = {
|
|
"mesh_rows": 0,
|
|
"mesh_cols": 0,
|
|
"crossbar_count": 0,
|
|
"crossbar_size": 0,
|
|
"core_count": 0,
|
|
}
|
|
inputs_desc: list[tuple[int, str, int, list[int]]] = []
|
|
outputs_desc: list[tuple[int, str, int, list[int]]] = []
|
|
arrays_in_order: list[np.ndarray] = []
|
|
runtime_inputs: list[np.ndarray] = []
|
|
|
|
network_mlir: Path | None = None
|
|
runner_path: Path | None = None
|
|
reference_dir: Path | None = None
|
|
raptor_pim_dir: Path | None = None
|
|
raptor_pass_timings: dict[str, float] = {}
|
|
verification_info: Path | None = None
|
|
simulation_info: Path | None = None
|
|
pimcomp_export_dir: Path | None = None
|
|
pimsim_config: Path | None = None
|
|
|
|
raptor_validation = skipped_validation("Raptor validation did not run")
|
|
pimcomp_validation = skipped_validation("PIMCOMP validation did not run")
|
|
raptor_perf: dict[str, Any] = skipped_perf("pimsim-nn Raptor did not run")
|
|
pimcomp_perf: dict[str, Any] = skipped_perf("pimsim-nn PIMCOMP did not run")
|
|
raptor_instr: dict[str, Any] = empty_instruction_summary("Raptor instruction parsing did not run")
|
|
pimcomp_instr: dict[str, Any] = empty_instruction_summary("PIMCOMP instruction parsing did not run")
|
|
|
|
loaded_hardware = try_stage(failures, "Load hardware configuration", load_effective_hardware, args)
|
|
if loaded_hardware is not None:
|
|
hardware = loaded_hardware
|
|
|
|
model_io = try_stage(failures, "Load model inputs", load_model_inputs, model_path, args.seed)
|
|
if model_io is not None:
|
|
inputs_desc, outputs_desc, arrays_in_order, runtime_inputs = model_io
|
|
|
|
expected_network_mlir = out_dir / "reference" / f"{model_path.stem}.onnx.mlir"
|
|
expected_runner_path = out_dir / "runner" / "build" / "runner"
|
|
|
|
reference_compile = try_stage(
|
|
failures,
|
|
"Compile reference",
|
|
compile_reference,
|
|
args,
|
|
model_path,
|
|
out_dir,
|
|
steps,
|
|
)
|
|
if reference_compile is not None:
|
|
network_mlir, _, runner_path = reference_compile
|
|
else:
|
|
if expected_network_mlir.exists():
|
|
network_mlir = expected_network_mlir
|
|
print(f"\n[Continue] Reusing partial ONNX MLIR: {network_mlir}")
|
|
if expected_runner_path.exists():
|
|
runner_path = expected_runner_path
|
|
print(f"\n[Continue] Reusing partial runner: {runner_path}")
|
|
|
|
if runner_path is not None and runner_path.exists() and model_io is not None:
|
|
generated_reference = try_stage(
|
|
failures,
|
|
"Run reference",
|
|
generate_reference_outputs,
|
|
runner_path,
|
|
runner_path.parent,
|
|
model_path,
|
|
arrays_in_order,
|
|
steps,
|
|
args,
|
|
out_dir,
|
|
)
|
|
if generated_reference is not None:
|
|
reference_dir = generated_reference
|
|
else:
|
|
record_failure(
|
|
failures,
|
|
"Skip reference outputs",
|
|
"Reference outputs were skipped because the native runner or model inputs are not available.",
|
|
)
|
|
|
|
if network_mlir is not None and network_mlir.exists() and hardware["core_count"] > 0:
|
|
compiled_raptor = try_stage(
|
|
failures,
|
|
"Compile Raptor PIM",
|
|
compile_raptor_target,
|
|
network_mlir,
|
|
out_dir / "raptor",
|
|
hardware,
|
|
args,
|
|
steps,
|
|
)
|
|
if compiled_raptor is not None:
|
|
raptor_pim_dir, raptor_pass_timings = compiled_raptor
|
|
else:
|
|
record_failure(
|
|
failures,
|
|
"Skip Raptor PIM compile",
|
|
"Raptor PIM compile was skipped because the ONNX MLIR or hardware configuration is not available.",
|
|
)
|
|
|
|
if raptor_pim_dir is not None:
|
|
wrote_inputs = try_stage_success(
|
|
failures,
|
|
"Write Raptor inputs",
|
|
write_inputs_to_memory_bin,
|
|
raptor_pim_dir / "memory.bin",
|
|
raptor_pim_dir / "config.json",
|
|
runtime_inputs,
|
|
)
|
|
if wrote_inputs and reference_dir is not None and outputs_desc:
|
|
validation = try_stage(
|
|
failures,
|
|
"Rust Validation Raptor",
|
|
run_rust_validation,
|
|
"Rust Validation Raptor",
|
|
raptor_pim_dir,
|
|
raptor_pim_dir / "config.json",
|
|
outputs_desc,
|
|
reference_dir,
|
|
steps,
|
|
args,
|
|
)
|
|
raptor_validation = validation if validation is not None else failed_validation("Raptor validation failed")
|
|
elif reference_dir is None:
|
|
raptor_validation = skipped_validation("Reference outputs are not available")
|
|
elif not outputs_desc:
|
|
raptor_validation = skipped_validation("Output descriptors are not available")
|
|
else:
|
|
raptor_validation = skipped_validation("Raptor input materialization failed")
|
|
else:
|
|
raptor_validation = skipped_validation("Raptor PIM compilation did not produce a PIM directory")
|
|
|
|
compiled_pimcomp = try_stage(
|
|
failures,
|
|
"Compile PIMCOMP",
|
|
compile_pimcomp,
|
|
args,
|
|
model_path,
|
|
out_dir / "pimcomp",
|
|
steps,
|
|
)
|
|
if compiled_pimcomp is not None:
|
|
verification_info, simulation_info = compiled_pimcomp
|
|
|
|
if verification_info is not None and simulation_info is not None and model_io is not None:
|
|
exported = try_stage(
|
|
failures,
|
|
"Export PIMCOMP for Rust",
|
|
export_pimcomp_for_rust,
|
|
model_path,
|
|
verification_info,
|
|
simulation_info,
|
|
runtime_inputs,
|
|
out_dir / "pimcomp_exported",
|
|
)
|
|
if exported is not None:
|
|
pimcomp_export_dir = exported
|
|
elif verification_info is None or simulation_info is None:
|
|
record_failure(
|
|
failures,
|
|
"Skip PIMCOMP Rust export",
|
|
"PIMCOMP Rust export was skipped because PIMCOMP did not produce VerificationInfo.json and SimulationInfo.gz.",
|
|
)
|
|
else:
|
|
record_failure(
|
|
failures,
|
|
"Skip PIMCOMP Rust export",
|
|
"PIMCOMP Rust export was skipped because model inputs are not available.",
|
|
)
|
|
|
|
if pimcomp_export_dir is not None and reference_dir is not None and outputs_desc:
|
|
validation = try_stage(
|
|
failures,
|
|
"Rust Validation PIMCOMP",
|
|
run_rust_validation,
|
|
"Rust Validation PIMCOMP",
|
|
pimcomp_export_dir,
|
|
pimcomp_export_dir / "config.json",
|
|
outputs_desc,
|
|
reference_dir,
|
|
steps,
|
|
args,
|
|
)
|
|
pimcomp_validation = validation if validation is not None else failed_validation("PIMCOMP validation failed")
|
|
elif pimcomp_export_dir is None:
|
|
pimcomp_validation = skipped_validation("PIMCOMP Rust export is not available")
|
|
elif reference_dir is None:
|
|
pimcomp_validation = skipped_validation("Reference outputs are not available")
|
|
else:
|
|
pimcomp_validation = skipped_validation("Output descriptors are not available")
|
|
|
|
if hardware["core_count"] > 0:
|
|
written_config = try_stage(
|
|
failures,
|
|
"Write pimsim-nn config",
|
|
write_pimsim_config,
|
|
args,
|
|
out_dir / "pimsim_config",
|
|
hardware,
|
|
)
|
|
if written_config is not None:
|
|
pimsim_config = written_config
|
|
else:
|
|
record_failure(
|
|
failures,
|
|
"Skip pimsim-nn config",
|
|
"pimsim-nn config was skipped because the hardware configuration is not available.",
|
|
)
|
|
|
|
if args.skip_pimsim_nn:
|
|
raptor_perf = skipped_perf("Skipped by --skip-pimsim-nn")
|
|
pimcomp_perf = skipped_perf("Skipped by --skip-pimsim-nn")
|
|
elif pimsim_config is None:
|
|
raptor_perf = skipped_perf("pimsim-nn config is not available")
|
|
pimcomp_perf = skipped_perf("pimsim-nn config is not available")
|
|
else:
|
|
if raptor_pim_dir is not None:
|
|
perf = try_stage(
|
|
failures,
|
|
"pimsim-nn Raptor",
|
|
run_pimsim_nn,
|
|
"pimsim-nn Raptor",
|
|
raptor_pim_dir,
|
|
pimsim_config,
|
|
False,
|
|
steps,
|
|
args,
|
|
)
|
|
raptor_perf = perf if perf is not None else failed_perf("pimsim-nn Raptor failed")
|
|
else:
|
|
raptor_perf = skipped_perf("Raptor PIM directory is not available")
|
|
|
|
if simulation_info is not None:
|
|
pimcomp_pimsim_dir = try_stage(
|
|
failures,
|
|
"Export PIMCOMP for pimsim-nn",
|
|
export_pimcomp_for_pimsim_nn,
|
|
simulation_info,
|
|
out_dir / "pimcomp_pimsim_nn",
|
|
)
|
|
if pimcomp_pimsim_dir is not None:
|
|
perf = try_stage(
|
|
failures,
|
|
"pimsim-nn PIMCOMP",
|
|
run_pimsim_nn,
|
|
"pimsim-nn PIMCOMP",
|
|
pimcomp_pimsim_dir,
|
|
pimsim_config,
|
|
False,
|
|
steps,
|
|
args,
|
|
)
|
|
pimcomp_perf = perf if perf is not None else failed_perf("pimsim-nn PIMCOMP failed")
|
|
else:
|
|
pimcomp_perf = failed_perf("PIMCOMP pimsim-nn export failed")
|
|
else:
|
|
pimcomp_perf = skipped_perf("PIMCOMP SimulationInfo.gz is not available")
|
|
|
|
if raptor_pim_dir is not None and raptor_pim_dir.exists():
|
|
parsed = try_stage(failures, "Parse Raptor instructions", parse_raptor_instructions, raptor_pim_dir)
|
|
raptor_instr = parsed if parsed is not None else empty_instruction_summary(error="Failed to parse Raptor instructions")
|
|
else:
|
|
raptor_instr = empty_instruction_summary("Raptor PIM directory is not available")
|
|
|
|
if simulation_info is not None and simulation_info.exists():
|
|
parsed = try_stage(failures, "Parse PIMCOMP instructions", parse_pimcomp_instructions, simulation_info)
|
|
pimcomp_instr = parsed if parsed is not None else empty_instruction_summary(error="Failed to parse PIMCOMP instructions")
|
|
else:
|
|
pimcomp_instr = empty_instruction_summary("PIMCOMP SimulationInfo.gz is not available")
|
|
|
|
report_path = out_dir / "comparison_report.md"
|
|
write_report(
|
|
report_path,
|
|
model_path=model_path,
|
|
hardware=hardware,
|
|
steps=steps,
|
|
failures=failures,
|
|
raptor_validation=raptor_validation,
|
|
pimcomp_validation=pimcomp_validation,
|
|
raptor_perf=raptor_perf,
|
|
pimcomp_perf=pimcomp_perf,
|
|
raptor_instr=raptor_instr,
|
|
pimcomp_instr=pimcomp_instr,
|
|
raptor_pass_timings=raptor_pass_timings,
|
|
pimsim_mode=args.pimsim_mode,
|
|
)
|
|
|
|
json_report = {
|
|
"model": str(model_path),
|
|
"hardware": hardware,
|
|
"pimsim_mode": args.pimsim_mode,
|
|
"failures": failures,
|
|
"steps": [asdict(step) for step in steps],
|
|
"raptor_validation": asdict(raptor_validation),
|
|
"pimcomp_validation": asdict(pimcomp_validation),
|
|
"raptor_performance": raptor_perf,
|
|
"pimcomp_performance": pimcomp_perf,
|
|
"raptor_instruction_summary": raptor_instr,
|
|
"pimcomp_instruction_summary": pimcomp_instr,
|
|
"raptor_pass_timings": raptor_pass_timings,
|
|
"paths": {
|
|
"reference_outputs": optional_path(reference_dir),
|
|
"raptor_pim": optional_path(raptor_pim_dir),
|
|
"pimcomp_simulation_info": optional_path(simulation_info),
|
|
"pimcomp_exported_pim": optional_path(pimcomp_export_dir),
|
|
"pimsim_config": optional_path(pimsim_config),
|
|
"report_markdown": str(report_path),
|
|
},
|
|
}
|
|
json_path = out_dir / "comparison_report.json"
|
|
with open(json_path, "w", encoding="utf-8") as f:
|
|
json.dump(json_report, f, indent=2)
|
|
f.write("\n")
|
|
|
|
print(f"\n[Done]")
|
|
print(f" Report: {report_path}")
|
|
print(f" JSON: {json_path}")
|
|
if failures or any(step.status != "passed" for step in steps):
|
|
print(f" Completed with {len(failures)} recorded failure/skipped stage(s).")
|
|
|
|
if args.fail_on_error and (failures or any(step.status != "passed" for step in steps)):
|
|
raise SystemExit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|