From 6d69600bc176dd282a8f33f98449f67a990b363b Mon Sep 17 00:00:00 2001 From: ilgeco Date: Wed, 10 Jun 2026 11:59:43 +0200 Subject: [PATCH] Yolo Image Validator + new accept rule --- src/PIM/Compiler/PimCompilerOptions.cpp | 6 + src/PIM/Compiler/PimCompilerOptions.hpp | 1 + src/PIM/Compiler/PimCompilerUtils.cpp | 3 +- validation/gen_network_runner.py | 5 +- validation/raptor.py | 5 +- .../tools/yolo_local_image_validation.py | 223 +++++++++ .../tools/yolo_real_image_validation.py | 425 ++++++++++++++++++ validation/validate.py | 9 +- validation/validate_one.py | 19 +- 9 files changed, 685 insertions(+), 11 deletions(-) create mode 100644 validation/tools/yolo_local_image_validation.py create mode 100644 validation/tools/yolo_real_image_validation.py diff --git a/src/PIM/Compiler/PimCompilerOptions.cpp b/src/PIM/Compiler/PimCompilerOptions.cpp index 0578e4e..7d1cb01 100644 --- a/src/PIM/Compiler/PimCompilerOptions.cpp +++ b/src/PIM/Compiler/PimCompilerOptions.cpp @@ -38,6 +38,12 @@ llvm::cl::opt llvm::cl::init(false), llvm::cl::cat(OnnxMlirOptions)); +llvm::cl::opt + pimDisableMemoryCoalescing("pim-disable-memory-coalescing", + llvm::cl::desc("Skip the PIM memory coalescing pass (developer diagnostic option)"), + llvm::cl::init(false), + llvm::cl::cat(OnnxMlirOptions)); + llvm::cl::opt useExperimentalConvImpl("use-experimental-conv-impl", llvm::cl::desc("Use experimental implementation for convolution"), llvm::cl::init(false), diff --git a/src/PIM/Compiler/PimCompilerOptions.hpp b/src/PIM/Compiler/PimCompilerOptions.hpp index b486070..3d90409 100644 --- a/src/PIM/Compiler/PimCompilerOptions.hpp +++ b/src/PIM/Compiler/PimCompilerOptions.hpp @@ -36,6 +36,7 @@ extern llvm::cl::opt pimMergeScheduler; extern llvm::cl::opt pimMemoryReport; extern llvm::cl::opt pimOnlyCodegen; +extern llvm::cl::opt pimDisableMemoryCoalescing; extern llvm::cl::opt useExperimentalConvImpl; extern llvm::cl::opt pimEmitJson; diff --git a/src/PIM/Compiler/PimCompilerUtils.cpp b/src/PIM/Compiler/PimCompilerUtils.cpp index 5035379..e9bc397 100644 --- a/src/PIM/Compiler/PimCompilerUtils.cpp +++ b/src/PIM/Compiler/PimCompilerUtils.cpp @@ -46,7 +46,8 @@ void addPassesPim(OwningOpRef& module, if (pimEmissionTarget >= EmitPimCodegen) { pm.addPass(createPimHostConstantFoldingPass()); pm.addPass(createMessagePass("Pim host constants folded")); - pm.addPass(createPimMemoryCoalescingPass()); + if (!pimDisableMemoryCoalescing) + pm.addPass(createPimMemoryCoalescingPass()); pm.addPass(createPimVerificationPass()); pm.addPass(createMessagePass("Pim verified")); pm.addPass(createEmitPimCodePass()); diff --git a/validation/gen_network_runner.py b/validation/gen_network_runner.py index 7966fc8..7f0b731 100644 --- a/validation/gen_network_runner.py +++ b/validation/gen_network_runner.py @@ -199,7 +199,10 @@ int main(int argc, char **argv) {{ // ---- Cleanup ---- omTensorListDestroy(in_list); - omTensorListDestroy(out_list); + // Some debug-heavy models return aliased outputs. This runner is a short- + // lived process, so destroy only the list wrapper and let process exit + // reclaim the output tensors safely. + omTensorListDestroyShallow(out_list); return 0; }} """ diff --git a/validation/raptor.py b/validation/raptor.py index 371c30c..e9feac7 100644 --- a/validation/raptor.py +++ b/validation/raptor.py @@ -41,7 +41,8 @@ def _format_command(cmd): def compile_with_raptor(network_path, raptor_onnx_path: Path, output_base: Path, crossbar_size, crossbar_count, core_count=None, pim_merge_scheduler="peft", - pim_memory_report="none", cwd=None, verbose=False, reporter=None, timeout_sec=None): + pim_memory_report="none", raptor_extra_args=None, cwd=None, verbose=False, + reporter=None, timeout_sec=None): # Define the arguments, with the possibility to set crossbar size and count args = [ network_path, @@ -57,6 +58,8 @@ def compile_with_raptor(network_path, raptor_onnx_path: Path, output_base: Path, args.append(f"--core-count={core_count}") if pim_memory_report != "none": args.append(f"--pim-memory-report={pim_memory_report}") + if raptor_extra_args: + args.extend(str(arg) for arg in raptor_extra_args) if verbose: args.append("--enable-timing") diff --git a/validation/tools/yolo_local_image_validation.py b/validation/tools/yolo_local_image_validation.py new file mode 100644 index 0000000..f0cff5b --- /dev/null +++ b/validation/tools/yolo_local_image_validation.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python3 + +import argparse +import json +import subprocess +import tempfile +from pathlib import Path + +import numpy as np +from PIL import Image +from onnx_utils import _ONNX_TO_NP, onnx_io, write_inputs_to_memory_bin +from validate_one import MODE_COMPILE_ONLY, build_dump_ranges, run_pim_simulator, sanitize_output_name, validate_network +from yolo_real_image_validation import ( + IMAGE_CASES, + decode_yolo_output, + download_image, + draw_detections, + letterbox_rgb, + save_tensor_csv, + top_unique_labels, +) + + +def resolve_default_paths(): + validation_dir = Path(__file__).resolve().parent.parent + repo_root = validation_dir.parent + return { + "validation_dir": validation_dir, + "repo_root": repo_root, + "network_dir": validation_dir / "networks" / "yolo11n" / "depth_51", + "raptor_path": repo_root / "build_release" / "Release" / "bin" / "onnx-mlir", + "onnx_include_dir": repo_root / "onnx-mlir" / "include", + "simulator_dir": repo_root / "backend-simulators" / "pim" / "pim-simulator", + } + + +def find_network_onnx(network_dir: Path) -> Path: + onnx_files = sorted(network_dir.glob("*.onnx")) + if not onnx_files: + raise FileNotFoundError(f"No .onnx file found in {network_dir}") + if len(onnx_files) > 1: + names = ", ".join(path.name for path in onnx_files) + raise RuntimeError(f"Expected exactly one .onnx file in {network_dir}, found: {names}") + return onnx_files[0] + + +def local_case_paths(network_dir: Path, case_name: str): + return { + "root": network_dir, + "runner": network_dir / "runner" / "build" / "runner", + "runner_build": network_dir / "runner" / "build", + "raptor_pim": network_dir / "raptor" / "pim", + "real_root": network_dir / "real_image_validation", + "input_csv": network_dir / "real_image_validation" / "inputs" / f"{case_name}.csv", + "ref_dir": network_dir / "real_image_validation" / "reference" / case_name, + "sim_dir": network_dir / "real_image_validation" / "simulation" / case_name, + "sim_bin": network_dir / "real_image_validation" / "simulation" / case_name / "out.bin", + } + + +def ensure_local_artifacts(args, network_onnx_path: Path): + validate_network( + network_onnx_path=network_onnx_path, + raptor_path=args.raptor_path, + onnx_include_dir=args.onnx_include_dir, + simulator_dir=args.simulator_dir, + crossbar_size=args.crossbar_size, + crossbar_count=args.crossbar_count, + core_count=args.core_count, + command_timeout_seconds=args.command_timeout_seconds, + mode=MODE_COMPILE_ONLY, + verbose=args.verbose, + ) + + +def ensure_existing_artifacts(network_dir: Path): + required_paths = [ + network_dir / "runner" / "build" / "runner", + network_dir / "raptor" / "pim" / "config.json", + network_dir / "raptor" / "pim" / "memory.bin", + ] + missing = [str(path) for path in required_paths if not path.exists()] + if missing: + raise FileNotFoundError( + "Missing compiled local artifacts. Re-run without --skip-compile or restore these paths:\n " + + "\n ".join(missing) + ) + + +def run_local_reference_and_simulator(args, network_dir: Path, network_onnx_path: Path, case_name: str): + paths = local_case_paths(network_dir, case_name) + paths["ref_dir"].mkdir(parents=True, exist_ok=True) + paths["sim_dir"].mkdir(parents=True, exist_ok=True) + + output_descriptors = onnx_io(network_onnx_path)[1] + if len(output_descriptors) != 1: + raise RuntimeError(f"Expected one YOLO output tensor, found {len(output_descriptors)}") + + runner_cmd = [ + str(paths["runner"]), + "--in0-csv-file", + str(paths["input_csv"]), + "--in0-shape", + "1x3x640x640", + "--save-csv-dir", + str(paths["ref_dir"]), + ] + subprocess.run(runner_cmd, cwd=paths["runner_build"], check=True) + + tensor = np.loadtxt(paths["input_csv"], delimiter=",", dtype=np.float32).reshape(1, 3, 640, 640) + write_inputs_to_memory_bin(paths["raptor_pim"] / "memory.bin", paths["raptor_pim"] / "config.json", [tensor]) + + dump_ranges = build_dump_ranges(paths["raptor_pim"] / "config.json", output_descriptors) + run_pim_simulator( + args.simulator_dir, + paths["raptor_pim"], + paths["sim_bin"], + dump_ranges, + timeout_sec=args.command_timeout_seconds, + ) + return paths, output_descriptors[0] + + +def analyze_case(args, network_dir: Path, network_onnx_path: Path, case, work_dir: Path): + image_path = work_dir / f"{case.name}{Path(case.url).suffix or '.img'}" + csv_path = work_dir / f"{case.name}.csv" + annotated_dir = args.annotated_dir + annotated_dir.mkdir(parents=True, exist_ok=True) + download_image(case.url, image_path) + tensor = letterbox_rgb(Image.open(image_path)) + save_tensor_csv(tensor, csv_path) + + paths = local_case_paths(network_dir, case.name) + paths["input_csv"].parent.mkdir(parents=True, exist_ok=True) + paths["input_csv"].write_bytes(csv_path.read_bytes()) + paths, output_descriptor = run_local_reference_and_simulator(args, network_dir, network_onnx_path, case.name) + + output_index, output_name, output_dtype_code, output_shape = output_descriptor + output_dtype = np.dtype(_ONNX_TO_NP[output_dtype_code]) + ref_csv_path = paths["ref_dir"] / f"output{output_index}_{sanitize_output_name(output_name)}.csv" + ref = np.loadtxt(ref_csv_path, delimiter=",", dtype=output_dtype).reshape(output_shape) + sim = np.frombuffer( + paths["sim_bin"].read_bytes(), + dtype=output_dtype, + count=int(np.prod(output_shape)), + ).reshape(output_shape) + + abs_diff = np.abs(sim.astype(np.float64) - ref.astype(np.float64)) + rel_diff = abs_diff / np.maximum(np.abs(ref.astype(np.float64)), 1e-12) + + ref_detections = decode_yolo_output(ref) + sim_detections = decode_yolo_output(sim) + ref_labels = top_unique_labels(ref_detections) + sim_labels = top_unique_labels(sim_detections) + ref_image_path = annotated_dir / f"{case.name}_reference.png" + sim_image_path = annotated_dir / f"{case.name}_simulator.png" + draw_detections(image_path, ref_detections, ref_image_path) + draw_detections(image_path, sim_detections, sim_image_path) + + return { + "case": case.name, + "expected_label": case.expected_label, + "ref_top_labels": ref_labels, + "sim_top_labels": sim_labels, + "top1_match": bool(ref_labels and sim_labels and ref_labels[0] == sim_labels[0]), + "expected_in_ref": case.expected_label in ref_labels, + "expected_in_sim": case.expected_label in sim_labels, + "max_abs_diff": float(abs_diff.max()), + "mean_abs_diff": float(abs_diff.mean()), + "max_rel_diff": float(rel_diff.max()), + "mean_rel_diff": float(rel_diff.mean()), + "reference_annotated_image": str(ref_image_path), + "simulator_annotated_image": str(sim_image_path), + "ref_top_detections": ref_detections[:5], + "sim_top_detections": sim_detections[:5], + } + + +def main(): + defaults = resolve_default_paths() + + parser = argparse.ArgumentParser(description="Validate YOLO detections on real images using local compilation and simulator execution.") + parser.add_argument("--network-dir", type=Path, default=defaults["network_dir"]) + parser.add_argument("--network-onnx", type=Path, default=None) + parser.add_argument("--raptor-path", type=Path, default=defaults["raptor_path"]) + parser.add_argument("--onnx-include-dir", type=Path, default=defaults["onnx_include_dir"]) + parser.add_argument("--simulator-dir", type=Path, default=defaults["simulator_dir"]) + parser.add_argument("--crossbar-size", type=int, default=2048) + parser.add_argument("--crossbar-count", type=int, default=256) + parser.add_argument("--core-count", type=int, default=1000) + parser.add_argument("--command-timeout-seconds", type=float, default=7200.0) + parser.add_argument("--skip-compile", action="store_true") + parser.add_argument("--verbose", action="store_true") + parser.add_argument( + "--annotated-dir", + type=Path, + default=defaults["network_dir"] / "real_image_validation" / "annotated", + ) + args = parser.parse_args() + + args.network_dir = args.network_dir.resolve() + args.network_onnx = args.network_onnx.resolve() if args.network_onnx else find_network_onnx(args.network_dir) + args.raptor_path = args.raptor_path.resolve() + args.onnx_include_dir = args.onnx_include_dir.resolve() + args.simulator_dir = args.simulator_dir.resolve() + args.annotated_dir = args.annotated_dir.resolve() + + if not args.skip_compile: + ensure_local_artifacts(args, args.network_onnx) + else: + ensure_existing_artifacts(args.network_dir) + + reports = [] + with tempfile.TemporaryDirectory(prefix="yolo_local_images_") as tmp_dir: + work_dir = Path(tmp_dir) + for case in IMAGE_CASES: + reports.append(analyze_case(args, args.network_dir, args.network_onnx, case, work_dir)) + + print(json.dumps({"network_dir": str(args.network_dir), "network_onnx": str(args.network_onnx), "cases": reports}, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/validation/tools/yolo_real_image_validation.py b/validation/tools/yolo_real_image_validation.py new file mode 100644 index 0000000..173989c --- /dev/null +++ b/validation/tools/yolo_real_image_validation.py @@ -0,0 +1,425 @@ +#!/usr/bin/env python3 + +import argparse +import json +import shlex +import subprocess +import tempfile +import urllib.request +from dataclasses import dataclass +from pathlib import Path + +import numpy as np +from PIL import Image, ImageDraw + + +COCO80_CLASSES = [ + "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", + "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", + "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", + "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", + "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", + "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", + "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", + "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", + "scissors", "teddy bear", "hair drier", "toothbrush", +] + + +@dataclass(frozen=True) +class ImageCase: + name: str + url: str + expected_label: str + + +IMAGE_CASES = [ + ImageCase( + name="cat_coco_39769", + url="http://images.cocodataset.org/val2017/000000039769.jpg", + expected_label="cat", + ), + ImageCase( + name="dog_pytorch_hub", + url="https://github.com/pytorch/hub/raw/master/images/dog.jpg", + expected_label="dog", + ), + ImageCase( + name="cute_kitty", + url="https://images.unsplash.com/photo-1529778873920-4da4926a72c2?q=80&w=872&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" , + expected_label="cat", + ), + +] + + +def run(cmd, *, cwd=None, capture_output=False, input_bytes=None): + return subprocess.run( + cmd, + cwd=cwd, + check=True, + input=input_bytes, + capture_output=capture_output, + ) + + +def ssh_command(ssh_key: str, remote_host: str, command: str): + return ["ssh", "-i", ssh_key, remote_host, command] + + +def remote_bash(ssh_key: str, remote_host: str, command: str, *, capture_output=False, input_bytes=None): + return run( + ssh_command(ssh_key, remote_host, command), + capture_output=capture_output, + input_bytes=input_bytes, + ) + + +def download_image(url: str, path: Path): + with urllib.request.urlopen(url) as response: + path.write_bytes(response.read()) + + +def letterbox_rgb(image: Image.Image, size: int = 640) -> np.ndarray: + image = image.convert("RGB") + width, height = image.size + scale = min(size / width, size / height) + resized_width = max(1, int(round(width * scale))) + resized_height = max(1, int(round(height * scale))) + resized = image.resize((resized_width, resized_height), Image.Resampling.BILINEAR) + + canvas = Image.new("RGB", (size, size), (114, 114, 114)) + offset_x = (size - resized_width) // 2 + offset_y = (size - resized_height) // 2 + canvas.paste(resized, (offset_x, offset_y)) + + array = np.asarray(canvas, dtype=np.float32) / 255.0 + chw = np.transpose(array, (2, 0, 1)) + return np.expand_dims(chw, axis=0) + + +def letterbox_params(width: int, height: int, size: int = 640): + scale = min(size / width, size / height) + resized_width = max(1, int(round(width * scale))) + resized_height = max(1, int(round(height * scale))) + offset_x = (size - resized_width) // 2 + offset_y = (size - resized_height) // 2 + return scale, offset_x, offset_y + + +def save_tensor_csv(array: np.ndarray, path: Path): + flat = array.reshape(-1) + np.savetxt(path, flat[np.newaxis, :], delimiter=",", fmt="%.9g") + + +def iou_xyxy(box: np.ndarray, boxes: np.ndarray) -> np.ndarray: + x1 = np.maximum(box[0], boxes[:, 0]) + y1 = np.maximum(box[1], boxes[:, 1]) + x2 = np.minimum(box[2], boxes[:, 2]) + y2 = np.minimum(box[3], boxes[:, 3]) + + inter_w = np.maximum(0.0, x2 - x1) + inter_h = np.maximum(0.0, y2 - y1) + inter = inter_w * inter_h + + area_box = np.maximum(0.0, box[2] - box[0]) * np.maximum(0.0, box[3] - box[1]) + area_boxes = np.maximum(0.0, boxes[:, 2] - boxes[:, 0]) * np.maximum(0.0, boxes[:, 3] - boxes[:, 1]) + union = area_box + area_boxes - inter + return np.divide(inter, union, out=np.zeros_like(inter), where=union > 0) + + +def decode_yolo_output( + output: np.ndarray, + *, + conf_threshold: float = 0.25, + iou_threshold: float = 0.45, + max_detections: int = 50, +): + predictions = output[0].T + boxes_xywh = predictions[:, :4] + class_scores = predictions[:, 4:] + + class_ids = np.argmax(class_scores, axis=1) + confidences = class_scores[np.arange(class_scores.shape[0]), class_ids] + keep = confidences >= conf_threshold + + if not np.any(keep): + return [] + + boxes_xywh = boxes_xywh[keep] + class_ids = class_ids[keep] + confidences = confidences[keep] + + boxes_xyxy = np.empty_like(boxes_xywh) + boxes_xyxy[:, 0] = boxes_xywh[:, 0] - boxes_xywh[:, 2] / 2.0 + boxes_xyxy[:, 1] = boxes_xywh[:, 1] - boxes_xywh[:, 3] / 2.0 + boxes_xyxy[:, 2] = boxes_xywh[:, 0] + boxes_xywh[:, 2] / 2.0 + boxes_xyxy[:, 3] = boxes_xywh[:, 1] + boxes_xywh[:, 3] / 2.0 + + detections = [] + for class_id in np.unique(class_ids): + class_mask = class_ids == class_id + class_boxes = boxes_xyxy[class_mask] + class_scores_masked = confidences[class_mask] + order = np.argsort(-class_scores_masked) + + while order.size > 0: + best = order[0] + detections.append({ + "label": COCO80_CLASSES[int(class_id)], + "class_id": int(class_id), + "confidence": float(class_scores_masked[best]), + "box_xyxy": class_boxes[best].tolist(), + }) + + if order.size == 1: + break + + rest = order[1:] + overlaps = iou_xyxy(class_boxes[best], class_boxes[rest]) + order = rest[overlaps <= iou_threshold] + + detections.sort(key=lambda det: det["confidence"], reverse=True) + return detections[:max_detections] + + +def top_unique_labels(detections, limit: int = 5): + labels = [] + seen = set() + for det in detections: + label = det["label"] + if label in seen: + continue + seen.add(label) + labels.append(label) + if len(labels) == limit: + break + return labels + + +def clamp_box_xyxy(box_xyxy, width: int, height: int): + x1, y1, x2, y2 = box_xyxy + return [ + max(0.0, min(float(width - 1), float(x1))), + max(0.0, min(float(height - 1), float(y1))), + max(0.0, min(float(width - 1), float(x2))), + max(0.0, min(float(height - 1), float(y2))), + ] + + +def unletterbox_box_xyxy(box_xyxy, width: int, height: int, size: int = 640): + scale, offset_x, offset_y = letterbox_params(width, height, size=size) + x1, y1, x2, y2 = box_xyxy + return [ + (float(x1) - offset_x) / scale, + (float(y1) - offset_y) / scale, + (float(x2) - offset_x) / scale, + (float(y2) - offset_y) / scale, + ] + + +def draw_detections(image_path: Path, detections, output_path: Path, *, limit: int = 10): + image = Image.open(image_path).convert("RGB") + draw = ImageDraw.Draw(image) + width, height = image.size + + for det in detections[:limit]: + box = unletterbox_box_xyxy(det["box_xyxy"], width, height) + box = clamp_box_xyxy(box, width, height) + label = f'{det["label"]} {det["confidence"]:.2f}' + draw.rectangle(box, outline=(255, 0, 0), width=3) + text_box = draw.textbbox((box[0], box[1]), label) + text_bg = [ + text_box[0] - 2, + text_box[1] - 2, + text_box[2] + 2, + text_box[3] + 2, + ] + draw.rectangle(text_bg, fill=(255, 0, 0)) + draw.text((box[0], box[1]), label, fill=(255, 255, 255)) + + image.save(output_path) + + +def ensure_remote_artifacts(args): + remote_project = shlex.quote(args.remote_project) + remote_python = shlex.quote(args.remote_python) + validate_cmd = ( + f"export PATH=$HOME/.cargo/bin:$PATH && " + f"cd {remote_project} && " + f"{remote_python} validation/validate.py " + f"--raptor-path build_release/Release/bin/onnx-mlir " + f"--onnx-include-dir onnx-mlir/include " + f"--operations-dir {shlex.quote(args.network_dir)} " + f"--crossbar-size {args.crossbar_size} " + f"--crossbar-count {args.crossbar_count} " + f"--core-count {args.core_count} " + f"--command-timeout-seconds {args.command_timeout_seconds} " + f"--compile-only" + ) + remote_bash(args.ssh_key, args.remote_host, validate_cmd) + + +def remote_case_paths(args, case_name: str): + network_dir = Path(args.network_dir) + root = Path(args.remote_project) / network_dir + return { + "root": root, + "runner": root / "runner" / "build" / "runner", + "runner_build": root / "runner" / "build", + "raptor_pim": root / "raptor" / "pim", + "real_root": root / "real_image_validation", + "input_csv": root / "real_image_validation" / "inputs" / f"{case_name}.csv", + "ref_dir": root / "real_image_validation" / "reference" / case_name, + "sim_dir": root / "real_image_validation" / "simulation" / case_name, + "sim_bin": root / "real_image_validation" / "simulation" / case_name / "out.bin", + } + + +def write_remote_file(args, remote_path: Path, data: bytes): + command = ( + f"mkdir -p {shlex.quote(str(remote_path.parent))} && " + f"cat > {shlex.quote(str(remote_path))}" + ) + remote_bash(args.ssh_key, args.remote_host, command, input_bytes=data) + + +def run_remote_reference_and_simulator(args, case_name: str): + paths = remote_case_paths(args, case_name) + quoted_project = shlex.quote(args.remote_project) + quoted_python = shlex.quote(args.remote_python) + quoted_case_csv = shlex.quote(str(paths["input_csv"])) + quoted_ref_dir = shlex.quote(str(paths["ref_dir"])) + quoted_sim_dir = shlex.quote(str(paths["sim_dir"])) + quoted_sim_bin = shlex.quote(str(paths["sim_bin"])) + quoted_runner = shlex.quote(str(paths["runner"])) + quoted_runner_build = shlex.quote(str(paths["runner_build"])) + quoted_pim = shlex.quote(str(paths["raptor_pim"])) + + command = f""" +set -e +export PATH=$HOME/.cargo/bin:$PATH +cd {quoted_project} +mkdir -p {quoted_ref_dir} {quoted_sim_dir} +cd {quoted_runner_build} +{quoted_runner} --in0-csv-file {quoted_case_csv} --in0-shape 1x3x640x640 --save-csv-dir {quoted_ref_dir} +cd {quoted_project} +{quoted_python} - <<'PY' +import json +import numpy as np +from pathlib import Path +input_csv = Path({json.dumps(str(paths["input_csv"]))}) +pim_dir = Path({json.dumps(str(paths["raptor_pim"]))}) +config = json.loads((pim_dir / "config.json").read_text()) +tensor = np.loadtxt(input_csv, delimiter=",", dtype=np.float32).reshape(1, 3, 640, 640) +with open(pim_dir / "memory.bin", "r+b") as f: + f.seek(config["inputs_addresses"][0]) + f.write(tensor.tobytes(order="C")) +output_addr = config["outputs_addresses"][0] +output_size = 1 * 84 * 8400 * 4 +print(f"{{output_addr}},{{output_size}}") +PY +""" + result = remote_bash(args.ssh_key, args.remote_host, command, capture_output=True) + dump_range = result.stdout.decode().strip().splitlines()[-1] + + sim_command = ( + f"export PATH=$HOME/.cargo/bin:$PATH && " + f"cd {quoted_project}/backend-simulators/pim/pim-simulator && " + f"cargo run --no-default-features --release --package pim-simulator --bin pim-simulator -- " + f"-f {quoted_pim} -o {quoted_sim_bin} -d {dump_range}" + ) + remote_bash(args.ssh_key, args.remote_host, sim_command) + return paths + + +def read_remote_file(args, remote_path: Path) -> bytes: + result = remote_bash( + args.ssh_key, + args.remote_host, + f"cat {shlex.quote(str(remote_path))}", + capture_output=True, + ) + return result.stdout + + +def analyze_case(args, case: ImageCase, work_dir: Path): + image_path = work_dir / f"{case.name}{Path(case.url).suffix or '.img'}" + csv_path = work_dir / f"{case.name}.csv" + annotated_dir = Path(args.annotated_dir) + annotated_dir.mkdir(parents=True, exist_ok=True) + download_image(case.url, image_path) + tensor = letterbox_rgb(Image.open(image_path)) + save_tensor_csv(tensor, csv_path) + + remote_paths = remote_case_paths(args, case.name) + write_remote_file(args, remote_paths["input_csv"], csv_path.read_bytes()) + remote_paths = run_remote_reference_and_simulator(args, case.name) + + ref_csv = read_remote_file(args, remote_paths["ref_dir"] / "output0_output0.csv") + sim_bin = read_remote_file(args, remote_paths["sim_bin"]) + + ref = np.loadtxt(ref_csv.decode().splitlines(), delimiter=",", dtype=np.float32).reshape(1, 84, 8400) + sim = np.frombuffer(sim_bin, dtype=np.float32, count=1 * 84 * 8400).reshape(1, 84, 8400) + abs_diff = np.abs(sim.astype(np.float64) - ref.astype(np.float64)) + rel_diff = abs_diff / np.maximum(np.abs(ref.astype(np.float64)), 1e-12) + + ref_detections = decode_yolo_output(ref) + sim_detections = decode_yolo_output(sim) + ref_labels = top_unique_labels(ref_detections) + sim_labels = top_unique_labels(sim_detections) + ref_image_path = annotated_dir / f"{case.name}_reference.png" + sim_image_path = annotated_dir / f"{case.name}_simulator.png" + draw_detections(image_path, ref_detections, ref_image_path) + draw_detections(image_path, sim_detections, sim_image_path) + + return { + "case": case.name, + "expected_label": case.expected_label, + "ref_top_labels": ref_labels, + "sim_top_labels": sim_labels, + "top1_match": bool(ref_labels and sim_labels and ref_labels[0] == sim_labels[0]), + "expected_in_ref": case.expected_label in ref_labels, + "expected_in_sim": case.expected_label in sim_labels, + "max_abs_diff": float(abs_diff.max()), + "mean_abs_diff": float(abs_diff.mean()), + "max_rel_diff": float(rel_diff.max()), + "mean_rel_diff": float(rel_diff.mean()), + "reference_annotated_image": str(ref_image_path), + "simulator_annotated_image": str(sim_image_path), + "ref_top_detections": ref_detections[:5], + "sim_top_detections": sim_detections[:5], + } + + +def main(): + parser = argparse.ArgumentParser(description="Validate YOLO detections on real animal images against the simulator.") + parser.add_argument("--remote-host", default="gmagnani@monolith") + parser.add_argument("--ssh-key", default="~/.ssh/github") + parser.add_argument("--remote-project", default="/home/gmagnani/Project/Raptor") + parser.add_argument("--remote-python", default="/home/gmagnani/venv/bin/python") + parser.add_argument("--network-dir", default="validation/networks/yolo11n/depth_51") + parser.add_argument("--crossbar-size", type=int, default=2048) + parser.add_argument("--crossbar-count", type=int, default=256) + parser.add_argument("--core-count", type=int, default=1000) + parser.add_argument("--command-timeout-seconds", type=int, default=7200) + parser.add_argument("--skip-compile", action="store_true") + parser.add_argument("--annotated-dir", default="validation/networks/yolo11n/depth_51/real_image_validation/annotated") + args = parser.parse_args() + + args.ssh_key = str(Path(args.ssh_key).expanduser()) + + if not args.skip_compile: + ensure_remote_artifacts(args) + + reports = [] + with tempfile.TemporaryDirectory(prefix="yolo_real_images_") as tmp_dir: + work_dir = Path(tmp_dir) + for case in IMAGE_CASES: + reports.append(analyze_case(args, case, work_dir)) + + print(json.dumps({"network_dir": args.network_dir, "cases": reports}, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/validation/validate.py b/validation/validate.py index 8d234aa..054228b 100644 --- a/validation/validate.py +++ b/validation/validate.py @@ -67,7 +67,10 @@ def main(): ap.add_argument("--operations-dir", default=None, help="Root of the operations tree (default: operations).") ap.add_argument("--simulator-dir", default=None, help="Path to pim-simulator crate root (default: auto-detected relative to script).") - ap.add_argument("--threshold", type=float, default=1e-3, help="Max allowed diff per output element.") + ap.add_argument("--threshold", type=float, default=1e-3, + help="Absolute tolerance for per-element output comparison.") + ap.add_argument("--relative-threshold", type=float, default=1e-5, + help="Relative tolerance for per-element output comparison.") ap.add_argument("--seed", type=int, default=0, help="RNG seed for generated validation inputs.") ap.add_argument("--crossbar-size", type=int, default=64) ap.add_argument("--crossbar-count", type=int, default=8) @@ -77,6 +80,8 @@ def main(): help="Scheduler used by the Spatial merge-compute-nodes pass.") ap.add_argument("--pim-memory-report", choices=("none", "summary", "full"), default="none", help="Emit a human-readable PIM memory planning report during codegen.") + ap.add_argument("--raptor-extra-arg", action="append", default=[], + help="Additional argument to pass through to the Raptor compiler. Repeat as needed.") ap.add_argument("--command-timeout-seconds", type=float, default=1000000.0, help="Per-subprocess timeout in seconds for compiler, runner, and simulator commands.") ap.add_argument("--clean", action="store_true", @@ -145,8 +150,10 @@ def main(): onnx_path, a.raptor_path, a.onnx_include_dir, simulator_dir, crossbar_size=a.crossbar_size, crossbar_count=a.crossbar_count, core_count=a.core_count, pim_merge_scheduler=a.pim_merge_scheduler, pim_memory_report=a.pim_memory_report, + raptor_extra_args=a.raptor_extra_arg, command_timeout_seconds=a.command_timeout_seconds, threshold=a.threshold, + rtol=a.relative_threshold, seed=a.seed, reporter=reporter, model_index=index, diff --git a/validation/validate_one.py b/validation/validate_one.py index 44858c4..13df8f0 100644 --- a/validation/validate_one.py +++ b/validation/validate_one.py @@ -258,14 +258,18 @@ def parse_pim_simulator_outputs(output_bin_path, outputs_descriptor): return arrays -def validate_outputs(sim_arrays, runner_out_dir, outputs_descriptor, threshold=1e-3, verbose=False): +def validate_outputs(sim_arrays, runner_out_dir, outputs_descriptor, threshold=1e-3, rtol=1e-5, verbose=False): all_passed = True rows = [] for sim_array, (oi, name, _, shape) in zip(sim_arrays, outputs_descriptor): csv_name = f"output{oi}_{sanitize_output_name(name)}.csv" runner_array = np.loadtxt(runner_out_dir / csv_name, delimiter=',', dtype=np.float32).reshape(shape) - max_diff = float(np.max(np.abs(sim_array.astype(np.float64) - runner_array.astype(np.float64)))) - passed = max_diff <= threshold + sim_array64 = sim_array.astype(np.float64) + runner_array64 = runner_array.astype(np.float64) + abs_diff = np.abs(sim_array64 - runner_array64) + allowed_diff = threshold + rtol * np.abs(runner_array64) + max_diff = float(np.max(abs_diff)) + passed = bool(np.all(abs_diff <= allowed_diff)) rows.append((name, f"{max_diff:.6e}", passed)) if not passed: all_passed = False @@ -289,7 +293,8 @@ def validate_outputs(sim_arrays, runner_out_dir, outputs_descriptor, threshold=1 def validate_network(network_onnx_path, raptor_path, onnx_include_dir, simulator_dir, crossbar_size=64, crossbar_count=8, core_count=None, - pim_merge_scheduler="peft", pim_memory_report="none", threshold=1e-3, + pim_merge_scheduler="peft", pim_memory_report="none", raptor_extra_args=None, + threshold=1e-3, rtol=1e-5, seed=0, reporter=None, model_index=1, model_total=1, verbose=False, command_timeout_seconds=60.0, mode=MODE_FULL): network_onnx_path = Path(network_onnx_path).resolve() @@ -343,7 +348,7 @@ def validate_network(network_onnx_path, raptor_path, onnx_include_dir, pim_pass_timings = compile_with_raptor( network_mlir_path, raptor_path, pim_output_base, crossbar_size, crossbar_count, core_count=core_count, pim_merge_scheduler=pim_merge_scheduler, - pim_memory_report=pim_memory_report, + pim_memory_report=pim_memory_report, raptor_extra_args=raptor_extra_args, cwd=raptor_dir, verbose=verbose, reporter=reporter, timeout_sec=command_timeout_seconds) print_info(reporter, f"PIM artifacts saved to {raptor_dir / 'pim'}") reporter.advance() @@ -383,7 +388,7 @@ def validate_network(network_onnx_path, raptor_path, onnx_include_dir, pim_pass_timings = compile_with_raptor( network_mlir_path, raptor_path, pim_output_base, crossbar_size, crossbar_count, core_count=core_count, pim_merge_scheduler=pim_merge_scheduler, - pim_memory_report=pim_memory_report, + pim_memory_report=pim_memory_report, raptor_extra_args=raptor_extra_args, cwd=raptor_dir, verbose=verbose, reporter=reporter, timeout_sec=command_timeout_seconds) print_info(reporter, f"PIM artifacts saved to {raptor_dir / 'pim'}") reporter.advance() @@ -403,7 +408,7 @@ def validate_network(network_onnx_path, raptor_path, onnx_include_dir, print_stage(reporter, model_index, model_total, network_onnx_path.name, "Compare Outputs") sim_arrays = parse_pim_simulator_outputs(output_bin_path, outputs_descriptor) reporter.suspend() - passed = validate_outputs(sim_arrays, out_dir, outputs_descriptor, threshold, verbose=verbose) + passed = validate_outputs(sim_arrays, out_dir, outputs_descriptor, threshold, rtol=rtol, verbose=verbose) reporter.resume() reporter.advance() reporter.record_result(passed)