Files
Raptor/validation/tools/yolo_real_image_validation.py
T
ilgeco 6d69600bc1
Validate Operations / validate-operations (push) Waiting to run
Yolo Image Validator + new accept rule
2026-06-10 11:59:43 +02:00

426 lines
16 KiB
Python

#!/usr/bin/env python3
import argparse
import json
import shlex
import subprocess
import tempfile
import urllib.request
from dataclasses import dataclass
from pathlib import Path
import numpy as np
from PIL import Image, ImageDraw
COCO80_CLASSES = [
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard",
"cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
"scissors", "teddy bear", "hair drier", "toothbrush",
]
@dataclass(frozen=True)
class ImageCase:
name: str
url: str
expected_label: str
IMAGE_CASES = [
ImageCase(
name="cat_coco_39769",
url="http://images.cocodataset.org/val2017/000000039769.jpg",
expected_label="cat",
),
ImageCase(
name="dog_pytorch_hub",
url="https://github.com/pytorch/hub/raw/master/images/dog.jpg",
expected_label="dog",
),
ImageCase(
name="cute_kitty",
url="https://images.unsplash.com/photo-1529778873920-4da4926a72c2?q=80&w=872&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" ,
expected_label="cat",
),
]
def run(cmd, *, cwd=None, capture_output=False, input_bytes=None):
return subprocess.run(
cmd,
cwd=cwd,
check=True,
input=input_bytes,
capture_output=capture_output,
)
def ssh_command(ssh_key: str, remote_host: str, command: str):
return ["ssh", "-i", ssh_key, remote_host, command]
def remote_bash(ssh_key: str, remote_host: str, command: str, *, capture_output=False, input_bytes=None):
return run(
ssh_command(ssh_key, remote_host, command),
capture_output=capture_output,
input_bytes=input_bytes,
)
def download_image(url: str, path: Path):
with urllib.request.urlopen(url) as response:
path.write_bytes(response.read())
def letterbox_rgb(image: Image.Image, size: int = 640) -> np.ndarray:
image = image.convert("RGB")
width, height = image.size
scale = min(size / width, size / height)
resized_width = max(1, int(round(width * scale)))
resized_height = max(1, int(round(height * scale)))
resized = image.resize((resized_width, resized_height), Image.Resampling.BILINEAR)
canvas = Image.new("RGB", (size, size), (114, 114, 114))
offset_x = (size - resized_width) // 2
offset_y = (size - resized_height) // 2
canvas.paste(resized, (offset_x, offset_y))
array = np.asarray(canvas, dtype=np.float32) / 255.0
chw = np.transpose(array, (2, 0, 1))
return np.expand_dims(chw, axis=0)
def letterbox_params(width: int, height: int, size: int = 640):
scale = min(size / width, size / height)
resized_width = max(1, int(round(width * scale)))
resized_height = max(1, int(round(height * scale)))
offset_x = (size - resized_width) // 2
offset_y = (size - resized_height) // 2
return scale, offset_x, offset_y
def save_tensor_csv(array: np.ndarray, path: Path):
flat = array.reshape(-1)
np.savetxt(path, flat[np.newaxis, :], delimiter=",", fmt="%.9g")
def iou_xyxy(box: np.ndarray, boxes: np.ndarray) -> np.ndarray:
x1 = np.maximum(box[0], boxes[:, 0])
y1 = np.maximum(box[1], boxes[:, 1])
x2 = np.minimum(box[2], boxes[:, 2])
y2 = np.minimum(box[3], boxes[:, 3])
inter_w = np.maximum(0.0, x2 - x1)
inter_h = np.maximum(0.0, y2 - y1)
inter = inter_w * inter_h
area_box = np.maximum(0.0, box[2] - box[0]) * np.maximum(0.0, box[3] - box[1])
area_boxes = np.maximum(0.0, boxes[:, 2] - boxes[:, 0]) * np.maximum(0.0, boxes[:, 3] - boxes[:, 1])
union = area_box + area_boxes - inter
return np.divide(inter, union, out=np.zeros_like(inter), where=union > 0)
def decode_yolo_output(
output: np.ndarray,
*,
conf_threshold: float = 0.25,
iou_threshold: float = 0.45,
max_detections: int = 50,
):
predictions = output[0].T
boxes_xywh = predictions[:, :4]
class_scores = predictions[:, 4:]
class_ids = np.argmax(class_scores, axis=1)
confidences = class_scores[np.arange(class_scores.shape[0]), class_ids]
keep = confidences >= conf_threshold
if not np.any(keep):
return []
boxes_xywh = boxes_xywh[keep]
class_ids = class_ids[keep]
confidences = confidences[keep]
boxes_xyxy = np.empty_like(boxes_xywh)
boxes_xyxy[:, 0] = boxes_xywh[:, 0] - boxes_xywh[:, 2] / 2.0
boxes_xyxy[:, 1] = boxes_xywh[:, 1] - boxes_xywh[:, 3] / 2.0
boxes_xyxy[:, 2] = boxes_xywh[:, 0] + boxes_xywh[:, 2] / 2.0
boxes_xyxy[:, 3] = boxes_xywh[:, 1] + boxes_xywh[:, 3] / 2.0
detections = []
for class_id in np.unique(class_ids):
class_mask = class_ids == class_id
class_boxes = boxes_xyxy[class_mask]
class_scores_masked = confidences[class_mask]
order = np.argsort(-class_scores_masked)
while order.size > 0:
best = order[0]
detections.append({
"label": COCO80_CLASSES[int(class_id)],
"class_id": int(class_id),
"confidence": float(class_scores_masked[best]),
"box_xyxy": class_boxes[best].tolist(),
})
if order.size == 1:
break
rest = order[1:]
overlaps = iou_xyxy(class_boxes[best], class_boxes[rest])
order = rest[overlaps <= iou_threshold]
detections.sort(key=lambda det: det["confidence"], reverse=True)
return detections[:max_detections]
def top_unique_labels(detections, limit: int = 5):
labels = []
seen = set()
for det in detections:
label = det["label"]
if label in seen:
continue
seen.add(label)
labels.append(label)
if len(labels) == limit:
break
return labels
def clamp_box_xyxy(box_xyxy, width: int, height: int):
x1, y1, x2, y2 = box_xyxy
return [
max(0.0, min(float(width - 1), float(x1))),
max(0.0, min(float(height - 1), float(y1))),
max(0.0, min(float(width - 1), float(x2))),
max(0.0, min(float(height - 1), float(y2))),
]
def unletterbox_box_xyxy(box_xyxy, width: int, height: int, size: int = 640):
scale, offset_x, offset_y = letterbox_params(width, height, size=size)
x1, y1, x2, y2 = box_xyxy
return [
(float(x1) - offset_x) / scale,
(float(y1) - offset_y) / scale,
(float(x2) - offset_x) / scale,
(float(y2) - offset_y) / scale,
]
def draw_detections(image_path: Path, detections, output_path: Path, *, limit: int = 10):
image = Image.open(image_path).convert("RGB")
draw = ImageDraw.Draw(image)
width, height = image.size
for det in detections[:limit]:
box = unletterbox_box_xyxy(det["box_xyxy"], width, height)
box = clamp_box_xyxy(box, width, height)
label = f'{det["label"]} {det["confidence"]:.2f}'
draw.rectangle(box, outline=(255, 0, 0), width=3)
text_box = draw.textbbox((box[0], box[1]), label)
text_bg = [
text_box[0] - 2,
text_box[1] - 2,
text_box[2] + 2,
text_box[3] + 2,
]
draw.rectangle(text_bg, fill=(255, 0, 0))
draw.text((box[0], box[1]), label, fill=(255, 255, 255))
image.save(output_path)
def ensure_remote_artifacts(args):
remote_project = shlex.quote(args.remote_project)
remote_python = shlex.quote(args.remote_python)
validate_cmd = (
f"export PATH=$HOME/.cargo/bin:$PATH && "
f"cd {remote_project} && "
f"{remote_python} validation/validate.py "
f"--raptor-path build_release/Release/bin/onnx-mlir "
f"--onnx-include-dir onnx-mlir/include "
f"--operations-dir {shlex.quote(args.network_dir)} "
f"--crossbar-size {args.crossbar_size} "
f"--crossbar-count {args.crossbar_count} "
f"--core-count {args.core_count} "
f"--command-timeout-seconds {args.command_timeout_seconds} "
f"--compile-only"
)
remote_bash(args.ssh_key, args.remote_host, validate_cmd)
def remote_case_paths(args, case_name: str):
network_dir = Path(args.network_dir)
root = Path(args.remote_project) / network_dir
return {
"root": root,
"runner": root / "runner" / "build" / "runner",
"runner_build": root / "runner" / "build",
"raptor_pim": root / "raptor" / "pim",
"real_root": root / "real_image_validation",
"input_csv": root / "real_image_validation" / "inputs" / f"{case_name}.csv",
"ref_dir": root / "real_image_validation" / "reference" / case_name,
"sim_dir": root / "real_image_validation" / "simulation" / case_name,
"sim_bin": root / "real_image_validation" / "simulation" / case_name / "out.bin",
}
def write_remote_file(args, remote_path: Path, data: bytes):
command = (
f"mkdir -p {shlex.quote(str(remote_path.parent))} && "
f"cat > {shlex.quote(str(remote_path))}"
)
remote_bash(args.ssh_key, args.remote_host, command, input_bytes=data)
def run_remote_reference_and_simulator(args, case_name: str):
paths = remote_case_paths(args, case_name)
quoted_project = shlex.quote(args.remote_project)
quoted_python = shlex.quote(args.remote_python)
quoted_case_csv = shlex.quote(str(paths["input_csv"]))
quoted_ref_dir = shlex.quote(str(paths["ref_dir"]))
quoted_sim_dir = shlex.quote(str(paths["sim_dir"]))
quoted_sim_bin = shlex.quote(str(paths["sim_bin"]))
quoted_runner = shlex.quote(str(paths["runner"]))
quoted_runner_build = shlex.quote(str(paths["runner_build"]))
quoted_pim = shlex.quote(str(paths["raptor_pim"]))
command = f"""
set -e
export PATH=$HOME/.cargo/bin:$PATH
cd {quoted_project}
mkdir -p {quoted_ref_dir} {quoted_sim_dir}
cd {quoted_runner_build}
{quoted_runner} --in0-csv-file {quoted_case_csv} --in0-shape 1x3x640x640 --save-csv-dir {quoted_ref_dir}
cd {quoted_project}
{quoted_python} - <<'PY'
import json
import numpy as np
from pathlib import Path
input_csv = Path({json.dumps(str(paths["input_csv"]))})
pim_dir = Path({json.dumps(str(paths["raptor_pim"]))})
config = json.loads((pim_dir / "config.json").read_text())
tensor = np.loadtxt(input_csv, delimiter=",", dtype=np.float32).reshape(1, 3, 640, 640)
with open(pim_dir / "memory.bin", "r+b") as f:
f.seek(config["inputs_addresses"][0])
f.write(tensor.tobytes(order="C"))
output_addr = config["outputs_addresses"][0]
output_size = 1 * 84 * 8400 * 4
print(f"{{output_addr}},{{output_size}}")
PY
"""
result = remote_bash(args.ssh_key, args.remote_host, command, capture_output=True)
dump_range = result.stdout.decode().strip().splitlines()[-1]
sim_command = (
f"export PATH=$HOME/.cargo/bin:$PATH && "
f"cd {quoted_project}/backend-simulators/pim/pim-simulator && "
f"cargo run --no-default-features --release --package pim-simulator --bin pim-simulator -- "
f"-f {quoted_pim} -o {quoted_sim_bin} -d {dump_range}"
)
remote_bash(args.ssh_key, args.remote_host, sim_command)
return paths
def read_remote_file(args, remote_path: Path) -> bytes:
result = remote_bash(
args.ssh_key,
args.remote_host,
f"cat {shlex.quote(str(remote_path))}",
capture_output=True,
)
return result.stdout
def analyze_case(args, case: ImageCase, work_dir: Path):
image_path = work_dir / f"{case.name}{Path(case.url).suffix or '.img'}"
csv_path = work_dir / f"{case.name}.csv"
annotated_dir = Path(args.annotated_dir)
annotated_dir.mkdir(parents=True, exist_ok=True)
download_image(case.url, image_path)
tensor = letterbox_rgb(Image.open(image_path))
save_tensor_csv(tensor, csv_path)
remote_paths = remote_case_paths(args, case.name)
write_remote_file(args, remote_paths["input_csv"], csv_path.read_bytes())
remote_paths = run_remote_reference_and_simulator(args, case.name)
ref_csv = read_remote_file(args, remote_paths["ref_dir"] / "output0_output0.csv")
sim_bin = read_remote_file(args, remote_paths["sim_bin"])
ref = np.loadtxt(ref_csv.decode().splitlines(), delimiter=",", dtype=np.float32).reshape(1, 84, 8400)
sim = np.frombuffer(sim_bin, dtype=np.float32, count=1 * 84 * 8400).reshape(1, 84, 8400)
abs_diff = np.abs(sim.astype(np.float64) - ref.astype(np.float64))
rel_diff = abs_diff / np.maximum(np.abs(ref.astype(np.float64)), 1e-12)
ref_detections = decode_yolo_output(ref)
sim_detections = decode_yolo_output(sim)
ref_labels = top_unique_labels(ref_detections)
sim_labels = top_unique_labels(sim_detections)
ref_image_path = annotated_dir / f"{case.name}_reference.png"
sim_image_path = annotated_dir / f"{case.name}_simulator.png"
draw_detections(image_path, ref_detections, ref_image_path)
draw_detections(image_path, sim_detections, sim_image_path)
return {
"case": case.name,
"expected_label": case.expected_label,
"ref_top_labels": ref_labels,
"sim_top_labels": sim_labels,
"top1_match": bool(ref_labels and sim_labels and ref_labels[0] == sim_labels[0]),
"expected_in_ref": case.expected_label in ref_labels,
"expected_in_sim": case.expected_label in sim_labels,
"max_abs_diff": float(abs_diff.max()),
"mean_abs_diff": float(abs_diff.mean()),
"max_rel_diff": float(rel_diff.max()),
"mean_rel_diff": float(rel_diff.mean()),
"reference_annotated_image": str(ref_image_path),
"simulator_annotated_image": str(sim_image_path),
"ref_top_detections": ref_detections[:5],
"sim_top_detections": sim_detections[:5],
}
def main():
parser = argparse.ArgumentParser(description="Validate YOLO detections on real animal images against the simulator.")
parser.add_argument("--remote-host", default="gmagnani@monolith")
parser.add_argument("--ssh-key", default="~/.ssh/github")
parser.add_argument("--remote-project", default="/home/gmagnani/Project/Raptor")
parser.add_argument("--remote-python", default="/home/gmagnani/venv/bin/python")
parser.add_argument("--network-dir", default="validation/networks/yolo11n/depth_51")
parser.add_argument("--crossbar-size", type=int, default=2048)
parser.add_argument("--crossbar-count", type=int, default=256)
parser.add_argument("--core-count", type=int, default=1000)
parser.add_argument("--command-timeout-seconds", type=int, default=7200)
parser.add_argument("--skip-compile", action="store_true")
parser.add_argument("--annotated-dir", default="validation/networks/yolo11n/depth_51/real_image_validation/annotated")
args = parser.parse_args()
args.ssh_key = str(Path(args.ssh_key).expanduser())
if not args.skip_compile:
ensure_remote_artifacts(args)
reports = []
with tempfile.TemporaryDirectory(prefix="yolo_real_images_") as tmp_dir:
work_dir = Path(tmp_dir)
for case in IMAGE_CASES:
reports.append(analyze_case(args, case, work_dir))
print(json.dumps({"network_dir": args.network_dir, "cases": reports}, indent=2))
if __name__ == "__main__":
main()