3 Commits

Author SHA1 Message Date
ilgeco 852bef7605 ReduceMean + resnet
Validate Operations / validate-operations (push) Waiting to run
2026-06-10 14:30:10 +02:00
ilgeco 237654dadf Fix direct import
Validate Operations / validate-operations (push) Waiting to run
2026-06-10 12:14:20 +02:00
ilgeco 6d69600bc1 Yolo Image Validator + new accept rule
Validate Operations / validate-operations (push) Waiting to run
2026-06-10 11:59:43 +02:00
21 changed files with 897 additions and 21 deletions
+6
View File
@@ -38,6 +38,12 @@ llvm::cl::opt<bool>
llvm::cl::init(false),
llvm::cl::cat(OnnxMlirOptions));
llvm::cl::opt<bool>
pimDisableMemoryCoalescing("pim-disable-memory-coalescing",
llvm::cl::desc("Skip the PIM memory coalescing pass (developer diagnostic option)"),
llvm::cl::init(false),
llvm::cl::cat(OnnxMlirOptions));
llvm::cl::opt<bool> useExperimentalConvImpl("use-experimental-conv-impl",
llvm::cl::desc("Use experimental implementation for convolution"),
llvm::cl::init(false),
+1
View File
@@ -36,6 +36,7 @@ extern llvm::cl::opt<PimMergeSchedulerType> pimMergeScheduler;
extern llvm::cl::opt<PimMemoryReportLevel> pimMemoryReport;
extern llvm::cl::opt<bool> pimOnlyCodegen;
extern llvm::cl::opt<bool> pimDisableMemoryCoalescing;
extern llvm::cl::opt<bool> useExperimentalConvImpl;
extern llvm::cl::opt<bool> pimEmitJson;
+2 -1
View File
@@ -46,7 +46,8 @@ void addPassesPim(OwningOpRef<ModuleOp>& module,
if (pimEmissionTarget >= EmitPimCodegen) {
pm.addPass(createPimHostConstantFoldingPass());
pm.addPass(createMessagePass("Pim host constants folded"));
pm.addPass(createPimMemoryCoalescingPass());
if (!pimDisableMemoryCoalescing)
pm.addPass(createPimMemoryCoalescingPass());
pm.addPass(createPimVerificationPass());
pm.addPass(createMessagePass("Pim verified"));
pm.addPass(createEmitPimCodePass());
@@ -140,6 +140,7 @@ void ONNXToSpatialPass::runOnOperation() {
target.addIllegalOp<ONNXResizeOp>();
target.addIllegalOp<ONNXSliceOp>();
target.addIllegalOp<ONNXLRNOp>();
target.addIllegalOp<ONNXReduceMeanOp>();
target.addIllegalOp<ONNXReduceMeanV13Op>();
target.addIllegalOp<ONNXSplitOp>();
@@ -6,6 +6,8 @@
#include <algorithm>
#include <numeric>
#include <optional>
#include <type_traits>
#include "src/Accelerators/PIM/Common/IR/AffineUtils.hpp"
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common/Common.hpp"
@@ -19,6 +21,85 @@ using namespace mlir;
namespace onnx_mlir {
namespace {
struct ReduceMeanSemantics {
SmallVector<int64_t> axes;
int64_t keepdims = 1;
bool isIdentity = false;
};
static bool isNoneValueLike(Value value) { return isa_and_nonnull<ONNXNoneOp>(value.getDefiningOp()); }
static FailureOr<SmallVector<int64_t>> getConstantIntValues(Value value) {
auto denseAttr = dyn_cast_or_null<DenseIntElementsAttr>(getHostConstDenseElementsAttr(value));
if (!denseAttr)
return failure();
return SmallVector<int64_t>(denseAttr.getValues<int64_t>().begin(), denseAttr.getValues<int64_t>().end());
}
static FailureOr<SmallVector<int64_t>> normalizeAxesChecked(ArrayRef<int64_t> axes, int64_t rank) {
SmallVector<int64_t> normalizedAxes;
normalizedAxes.reserve(axes.size());
for (int64_t axis : axes) {
auto normalizedAxis = normalizeAxisChecked(axis, rank);
if (failed(normalizedAxis))
return failure();
normalizedAxes.push_back(*normalizedAxis);
}
llvm::sort(normalizedAxes);
normalizedAxes.erase(std::unique(normalizedAxes.begin(), normalizedAxes.end()), normalizedAxes.end());
return normalizedAxes;
}
template <typename ReduceMeanOp, typename ReduceMeanOpAdaptor>
static FailureOr<ReduceMeanSemantics>
getReduceMeanSemantics(ReduceMeanOp reduceMeanOp, ReduceMeanOpAdaptor adaptor, int64_t inputRank) {
ReduceMeanSemantics semantics;
semantics.keepdims = reduceMeanOp.getKeepdims();
if constexpr (std::is_same_v<ReduceMeanOp, ONNXReduceMeanV13Op>) {
auto axes = onnx_mlir::normalizeAxesChecked(std::optional<ArrayAttr>(reduceMeanOp.getAxesAttr()), inputRank);
if (failed(axes))
return failure();
semantics.axes = std::move(*axes);
return semantics;
}
else {
if (isNoneValueLike(adaptor.getAxes())) {
if (reduceMeanOp.getNoopWithEmptyAxes() != 0) {
semantics.isIdentity = true;
return semantics;
}
semantics.axes.reserve(inputRank);
for (int64_t axis = 0; axis < inputRank; ++axis)
semantics.axes.push_back(axis);
return semantics;
}
auto axes = getConstantIntValues(adaptor.getAxes());
if (failed(axes))
return failure();
if (axes->empty()) {
if (reduceMeanOp.getNoopWithEmptyAxes() != 0) {
semantics.isIdentity = true;
return semantics;
}
semantics.axes.reserve(inputRank);
for (int64_t axis = 0; axis < inputRank; ++axis)
semantics.axes.push_back(axis);
return semantics;
}
auto normalizedAxes = normalizeAxesChecked(*axes, inputRank);
if (failed(normalizedAxes))
return failure();
semantics.axes = std::move(*normalizedAxes);
return semantics;
}
}
static SmallVector<bool> buildReducedAxesMask(ArrayRef<int64_t> axes, int64_t rank) {
SmallVector<bool> reducedAxes(rank, false);
for (int64_t axis : axes) {
@@ -251,11 +332,13 @@ static Value squeezeReducedAxes(Value keepdimsValue,
return squeezeCompute.getResult(0);
}
struct ReduceMeanToSpatialCompute : OpConversionPattern<ONNXReduceMeanV13Op> {
using OpConversionPattern::OpConversionPattern;
template <typename ReduceMeanOp>
struct ReduceMeanToSpatialCompute : OpConversionPattern<ReduceMeanOp> {
using OpConversionPattern<ReduceMeanOp>::OpConversionPattern;
using Adaptor = typename ReduceMeanOp::Adaptor;
LogicalResult matchAndRewrite(ONNXReduceMeanV13Op reduceMeanOp,
ONNXReduceMeanV13OpAdaptor adaptor,
LogicalResult matchAndRewrite(ReduceMeanOp reduceMeanOp,
Adaptor adaptor,
ConversionPatternRewriter& rewriter) const override {
auto inputType = dyn_cast<RankedTensorType>(adaptor.getData().getType());
auto resultType = dyn_cast<RankedTensorType>(reduceMeanOp.getReduced().getType());
@@ -266,10 +349,18 @@ struct ReduceMeanToSpatialCompute : OpConversionPattern<ONNXReduceMeanV13Op> {
return success();
}
auto axes = normalizeAxesChecked(std::optional<ArrayAttr>(reduceMeanOp.getAxesAttr()), inputType.getRank());
if (failed(axes))
return failure();
SmallVector<bool> reducedAxes = buildReducedAxesMask(*axes, inputType.getRank());
auto semantics = getReduceMeanSemantics(reduceMeanOp, adaptor, inputType.getRank());
if (failed(semantics))
return rewriter.notifyMatchFailure(reduceMeanOp, "requires compile-time constant, in-range ReduceMean axes");
if (semantics->isIdentity) {
if (inputType != resultType)
return rewriter.notifyMatchFailure(
reduceMeanOp, "noop_with_empty_axes identity requires the result type to match the input type");
rewriter.replaceOp(reduceMeanOp, adaptor.getData());
return success();
}
SmallVector<bool> reducedAxes = buildReducedAxesMask(semantics->axes, inputType.getRank());
if (reducedAxes.empty() && inputType.getRank() != 0)
return failure();
@@ -289,7 +380,7 @@ struct ReduceMeanToSpatialCompute : OpConversionPattern<ONNXReduceMeanV13Op> {
Value reducedKeepdims =
buildKeepdimsFromLanePackedBatch(*lanePackedKeepdims, keepdimsType, compactKeptType, reducedAxes, rewriter, loc);
if (reduceMeanOp.getKeepdims() != 0) {
if (semantics->keepdims != 0) {
rewriter.replaceOp(reduceMeanOp, reducedKeepdims);
return success();
}
@@ -303,7 +394,7 @@ struct ReduceMeanToSpatialCompute : OpConversionPattern<ONNXReduceMeanV13Op> {
} // namespace
void populateReduceMeanPatterns(RewritePatternSet& patterns, MLIRContext* ctx) {
patterns.add<ReduceMeanToSpatialCompute>(ctx);
patterns.add<ReduceMeanToSpatialCompute<ONNXReduceMeanV13Op>, ReduceMeanToSpatialCompute<ONNXReduceMeanOp>>(ctx);
}
} // namespace onnx_mlir
+4
View File
@@ -8,3 +8,7 @@ networks/**/outputs
networks/**/raptor
networks/**/runner
networks/**/simulation
networks/**/real_image_val
networks/**/*.png
networks/**/*.jpg
networks/**/*.csv
+4 -1
View File
@@ -199,7 +199,10 @@ int main(int argc, char **argv) {{
// ---- Cleanup ----
omTensorListDestroy(in_list);
omTensorListDestroy(out_list);
// Some debug-heavy models return aliased outputs. This runner is a short-
// lived process, so destroy only the list wrapper and let process exit
// reclaim the output tensors safely.
omTensorListDestroyShallow(out_list);
return 0;
}}
"""
Binary file not shown.
+93
View File
@@ -1053,6 +1053,92 @@ def reducemean_large_dimension_1024():
save_model(model, "reduce_mean/large_dimension_1024", "reduce_mean_large_dimension_1024.onnx")
def make_legacy_reducemean_model(name, shape, output_shape, directory, filename, *, axes, keepdims=1,
noop_with_empty_axes=0):
"""Create an opset-18 ReduceMean model that lowers to ONNXReduceMeanOp."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, shape)
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, output_shape)
initializers = []
node_inputs = ["X", ""]
if axes is not None:
initializers.append(make_int64_initializer("axes", axes))
node_inputs = ["X", "axes"]
node = helper.make_node("ReduceMean", node_inputs, ["Y"],
keepdims=keepdims, noop_with_empty_axes=noop_with_empty_axes)
graph = helper.make_graph([node], name, [X], [Y], initializer=initializers)
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 18)])
save_model(model, directory, filename)
def reducemean_legacy_axis1_keepdims_1():
"""Opset-18 ReduceMean over one positive axis, preserving rank."""
make_legacy_reducemean_model("reducemean_legacy_axis1_keepdims_1",
[2, 3, 4], [2, 1, 4],
"reduce_mean/legacy_axis1_keepdims_1",
"reduce_mean_legacy_axis1_keepdims_1.onnx",
axes=[1], keepdims=1)
def reducemean_legacy_axis1_keepdims_0():
"""Opset-18 ReduceMean over one positive axis, dropping the reduced axis."""
make_legacy_reducemean_model("reducemean_legacy_axis1_keepdims_0",
[2, 3, 4], [2, 4],
"reduce_mean/legacy_axis1_keepdims_0",
"reduce_mean_legacy_axis1_keepdims_0.onnx",
axes=[1], keepdims=0)
def reducemean_legacy_axes_1_2_keepdims_1():
"""Opset-18 ReduceMean over multiple positive axes."""
make_legacy_reducemean_model("reducemean_legacy_axes_1_2_keepdims_1",
[2, 3, 4], [2, 1, 1],
"reduce_mean/legacy_axes_1_2_keepdims_1",
"reduce_mean_legacy_axes_1_2_keepdims_1.onnx",
axes=[1, 2], keepdims=1)
def reducemean_legacy_negative_axis():
"""Opset-18 ReduceMean using a negative axis."""
make_legacy_reducemean_model("reducemean_legacy_negative_axis",
[2, 3, 4], [2, 3, 1],
"reduce_mean/legacy_negative_axis",
"reduce_mean_legacy_negative_axis.onnx",
axes=[-1], keepdims=1)
def reducemean_legacy_reduce_all_keepdims_1():
"""Opset-18 ReduceMean over all axes with the optional axes input omitted."""
make_legacy_reducemean_model("reducemean_legacy_reduce_all_keepdims_1",
[2, 3, 4], [1, 1, 1],
"reduce_mean/legacy_reduce_all_keepdims_1",
"reduce_mean_legacy_reduce_all_keepdims_1.onnx",
axes=None, keepdims=1)
def reducemean_legacy_empty_axes_noop():
"""Opset-18 ReduceMean with empty axes and noop_with_empty_axes enabled."""
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [3, 4])
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [3, 4])
axes = make_int64_initializer("axes", [])
reduce = helper.make_node("ReduceMean", ["X", "axes"], ["R"],
keepdims=1, noop_with_empty_axes=1)
relu = helper.make_node("Relu", ["R"], ["Y"])
graph = helper.make_graph([reduce, relu], "reducemean_legacy_empty_axes_noop", [X], [Y], initializer=[axes])
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 18)])
save_model(model, "reduce_mean/legacy_empty_axes_noop", "reduce_mean_legacy_empty_axes_noop.onnx")
def reducemean_legacy_nchw_spatial():
"""Opset-18 ReduceMean over H and W on an NCHW tensor."""
make_legacy_reducemean_model("reducemean_legacy_nchw_spatial",
[1, 3, 5, 5], [1, 3, 1, 1],
"reduce_mean/legacy_nchw_spatial",
"reduce_mean_legacy_nchw_spatial.onnx",
axes=[2, 3], keepdims=1)
# ---------------------------------------------------------------------------
# Relu tests
# ---------------------------------------------------------------------------
@@ -1974,6 +2060,13 @@ if __name__ == "__main__":
reducemean_4d_spatial_keepdims_0()
reducemean_channel_axis_nchw()
reducemean_large_dimension_1024()
reducemean_legacy_axis1_keepdims_1()
reducemean_legacy_axis1_keepdims_0()
reducemean_legacy_axes_1_2_keepdims_1()
reducemean_legacy_negative_axis()
reducemean_legacy_reduce_all_keepdims_1()
reducemean_legacy_empty_axes_noop()
reducemean_legacy_nchw_spatial()
print("\nGenerating Relu tests:")
relu_basic()
+4 -1
View File
@@ -41,7 +41,8 @@ def _format_command(cmd):
def compile_with_raptor(network_path, raptor_onnx_path: Path, output_base: Path,
crossbar_size, crossbar_count, core_count=None, pim_merge_scheduler="peft",
pim_memory_report="none", cwd=None, verbose=False, reporter=None, timeout_sec=None):
pim_memory_report="none", raptor_extra_args=None, cwd=None, verbose=False,
reporter=None, timeout_sec=None):
# Define the arguments, with the possibility to set crossbar size and count
args = [
network_path,
@@ -57,6 +58,8 @@ def compile_with_raptor(network_path, raptor_onnx_path: Path, output_base: Path,
args.append(f"--core-count={core_count}")
if pim_memory_report != "none":
args.append(f"--pim-memory-report={pim_memory_report}")
if raptor_extra_args:
args.extend(str(arg) for arg in raptor_extra_args)
if verbose:
args.append("--enable-timing")
@@ -0,0 +1,236 @@
#!/usr/bin/env python3
import argparse
import json
import subprocess
import sys
import tempfile
from pathlib import Path
import numpy as np
from PIL import Image
SCRIPT_DIR = Path(__file__).resolve().parent
VALIDATION_DIR = SCRIPT_DIR.parent
if str(VALIDATION_DIR) not in sys.path:
sys.path.insert(0, str(VALIDATION_DIR))
if sys.version_info < (3, 10):
raise SystemExit(
"yolo_local_image_validation.py requires Python 3.10+ because validation modules use modern type syntax. "
"Run it with a newer interpreter, for example your project venv Python."
)
from onnx_utils import _ONNX_TO_NP, onnx_io, write_inputs_to_memory_bin
from validate_one import MODE_COMPILE_ONLY, build_dump_ranges, run_pim_simulator, sanitize_output_name, validate_network
from yolo_real_image_validation import (
IMAGE_CASES,
decode_yolo_output,
download_image,
draw_detections,
letterbox_rgb,
save_tensor_csv,
top_unique_labels,
)
def resolve_default_paths():
validation_dir = Path(__file__).resolve().parent.parent
repo_root = validation_dir.parent
return {
"validation_dir": validation_dir,
"repo_root": repo_root,
"network_dir": validation_dir / "networks" / "yolo11n" / "depth_51",
"raptor_path": repo_root / "build_release" / "Release" / "bin" / "onnx-mlir",
"onnx_include_dir": repo_root / "onnx-mlir" / "include",
"simulator_dir": repo_root / "backend-simulators" / "pim" / "pim-simulator",
}
def find_network_onnx(network_dir: Path) -> Path:
onnx_files = sorted(network_dir.glob("*.onnx"))
if not onnx_files:
raise FileNotFoundError(f"No .onnx file found in {network_dir}")
if len(onnx_files) > 1:
names = ", ".join(path.name for path in onnx_files)
raise RuntimeError(f"Expected exactly one .onnx file in {network_dir}, found: {names}")
return onnx_files[0]
def local_case_paths(network_dir: Path, case_name: str):
return {
"root": network_dir,
"runner": network_dir / "runner" / "build" / "runner",
"runner_build": network_dir / "runner" / "build",
"raptor_pim": network_dir / "raptor" / "pim",
"real_root": network_dir / "real_image_validation",
"input_csv": network_dir / "real_image_validation" / "inputs" / f"{case_name}.csv",
"ref_dir": network_dir / "real_image_validation" / "reference" / case_name,
"sim_dir": network_dir / "real_image_validation" / "simulation" / case_name,
"sim_bin": network_dir / "real_image_validation" / "simulation" / case_name / "out.bin",
}
def ensure_local_artifacts(args, network_onnx_path: Path):
validate_network(
network_onnx_path=network_onnx_path,
raptor_path=args.raptor_path,
onnx_include_dir=args.onnx_include_dir,
simulator_dir=args.simulator_dir,
crossbar_size=args.crossbar_size,
crossbar_count=args.crossbar_count,
core_count=args.core_count,
command_timeout_seconds=args.command_timeout_seconds,
mode=MODE_COMPILE_ONLY,
verbose=args.verbose,
)
def ensure_existing_artifacts(network_dir: Path):
required_paths = [
network_dir / "runner" / "build" / "runner",
network_dir / "raptor" / "pim" / "config.json",
network_dir / "raptor" / "pim" / "memory.bin",
]
missing = [str(path) for path in required_paths if not path.exists()]
if missing:
raise FileNotFoundError(
"Missing compiled local artifacts. Re-run without --skip-compile or restore these paths:\n "
+ "\n ".join(missing)
)
def run_local_reference_and_simulator(args, network_dir: Path, network_onnx_path: Path, case_name: str):
paths = local_case_paths(network_dir, case_name)
paths["ref_dir"].mkdir(parents=True, exist_ok=True)
paths["sim_dir"].mkdir(parents=True, exist_ok=True)
output_descriptors = onnx_io(network_onnx_path)[1]
if len(output_descriptors) != 1:
raise RuntimeError(f"Expected one YOLO output tensor, found {len(output_descriptors)}")
runner_cmd = [
str(paths["runner"]),
"--in0-csv-file",
str(paths["input_csv"]),
"--in0-shape",
"1x3x640x640",
"--save-csv-dir",
str(paths["ref_dir"]),
]
subprocess.run(runner_cmd, cwd=paths["runner_build"], check=True)
tensor = np.loadtxt(paths["input_csv"], delimiter=",", dtype=np.float32).reshape(1, 3, 640, 640)
write_inputs_to_memory_bin(paths["raptor_pim"] / "memory.bin", paths["raptor_pim"] / "config.json", [tensor])
dump_ranges = build_dump_ranges(paths["raptor_pim"] / "config.json", output_descriptors)
run_pim_simulator(
args.simulator_dir,
paths["raptor_pim"],
paths["sim_bin"],
dump_ranges,
timeout_sec=args.command_timeout_seconds,
)
return paths, output_descriptors[0]
def analyze_case(args, network_dir: Path, network_onnx_path: Path, case, work_dir: Path):
image_path = work_dir / f"{case.name}{Path(case.url).suffix or '.img'}"
csv_path = work_dir / f"{case.name}.csv"
annotated_dir = args.annotated_dir
annotated_dir.mkdir(parents=True, exist_ok=True)
download_image(case.url, image_path)
tensor = letterbox_rgb(Image.open(image_path))
save_tensor_csv(tensor, csv_path)
paths = local_case_paths(network_dir, case.name)
paths["input_csv"].parent.mkdir(parents=True, exist_ok=True)
paths["input_csv"].write_bytes(csv_path.read_bytes())
paths, output_descriptor = run_local_reference_and_simulator(args, network_dir, network_onnx_path, case.name)
output_index, output_name, output_dtype_code, output_shape = output_descriptor
output_dtype = np.dtype(_ONNX_TO_NP[output_dtype_code])
ref_csv_path = paths["ref_dir"] / f"output{output_index}_{sanitize_output_name(output_name)}.csv"
ref = np.loadtxt(ref_csv_path, delimiter=",", dtype=output_dtype).reshape(output_shape)
sim = np.frombuffer(
paths["sim_bin"].read_bytes(),
dtype=output_dtype,
count=int(np.prod(output_shape)),
).reshape(output_shape)
abs_diff = np.abs(sim.astype(np.float64) - ref.astype(np.float64))
rel_diff = abs_diff / np.maximum(np.abs(ref.astype(np.float64)), 1e-12)
ref_detections = decode_yolo_output(ref)
sim_detections = decode_yolo_output(sim)
ref_labels = top_unique_labels(ref_detections)
sim_labels = top_unique_labels(sim_detections)
ref_image_path = annotated_dir / f"{case.name}_reference.png"
sim_image_path = annotated_dir / f"{case.name}_simulator.png"
draw_detections(image_path, ref_detections, ref_image_path)
draw_detections(image_path, sim_detections, sim_image_path)
return {
"case": case.name,
"expected_label": case.expected_label,
"ref_top_labels": ref_labels,
"sim_top_labels": sim_labels,
"top1_match": bool(ref_labels and sim_labels and ref_labels[0] == sim_labels[0]),
"expected_in_ref": case.expected_label in ref_labels,
"expected_in_sim": case.expected_label in sim_labels,
"max_abs_diff": float(abs_diff.max()),
"mean_abs_diff": float(abs_diff.mean()),
"max_rel_diff": float(rel_diff.max()),
"mean_rel_diff": float(rel_diff.mean()),
"reference_annotated_image": str(ref_image_path),
"simulator_annotated_image": str(sim_image_path),
"ref_top_detections": ref_detections[:5],
"sim_top_detections": sim_detections[:5],
}
def main():
defaults = resolve_default_paths()
parser = argparse.ArgumentParser(description="Validate YOLO detections on real images using local compilation and simulator execution.")
parser.add_argument("--network-dir", type=Path, default=defaults["network_dir"])
parser.add_argument("--network-onnx", type=Path, default=None)
parser.add_argument("--raptor-path", type=Path, default=defaults["raptor_path"])
parser.add_argument("--onnx-include-dir", type=Path, default=defaults["onnx_include_dir"])
parser.add_argument("--simulator-dir", type=Path, default=defaults["simulator_dir"])
parser.add_argument("--crossbar-size", type=int, default=2048)
parser.add_argument("--crossbar-count", type=int, default=256)
parser.add_argument("--core-count", type=int, default=1000)
parser.add_argument("--command-timeout-seconds", type=float, default=7200.0)
parser.add_argument("--skip-compile", action="store_true")
parser.add_argument("--verbose", action="store_true")
parser.add_argument(
"--annotated-dir",
type=Path,
default=defaults["network_dir"] / "real_image_validation" / "annotated",
)
args = parser.parse_args()
args.network_dir = args.network_dir.resolve()
args.network_onnx = args.network_onnx.resolve() if args.network_onnx else find_network_onnx(args.network_dir)
args.raptor_path = args.raptor_path.resolve()
args.onnx_include_dir = args.onnx_include_dir.resolve()
args.simulator_dir = args.simulator_dir.resolve()
args.annotated_dir = args.annotated_dir.resolve()
if not args.skip_compile:
ensure_local_artifacts(args, args.network_onnx)
else:
ensure_existing_artifacts(args.network_dir)
reports = []
with tempfile.TemporaryDirectory(prefix="yolo_local_images_") as tmp_dir:
work_dir = Path(tmp_dir)
for case in IMAGE_CASES:
reports.append(analyze_case(args, args.network_dir, args.network_onnx, case, work_dir))
print(json.dumps({"network_dir": str(args.network_dir), "network_onnx": str(args.network_onnx), "cases": reports}, indent=2))
if __name__ == "__main__":
main()
@@ -0,0 +1,425 @@
#!/usr/bin/env python3
import argparse
import json
import shlex
import subprocess
import tempfile
import urllib.request
from dataclasses import dataclass
from pathlib import Path
import numpy as np
from PIL import Image, ImageDraw
COCO80_CLASSES = [
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard",
"cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
"scissors", "teddy bear", "hair drier", "toothbrush",
]
@dataclass(frozen=True)
class ImageCase:
name: str
url: str
expected_label: str
IMAGE_CASES = [
ImageCase(
name="cat_coco_39769",
url="http://images.cocodataset.org/val2017/000000039769.jpg",
expected_label="cat",
),
ImageCase(
name="dog_pytorch_hub",
url="https://github.com/pytorch/hub/raw/master/images/dog.jpg",
expected_label="dog",
),
ImageCase(
name="cute_kitty",
url="https://images.unsplash.com/photo-1529778873920-4da4926a72c2?q=80&w=872&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" ,
expected_label="cat",
),
]
def run(cmd, *, cwd=None, capture_output=False, input_bytes=None):
return subprocess.run(
cmd,
cwd=cwd,
check=True,
input=input_bytes,
capture_output=capture_output,
)
def ssh_command(ssh_key: str, remote_host: str, command: str):
return ["ssh", "-i", ssh_key, remote_host, command]
def remote_bash(ssh_key: str, remote_host: str, command: str, *, capture_output=False, input_bytes=None):
return run(
ssh_command(ssh_key, remote_host, command),
capture_output=capture_output,
input_bytes=input_bytes,
)
def download_image(url: str, path: Path):
with urllib.request.urlopen(url) as response:
path.write_bytes(response.read())
def letterbox_rgb(image: Image.Image, size: int = 640) -> np.ndarray:
image = image.convert("RGB")
width, height = image.size
scale = min(size / width, size / height)
resized_width = max(1, int(round(width * scale)))
resized_height = max(1, int(round(height * scale)))
resized = image.resize((resized_width, resized_height), Image.Resampling.BILINEAR)
canvas = Image.new("RGB", (size, size), (114, 114, 114))
offset_x = (size - resized_width) // 2
offset_y = (size - resized_height) // 2
canvas.paste(resized, (offset_x, offset_y))
array = np.asarray(canvas, dtype=np.float32) / 255.0
chw = np.transpose(array, (2, 0, 1))
return np.expand_dims(chw, axis=0)
def letterbox_params(width: int, height: int, size: int = 640):
scale = min(size / width, size / height)
resized_width = max(1, int(round(width * scale)))
resized_height = max(1, int(round(height * scale)))
offset_x = (size - resized_width) // 2
offset_y = (size - resized_height) // 2
return scale, offset_x, offset_y
def save_tensor_csv(array: np.ndarray, path: Path):
flat = array.reshape(-1)
np.savetxt(path, flat[np.newaxis, :], delimiter=",", fmt="%.9g")
def iou_xyxy(box: np.ndarray, boxes: np.ndarray) -> np.ndarray:
x1 = np.maximum(box[0], boxes[:, 0])
y1 = np.maximum(box[1], boxes[:, 1])
x2 = np.minimum(box[2], boxes[:, 2])
y2 = np.minimum(box[3], boxes[:, 3])
inter_w = np.maximum(0.0, x2 - x1)
inter_h = np.maximum(0.0, y2 - y1)
inter = inter_w * inter_h
area_box = np.maximum(0.0, box[2] - box[0]) * np.maximum(0.0, box[3] - box[1])
area_boxes = np.maximum(0.0, boxes[:, 2] - boxes[:, 0]) * np.maximum(0.0, boxes[:, 3] - boxes[:, 1])
union = area_box + area_boxes - inter
return np.divide(inter, union, out=np.zeros_like(inter), where=union > 0)
def decode_yolo_output(
output: np.ndarray,
*,
conf_threshold: float = 0.25,
iou_threshold: float = 0.45,
max_detections: int = 50,
):
predictions = output[0].T
boxes_xywh = predictions[:, :4]
class_scores = predictions[:, 4:]
class_ids = np.argmax(class_scores, axis=1)
confidences = class_scores[np.arange(class_scores.shape[0]), class_ids]
keep = confidences >= conf_threshold
if not np.any(keep):
return []
boxes_xywh = boxes_xywh[keep]
class_ids = class_ids[keep]
confidences = confidences[keep]
boxes_xyxy = np.empty_like(boxes_xywh)
boxes_xyxy[:, 0] = boxes_xywh[:, 0] - boxes_xywh[:, 2] / 2.0
boxes_xyxy[:, 1] = boxes_xywh[:, 1] - boxes_xywh[:, 3] / 2.0
boxes_xyxy[:, 2] = boxes_xywh[:, 0] + boxes_xywh[:, 2] / 2.0
boxes_xyxy[:, 3] = boxes_xywh[:, 1] + boxes_xywh[:, 3] / 2.0
detections = []
for class_id in np.unique(class_ids):
class_mask = class_ids == class_id
class_boxes = boxes_xyxy[class_mask]
class_scores_masked = confidences[class_mask]
order = np.argsort(-class_scores_masked)
while order.size > 0:
best = order[0]
detections.append({
"label": COCO80_CLASSES[int(class_id)],
"class_id": int(class_id),
"confidence": float(class_scores_masked[best]),
"box_xyxy": class_boxes[best].tolist(),
})
if order.size == 1:
break
rest = order[1:]
overlaps = iou_xyxy(class_boxes[best], class_boxes[rest])
order = rest[overlaps <= iou_threshold]
detections.sort(key=lambda det: det["confidence"], reverse=True)
return detections[:max_detections]
def top_unique_labels(detections, limit: int = 5):
labels = []
seen = set()
for det in detections:
label = det["label"]
if label in seen:
continue
seen.add(label)
labels.append(label)
if len(labels) == limit:
break
return labels
def clamp_box_xyxy(box_xyxy, width: int, height: int):
x1, y1, x2, y2 = box_xyxy
return [
max(0.0, min(float(width - 1), float(x1))),
max(0.0, min(float(height - 1), float(y1))),
max(0.0, min(float(width - 1), float(x2))),
max(0.0, min(float(height - 1), float(y2))),
]
def unletterbox_box_xyxy(box_xyxy, width: int, height: int, size: int = 640):
scale, offset_x, offset_y = letterbox_params(width, height, size=size)
x1, y1, x2, y2 = box_xyxy
return [
(float(x1) - offset_x) / scale,
(float(y1) - offset_y) / scale,
(float(x2) - offset_x) / scale,
(float(y2) - offset_y) / scale,
]
def draw_detections(image_path: Path, detections, output_path: Path, *, limit: int = 10):
image = Image.open(image_path).convert("RGB")
draw = ImageDraw.Draw(image)
width, height = image.size
for det in detections[:limit]:
box = unletterbox_box_xyxy(det["box_xyxy"], width, height)
box = clamp_box_xyxy(box, width, height)
label = f'{det["label"]} {det["confidence"]:.2f}'
draw.rectangle(box, outline=(255, 0, 0), width=3)
text_box = draw.textbbox((box[0], box[1]), label)
text_bg = [
text_box[0] - 2,
text_box[1] - 2,
text_box[2] + 2,
text_box[3] + 2,
]
draw.rectangle(text_bg, fill=(255, 0, 0))
draw.text((box[0], box[1]), label, fill=(255, 255, 255))
image.save(output_path)
def ensure_remote_artifacts(args):
remote_project = shlex.quote(args.remote_project)
remote_python = shlex.quote(args.remote_python)
validate_cmd = (
f"export PATH=$HOME/.cargo/bin:$PATH && "
f"cd {remote_project} && "
f"{remote_python} validation/validate.py "
f"--raptor-path build_release/Release/bin/onnx-mlir "
f"--onnx-include-dir onnx-mlir/include "
f"--operations-dir {shlex.quote(args.network_dir)} "
f"--crossbar-size {args.crossbar_size} "
f"--crossbar-count {args.crossbar_count} "
f"--core-count {args.core_count} "
f"--command-timeout-seconds {args.command_timeout_seconds} "
f"--compile-only"
)
remote_bash(args.ssh_key, args.remote_host, validate_cmd)
def remote_case_paths(args, case_name: str):
network_dir = Path(args.network_dir)
root = Path(args.remote_project) / network_dir
return {
"root": root,
"runner": root / "runner" / "build" / "runner",
"runner_build": root / "runner" / "build",
"raptor_pim": root / "raptor" / "pim",
"real_root": root / "real_image_validation",
"input_csv": root / "real_image_validation" / "inputs" / f"{case_name}.csv",
"ref_dir": root / "real_image_validation" / "reference" / case_name,
"sim_dir": root / "real_image_validation" / "simulation" / case_name,
"sim_bin": root / "real_image_validation" / "simulation" / case_name / "out.bin",
}
def write_remote_file(args, remote_path: Path, data: bytes):
command = (
f"mkdir -p {shlex.quote(str(remote_path.parent))} && "
f"cat > {shlex.quote(str(remote_path))}"
)
remote_bash(args.ssh_key, args.remote_host, command, input_bytes=data)
def run_remote_reference_and_simulator(args, case_name: str):
paths = remote_case_paths(args, case_name)
quoted_project = shlex.quote(args.remote_project)
quoted_python = shlex.quote(args.remote_python)
quoted_case_csv = shlex.quote(str(paths["input_csv"]))
quoted_ref_dir = shlex.quote(str(paths["ref_dir"]))
quoted_sim_dir = shlex.quote(str(paths["sim_dir"]))
quoted_sim_bin = shlex.quote(str(paths["sim_bin"]))
quoted_runner = shlex.quote(str(paths["runner"]))
quoted_runner_build = shlex.quote(str(paths["runner_build"]))
quoted_pim = shlex.quote(str(paths["raptor_pim"]))
command = f"""
set -e
export PATH=$HOME/.cargo/bin:$PATH
cd {quoted_project}
mkdir -p {quoted_ref_dir} {quoted_sim_dir}
cd {quoted_runner_build}
{quoted_runner} --in0-csv-file {quoted_case_csv} --in0-shape 1x3x640x640 --save-csv-dir {quoted_ref_dir}
cd {quoted_project}
{quoted_python} - <<'PY'
import json
import numpy as np
from pathlib import Path
input_csv = Path({json.dumps(str(paths["input_csv"]))})
pim_dir = Path({json.dumps(str(paths["raptor_pim"]))})
config = json.loads((pim_dir / "config.json").read_text())
tensor = np.loadtxt(input_csv, delimiter=",", dtype=np.float32).reshape(1, 3, 640, 640)
with open(pim_dir / "memory.bin", "r+b") as f:
f.seek(config["inputs_addresses"][0])
f.write(tensor.tobytes(order="C"))
output_addr = config["outputs_addresses"][0]
output_size = 1 * 84 * 8400 * 4
print(f"{{output_addr}},{{output_size}}")
PY
"""
result = remote_bash(args.ssh_key, args.remote_host, command, capture_output=True)
dump_range = result.stdout.decode().strip().splitlines()[-1]
sim_command = (
f"export PATH=$HOME/.cargo/bin:$PATH && "
f"cd {quoted_project}/backend-simulators/pim/pim-simulator && "
f"cargo run --no-default-features --release --package pim-simulator --bin pim-simulator -- "
f"-f {quoted_pim} -o {quoted_sim_bin} -d {dump_range}"
)
remote_bash(args.ssh_key, args.remote_host, sim_command)
return paths
def read_remote_file(args, remote_path: Path) -> bytes:
result = remote_bash(
args.ssh_key,
args.remote_host,
f"cat {shlex.quote(str(remote_path))}",
capture_output=True,
)
return result.stdout
def analyze_case(args, case: ImageCase, work_dir: Path):
image_path = work_dir / f"{case.name}{Path(case.url).suffix or '.img'}"
csv_path = work_dir / f"{case.name}.csv"
annotated_dir = Path(args.annotated_dir)
annotated_dir.mkdir(parents=True, exist_ok=True)
download_image(case.url, image_path)
tensor = letterbox_rgb(Image.open(image_path))
save_tensor_csv(tensor, csv_path)
remote_paths = remote_case_paths(args, case.name)
write_remote_file(args, remote_paths["input_csv"], csv_path.read_bytes())
remote_paths = run_remote_reference_and_simulator(args, case.name)
ref_csv = read_remote_file(args, remote_paths["ref_dir"] / "output0_output0.csv")
sim_bin = read_remote_file(args, remote_paths["sim_bin"])
ref = np.loadtxt(ref_csv.decode().splitlines(), delimiter=",", dtype=np.float32).reshape(1, 84, 8400)
sim = np.frombuffer(sim_bin, dtype=np.float32, count=1 * 84 * 8400).reshape(1, 84, 8400)
abs_diff = np.abs(sim.astype(np.float64) - ref.astype(np.float64))
rel_diff = abs_diff / np.maximum(np.abs(ref.astype(np.float64)), 1e-12)
ref_detections = decode_yolo_output(ref)
sim_detections = decode_yolo_output(sim)
ref_labels = top_unique_labels(ref_detections)
sim_labels = top_unique_labels(sim_detections)
ref_image_path = annotated_dir / f"{case.name}_reference.png"
sim_image_path = annotated_dir / f"{case.name}_simulator.png"
draw_detections(image_path, ref_detections, ref_image_path)
draw_detections(image_path, sim_detections, sim_image_path)
return {
"case": case.name,
"expected_label": case.expected_label,
"ref_top_labels": ref_labels,
"sim_top_labels": sim_labels,
"top1_match": bool(ref_labels and sim_labels and ref_labels[0] == sim_labels[0]),
"expected_in_ref": case.expected_label in ref_labels,
"expected_in_sim": case.expected_label in sim_labels,
"max_abs_diff": float(abs_diff.max()),
"mean_abs_diff": float(abs_diff.mean()),
"max_rel_diff": float(rel_diff.max()),
"mean_rel_diff": float(rel_diff.mean()),
"reference_annotated_image": str(ref_image_path),
"simulator_annotated_image": str(sim_image_path),
"ref_top_detections": ref_detections[:5],
"sim_top_detections": sim_detections[:5],
}
def main():
parser = argparse.ArgumentParser(description="Validate YOLO detections on real animal images against the simulator.")
parser.add_argument("--remote-host", default="gmagnani@monolith")
parser.add_argument("--ssh-key", default="~/.ssh/github")
parser.add_argument("--remote-project", default="/home/gmagnani/Project/Raptor")
parser.add_argument("--remote-python", default="/home/gmagnani/venv/bin/python")
parser.add_argument("--network-dir", default="validation/networks/yolo11n/depth_51")
parser.add_argument("--crossbar-size", type=int, default=2048)
parser.add_argument("--crossbar-count", type=int, default=256)
parser.add_argument("--core-count", type=int, default=1000)
parser.add_argument("--command-timeout-seconds", type=int, default=7200)
parser.add_argument("--skip-compile", action="store_true")
parser.add_argument("--annotated-dir", default="validation/networks/yolo11n/depth_51/real_image_validation/annotated")
args = parser.parse_args()
args.ssh_key = str(Path(args.ssh_key).expanduser())
if not args.skip_compile:
ensure_remote_artifacts(args)
reports = []
with tempfile.TemporaryDirectory(prefix="yolo_real_images_") as tmp_dir:
work_dir = Path(tmp_dir)
for case in IMAGE_CASES:
reports.append(analyze_case(args, case, work_dir))
print(json.dumps({"network_dir": args.network_dir, "cases": reports}, indent=2))
if __name__ == "__main__":
main()
+8 -1
View File
@@ -67,7 +67,10 @@ def main():
ap.add_argument("--operations-dir", default=None, help="Root of the operations tree (default: operations).")
ap.add_argument("--simulator-dir", default=None,
help="Path to pim-simulator crate root (default: auto-detected relative to script).")
ap.add_argument("--threshold", type=float, default=1e-3, help="Max allowed diff per output element.")
ap.add_argument("--threshold", type=float, default=1e-3,
help="Absolute tolerance for per-element output comparison.")
ap.add_argument("--relative-threshold", type=float, default=1e-5,
help="Relative tolerance for per-element output comparison.")
ap.add_argument("--seed", type=int, default=0, help="RNG seed for generated validation inputs.")
ap.add_argument("--crossbar-size", type=int, default=64)
ap.add_argument("--crossbar-count", type=int, default=8)
@@ -77,6 +80,8 @@ def main():
help="Scheduler used by the Spatial merge-compute-nodes pass.")
ap.add_argument("--pim-memory-report", choices=("none", "summary", "full"), default="none",
help="Emit a human-readable PIM memory planning report during codegen.")
ap.add_argument("--raptor-extra-arg", action="append", default=[],
help="Additional argument to pass through to the Raptor compiler. Repeat as needed.")
ap.add_argument("--command-timeout-seconds", type=float, default=1000000.0,
help="Per-subprocess timeout in seconds for compiler, runner, and simulator commands.")
ap.add_argument("--clean", action="store_true",
@@ -145,8 +150,10 @@ def main():
onnx_path, a.raptor_path, a.onnx_include_dir, simulator_dir,
crossbar_size=a.crossbar_size, crossbar_count=a.crossbar_count, core_count=a.core_count,
pim_merge_scheduler=a.pim_merge_scheduler, pim_memory_report=a.pim_memory_report,
raptor_extra_args=a.raptor_extra_arg,
command_timeout_seconds=a.command_timeout_seconds,
threshold=a.threshold,
rtol=a.relative_threshold,
seed=a.seed,
reporter=reporter,
model_index=index,
+12 -7
View File
@@ -258,14 +258,18 @@ def parse_pim_simulator_outputs(output_bin_path, outputs_descriptor):
return arrays
def validate_outputs(sim_arrays, runner_out_dir, outputs_descriptor, threshold=1e-3, verbose=False):
def validate_outputs(sim_arrays, runner_out_dir, outputs_descriptor, threshold=1e-3, rtol=1e-5, verbose=False):
all_passed = True
rows = []
for sim_array, (oi, name, _, shape) in zip(sim_arrays, outputs_descriptor):
csv_name = f"output{oi}_{sanitize_output_name(name)}.csv"
runner_array = np.loadtxt(runner_out_dir / csv_name, delimiter=',', dtype=np.float32).reshape(shape)
max_diff = float(np.max(np.abs(sim_array.astype(np.float64) - runner_array.astype(np.float64))))
passed = max_diff <= threshold
sim_array64 = sim_array.astype(np.float64)
runner_array64 = runner_array.astype(np.float64)
abs_diff = np.abs(sim_array64 - runner_array64)
allowed_diff = threshold + rtol * np.abs(runner_array64)
max_diff = float(np.max(abs_diff))
passed = bool(np.all(abs_diff <= allowed_diff))
rows.append((name, f"{max_diff:.6e}", passed))
if not passed:
all_passed = False
@@ -289,7 +293,8 @@ def validate_outputs(sim_arrays, runner_out_dir, outputs_descriptor, threshold=1
def validate_network(network_onnx_path, raptor_path, onnx_include_dir,
simulator_dir, crossbar_size=64, crossbar_count=8, core_count=None,
pim_merge_scheduler="peft", pim_memory_report="none", threshold=1e-3,
pim_merge_scheduler="peft", pim_memory_report="none", raptor_extra_args=None,
threshold=1e-3, rtol=1e-5,
seed=0, reporter=None, model_index=1, model_total=1, verbose=False,
command_timeout_seconds=60.0, mode=MODE_FULL):
network_onnx_path = Path(network_onnx_path).resolve()
@@ -343,7 +348,7 @@ def validate_network(network_onnx_path, raptor_path, onnx_include_dir,
pim_pass_timings = compile_with_raptor(
network_mlir_path, raptor_path, pim_output_base, crossbar_size, crossbar_count,
core_count=core_count, pim_merge_scheduler=pim_merge_scheduler,
pim_memory_report=pim_memory_report,
pim_memory_report=pim_memory_report, raptor_extra_args=raptor_extra_args,
cwd=raptor_dir, verbose=verbose, reporter=reporter, timeout_sec=command_timeout_seconds)
print_info(reporter, f"PIM artifacts saved to {raptor_dir / 'pim'}")
reporter.advance()
@@ -383,7 +388,7 @@ def validate_network(network_onnx_path, raptor_path, onnx_include_dir,
pim_pass_timings = compile_with_raptor(
network_mlir_path, raptor_path, pim_output_base, crossbar_size, crossbar_count,
core_count=core_count, pim_merge_scheduler=pim_merge_scheduler,
pim_memory_report=pim_memory_report,
pim_memory_report=pim_memory_report, raptor_extra_args=raptor_extra_args,
cwd=raptor_dir, verbose=verbose, reporter=reporter, timeout_sec=command_timeout_seconds)
print_info(reporter, f"PIM artifacts saved to {raptor_dir / 'pim'}")
reporter.advance()
@@ -403,7 +408,7 @@ def validate_network(network_onnx_path, raptor_path, onnx_include_dir,
print_stage(reporter, model_index, model_total, network_onnx_path.name, "Compare Outputs")
sim_arrays = parse_pim_simulator_outputs(output_bin_path, outputs_descriptor)
reporter.suspend()
passed = validate_outputs(sim_arrays, out_dir, outputs_descriptor, threshold, verbose=verbose)
passed = validate_outputs(sim_arrays, out_dir, outputs_descriptor, threshold, rtol=rtol, verbose=verbose)
reporter.resume()
reporter.advance()
reporter.record_result(passed)