Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 852bef7605 | |||
| 237654dadf | |||
| 6d69600bc1 |
@@ -38,6 +38,12 @@ llvm::cl::opt<bool>
|
||||
llvm::cl::init(false),
|
||||
llvm::cl::cat(OnnxMlirOptions));
|
||||
|
||||
llvm::cl::opt<bool>
|
||||
pimDisableMemoryCoalescing("pim-disable-memory-coalescing",
|
||||
llvm::cl::desc("Skip the PIM memory coalescing pass (developer diagnostic option)"),
|
||||
llvm::cl::init(false),
|
||||
llvm::cl::cat(OnnxMlirOptions));
|
||||
|
||||
llvm::cl::opt<bool> useExperimentalConvImpl("use-experimental-conv-impl",
|
||||
llvm::cl::desc("Use experimental implementation for convolution"),
|
||||
llvm::cl::init(false),
|
||||
|
||||
@@ -36,6 +36,7 @@ extern llvm::cl::opt<PimMergeSchedulerType> pimMergeScheduler;
|
||||
extern llvm::cl::opt<PimMemoryReportLevel> pimMemoryReport;
|
||||
|
||||
extern llvm::cl::opt<bool> pimOnlyCodegen;
|
||||
extern llvm::cl::opt<bool> pimDisableMemoryCoalescing;
|
||||
extern llvm::cl::opt<bool> useExperimentalConvImpl;
|
||||
extern llvm::cl::opt<bool> pimEmitJson;
|
||||
|
||||
|
||||
@@ -46,7 +46,8 @@ void addPassesPim(OwningOpRef<ModuleOp>& module,
|
||||
if (pimEmissionTarget >= EmitPimCodegen) {
|
||||
pm.addPass(createPimHostConstantFoldingPass());
|
||||
pm.addPass(createMessagePass("Pim host constants folded"));
|
||||
pm.addPass(createPimMemoryCoalescingPass());
|
||||
if (!pimDisableMemoryCoalescing)
|
||||
pm.addPass(createPimMemoryCoalescingPass());
|
||||
pm.addPass(createPimVerificationPass());
|
||||
pm.addPass(createMessagePass("Pim verified"));
|
||||
pm.addPass(createEmitPimCodePass());
|
||||
|
||||
@@ -140,6 +140,7 @@ void ONNXToSpatialPass::runOnOperation() {
|
||||
target.addIllegalOp<ONNXResizeOp>();
|
||||
target.addIllegalOp<ONNXSliceOp>();
|
||||
target.addIllegalOp<ONNXLRNOp>();
|
||||
target.addIllegalOp<ONNXReduceMeanOp>();
|
||||
target.addIllegalOp<ONNXReduceMeanV13Op>();
|
||||
target.addIllegalOp<ONNXSplitOp>();
|
||||
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
#include <optional>
|
||||
#include <type_traits>
|
||||
|
||||
#include "src/Accelerators/PIM/Common/IR/AffineUtils.hpp"
|
||||
#include "src/Accelerators/PIM/Conversion/ONNXToSpatial/Common/Common.hpp"
|
||||
@@ -19,6 +21,85 @@ using namespace mlir;
|
||||
namespace onnx_mlir {
|
||||
namespace {
|
||||
|
||||
struct ReduceMeanSemantics {
|
||||
SmallVector<int64_t> axes;
|
||||
int64_t keepdims = 1;
|
||||
bool isIdentity = false;
|
||||
};
|
||||
|
||||
static bool isNoneValueLike(Value value) { return isa_and_nonnull<ONNXNoneOp>(value.getDefiningOp()); }
|
||||
|
||||
static FailureOr<SmallVector<int64_t>> getConstantIntValues(Value value) {
|
||||
auto denseAttr = dyn_cast_or_null<DenseIntElementsAttr>(getHostConstDenseElementsAttr(value));
|
||||
if (!denseAttr)
|
||||
return failure();
|
||||
return SmallVector<int64_t>(denseAttr.getValues<int64_t>().begin(), denseAttr.getValues<int64_t>().end());
|
||||
}
|
||||
|
||||
static FailureOr<SmallVector<int64_t>> normalizeAxesChecked(ArrayRef<int64_t> axes, int64_t rank) {
|
||||
SmallVector<int64_t> normalizedAxes;
|
||||
normalizedAxes.reserve(axes.size());
|
||||
for (int64_t axis : axes) {
|
||||
auto normalizedAxis = normalizeAxisChecked(axis, rank);
|
||||
if (failed(normalizedAxis))
|
||||
return failure();
|
||||
normalizedAxes.push_back(*normalizedAxis);
|
||||
}
|
||||
llvm::sort(normalizedAxes);
|
||||
normalizedAxes.erase(std::unique(normalizedAxes.begin(), normalizedAxes.end()), normalizedAxes.end());
|
||||
return normalizedAxes;
|
||||
}
|
||||
|
||||
template <typename ReduceMeanOp, typename ReduceMeanOpAdaptor>
|
||||
static FailureOr<ReduceMeanSemantics>
|
||||
getReduceMeanSemantics(ReduceMeanOp reduceMeanOp, ReduceMeanOpAdaptor adaptor, int64_t inputRank) {
|
||||
ReduceMeanSemantics semantics;
|
||||
semantics.keepdims = reduceMeanOp.getKeepdims();
|
||||
|
||||
if constexpr (std::is_same_v<ReduceMeanOp, ONNXReduceMeanV13Op>) {
|
||||
auto axes = onnx_mlir::normalizeAxesChecked(std::optional<ArrayAttr>(reduceMeanOp.getAxesAttr()), inputRank);
|
||||
if (failed(axes))
|
||||
return failure();
|
||||
semantics.axes = std::move(*axes);
|
||||
return semantics;
|
||||
}
|
||||
else {
|
||||
if (isNoneValueLike(adaptor.getAxes())) {
|
||||
if (reduceMeanOp.getNoopWithEmptyAxes() != 0) {
|
||||
semantics.isIdentity = true;
|
||||
return semantics;
|
||||
}
|
||||
|
||||
semantics.axes.reserve(inputRank);
|
||||
for (int64_t axis = 0; axis < inputRank; ++axis)
|
||||
semantics.axes.push_back(axis);
|
||||
return semantics;
|
||||
}
|
||||
|
||||
auto axes = getConstantIntValues(adaptor.getAxes());
|
||||
if (failed(axes))
|
||||
return failure();
|
||||
|
||||
if (axes->empty()) {
|
||||
if (reduceMeanOp.getNoopWithEmptyAxes() != 0) {
|
||||
semantics.isIdentity = true;
|
||||
return semantics;
|
||||
}
|
||||
|
||||
semantics.axes.reserve(inputRank);
|
||||
for (int64_t axis = 0; axis < inputRank; ++axis)
|
||||
semantics.axes.push_back(axis);
|
||||
return semantics;
|
||||
}
|
||||
|
||||
auto normalizedAxes = normalizeAxesChecked(*axes, inputRank);
|
||||
if (failed(normalizedAxes))
|
||||
return failure();
|
||||
semantics.axes = std::move(*normalizedAxes);
|
||||
return semantics;
|
||||
}
|
||||
}
|
||||
|
||||
static SmallVector<bool> buildReducedAxesMask(ArrayRef<int64_t> axes, int64_t rank) {
|
||||
SmallVector<bool> reducedAxes(rank, false);
|
||||
for (int64_t axis : axes) {
|
||||
@@ -251,11 +332,13 @@ static Value squeezeReducedAxes(Value keepdimsValue,
|
||||
return squeezeCompute.getResult(0);
|
||||
}
|
||||
|
||||
struct ReduceMeanToSpatialCompute : OpConversionPattern<ONNXReduceMeanV13Op> {
|
||||
using OpConversionPattern::OpConversionPattern;
|
||||
template <typename ReduceMeanOp>
|
||||
struct ReduceMeanToSpatialCompute : OpConversionPattern<ReduceMeanOp> {
|
||||
using OpConversionPattern<ReduceMeanOp>::OpConversionPattern;
|
||||
using Adaptor = typename ReduceMeanOp::Adaptor;
|
||||
|
||||
LogicalResult matchAndRewrite(ONNXReduceMeanV13Op reduceMeanOp,
|
||||
ONNXReduceMeanV13OpAdaptor adaptor,
|
||||
LogicalResult matchAndRewrite(ReduceMeanOp reduceMeanOp,
|
||||
Adaptor adaptor,
|
||||
ConversionPatternRewriter& rewriter) const override {
|
||||
auto inputType = dyn_cast<RankedTensorType>(adaptor.getData().getType());
|
||||
auto resultType = dyn_cast<RankedTensorType>(reduceMeanOp.getReduced().getType());
|
||||
@@ -266,10 +349,18 @@ struct ReduceMeanToSpatialCompute : OpConversionPattern<ONNXReduceMeanV13Op> {
|
||||
return success();
|
||||
}
|
||||
|
||||
auto axes = normalizeAxesChecked(std::optional<ArrayAttr>(reduceMeanOp.getAxesAttr()), inputType.getRank());
|
||||
if (failed(axes))
|
||||
return failure();
|
||||
SmallVector<bool> reducedAxes = buildReducedAxesMask(*axes, inputType.getRank());
|
||||
auto semantics = getReduceMeanSemantics(reduceMeanOp, adaptor, inputType.getRank());
|
||||
if (failed(semantics))
|
||||
return rewriter.notifyMatchFailure(reduceMeanOp, "requires compile-time constant, in-range ReduceMean axes");
|
||||
if (semantics->isIdentity) {
|
||||
if (inputType != resultType)
|
||||
return rewriter.notifyMatchFailure(
|
||||
reduceMeanOp, "noop_with_empty_axes identity requires the result type to match the input type");
|
||||
rewriter.replaceOp(reduceMeanOp, adaptor.getData());
|
||||
return success();
|
||||
}
|
||||
|
||||
SmallVector<bool> reducedAxes = buildReducedAxesMask(semantics->axes, inputType.getRank());
|
||||
if (reducedAxes.empty() && inputType.getRank() != 0)
|
||||
return failure();
|
||||
|
||||
@@ -289,7 +380,7 @@ struct ReduceMeanToSpatialCompute : OpConversionPattern<ONNXReduceMeanV13Op> {
|
||||
Value reducedKeepdims =
|
||||
buildKeepdimsFromLanePackedBatch(*lanePackedKeepdims, keepdimsType, compactKeptType, reducedAxes, rewriter, loc);
|
||||
|
||||
if (reduceMeanOp.getKeepdims() != 0) {
|
||||
if (semantics->keepdims != 0) {
|
||||
rewriter.replaceOp(reduceMeanOp, reducedKeepdims);
|
||||
return success();
|
||||
}
|
||||
@@ -303,7 +394,7 @@ struct ReduceMeanToSpatialCompute : OpConversionPattern<ONNXReduceMeanV13Op> {
|
||||
} // namespace
|
||||
|
||||
void populateReduceMeanPatterns(RewritePatternSet& patterns, MLIRContext* ctx) {
|
||||
patterns.add<ReduceMeanToSpatialCompute>(ctx);
|
||||
patterns.add<ReduceMeanToSpatialCompute<ONNXReduceMeanV13Op>, ReduceMeanToSpatialCompute<ONNXReduceMeanOp>>(ctx);
|
||||
}
|
||||
|
||||
} // namespace onnx_mlir
|
||||
|
||||
@@ -8,3 +8,7 @@ networks/**/outputs
|
||||
networks/**/raptor
|
||||
networks/**/runner
|
||||
networks/**/simulation
|
||||
networks/**/real_image_val
|
||||
networks/**/*.png
|
||||
networks/**/*.jpg
|
||||
networks/**/*.csv
|
||||
|
||||
@@ -199,7 +199,10 @@ int main(int argc, char **argv) {{
|
||||
|
||||
// ---- Cleanup ----
|
||||
omTensorListDestroy(in_list);
|
||||
omTensorListDestroy(out_list);
|
||||
// Some debug-heavy models return aliased outputs. This runner is a short-
|
||||
// lived process, so destroy only the list wrapper and let process exit
|
||||
// reclaim the output tensors safely.
|
||||
omTensorListDestroyShallow(out_list);
|
||||
return 0;
|
||||
}}
|
||||
"""
|
||||
|
||||
Binary file not shown.
@@ -1053,6 +1053,92 @@ def reducemean_large_dimension_1024():
|
||||
save_model(model, "reduce_mean/large_dimension_1024", "reduce_mean_large_dimension_1024.onnx")
|
||||
|
||||
|
||||
def make_legacy_reducemean_model(name, shape, output_shape, directory, filename, *, axes, keepdims=1,
|
||||
noop_with_empty_axes=0):
|
||||
"""Create an opset-18 ReduceMean model that lowers to ONNXReduceMeanOp."""
|
||||
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, shape)
|
||||
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, output_shape)
|
||||
|
||||
initializers = []
|
||||
node_inputs = ["X", ""]
|
||||
if axes is not None:
|
||||
initializers.append(make_int64_initializer("axes", axes))
|
||||
node_inputs = ["X", "axes"]
|
||||
|
||||
node = helper.make_node("ReduceMean", node_inputs, ["Y"],
|
||||
keepdims=keepdims, noop_with_empty_axes=noop_with_empty_axes)
|
||||
graph = helper.make_graph([node], name, [X], [Y], initializer=initializers)
|
||||
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 18)])
|
||||
save_model(model, directory, filename)
|
||||
|
||||
|
||||
def reducemean_legacy_axis1_keepdims_1():
|
||||
"""Opset-18 ReduceMean over one positive axis, preserving rank."""
|
||||
make_legacy_reducemean_model("reducemean_legacy_axis1_keepdims_1",
|
||||
[2, 3, 4], [2, 1, 4],
|
||||
"reduce_mean/legacy_axis1_keepdims_1",
|
||||
"reduce_mean_legacy_axis1_keepdims_1.onnx",
|
||||
axes=[1], keepdims=1)
|
||||
|
||||
|
||||
def reducemean_legacy_axis1_keepdims_0():
|
||||
"""Opset-18 ReduceMean over one positive axis, dropping the reduced axis."""
|
||||
make_legacy_reducemean_model("reducemean_legacy_axis1_keepdims_0",
|
||||
[2, 3, 4], [2, 4],
|
||||
"reduce_mean/legacy_axis1_keepdims_0",
|
||||
"reduce_mean_legacy_axis1_keepdims_0.onnx",
|
||||
axes=[1], keepdims=0)
|
||||
|
||||
|
||||
def reducemean_legacy_axes_1_2_keepdims_1():
|
||||
"""Opset-18 ReduceMean over multiple positive axes."""
|
||||
make_legacy_reducemean_model("reducemean_legacy_axes_1_2_keepdims_1",
|
||||
[2, 3, 4], [2, 1, 1],
|
||||
"reduce_mean/legacy_axes_1_2_keepdims_1",
|
||||
"reduce_mean_legacy_axes_1_2_keepdims_1.onnx",
|
||||
axes=[1, 2], keepdims=1)
|
||||
|
||||
|
||||
def reducemean_legacy_negative_axis():
|
||||
"""Opset-18 ReduceMean using a negative axis."""
|
||||
make_legacy_reducemean_model("reducemean_legacy_negative_axis",
|
||||
[2, 3, 4], [2, 3, 1],
|
||||
"reduce_mean/legacy_negative_axis",
|
||||
"reduce_mean_legacy_negative_axis.onnx",
|
||||
axes=[-1], keepdims=1)
|
||||
|
||||
|
||||
def reducemean_legacy_reduce_all_keepdims_1():
|
||||
"""Opset-18 ReduceMean over all axes with the optional axes input omitted."""
|
||||
make_legacy_reducemean_model("reducemean_legacy_reduce_all_keepdims_1",
|
||||
[2, 3, 4], [1, 1, 1],
|
||||
"reduce_mean/legacy_reduce_all_keepdims_1",
|
||||
"reduce_mean_legacy_reduce_all_keepdims_1.onnx",
|
||||
axes=None, keepdims=1)
|
||||
|
||||
|
||||
def reducemean_legacy_empty_axes_noop():
|
||||
"""Opset-18 ReduceMean with empty axes and noop_with_empty_axes enabled."""
|
||||
X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [3, 4])
|
||||
Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [3, 4])
|
||||
axes = make_int64_initializer("axes", [])
|
||||
reduce = helper.make_node("ReduceMean", ["X", "axes"], ["R"],
|
||||
keepdims=1, noop_with_empty_axes=1)
|
||||
relu = helper.make_node("Relu", ["R"], ["Y"])
|
||||
graph = helper.make_graph([reduce, relu], "reducemean_legacy_empty_axes_noop", [X], [Y], initializer=[axes])
|
||||
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 18)])
|
||||
save_model(model, "reduce_mean/legacy_empty_axes_noop", "reduce_mean_legacy_empty_axes_noop.onnx")
|
||||
|
||||
|
||||
def reducemean_legacy_nchw_spatial():
|
||||
"""Opset-18 ReduceMean over H and W on an NCHW tensor."""
|
||||
make_legacy_reducemean_model("reducemean_legacy_nchw_spatial",
|
||||
[1, 3, 5, 5], [1, 3, 1, 1],
|
||||
"reduce_mean/legacy_nchw_spatial",
|
||||
"reduce_mean_legacy_nchw_spatial.onnx",
|
||||
axes=[2, 3], keepdims=1)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Relu tests
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -1974,6 +2060,13 @@ if __name__ == "__main__":
|
||||
reducemean_4d_spatial_keepdims_0()
|
||||
reducemean_channel_axis_nchw()
|
||||
reducemean_large_dimension_1024()
|
||||
reducemean_legacy_axis1_keepdims_1()
|
||||
reducemean_legacy_axis1_keepdims_0()
|
||||
reducemean_legacy_axes_1_2_keepdims_1()
|
||||
reducemean_legacy_negative_axis()
|
||||
reducemean_legacy_reduce_all_keepdims_1()
|
||||
reducemean_legacy_empty_axes_noop()
|
||||
reducemean_legacy_nchw_spatial()
|
||||
|
||||
print("\nGenerating Relu tests:")
|
||||
relu_basic()
|
||||
|
||||
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
@@ -41,7 +41,8 @@ def _format_command(cmd):
|
||||
|
||||
def compile_with_raptor(network_path, raptor_onnx_path: Path, output_base: Path,
|
||||
crossbar_size, crossbar_count, core_count=None, pim_merge_scheduler="peft",
|
||||
pim_memory_report="none", cwd=None, verbose=False, reporter=None, timeout_sec=None):
|
||||
pim_memory_report="none", raptor_extra_args=None, cwd=None, verbose=False,
|
||||
reporter=None, timeout_sec=None):
|
||||
# Define the arguments, with the possibility to set crossbar size and count
|
||||
args = [
|
||||
network_path,
|
||||
@@ -57,6 +58,8 @@ def compile_with_raptor(network_path, raptor_onnx_path: Path, output_base: Path,
|
||||
args.append(f"--core-count={core_count}")
|
||||
if pim_memory_report != "none":
|
||||
args.append(f"--pim-memory-report={pim_memory_report}")
|
||||
if raptor_extra_args:
|
||||
args.extend(str(arg) for arg in raptor_extra_args)
|
||||
if verbose:
|
||||
args.append("--enable-timing")
|
||||
|
||||
|
||||
@@ -0,0 +1,236 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
SCRIPT_DIR = Path(__file__).resolve().parent
|
||||
VALIDATION_DIR = SCRIPT_DIR.parent
|
||||
if str(VALIDATION_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(VALIDATION_DIR))
|
||||
|
||||
if sys.version_info < (3, 10):
|
||||
raise SystemExit(
|
||||
"yolo_local_image_validation.py requires Python 3.10+ because validation modules use modern type syntax. "
|
||||
"Run it with a newer interpreter, for example your project venv Python."
|
||||
)
|
||||
|
||||
from onnx_utils import _ONNX_TO_NP, onnx_io, write_inputs_to_memory_bin
|
||||
from validate_one import MODE_COMPILE_ONLY, build_dump_ranges, run_pim_simulator, sanitize_output_name, validate_network
|
||||
from yolo_real_image_validation import (
|
||||
IMAGE_CASES,
|
||||
decode_yolo_output,
|
||||
download_image,
|
||||
draw_detections,
|
||||
letterbox_rgb,
|
||||
save_tensor_csv,
|
||||
top_unique_labels,
|
||||
)
|
||||
|
||||
|
||||
def resolve_default_paths():
|
||||
validation_dir = Path(__file__).resolve().parent.parent
|
||||
repo_root = validation_dir.parent
|
||||
return {
|
||||
"validation_dir": validation_dir,
|
||||
"repo_root": repo_root,
|
||||
"network_dir": validation_dir / "networks" / "yolo11n" / "depth_51",
|
||||
"raptor_path": repo_root / "build_release" / "Release" / "bin" / "onnx-mlir",
|
||||
"onnx_include_dir": repo_root / "onnx-mlir" / "include",
|
||||
"simulator_dir": repo_root / "backend-simulators" / "pim" / "pim-simulator",
|
||||
}
|
||||
|
||||
|
||||
def find_network_onnx(network_dir: Path) -> Path:
|
||||
onnx_files = sorted(network_dir.glob("*.onnx"))
|
||||
if not onnx_files:
|
||||
raise FileNotFoundError(f"No .onnx file found in {network_dir}")
|
||||
if len(onnx_files) > 1:
|
||||
names = ", ".join(path.name for path in onnx_files)
|
||||
raise RuntimeError(f"Expected exactly one .onnx file in {network_dir}, found: {names}")
|
||||
return onnx_files[0]
|
||||
|
||||
|
||||
def local_case_paths(network_dir: Path, case_name: str):
|
||||
return {
|
||||
"root": network_dir,
|
||||
"runner": network_dir / "runner" / "build" / "runner",
|
||||
"runner_build": network_dir / "runner" / "build",
|
||||
"raptor_pim": network_dir / "raptor" / "pim",
|
||||
"real_root": network_dir / "real_image_validation",
|
||||
"input_csv": network_dir / "real_image_validation" / "inputs" / f"{case_name}.csv",
|
||||
"ref_dir": network_dir / "real_image_validation" / "reference" / case_name,
|
||||
"sim_dir": network_dir / "real_image_validation" / "simulation" / case_name,
|
||||
"sim_bin": network_dir / "real_image_validation" / "simulation" / case_name / "out.bin",
|
||||
}
|
||||
|
||||
|
||||
def ensure_local_artifacts(args, network_onnx_path: Path):
|
||||
validate_network(
|
||||
network_onnx_path=network_onnx_path,
|
||||
raptor_path=args.raptor_path,
|
||||
onnx_include_dir=args.onnx_include_dir,
|
||||
simulator_dir=args.simulator_dir,
|
||||
crossbar_size=args.crossbar_size,
|
||||
crossbar_count=args.crossbar_count,
|
||||
core_count=args.core_count,
|
||||
command_timeout_seconds=args.command_timeout_seconds,
|
||||
mode=MODE_COMPILE_ONLY,
|
||||
verbose=args.verbose,
|
||||
)
|
||||
|
||||
|
||||
def ensure_existing_artifacts(network_dir: Path):
|
||||
required_paths = [
|
||||
network_dir / "runner" / "build" / "runner",
|
||||
network_dir / "raptor" / "pim" / "config.json",
|
||||
network_dir / "raptor" / "pim" / "memory.bin",
|
||||
]
|
||||
missing = [str(path) for path in required_paths if not path.exists()]
|
||||
if missing:
|
||||
raise FileNotFoundError(
|
||||
"Missing compiled local artifacts. Re-run without --skip-compile or restore these paths:\n "
|
||||
+ "\n ".join(missing)
|
||||
)
|
||||
|
||||
|
||||
def run_local_reference_and_simulator(args, network_dir: Path, network_onnx_path: Path, case_name: str):
|
||||
paths = local_case_paths(network_dir, case_name)
|
||||
paths["ref_dir"].mkdir(parents=True, exist_ok=True)
|
||||
paths["sim_dir"].mkdir(parents=True, exist_ok=True)
|
||||
|
||||
output_descriptors = onnx_io(network_onnx_path)[1]
|
||||
if len(output_descriptors) != 1:
|
||||
raise RuntimeError(f"Expected one YOLO output tensor, found {len(output_descriptors)}")
|
||||
|
||||
runner_cmd = [
|
||||
str(paths["runner"]),
|
||||
"--in0-csv-file",
|
||||
str(paths["input_csv"]),
|
||||
"--in0-shape",
|
||||
"1x3x640x640",
|
||||
"--save-csv-dir",
|
||||
str(paths["ref_dir"]),
|
||||
]
|
||||
subprocess.run(runner_cmd, cwd=paths["runner_build"], check=True)
|
||||
|
||||
tensor = np.loadtxt(paths["input_csv"], delimiter=",", dtype=np.float32).reshape(1, 3, 640, 640)
|
||||
write_inputs_to_memory_bin(paths["raptor_pim"] / "memory.bin", paths["raptor_pim"] / "config.json", [tensor])
|
||||
|
||||
dump_ranges = build_dump_ranges(paths["raptor_pim"] / "config.json", output_descriptors)
|
||||
run_pim_simulator(
|
||||
args.simulator_dir,
|
||||
paths["raptor_pim"],
|
||||
paths["sim_bin"],
|
||||
dump_ranges,
|
||||
timeout_sec=args.command_timeout_seconds,
|
||||
)
|
||||
return paths, output_descriptors[0]
|
||||
|
||||
|
||||
def analyze_case(args, network_dir: Path, network_onnx_path: Path, case, work_dir: Path):
|
||||
image_path = work_dir / f"{case.name}{Path(case.url).suffix or '.img'}"
|
||||
csv_path = work_dir / f"{case.name}.csv"
|
||||
annotated_dir = args.annotated_dir
|
||||
annotated_dir.mkdir(parents=True, exist_ok=True)
|
||||
download_image(case.url, image_path)
|
||||
tensor = letterbox_rgb(Image.open(image_path))
|
||||
save_tensor_csv(tensor, csv_path)
|
||||
|
||||
paths = local_case_paths(network_dir, case.name)
|
||||
paths["input_csv"].parent.mkdir(parents=True, exist_ok=True)
|
||||
paths["input_csv"].write_bytes(csv_path.read_bytes())
|
||||
paths, output_descriptor = run_local_reference_and_simulator(args, network_dir, network_onnx_path, case.name)
|
||||
|
||||
output_index, output_name, output_dtype_code, output_shape = output_descriptor
|
||||
output_dtype = np.dtype(_ONNX_TO_NP[output_dtype_code])
|
||||
ref_csv_path = paths["ref_dir"] / f"output{output_index}_{sanitize_output_name(output_name)}.csv"
|
||||
ref = np.loadtxt(ref_csv_path, delimiter=",", dtype=output_dtype).reshape(output_shape)
|
||||
sim = np.frombuffer(
|
||||
paths["sim_bin"].read_bytes(),
|
||||
dtype=output_dtype,
|
||||
count=int(np.prod(output_shape)),
|
||||
).reshape(output_shape)
|
||||
|
||||
abs_diff = np.abs(sim.astype(np.float64) - ref.astype(np.float64))
|
||||
rel_diff = abs_diff / np.maximum(np.abs(ref.astype(np.float64)), 1e-12)
|
||||
|
||||
ref_detections = decode_yolo_output(ref)
|
||||
sim_detections = decode_yolo_output(sim)
|
||||
ref_labels = top_unique_labels(ref_detections)
|
||||
sim_labels = top_unique_labels(sim_detections)
|
||||
ref_image_path = annotated_dir / f"{case.name}_reference.png"
|
||||
sim_image_path = annotated_dir / f"{case.name}_simulator.png"
|
||||
draw_detections(image_path, ref_detections, ref_image_path)
|
||||
draw_detections(image_path, sim_detections, sim_image_path)
|
||||
|
||||
return {
|
||||
"case": case.name,
|
||||
"expected_label": case.expected_label,
|
||||
"ref_top_labels": ref_labels,
|
||||
"sim_top_labels": sim_labels,
|
||||
"top1_match": bool(ref_labels and sim_labels and ref_labels[0] == sim_labels[0]),
|
||||
"expected_in_ref": case.expected_label in ref_labels,
|
||||
"expected_in_sim": case.expected_label in sim_labels,
|
||||
"max_abs_diff": float(abs_diff.max()),
|
||||
"mean_abs_diff": float(abs_diff.mean()),
|
||||
"max_rel_diff": float(rel_diff.max()),
|
||||
"mean_rel_diff": float(rel_diff.mean()),
|
||||
"reference_annotated_image": str(ref_image_path),
|
||||
"simulator_annotated_image": str(sim_image_path),
|
||||
"ref_top_detections": ref_detections[:5],
|
||||
"sim_top_detections": sim_detections[:5],
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
defaults = resolve_default_paths()
|
||||
|
||||
parser = argparse.ArgumentParser(description="Validate YOLO detections on real images using local compilation and simulator execution.")
|
||||
parser.add_argument("--network-dir", type=Path, default=defaults["network_dir"])
|
||||
parser.add_argument("--network-onnx", type=Path, default=None)
|
||||
parser.add_argument("--raptor-path", type=Path, default=defaults["raptor_path"])
|
||||
parser.add_argument("--onnx-include-dir", type=Path, default=defaults["onnx_include_dir"])
|
||||
parser.add_argument("--simulator-dir", type=Path, default=defaults["simulator_dir"])
|
||||
parser.add_argument("--crossbar-size", type=int, default=2048)
|
||||
parser.add_argument("--crossbar-count", type=int, default=256)
|
||||
parser.add_argument("--core-count", type=int, default=1000)
|
||||
parser.add_argument("--command-timeout-seconds", type=float, default=7200.0)
|
||||
parser.add_argument("--skip-compile", action="store_true")
|
||||
parser.add_argument("--verbose", action="store_true")
|
||||
parser.add_argument(
|
||||
"--annotated-dir",
|
||||
type=Path,
|
||||
default=defaults["network_dir"] / "real_image_validation" / "annotated",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
args.network_dir = args.network_dir.resolve()
|
||||
args.network_onnx = args.network_onnx.resolve() if args.network_onnx else find_network_onnx(args.network_dir)
|
||||
args.raptor_path = args.raptor_path.resolve()
|
||||
args.onnx_include_dir = args.onnx_include_dir.resolve()
|
||||
args.simulator_dir = args.simulator_dir.resolve()
|
||||
args.annotated_dir = args.annotated_dir.resolve()
|
||||
|
||||
if not args.skip_compile:
|
||||
ensure_local_artifacts(args, args.network_onnx)
|
||||
else:
|
||||
ensure_existing_artifacts(args.network_dir)
|
||||
|
||||
reports = []
|
||||
with tempfile.TemporaryDirectory(prefix="yolo_local_images_") as tmp_dir:
|
||||
work_dir = Path(tmp_dir)
|
||||
for case in IMAGE_CASES:
|
||||
reports.append(analyze_case(args, args.network_dir, args.network_onnx, case, work_dir))
|
||||
|
||||
print(json.dumps({"network_dir": str(args.network_dir), "network_onnx": str(args.network_onnx), "cases": reports}, indent=2))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,425 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import shlex
|
||||
import subprocess
|
||||
import tempfile
|
||||
import urllib.request
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image, ImageDraw
|
||||
|
||||
|
||||
COCO80_CLASSES = [
|
||||
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
|
||||
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
|
||||
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
|
||||
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
|
||||
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
|
||||
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
|
||||
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard",
|
||||
"cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
|
||||
"scissors", "teddy bear", "hair drier", "toothbrush",
|
||||
]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ImageCase:
|
||||
name: str
|
||||
url: str
|
||||
expected_label: str
|
||||
|
||||
|
||||
IMAGE_CASES = [
|
||||
ImageCase(
|
||||
name="cat_coco_39769",
|
||||
url="http://images.cocodataset.org/val2017/000000039769.jpg",
|
||||
expected_label="cat",
|
||||
),
|
||||
ImageCase(
|
||||
name="dog_pytorch_hub",
|
||||
url="https://github.com/pytorch/hub/raw/master/images/dog.jpg",
|
||||
expected_label="dog",
|
||||
),
|
||||
ImageCase(
|
||||
name="cute_kitty",
|
||||
url="https://images.unsplash.com/photo-1529778873920-4da4926a72c2?q=80&w=872&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D" ,
|
||||
expected_label="cat",
|
||||
),
|
||||
|
||||
]
|
||||
|
||||
|
||||
def run(cmd, *, cwd=None, capture_output=False, input_bytes=None):
|
||||
return subprocess.run(
|
||||
cmd,
|
||||
cwd=cwd,
|
||||
check=True,
|
||||
input=input_bytes,
|
||||
capture_output=capture_output,
|
||||
)
|
||||
|
||||
|
||||
def ssh_command(ssh_key: str, remote_host: str, command: str):
|
||||
return ["ssh", "-i", ssh_key, remote_host, command]
|
||||
|
||||
|
||||
def remote_bash(ssh_key: str, remote_host: str, command: str, *, capture_output=False, input_bytes=None):
|
||||
return run(
|
||||
ssh_command(ssh_key, remote_host, command),
|
||||
capture_output=capture_output,
|
||||
input_bytes=input_bytes,
|
||||
)
|
||||
|
||||
|
||||
def download_image(url: str, path: Path):
|
||||
with urllib.request.urlopen(url) as response:
|
||||
path.write_bytes(response.read())
|
||||
|
||||
|
||||
def letterbox_rgb(image: Image.Image, size: int = 640) -> np.ndarray:
|
||||
image = image.convert("RGB")
|
||||
width, height = image.size
|
||||
scale = min(size / width, size / height)
|
||||
resized_width = max(1, int(round(width * scale)))
|
||||
resized_height = max(1, int(round(height * scale)))
|
||||
resized = image.resize((resized_width, resized_height), Image.Resampling.BILINEAR)
|
||||
|
||||
canvas = Image.new("RGB", (size, size), (114, 114, 114))
|
||||
offset_x = (size - resized_width) // 2
|
||||
offset_y = (size - resized_height) // 2
|
||||
canvas.paste(resized, (offset_x, offset_y))
|
||||
|
||||
array = np.asarray(canvas, dtype=np.float32) / 255.0
|
||||
chw = np.transpose(array, (2, 0, 1))
|
||||
return np.expand_dims(chw, axis=0)
|
||||
|
||||
|
||||
def letterbox_params(width: int, height: int, size: int = 640):
|
||||
scale = min(size / width, size / height)
|
||||
resized_width = max(1, int(round(width * scale)))
|
||||
resized_height = max(1, int(round(height * scale)))
|
||||
offset_x = (size - resized_width) // 2
|
||||
offset_y = (size - resized_height) // 2
|
||||
return scale, offset_x, offset_y
|
||||
|
||||
|
||||
def save_tensor_csv(array: np.ndarray, path: Path):
|
||||
flat = array.reshape(-1)
|
||||
np.savetxt(path, flat[np.newaxis, :], delimiter=",", fmt="%.9g")
|
||||
|
||||
|
||||
def iou_xyxy(box: np.ndarray, boxes: np.ndarray) -> np.ndarray:
|
||||
x1 = np.maximum(box[0], boxes[:, 0])
|
||||
y1 = np.maximum(box[1], boxes[:, 1])
|
||||
x2 = np.minimum(box[2], boxes[:, 2])
|
||||
y2 = np.minimum(box[3], boxes[:, 3])
|
||||
|
||||
inter_w = np.maximum(0.0, x2 - x1)
|
||||
inter_h = np.maximum(0.0, y2 - y1)
|
||||
inter = inter_w * inter_h
|
||||
|
||||
area_box = np.maximum(0.0, box[2] - box[0]) * np.maximum(0.0, box[3] - box[1])
|
||||
area_boxes = np.maximum(0.0, boxes[:, 2] - boxes[:, 0]) * np.maximum(0.0, boxes[:, 3] - boxes[:, 1])
|
||||
union = area_box + area_boxes - inter
|
||||
return np.divide(inter, union, out=np.zeros_like(inter), where=union > 0)
|
||||
|
||||
|
||||
def decode_yolo_output(
|
||||
output: np.ndarray,
|
||||
*,
|
||||
conf_threshold: float = 0.25,
|
||||
iou_threshold: float = 0.45,
|
||||
max_detections: int = 50,
|
||||
):
|
||||
predictions = output[0].T
|
||||
boxes_xywh = predictions[:, :4]
|
||||
class_scores = predictions[:, 4:]
|
||||
|
||||
class_ids = np.argmax(class_scores, axis=1)
|
||||
confidences = class_scores[np.arange(class_scores.shape[0]), class_ids]
|
||||
keep = confidences >= conf_threshold
|
||||
|
||||
if not np.any(keep):
|
||||
return []
|
||||
|
||||
boxes_xywh = boxes_xywh[keep]
|
||||
class_ids = class_ids[keep]
|
||||
confidences = confidences[keep]
|
||||
|
||||
boxes_xyxy = np.empty_like(boxes_xywh)
|
||||
boxes_xyxy[:, 0] = boxes_xywh[:, 0] - boxes_xywh[:, 2] / 2.0
|
||||
boxes_xyxy[:, 1] = boxes_xywh[:, 1] - boxes_xywh[:, 3] / 2.0
|
||||
boxes_xyxy[:, 2] = boxes_xywh[:, 0] + boxes_xywh[:, 2] / 2.0
|
||||
boxes_xyxy[:, 3] = boxes_xywh[:, 1] + boxes_xywh[:, 3] / 2.0
|
||||
|
||||
detections = []
|
||||
for class_id in np.unique(class_ids):
|
||||
class_mask = class_ids == class_id
|
||||
class_boxes = boxes_xyxy[class_mask]
|
||||
class_scores_masked = confidences[class_mask]
|
||||
order = np.argsort(-class_scores_masked)
|
||||
|
||||
while order.size > 0:
|
||||
best = order[0]
|
||||
detections.append({
|
||||
"label": COCO80_CLASSES[int(class_id)],
|
||||
"class_id": int(class_id),
|
||||
"confidence": float(class_scores_masked[best]),
|
||||
"box_xyxy": class_boxes[best].tolist(),
|
||||
})
|
||||
|
||||
if order.size == 1:
|
||||
break
|
||||
|
||||
rest = order[1:]
|
||||
overlaps = iou_xyxy(class_boxes[best], class_boxes[rest])
|
||||
order = rest[overlaps <= iou_threshold]
|
||||
|
||||
detections.sort(key=lambda det: det["confidence"], reverse=True)
|
||||
return detections[:max_detections]
|
||||
|
||||
|
||||
def top_unique_labels(detections, limit: int = 5):
|
||||
labels = []
|
||||
seen = set()
|
||||
for det in detections:
|
||||
label = det["label"]
|
||||
if label in seen:
|
||||
continue
|
||||
seen.add(label)
|
||||
labels.append(label)
|
||||
if len(labels) == limit:
|
||||
break
|
||||
return labels
|
||||
|
||||
|
||||
def clamp_box_xyxy(box_xyxy, width: int, height: int):
|
||||
x1, y1, x2, y2 = box_xyxy
|
||||
return [
|
||||
max(0.0, min(float(width - 1), float(x1))),
|
||||
max(0.0, min(float(height - 1), float(y1))),
|
||||
max(0.0, min(float(width - 1), float(x2))),
|
||||
max(0.0, min(float(height - 1), float(y2))),
|
||||
]
|
||||
|
||||
|
||||
def unletterbox_box_xyxy(box_xyxy, width: int, height: int, size: int = 640):
|
||||
scale, offset_x, offset_y = letterbox_params(width, height, size=size)
|
||||
x1, y1, x2, y2 = box_xyxy
|
||||
return [
|
||||
(float(x1) - offset_x) / scale,
|
||||
(float(y1) - offset_y) / scale,
|
||||
(float(x2) - offset_x) / scale,
|
||||
(float(y2) - offset_y) / scale,
|
||||
]
|
||||
|
||||
|
||||
def draw_detections(image_path: Path, detections, output_path: Path, *, limit: int = 10):
|
||||
image = Image.open(image_path).convert("RGB")
|
||||
draw = ImageDraw.Draw(image)
|
||||
width, height = image.size
|
||||
|
||||
for det in detections[:limit]:
|
||||
box = unletterbox_box_xyxy(det["box_xyxy"], width, height)
|
||||
box = clamp_box_xyxy(box, width, height)
|
||||
label = f'{det["label"]} {det["confidence"]:.2f}'
|
||||
draw.rectangle(box, outline=(255, 0, 0), width=3)
|
||||
text_box = draw.textbbox((box[0], box[1]), label)
|
||||
text_bg = [
|
||||
text_box[0] - 2,
|
||||
text_box[1] - 2,
|
||||
text_box[2] + 2,
|
||||
text_box[3] + 2,
|
||||
]
|
||||
draw.rectangle(text_bg, fill=(255, 0, 0))
|
||||
draw.text((box[0], box[1]), label, fill=(255, 255, 255))
|
||||
|
||||
image.save(output_path)
|
||||
|
||||
|
||||
def ensure_remote_artifacts(args):
|
||||
remote_project = shlex.quote(args.remote_project)
|
||||
remote_python = shlex.quote(args.remote_python)
|
||||
validate_cmd = (
|
||||
f"export PATH=$HOME/.cargo/bin:$PATH && "
|
||||
f"cd {remote_project} && "
|
||||
f"{remote_python} validation/validate.py "
|
||||
f"--raptor-path build_release/Release/bin/onnx-mlir "
|
||||
f"--onnx-include-dir onnx-mlir/include "
|
||||
f"--operations-dir {shlex.quote(args.network_dir)} "
|
||||
f"--crossbar-size {args.crossbar_size} "
|
||||
f"--crossbar-count {args.crossbar_count} "
|
||||
f"--core-count {args.core_count} "
|
||||
f"--command-timeout-seconds {args.command_timeout_seconds} "
|
||||
f"--compile-only"
|
||||
)
|
||||
remote_bash(args.ssh_key, args.remote_host, validate_cmd)
|
||||
|
||||
|
||||
def remote_case_paths(args, case_name: str):
|
||||
network_dir = Path(args.network_dir)
|
||||
root = Path(args.remote_project) / network_dir
|
||||
return {
|
||||
"root": root,
|
||||
"runner": root / "runner" / "build" / "runner",
|
||||
"runner_build": root / "runner" / "build",
|
||||
"raptor_pim": root / "raptor" / "pim",
|
||||
"real_root": root / "real_image_validation",
|
||||
"input_csv": root / "real_image_validation" / "inputs" / f"{case_name}.csv",
|
||||
"ref_dir": root / "real_image_validation" / "reference" / case_name,
|
||||
"sim_dir": root / "real_image_validation" / "simulation" / case_name,
|
||||
"sim_bin": root / "real_image_validation" / "simulation" / case_name / "out.bin",
|
||||
}
|
||||
|
||||
|
||||
def write_remote_file(args, remote_path: Path, data: bytes):
|
||||
command = (
|
||||
f"mkdir -p {shlex.quote(str(remote_path.parent))} && "
|
||||
f"cat > {shlex.quote(str(remote_path))}"
|
||||
)
|
||||
remote_bash(args.ssh_key, args.remote_host, command, input_bytes=data)
|
||||
|
||||
|
||||
def run_remote_reference_and_simulator(args, case_name: str):
|
||||
paths = remote_case_paths(args, case_name)
|
||||
quoted_project = shlex.quote(args.remote_project)
|
||||
quoted_python = shlex.quote(args.remote_python)
|
||||
quoted_case_csv = shlex.quote(str(paths["input_csv"]))
|
||||
quoted_ref_dir = shlex.quote(str(paths["ref_dir"]))
|
||||
quoted_sim_dir = shlex.quote(str(paths["sim_dir"]))
|
||||
quoted_sim_bin = shlex.quote(str(paths["sim_bin"]))
|
||||
quoted_runner = shlex.quote(str(paths["runner"]))
|
||||
quoted_runner_build = shlex.quote(str(paths["runner_build"]))
|
||||
quoted_pim = shlex.quote(str(paths["raptor_pim"]))
|
||||
|
||||
command = f"""
|
||||
set -e
|
||||
export PATH=$HOME/.cargo/bin:$PATH
|
||||
cd {quoted_project}
|
||||
mkdir -p {quoted_ref_dir} {quoted_sim_dir}
|
||||
cd {quoted_runner_build}
|
||||
{quoted_runner} --in0-csv-file {quoted_case_csv} --in0-shape 1x3x640x640 --save-csv-dir {quoted_ref_dir}
|
||||
cd {quoted_project}
|
||||
{quoted_python} - <<'PY'
|
||||
import json
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
input_csv = Path({json.dumps(str(paths["input_csv"]))})
|
||||
pim_dir = Path({json.dumps(str(paths["raptor_pim"]))})
|
||||
config = json.loads((pim_dir / "config.json").read_text())
|
||||
tensor = np.loadtxt(input_csv, delimiter=",", dtype=np.float32).reshape(1, 3, 640, 640)
|
||||
with open(pim_dir / "memory.bin", "r+b") as f:
|
||||
f.seek(config["inputs_addresses"][0])
|
||||
f.write(tensor.tobytes(order="C"))
|
||||
output_addr = config["outputs_addresses"][0]
|
||||
output_size = 1 * 84 * 8400 * 4
|
||||
print(f"{{output_addr}},{{output_size}}")
|
||||
PY
|
||||
"""
|
||||
result = remote_bash(args.ssh_key, args.remote_host, command, capture_output=True)
|
||||
dump_range = result.stdout.decode().strip().splitlines()[-1]
|
||||
|
||||
sim_command = (
|
||||
f"export PATH=$HOME/.cargo/bin:$PATH && "
|
||||
f"cd {quoted_project}/backend-simulators/pim/pim-simulator && "
|
||||
f"cargo run --no-default-features --release --package pim-simulator --bin pim-simulator -- "
|
||||
f"-f {quoted_pim} -o {quoted_sim_bin} -d {dump_range}"
|
||||
)
|
||||
remote_bash(args.ssh_key, args.remote_host, sim_command)
|
||||
return paths
|
||||
|
||||
|
||||
def read_remote_file(args, remote_path: Path) -> bytes:
|
||||
result = remote_bash(
|
||||
args.ssh_key,
|
||||
args.remote_host,
|
||||
f"cat {shlex.quote(str(remote_path))}",
|
||||
capture_output=True,
|
||||
)
|
||||
return result.stdout
|
||||
|
||||
|
||||
def analyze_case(args, case: ImageCase, work_dir: Path):
|
||||
image_path = work_dir / f"{case.name}{Path(case.url).suffix or '.img'}"
|
||||
csv_path = work_dir / f"{case.name}.csv"
|
||||
annotated_dir = Path(args.annotated_dir)
|
||||
annotated_dir.mkdir(parents=True, exist_ok=True)
|
||||
download_image(case.url, image_path)
|
||||
tensor = letterbox_rgb(Image.open(image_path))
|
||||
save_tensor_csv(tensor, csv_path)
|
||||
|
||||
remote_paths = remote_case_paths(args, case.name)
|
||||
write_remote_file(args, remote_paths["input_csv"], csv_path.read_bytes())
|
||||
remote_paths = run_remote_reference_and_simulator(args, case.name)
|
||||
|
||||
ref_csv = read_remote_file(args, remote_paths["ref_dir"] / "output0_output0.csv")
|
||||
sim_bin = read_remote_file(args, remote_paths["sim_bin"])
|
||||
|
||||
ref = np.loadtxt(ref_csv.decode().splitlines(), delimiter=",", dtype=np.float32).reshape(1, 84, 8400)
|
||||
sim = np.frombuffer(sim_bin, dtype=np.float32, count=1 * 84 * 8400).reshape(1, 84, 8400)
|
||||
abs_diff = np.abs(sim.astype(np.float64) - ref.astype(np.float64))
|
||||
rel_diff = abs_diff / np.maximum(np.abs(ref.astype(np.float64)), 1e-12)
|
||||
|
||||
ref_detections = decode_yolo_output(ref)
|
||||
sim_detections = decode_yolo_output(sim)
|
||||
ref_labels = top_unique_labels(ref_detections)
|
||||
sim_labels = top_unique_labels(sim_detections)
|
||||
ref_image_path = annotated_dir / f"{case.name}_reference.png"
|
||||
sim_image_path = annotated_dir / f"{case.name}_simulator.png"
|
||||
draw_detections(image_path, ref_detections, ref_image_path)
|
||||
draw_detections(image_path, sim_detections, sim_image_path)
|
||||
|
||||
return {
|
||||
"case": case.name,
|
||||
"expected_label": case.expected_label,
|
||||
"ref_top_labels": ref_labels,
|
||||
"sim_top_labels": sim_labels,
|
||||
"top1_match": bool(ref_labels and sim_labels and ref_labels[0] == sim_labels[0]),
|
||||
"expected_in_ref": case.expected_label in ref_labels,
|
||||
"expected_in_sim": case.expected_label in sim_labels,
|
||||
"max_abs_diff": float(abs_diff.max()),
|
||||
"mean_abs_diff": float(abs_diff.mean()),
|
||||
"max_rel_diff": float(rel_diff.max()),
|
||||
"mean_rel_diff": float(rel_diff.mean()),
|
||||
"reference_annotated_image": str(ref_image_path),
|
||||
"simulator_annotated_image": str(sim_image_path),
|
||||
"ref_top_detections": ref_detections[:5],
|
||||
"sim_top_detections": sim_detections[:5],
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Validate YOLO detections on real animal images against the simulator.")
|
||||
parser.add_argument("--remote-host", default="gmagnani@monolith")
|
||||
parser.add_argument("--ssh-key", default="~/.ssh/github")
|
||||
parser.add_argument("--remote-project", default="/home/gmagnani/Project/Raptor")
|
||||
parser.add_argument("--remote-python", default="/home/gmagnani/venv/bin/python")
|
||||
parser.add_argument("--network-dir", default="validation/networks/yolo11n/depth_51")
|
||||
parser.add_argument("--crossbar-size", type=int, default=2048)
|
||||
parser.add_argument("--crossbar-count", type=int, default=256)
|
||||
parser.add_argument("--core-count", type=int, default=1000)
|
||||
parser.add_argument("--command-timeout-seconds", type=int, default=7200)
|
||||
parser.add_argument("--skip-compile", action="store_true")
|
||||
parser.add_argument("--annotated-dir", default="validation/networks/yolo11n/depth_51/real_image_validation/annotated")
|
||||
args = parser.parse_args()
|
||||
|
||||
args.ssh_key = str(Path(args.ssh_key).expanduser())
|
||||
|
||||
if not args.skip_compile:
|
||||
ensure_remote_artifacts(args)
|
||||
|
||||
reports = []
|
||||
with tempfile.TemporaryDirectory(prefix="yolo_real_images_") as tmp_dir:
|
||||
work_dir = Path(tmp_dir)
|
||||
for case in IMAGE_CASES:
|
||||
reports.append(analyze_case(args, case, work_dir))
|
||||
|
||||
print(json.dumps({"network_dir": args.network_dir, "cases": reports}, indent=2))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -67,7 +67,10 @@ def main():
|
||||
ap.add_argument("--operations-dir", default=None, help="Root of the operations tree (default: operations).")
|
||||
ap.add_argument("--simulator-dir", default=None,
|
||||
help="Path to pim-simulator crate root (default: auto-detected relative to script).")
|
||||
ap.add_argument("--threshold", type=float, default=1e-3, help="Max allowed diff per output element.")
|
||||
ap.add_argument("--threshold", type=float, default=1e-3,
|
||||
help="Absolute tolerance for per-element output comparison.")
|
||||
ap.add_argument("--relative-threshold", type=float, default=1e-5,
|
||||
help="Relative tolerance for per-element output comparison.")
|
||||
ap.add_argument("--seed", type=int, default=0, help="RNG seed for generated validation inputs.")
|
||||
ap.add_argument("--crossbar-size", type=int, default=64)
|
||||
ap.add_argument("--crossbar-count", type=int, default=8)
|
||||
@@ -77,6 +80,8 @@ def main():
|
||||
help="Scheduler used by the Spatial merge-compute-nodes pass.")
|
||||
ap.add_argument("--pim-memory-report", choices=("none", "summary", "full"), default="none",
|
||||
help="Emit a human-readable PIM memory planning report during codegen.")
|
||||
ap.add_argument("--raptor-extra-arg", action="append", default=[],
|
||||
help="Additional argument to pass through to the Raptor compiler. Repeat as needed.")
|
||||
ap.add_argument("--command-timeout-seconds", type=float, default=1000000.0,
|
||||
help="Per-subprocess timeout in seconds for compiler, runner, and simulator commands.")
|
||||
ap.add_argument("--clean", action="store_true",
|
||||
@@ -145,8 +150,10 @@ def main():
|
||||
onnx_path, a.raptor_path, a.onnx_include_dir, simulator_dir,
|
||||
crossbar_size=a.crossbar_size, crossbar_count=a.crossbar_count, core_count=a.core_count,
|
||||
pim_merge_scheduler=a.pim_merge_scheduler, pim_memory_report=a.pim_memory_report,
|
||||
raptor_extra_args=a.raptor_extra_arg,
|
||||
command_timeout_seconds=a.command_timeout_seconds,
|
||||
threshold=a.threshold,
|
||||
rtol=a.relative_threshold,
|
||||
seed=a.seed,
|
||||
reporter=reporter,
|
||||
model_index=index,
|
||||
|
||||
@@ -258,14 +258,18 @@ def parse_pim_simulator_outputs(output_bin_path, outputs_descriptor):
|
||||
return arrays
|
||||
|
||||
|
||||
def validate_outputs(sim_arrays, runner_out_dir, outputs_descriptor, threshold=1e-3, verbose=False):
|
||||
def validate_outputs(sim_arrays, runner_out_dir, outputs_descriptor, threshold=1e-3, rtol=1e-5, verbose=False):
|
||||
all_passed = True
|
||||
rows = []
|
||||
for sim_array, (oi, name, _, shape) in zip(sim_arrays, outputs_descriptor):
|
||||
csv_name = f"output{oi}_{sanitize_output_name(name)}.csv"
|
||||
runner_array = np.loadtxt(runner_out_dir / csv_name, delimiter=',', dtype=np.float32).reshape(shape)
|
||||
max_diff = float(np.max(np.abs(sim_array.astype(np.float64) - runner_array.astype(np.float64))))
|
||||
passed = max_diff <= threshold
|
||||
sim_array64 = sim_array.astype(np.float64)
|
||||
runner_array64 = runner_array.astype(np.float64)
|
||||
abs_diff = np.abs(sim_array64 - runner_array64)
|
||||
allowed_diff = threshold + rtol * np.abs(runner_array64)
|
||||
max_diff = float(np.max(abs_diff))
|
||||
passed = bool(np.all(abs_diff <= allowed_diff))
|
||||
rows.append((name, f"{max_diff:.6e}", passed))
|
||||
if not passed:
|
||||
all_passed = False
|
||||
@@ -289,7 +293,8 @@ def validate_outputs(sim_arrays, runner_out_dir, outputs_descriptor, threshold=1
|
||||
|
||||
def validate_network(network_onnx_path, raptor_path, onnx_include_dir,
|
||||
simulator_dir, crossbar_size=64, crossbar_count=8, core_count=None,
|
||||
pim_merge_scheduler="peft", pim_memory_report="none", threshold=1e-3,
|
||||
pim_merge_scheduler="peft", pim_memory_report="none", raptor_extra_args=None,
|
||||
threshold=1e-3, rtol=1e-5,
|
||||
seed=0, reporter=None, model_index=1, model_total=1, verbose=False,
|
||||
command_timeout_seconds=60.0, mode=MODE_FULL):
|
||||
network_onnx_path = Path(network_onnx_path).resolve()
|
||||
@@ -343,7 +348,7 @@ def validate_network(network_onnx_path, raptor_path, onnx_include_dir,
|
||||
pim_pass_timings = compile_with_raptor(
|
||||
network_mlir_path, raptor_path, pim_output_base, crossbar_size, crossbar_count,
|
||||
core_count=core_count, pim_merge_scheduler=pim_merge_scheduler,
|
||||
pim_memory_report=pim_memory_report,
|
||||
pim_memory_report=pim_memory_report, raptor_extra_args=raptor_extra_args,
|
||||
cwd=raptor_dir, verbose=verbose, reporter=reporter, timeout_sec=command_timeout_seconds)
|
||||
print_info(reporter, f"PIM artifacts saved to {raptor_dir / 'pim'}")
|
||||
reporter.advance()
|
||||
@@ -383,7 +388,7 @@ def validate_network(network_onnx_path, raptor_path, onnx_include_dir,
|
||||
pim_pass_timings = compile_with_raptor(
|
||||
network_mlir_path, raptor_path, pim_output_base, crossbar_size, crossbar_count,
|
||||
core_count=core_count, pim_merge_scheduler=pim_merge_scheduler,
|
||||
pim_memory_report=pim_memory_report,
|
||||
pim_memory_report=pim_memory_report, raptor_extra_args=raptor_extra_args,
|
||||
cwd=raptor_dir, verbose=verbose, reporter=reporter, timeout_sec=command_timeout_seconds)
|
||||
print_info(reporter, f"PIM artifacts saved to {raptor_dir / 'pim'}")
|
||||
reporter.advance()
|
||||
@@ -403,7 +408,7 @@ def validate_network(network_onnx_path, raptor_path, onnx_include_dir,
|
||||
print_stage(reporter, model_index, model_total, network_onnx_path.name, "Compare Outputs")
|
||||
sim_arrays = parse_pim_simulator_outputs(output_bin_path, outputs_descriptor)
|
||||
reporter.suspend()
|
||||
passed = validate_outputs(sim_arrays, out_dir, outputs_descriptor, threshold, verbose=verbose)
|
||||
passed = validate_outputs(sim_arrays, out_dir, outputs_descriptor, threshold, rtol=rtol, verbose=verbose)
|
||||
reporter.resume()
|
||||
reporter.advance()
|
||||
reporter.record_result(passed)
|
||||
|
||||
Reference in New Issue
Block a user