better deadlock report by pim simulator
This commit is contained in:
+24
-16
@@ -142,8 +142,8 @@ class ProgressReporter:
|
||||
self.rendered_width = 0
|
||||
|
||||
|
||||
def run_command(cmd, cwd=None, reporter=None):
|
||||
run_command_with_reporter(cmd, cwd=cwd, reporter=reporter)
|
||||
def run_command(cmd, cwd=None, reporter=None, timeout_sec=None):
|
||||
run_command_with_reporter(cmd, cwd=cwd, reporter=reporter, timeout_sec=timeout_sec)
|
||||
|
||||
|
||||
def clean_workspace_artifacts(workspace_dir, model_stem):
|
||||
@@ -186,21 +186,22 @@ def print_info(reporter, message):
|
||||
reporter.log(f" {message}")
|
||||
|
||||
|
||||
def compile_onnx_network(network_onnx_path, raptor_path, raptor_dir, runner_dir, reporter=None):
|
||||
def compile_onnx_network(network_onnx_path, raptor_path, raptor_dir, runner_dir, reporter=None, timeout_sec=None):
|
||||
stem = network_onnx_path.stem
|
||||
onnx_ir_base = raptor_dir / stem
|
||||
runner_base = runner_dir / stem
|
||||
run_command([raptor_path, network_onnx_path, "-o", onnx_ir_base, "--EmitONNXIR"], reporter=reporter)
|
||||
run_command([raptor_path, network_onnx_path, "-o", runner_base], reporter=reporter)
|
||||
run_command([raptor_path, network_onnx_path, "-o", onnx_ir_base, "--EmitONNXIR"],
|
||||
reporter=reporter, timeout_sec=timeout_sec)
|
||||
run_command([raptor_path, network_onnx_path, "-o", runner_base], reporter=reporter, timeout_sec=timeout_sec)
|
||||
network_so_path = runner_base.with_suffix(".so")
|
||||
network_mlir_path = onnx_ir_base.with_suffix(".onnx.mlir")
|
||||
onnx_ir_base.with_suffix(".tmp").unlink(missing_ok=True)
|
||||
return network_so_path, network_mlir_path
|
||||
|
||||
|
||||
def build_onnx_runner(source_dir, build_dir, reporter=None):
|
||||
run_command(["cmake", source_dir], cwd=build_dir, reporter=reporter)
|
||||
run_command(["cmake", "--build", ".", "-j"], cwd=build_dir, reporter=reporter)
|
||||
def build_onnx_runner(source_dir, build_dir, reporter=None, timeout_sec=None):
|
||||
run_command(["cmake", source_dir], cwd=build_dir, reporter=reporter, timeout_sec=timeout_sec)
|
||||
run_command(["cmake", "--build", ".", "-j"], cwd=build_dir, reporter=reporter, timeout_sec=timeout_sec)
|
||||
return build_dir / "runner"
|
||||
|
||||
|
||||
@@ -214,13 +215,14 @@ def build_dump_ranges(config_path, outputs_descriptor):
|
||||
return ",".join(ranges)
|
||||
|
||||
|
||||
def run_pim_simulator(simulator_dir, pim_dir, output_bin_path, dump_ranges, reporter=None):
|
||||
def run_pim_simulator(simulator_dir, pim_dir, output_bin_path, dump_ranges, reporter=None, timeout_sec=None):
|
||||
run_command(
|
||||
["cargo", "run", "--no-default-features", "--release", "--package", "pim-simulator", "--bin", "pim-simulator",
|
||||
"--",
|
||||
"-f", str(pim_dir), "-o", str(output_bin_path), "-d", dump_ranges],
|
||||
cwd=simulator_dir,
|
||||
reporter=reporter,
|
||||
timeout_sec=timeout_sec,
|
||||
)
|
||||
|
||||
|
||||
@@ -267,8 +269,10 @@ def validate_outputs(sim_arrays, runner_out_dir, outputs_descriptor, threshold=1
|
||||
|
||||
|
||||
def validate_network(network_onnx_path, raptor_path, onnx_include_dir,
|
||||
simulator_dir, crossbar_size=64, crossbar_count=8, core_count=None, threshold=1e-3,
|
||||
seed=0, reporter=None, model_index=1, model_total=1, verbose=False):
|
||||
simulator_dir, crossbar_size=64, crossbar_count=8, core_count=None,
|
||||
pim_merge_scheduler="peft", threshold=1e-3,
|
||||
seed=0, reporter=None, model_index=1, model_total=1, verbose=False,
|
||||
command_timeout_seconds=60.0):
|
||||
network_onnx_path = Path(network_onnx_path).resolve()
|
||||
raptor_path = Path(raptor_path).resolve()
|
||||
onnx_include_dir = Path(onnx_include_dir).resolve()
|
||||
@@ -292,7 +296,8 @@ def validate_network(network_onnx_path, raptor_path, onnx_include_dir,
|
||||
try:
|
||||
print_stage(reporter, model_index, model_total, network_onnx_path.name, "Compile ONNX")
|
||||
network_so_path, network_mlir_path = compile_onnx_network(
|
||||
network_onnx_path, raptor_path, raptor_dir, runner_dir, reporter=reporter)
|
||||
network_onnx_path, raptor_path, raptor_dir, runner_dir, reporter=reporter,
|
||||
timeout_sec=command_timeout_seconds)
|
||||
print_info(reporter, f"MLIR saved to {network_mlir_path}")
|
||||
print_info(reporter, f"Shared library saved to {network_so_path}")
|
||||
reporter.advance()
|
||||
@@ -300,7 +305,8 @@ def validate_network(network_onnx_path, raptor_path, onnx_include_dir,
|
||||
print_stage(reporter, model_index, model_total, network_onnx_path.name, "Build Runner")
|
||||
gen_network_runner(network_onnx_path, network_so_path, onnx_include_dir, out=runner_dir / "runner.c",
|
||||
verbose=False)
|
||||
runner_path = build_onnx_runner(runner_dir, runner_build_dir, reporter=reporter)
|
||||
runner_path = build_onnx_runner(runner_dir, runner_build_dir, reporter=reporter,
|
||||
timeout_sec=command_timeout_seconds)
|
||||
print_info(reporter, f"Runner built at {runner_path}")
|
||||
reporter.advance()
|
||||
|
||||
@@ -316,14 +322,15 @@ def validate_network(network_onnx_path, raptor_path, onnx_include_dir,
|
||||
Path.mkdir(out_dir, exist_ok=True)
|
||||
run_cmd = [runner_path, *flags]
|
||||
run_cmd += ["--save-csv-dir", f"{out_dir}"]
|
||||
run_command(run_cmd, cwd=runner_build_dir, reporter=reporter)
|
||||
run_command(run_cmd, cwd=runner_build_dir, reporter=reporter, timeout_sec=command_timeout_seconds)
|
||||
print_info(reporter, f"Reference outputs saved to {out_dir}")
|
||||
reporter.advance()
|
||||
|
||||
print_stage(reporter, model_index, model_total, network_onnx_path.name, "Compile PIM")
|
||||
pim_pass_timings = compile_with_raptor(
|
||||
network_mlir_path, raptor_path, raptor_dir / network_onnx_path.stem, crossbar_size, crossbar_count,
|
||||
core_count=core_count, cwd=raptor_dir, verbose=verbose, reporter=reporter)
|
||||
core_count=core_count, pim_merge_scheduler=pim_merge_scheduler,
|
||||
cwd=raptor_dir, verbose=verbose, reporter=reporter, timeout_sec=command_timeout_seconds)
|
||||
print_info(reporter, f"PIM artifacts saved to {raptor_dir / 'pim'}")
|
||||
reporter.advance()
|
||||
|
||||
@@ -334,7 +341,8 @@ def validate_network(network_onnx_path, raptor_path, onnx_include_dir,
|
||||
Path.mkdir(simulation_dir, exist_ok=True)
|
||||
dump_ranges = build_dump_ranges(pim_dir / "config.json", outputs_descriptor)
|
||||
output_bin_path = simulation_dir / "out.bin"
|
||||
run_pim_simulator(simulator_dir, pim_dir, output_bin_path, dump_ranges, reporter=reporter)
|
||||
run_pim_simulator(simulator_dir, pim_dir, output_bin_path, dump_ranges, reporter=reporter,
|
||||
timeout_sec=command_timeout_seconds)
|
||||
print_info(reporter, f"Simulator output saved to {output_bin_path}")
|
||||
reporter.advance()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user