remove host core generation

strip config.json emitted by raptor
add actual pimsim-nn configs in validation pimsim-configs
This commit is contained in:
NiccoloN
2026-05-13 16:31:01 +02:00
parent 623ee62a04
commit 03eab42971
11 changed files with 206 additions and 81 deletions
@@ -68,7 +68,7 @@ fn map_crossbars_to_cores<'c>(
args: &Args,
global_crossbars: &'c HashMap<String, Crossbar>,
) -> Vec<Vec<&'c Crossbar>> {
let mut res = Vec::new();
let mut res = vec![Vec::new()];
let num_cores = config.get("core_cnt").unwrap().as_i64().unwrap() as i32;
if let Some(folder) = args.folder.as_ref() {
@@ -312,7 +312,7 @@ fn append_record(
29 => {
inst_data_builder
.set_rd_u8(rd)
.set_imm_core(r2_or_imm)
.set_imm_core(r2_or_imm + 1)
.set_imm_len(generic3)
.set_offset_select_value(generic1, generic2);
inst_builder.make_inst(send, inst_data_builder.build());
@@ -320,7 +320,7 @@ fn append_record(
30 => {
inst_data_builder
.set_rd_u8(rd)
.set_imm_core(r2_or_imm)
.set_imm_core(r2_or_imm + 1)
.set_imm_len(generic3)
.set_offset_select_value(generic1, generic2);
inst_builder.make_inst(recv, inst_data_builder.build());
@@ -366,23 +366,19 @@ fn binary_to_instructions(
pub fn binary_to_executor<'a, 'b>(
config: Value,
mut cores: impl Iterator<Item = &'b Vec<u8>>,
cores: impl Iterator<Item = &'b Vec<u8>>,
crossbars: Vec<Vec<&'a Crossbar>>,
) -> Result<Executable<'a>> {
let core_cnt = config
.get("core_cnt")
.context("missing core_cnt in config")?
.as_i64()
.context("core_cnt is not an integer")? as i32
- 1;
.context("core_cnt is not an integer")? as i32;
let cpu = CPU::new(core_cnt, crossbars);
let mut core_insts_builder = CoreInstructionsBuilder::new(core_cnt as usize);
cores.next();
for core_indx in 1..=core_cnt {
let core_bytes = cores
.next()
.unwrap_or_else(|| panic!("cores files less than {}", core_indx));
for (external_core_indx, core_bytes) in cores.enumerate() {
let core_indx = external_core_indx as i32 + 1;
let instructions = binary_to_instructions(core_bytes, core_indx)?;
core_insts_builder.set_core(core_indx, instructions);
}
@@ -567,7 +567,7 @@ fn json_to_send(
let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
inst_data_builder
.set_rd(rd)
.set_imm_core(core)
.set_imm_core(core + 1)
.set_imm_len(size)
.set_offset_select(offset_select)
.set_offset_value(offset_value);
@@ -588,7 +588,7 @@ fn json_to_recv(
let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
inst_data_builder
.set_rd(rd)
.set_imm_core(core)
.set_imm_core(core + 1)
.set_imm_len(size)
.set_offset_select(offset_select)
.set_offset_value(offset_value);
@@ -1,45 +1,30 @@
use core::panic;
use std::collections::HashMap;
use serde_json::{Map, Value};
use serde_json::Value;
use crate::{
CoreInstructionsBuilder, Executable,
cpu::{CPU, crossbar::{self, Crossbar}},
instruction_set::{
InstructionsBuilder,
instruction_data::{self, InstructionData, InstructionDataBuilder},
},
json_to_instruction::{self, json_isa},
memory_manager::type_traits::TryToUsize,
cpu::{CPU, crossbar::Crossbar},
instruction_set::{InstructionsBuilder, instruction_data::InstructionDataBuilder},
json_to_instruction::json_isa,
};
pub fn json_to_executor<'a, 'b>(
config: Value,
mut cores: impl Iterator<Item = &'b Value>,
cores: impl Iterator<Item = &'b Value>,
crossbars : Vec<Vec<&'a Crossbar>>
) -> Executable<'a> {
let cell_precision = config.get("cell_precision").unwrap().as_i64().unwrap() as i32;
let core_cnt = config.get("core_cnt").unwrap().as_i64().unwrap() as i32 - 1;
let xbar_count = config.get("xbar_array_count").unwrap().as_i64().unwrap() as i32;
let xbar_size = config.get("xbar_size").unwrap().as_array().unwrap();
let rows_crossbar = xbar_size[0].as_i64().unwrap() as i32;
let column_corssbar = xbar_size[1].as_i64().unwrap() as i32;
let core_cnt = config.get("core_cnt").unwrap().as_i64().unwrap() as i32;
let mut cpu = CPU::new(core_cnt, crossbars);
let cpu = CPU::new(core_cnt, crossbars);
let mut core_insts_builder = CoreInstructionsBuilder::new(core_cnt as usize);
cores.next();
for core_indx in 1..=core_cnt {
for (external_core_indx, json_core) in cores.enumerate() {
let core_indx = external_core_indx as i32 + 1;
let mut insts_builder = InstructionsBuilder::new();
let mut inst_data_builder = InstructionDataBuilder::new();
inst_data_builder.set_core_indx(core_indx).fix_core_indx();
let json_core = cores
.next()
.unwrap_or_else(|| panic!("cores files less than {}", core_indx ));
let json_core_insts = json_core
.as_array()
.unwrap_or_else(|| panic!("core{} has not a list of instruction", core_indx));
.unwrap_or_else(|| panic!("core{} has not a list of instruction", external_core_indx));
for json_inst in json_core_insts {
json_isa::json_to_instruction(&mut insts_builder, &mut inst_data_builder, json_inst);
}
-35
View File
@@ -20,38 +20,6 @@ using namespace mlir;
namespace onnx_mlir {
OnnxMlirCompilerErrorCodes writeHostCoreArtifacts(StringRef outputDirPath) {
std::error_code errorCode;
std::string outputHostCorePath = outputDirPath.str() + "/core_0.pim";
raw_fd_ostream hostFileStream(outputHostCorePath, errorCode, sys::fs::OF_None);
if (errorCode) {
errs() << "Error while opening host core file `" << outputHostCorePath << "`: " << errorCode.message() << '\n';
return InvalidOutputFileAccess;
}
pim_binary::writeHeader(hostFileStream);
pim_binary::InstructionRecord noop;
noop.opcode = pim_binary::Opcode::sldi;
pim_binary::writeInstructionRecord(hostFileStream, noop);
pim_binary::writeInstructionRecord(hostFileStream, noop);
pim_binary::patchInstructionCount(hostFileStream, 2);
hostFileStream.close();
if (pimEmitJson.getValue()) {
std::string outputHostJsonPath = outputDirPath.str() + "/core_0.json";
raw_fd_ostream hostJsonStream(outputHostJsonPath, errorCode);
if (errorCode) {
errs() << "Error while opening host core json file `" << outputHostJsonPath << "`: " << errorCode.message()
<< '\n';
return InvalidOutputFileAccess;
}
// The host core json contains two no-op-like instructions to satisfy pimsim-nn
hostJsonStream << "[{\"imm\":0,\"op\":\"sldi\",\"rd\":0},{\"imm\":0,\"op\":\"sldi\",\"rd\":0}]";
hostJsonStream.close();
}
return CompilerSuccess;
}
OnnxMlirCompilerErrorCodes
writeMemoryBinary(ModuleOp moduleOp, func::FuncOp funcOp, PimAcceleratorMemory& memory, StringRef outputDirPath) {
auto memoryFilePath = (outputDirPath + "/memory.bin").str();
@@ -109,9 +77,6 @@ OnnxMlirCompilerErrorCodes writeConfigJson(func::FuncOp funcOp,
json::Object configJson;
configJson["core_cnt"] = maxCoreId + 1;
configJson["adc_count"] = 16;
configJson["cell_precision"] = 2;
configJson["xbar_array_count"] = crossbarCountInCore.getValue();
configJson["xbar_size"] = {crossbarSize.getValue(), crossbarSize.getValue()};
configJson["array_group_map"] = std::move(xbarsPerArrayGroup);
-1
View File
@@ -12,7 +12,6 @@ namespace onnx_mlir {
class PimAcceleratorMemory;
OnnxMlirCompilerErrorCodes writeHostCoreArtifacts(llvm::StringRef outputDirPath);
OnnxMlirCompilerErrorCodes writeMemoryBinary(mlir::ModuleOp moduleOp,
mlir::func::FuncOp funcOp,
PimAcceleratorMemory& memory,
+1 -6
View File
@@ -875,11 +875,6 @@ OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimCode(ModuleOp& moduleOp, std::
if (auto err = writeMemoryBinary(moduleOp, funcOp, memory, outputDirPath))
return err;
if (auto err = writeHostCoreArtifacts(outputDirPath))
return err;
// For each core, specify the number of crossbar per array group.
// This implementation always assigns one crossbar per group.
json::Object xbarsPerArrayGroup;
size_t maxCoreId = 0;
uint64_t nextBatchReportId = 0;
@@ -891,7 +886,7 @@ OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimCode(ModuleOp& moduleOp, std::
SmallDenseMap<memref::GlobalOp, MemEntry, 16> materializedHostGlobals =
collectMaterializedHostGlobals(moduleOp, funcOp, memory);
llvm::DenseMap<size_t, size_t> emittedCoreIds;
size_t nextEmittedCoreId = 1;
size_t nextEmittedCoreId = 0;
for (Operation* op : coreLikeOps) {
if (auto coreOp = dyn_cast<pim::PimCoreOp>(op)) {
@@ -0,0 +1,58 @@
{
"chip_config": {
"core_config": {
"rob_size": 1,
"period": 0.5,
"matrix_config": {
"xbar_array_count": 256,
"period": 0.5,
"pipeline_mode": true,
"dac_resolution": 4,
"dac_count": 2048,
"xbar_size": [
2048,
2048
],
"cell_precision": 2,
"xbar_latency": 10,
"sample_hold_latency_cycle": 1,
"adc_resolution": 8,
"adc_latency_cycle": 4,
"adc_count": 256,
"shift_adder_latency_cycle": 1,
"output_buffer_latency_cycle": 1
},
"vector_width": 64,
"vector_latency_cycle": 2,
"local_memory_config": {
"data_width": 256,
"period": 0.5,
"write_latency_cycle": 1,
"read_latency_cycle": 1
},
"global_memory_switch_id": -10
},
"global_memory_config": {
"data_width": 256,
"period": 0.5,
"write_latency_cycle": 1,
"read_latency_cycle": 1
},
"network_config": {
"bus_topology": "mesh",
"bus_width": 256,
"layout": [
25,
40
],
"net_config_file_path": "network_mesh_1000.json"
},
"core_cnt": 1000,
"global_memory_switch_id": -10
},
"sim_config": {
"sim_mode": 1,
"sim_time": 1,
"report_verbose_level": 0
}
}
File diff suppressed because one or more lines are too long
@@ -0,0 +1,126 @@
#!/usr/bin/env python3
import argparse
import json
from pathlib import Path
def build_latency_config(core_count, crossbar_count, crossbar_size, layout):
return {
"chip_config": {
"core_config": {
"rob_size": 1,
"period": 0.5,
"matrix_config": {
"xbar_array_count": crossbar_count,
"period": 0.5,
"pipeline_mode": True,
"dac_resolution": 4,
"dac_count": crossbar_size,
"xbar_size": [crossbar_size, crossbar_size],
"cell_precision": 2,
"xbar_latency": 10,
"sample_hold_latency_cycle": 1,
"adc_resolution": 8,
"adc_latency_cycle": 4,
"adc_count": max(128, crossbar_size // 8),
"shift_adder_latency_cycle": 1,
"output_buffer_latency_cycle": 1,
},
"vector_width": 64,
"vector_latency_cycle": 2,
"local_memory_config": {
"data_width": 256,
"period": 0.5,
"write_latency_cycle": 1,
"read_latency_cycle": 1,
},
"global_memory_switch_id": -10,
},
"global_memory_config": {
"data_width": 256,
"period": 0.5,
"write_latency_cycle": 1,
"read_latency_cycle": 1,
},
"network_config": {
"bus_topology": "mesh",
"bus_width": 256,
"layout": [layout[0], layout[1]],
"net_config_file_path": f"network_mesh_{core_count}.json",
},
"core_cnt": core_count,
"global_memory_switch_id": -10,
},
"sim_config": {
"sim_mode": 1,
"sim_time": 1,
"report_verbose_level": 0,
},
}
def build_network(core_count, layout):
rows, cols = layout
assert rows * cols == core_count
global_memory_latency = 6
global_memory_energy = 24
per_hop_latency = 1
base_latency = 2
per_hop_energy = 3
base_energy = 12
latency = {"-10": {}}
energy = {"-10": {}}
for dst in range(core_count):
latency["-10"][str(dst)] = global_memory_latency
energy["-10"][str(dst)] = global_memory_energy
for src in range(core_count):
src_row, src_col = divmod(src, cols)
latency[str(src)] = {"-10": global_memory_latency}
energy[str(src)] = {"-10": global_memory_energy}
for dst in range(core_count):
if src == dst:
continue
dst_row, dst_col = divmod(dst, cols)
hops = abs(src_row - dst_row) + abs(src_col - dst_col)
latency[str(src)][str(dst)] = base_latency + per_hop_latency * hops
energy[str(src)][str(dst)] = base_energy + per_hop_energy * hops
return {"latency": latency, "energy": energy}
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--output-dir", required=True)
parser.add_argument("--core-count", type=int, required=True)
parser.add_argument("--crossbar-count", type=int, required=True)
parser.add_argument("--crossbar-size", type=int, required=True)
parser.add_argument("--mesh-rows", type=int, required=True)
parser.add_argument("--mesh-cols", type=int, required=True)
args = parser.parse_args()
output_dir = Path(args.output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
layout = (args.mesh_rows, args.mesh_cols)
latency_config = build_latency_config(
args.core_count, args.crossbar_count, args.crossbar_size, layout
)
network = build_network(args.core_count, layout)
with open(output_dir / "latency_config.json", "w", encoding="utf-8") as f:
json.dump(latency_config, f, indent=2)
f.write("\n")
network_path = output_dir / f"network_mesh_{args.core_count}.json"
with open(network_path, "w", encoding="utf-8") as f:
json.dump(network, f, separators=(",", ":"))
f.write("\n")
if __name__ == "__main__":
main()