From 0c7db55a241a0f4c1b9b5da60ff9c8c9071e0f34 Mon Sep 17 00:00:00 2001 From: NiccoloN Date: Wed, 13 May 2026 11:15:54 +0200 Subject: [PATCH] binary pim code for reduced memory usage fast pim code emission --- .../src/bin/pim-simulator/main.rs | 94 +++- .../src/lib/binary_to_instruction/mod.rs | 497 ++++++++++++++++++ .../lib/instruction_set/instruction_data.rs | 91 +++- .../pim/pim-simulator/src/lib/pimcore.rs | 1 + src/PIM/Compiler/PimArtifactWriter.cpp | 28 +- src/PIM/Compiler/PimArtifactWriter.hpp | 2 +- src/PIM/Compiler/PimBinaryFormat.hpp | 381 ++++++++++++++ src/PIM/Compiler/PimCodeGen.cpp | 349 ++++++------ src/PIM/Compiler/PimCodeGen.hpp | 17 +- src/PIM/Compiler/PimCompilerOptions.cpp | 5 + src/PIM/Compiler/PimCompilerOptions.hpp | 1 + src/PIM/Compiler/PimCompilerUtils.cpp | 4 +- src/PIM/Pass/CMakeLists.txt | 2 +- src/PIM/Pass/PIMPasses.h | 2 +- src/PIM/Pass/PimCodegen/EmitPimCodePass.cpp | 36 ++ src/PIM/Pass/PimCodegen/EmitPimJsonPass.cpp | 36 -- src/PIM/PimAccelerator.cpp | 2 +- validation/raptor.py | 2 +- 18 files changed, 1289 insertions(+), 261 deletions(-) create mode 100644 backend-simulators/pim/pim-simulator/src/lib/binary_to_instruction/mod.rs create mode 100644 src/PIM/Compiler/PimBinaryFormat.hpp create mode 100644 src/PIM/Pass/PimCodegen/EmitPimCodePass.cpp delete mode 100644 src/PIM/Pass/PimCodegen/EmitPimJsonPass.cpp diff --git a/backend-simulators/pim/pim-simulator/src/bin/pim-simulator/main.rs b/backend-simulators/pim/pim-simulator/src/bin/pim-simulator/main.rs index 77ea143..7aab139 100644 --- a/backend-simulators/pim/pim-simulator/src/bin/pim-simulator/main.rs +++ b/backend-simulators/pim/pim-simulator/src/bin/pim-simulator/main.rs @@ -1,6 +1,7 @@ use anyhow::{Context, Result, bail}; use clap::Parser; use glob::glob; +use pimcore::binary_to_instruction::binary_to_executor; use pimcore::cpu::crossbar::Crossbar; use pimcore::json_to_instruction::json_to_executor; use pimcore::memory_manager::CoreMemory; @@ -44,12 +45,14 @@ fn main() -> Result<()> { let args = Args::parse(); let config_json = retrive_config(&args)?; - let core_jsons = retrive_cores(&args)?; + let core_inputs = retrive_cores(&args)?; let memory = retrive_memory(&args)?; let global_crossbars = get_crossbars(&config_json, &args).unwrap(); let crossbars = map_crossbars_to_cores(&config_json, &args, &global_crossbars); - let mut executor = - json_to_executor::json_to_executor(config_json, core_jsons.iter(), crossbars); + let mut executor = match &core_inputs { + CoreInputs::Json(core_jsons) => json_to_executor::json_to_executor(config_json, core_jsons.iter(), crossbars), + CoreInputs::Binary(core_bins) => binary_to_executor(config_json, core_bins.iter(), crossbars)?, + }; set_memory(&mut executor, memory); TRACER .lock() @@ -214,9 +217,29 @@ fn retrive_memory(args: &Args) -> Result> { Ok(memory_vector) } -fn retrive_cores(args: &Args) -> Result, anyhow::Error> { - let mut core_jsons: Vec = Vec::new(); +enum CoreInputs { + Json(Vec), + Binary(Vec>), +} + +fn retrive_cores(args: &Args) -> Result { if let Some(cores_override) = &args.cores { + let first_extension = cores_override + .first() + .and_then(|path| path.extension()) + .and_then(|ext| ext.to_str()) + .unwrap_or_default(); + if first_extension == "pim" { + let mut core_bins = Vec::with_capacity(cores_override.len()); + for core in cores_override { + core_bins.push( + fs::read(core) + .with_context(|| format!("Failed to read binary core file: {:?}", core))?, + ); + } + return Ok(CoreInputs::Binary(core_bins)); + } + let mut core_jsons: Vec = Vec::with_capacity(cores_override.len()); for core in cores_override { let content = fs::read_to_string(core) .with_context(|| format!("Failed to read core file: {:?}", cores_override))?; @@ -224,35 +247,56 @@ fn retrive_cores(args: &Args) -> Result, anyhow::Error> { serde_json::from_str(&content).context("Failed to parse core json override")?; core_jsons.push(json); } - } else if let Some(folder) = args.folder.as_ref() { - let pattern = folder.join("core*.json"); - let pattern_str = pattern.to_str().context("Invalid path encoding")?; - let mut paths: Vec<_> = glob(pattern_str)?.map(|x| x.unwrap()).collect(); - paths.sort_by_cached_key(|x| { - let mut x = x - .file_stem() - .expect("Extracting the stem") - .to_str() - .expect("File not utf-8"); - x = &x[5..]; - x.parse::().unwrap() - }); + return Ok(CoreInputs::Json(core_jsons)); + } - if paths.is_empty() { - bail!("No core*.json files found in {:?}", folder); + if let Some(folder) = args.folder.as_ref() { + let binary_pattern = folder.join("core*.pim"); + let binary_pattern_str = binary_pattern.to_str().context("Invalid path encoding")?; + let mut binary_paths: Vec<_> = glob(binary_pattern_str)?.map(|x| x.unwrap()).collect(); + binary_paths.sort_by_cached_key(core_sort_key); + if !binary_paths.is_empty() { + let mut core_bins = Vec::with_capacity(binary_paths.len()); + for path in binary_paths { + core_bins.push( + fs::read(&path) + .with_context(|| format!("Failed to read core file: {:?}", path))?, + ); + } + return Ok(CoreInputs::Binary(core_bins)); } - for entry in paths { - let path = entry; + + let json_pattern = folder.join("core*.json"); + let json_pattern_str = json_pattern.to_str().context("Invalid path encoding")?; + let mut json_paths: Vec<_> = glob(json_pattern_str)?.map(|x| x.unwrap()).collect(); + json_paths.sort_by_cached_key(core_sort_key); + + if json_paths.is_empty() { + bail!("No core*.pim or core*.json files found in {:?}", folder); + } + + let mut core_jsons: Vec = Vec::with_capacity(json_paths.len()); + for path in json_paths { let content = fs::read_to_string(&path) .with_context(|| format!("Failed to read core file: {:?}", path))?; let json: Value = serde_json::from_str(&content) .with_context(|| format!("Failed to parse JSON in {:?}", path))?; core_jsons.push(json); } - } else { - bail!("Either --core or --folder must be provided to find core definitions."); + return Ok(CoreInputs::Json(core_jsons)); } - Ok(core_jsons) + + bail!("Either --core or --folder must be provided to find core definitions."); +} + +fn core_sort_key(path: &PathBuf) -> i32 { + let mut stem = path + .file_stem() + .expect("Extracting the stem") + .to_str() + .expect("File not utf-8"); + stem = &stem[5..]; + stem.parse::().unwrap() } fn retrive_config(args: &Args) -> Result { diff --git a/backend-simulators/pim/pim-simulator/src/lib/binary_to_instruction/mod.rs b/backend-simulators/pim/pim-simulator/src/lib/binary_to_instruction/mod.rs new file mode 100644 index 0000000..7ae01f8 --- /dev/null +++ b/backend-simulators/pim/pim-simulator/src/lib/binary_to_instruction/mod.rs @@ -0,0 +1,497 @@ +use crate::{ + CoreInstructionsBuilder, Executable, + cpu::{CPU, crossbar::Crossbar}, + instruction_set::{InstructionsBuilder, instruction_data::InstructionDataBuilder, isa::*}, +}; +use anyhow::{Context, Result, bail, ensure}; +use serde_json::Value; +use std::collections::HashMap; +use std::convert::TryFrom; +use std::sync::LazyLock; + +const MAGIC: &[u8; 4] = b"PIMB"; +const VERSION: u32 = 1; +const HEADER_SIZE: usize = 12; +const RECORD_SIZE: usize = 20; + +macro_rules! add_name { + ($storage:ident, $opcode:literal, $name:literal) => { + $storage.insert($opcode, $name); + }; +} + +static INSTRUCTIONS: LazyLock> = LazyLock::new(|| { + let mut hash = HashMap::new(); + add_name!(hash, 0, "nop"); + add_name!(hash, 1, "sldi"); + add_name!(hash, 2, "sld"); + add_name!(hash, 3, "sadd"); + add_name!(hash, 4, "ssub"); + add_name!(hash, 5, "smul"); + add_name!(hash, 6, "saddi"); + add_name!(hash, 7, "smuli"); + add_name!(hash, 8, "setbw"); + add_name!(hash, 9, "mvmul"); + add_name!(hash, 10, "vvadd"); + add_name!(hash, 11, "vvsub"); + add_name!(hash, 12, "vvmul"); + add_name!(hash, 13, "vvdmul"); + add_name!(hash, 14, "vvmax"); + add_name!(hash, 15, "vvsll"); + add_name!(hash, 16, "vvsra"); + add_name!(hash, 17, "vavg"); + add_name!(hash, 18, "vrelu"); + add_name!(hash, 19, "vtanh"); + add_name!(hash, 20, "vsigm"); + add_name!(hash, 21, "vsoftmax"); + add_name!(hash, 22, "vmv"); + add_name!(hash, 23, "vrsu"); + add_name!(hash, 24, "vrsl"); + add_name!(hash, 25, "ld"); + add_name!(hash, 26, "st"); + add_name!(hash, 27, "lldi"); + add_name!(hash, 28, "lmv"); + add_name!(hash, 29, "send"); + add_name!(hash, 30, "recv"); + add_name!(hash, 31, "wait"); + add_name!(hash, 32, "sync"); + hash +}); + +#[derive(Clone, Copy, Debug, Default)] +struct InstructionRecord { + opcode: u8, + rd: u8, + r1: u8, + r2_or_imm: i32, + generic1: i32, + generic2: i32, + generic3: i32, + flags: u8, +} + +fn read_u32_le(bytes: &[u8], offset: usize) -> u32 { + u32::from_le_bytes(bytes[offset..offset + 4].try_into().unwrap()) +} + +fn read_i32_le(bytes: &[u8], offset: usize) -> i32 { + i32::from_le_bytes(bytes[offset..offset + 4].try_into().unwrap()) +} + +fn parse_binary_records(bytes: &[u8]) -> Result> { + ensure!(bytes.len() >= HEADER_SIZE, "binary core file too small"); + ensure!(&bytes[0..4] == MAGIC, "invalid PIM binary magic"); + + let version = read_u32_le(bytes, 4); + ensure!( + version == VERSION, + "unsupported PIM binary version {version}" + ); + + let instruction_count = read_u32_le(bytes, 8) as usize; + let expected_len = HEADER_SIZE + instruction_count * RECORD_SIZE; + ensure!( + bytes.len() == expected_len, + "PIM binary size mismatch: expected {expected_len} bytes, got {}", + bytes.len() + ); + + let mut records = Vec::with_capacity(instruction_count); + for index in 0..instruction_count { + let base = HEADER_SIZE + index * RECORD_SIZE; + records.push(InstructionRecord { + opcode: bytes[base], + rd: bytes[base + 1], + r1: bytes[base + 2], + flags: bytes[base + 3], + r2_or_imm: read_i32_le(bytes, base + 4), + generic1: read_i32_le(bytes, base + 8), + generic2: read_i32_le(bytes, base + 12), + generic3: read_i32_le(bytes, base + 16), + }); + } + + Ok(records) +} + +fn append_record( + inst_builder: &mut InstructionsBuilder, + inst_data_builder: &mut InstructionDataBuilder, + record: InstructionRecord, +) -> Result<()> { + let InstructionRecord { + opcode, + rd, + r1, + r2_or_imm, + generic1, + generic2, + generic3, + flags: _, + } = record; + + match opcode { + 0 => {} + 1 => { + inst_data_builder.set_rd_u8(rd).set_imm(r2_or_imm); + inst_builder.make_inst(sldi, inst_data_builder.build()); + } + 2 => { + inst_data_builder + .set_rd_u8(rd) + .set_r1_u8(r1) + .set_offset_select(generic1) + .set_offset_value(generic2); + inst_builder.make_inst(sld, inst_data_builder.build()); + } + 3 => { + inst_data_builder.set_rdr1r2_u8(rd, r1, r2_or_imm); + inst_builder.make_inst(sadd, inst_data_builder.build()); + } + 4 => { + inst_data_builder.set_rdr1r2_u8(rd, r1, r2_or_imm); + inst_builder.make_inst(ssub, inst_data_builder.build()); + } + 5 => { + inst_data_builder.set_rdr1r2_u8(rd, r1, r2_or_imm); + inst_builder.make_inst(smul, inst_data_builder.build()); + } + 6 => { + inst_data_builder.set_rdr1imm_u8(rd, r1, r2_or_imm); + inst_builder.make_inst(saddi, inst_data_builder.build()); + } + 7 => { + inst_data_builder.set_rdr1imm_u8(rd, r1, r2_or_imm); + inst_builder.make_inst(smuli, inst_data_builder.build()); + } + 8 => { + inst_data_builder.set_ibiw_obiw(generic1, generic2); + inst_builder.make_inst(setbw, inst_data_builder.build()); + } + 9 => { + inst_data_builder + .set_rd_u8(rd) + .set_r1_u8(r1) + .set_mbiw_immrelu_immgroup(r2_or_imm, generic1, generic2); + inst_builder.make_inst(mvmul, inst_data_builder.build()); + } + 10 => { + inst_data_builder + .set_rdr1r2_u8(rd, r1, r2_or_imm) + .set_imm_len(generic3) + .set_offset_select_value(generic1, generic2); + inst_builder.make_inst(vvadd, inst_data_builder.build()); + } + 11 => { + inst_data_builder + .set_rdr1r2_u8(rd, r1, r2_or_imm) + .set_imm_len(generic3) + .set_offset_select_value(generic1, generic2); + inst_builder.make_inst(vvsub, inst_data_builder.build()); + } + 12 => { + inst_data_builder + .set_rdr1r2_u8(rd, r1, r2_or_imm) + .set_imm_len(generic3) + .set_offset_select_value(generic1, generic2); + inst_builder.make_inst(vvmul, inst_data_builder.build()); + } + 13 => { + inst_data_builder + .set_rdr1r2_u8(rd, r1, r2_or_imm) + .set_imm_len(generic3) + .set_offset_select_value(generic1, generic2); + inst_builder.make_inst(vvdmul, inst_data_builder.build()); + } + 14 => { + inst_data_builder + .set_rdr1r2_u8(rd, r1, r2_or_imm) + .set_imm_len(generic3) + .set_offset_select_value(generic1, generic2); + inst_builder.make_inst(vvmax, inst_data_builder.build()); + } + 15 => { + inst_data_builder + .set_rdr1r2_u8(rd, r1, r2_or_imm) + .set_imm_len(generic3) + .set_offset_select_value(generic1, generic2); + inst_builder.make_inst(vvsll, inst_data_builder.build()); + } + 16 => { + inst_data_builder + .set_rdr1r2_u8(rd, r1, r2_or_imm) + .set_imm_len(generic3) + .set_offset_select_value(generic1, generic2); + inst_builder.make_inst(vvsra, inst_data_builder.build()); + } + 17 => { + inst_data_builder + .set_rdr1r2_u8(rd, r1, r2_or_imm) + .set_imm_len(generic3) + .set_offset_select_value(generic1, generic2); + inst_builder.make_inst(vavg, inst_data_builder.build()); + } + 18 => { + inst_data_builder + .set_rdr1_u8(rd, r1) + .set_imm_len(generic3) + .set_offset_select_value(generic1, generic2); + inst_builder.make_inst(vrelu, inst_data_builder.build()); + } + 19 => { + inst_data_builder + .set_rdr1_u8(rd, r1) + .set_imm_len(generic3) + .set_offset_select_value(generic1, generic2); + inst_builder.make_inst(vtanh, inst_data_builder.build()); + } + 20 => { + inst_data_builder + .set_rdr1_u8(rd, r1) + .set_imm_len(generic3) + .set_offset_select_value(generic1, generic2); + inst_builder.make_inst(vsigm, inst_data_builder.build()); + } + 21 => { + inst_data_builder + .set_rdr1_u8(rd, r1) + .set_imm_len(generic3) + .set_offset_select_value(generic1, generic2); + inst_builder.make_inst(vsoftmax, inst_data_builder.build()); + } + 22 => { + inst_data_builder + .set_rdr1r2_u8(rd, r1, r2_or_imm) + .set_imm_len(generic3) + .set_offset_select_value(generic1, generic2); + inst_builder.make_inst(vmv, inst_data_builder.build()); + } + 23 => { + inst_data_builder + .set_rdr1r2_u8(rd, r1, r2_or_imm) + .set_imm_len(generic3) + .set_offset_select_value(generic1, generic2); + inst_builder.make_inst(vrsu, inst_data_builder.build()); + } + 24 => { + inst_data_builder + .set_rdr1r2_u8(rd, r1, r2_or_imm) + .set_imm_len(generic3) + .set_offset_select_value(generic1, generic2); + inst_builder.make_inst(vrsl, inst_data_builder.build()); + } + 25 => { + inst_data_builder + .set_rdr1_u8(rd, r1) + .set_imm_len(generic3) + .set_offset_select_value(generic1, generic2); + inst_builder.make_inst(ld, inst_data_builder.build()); + } + 26 => { + inst_data_builder + .set_rdr1_u8(rd, r1) + .set_imm_len(generic3) + .set_offset_select_value(generic1, generic2); + inst_builder.make_inst(st, inst_data_builder.build()); + } + 27 => { + inst_data_builder + .set_rd_u8(rd) + .set_imm(r2_or_imm) + .set_imm_len(generic3) + .set_offset_select_value(generic1, generic2); + inst_builder.make_inst(lldi, inst_data_builder.build()); + } + 28 => { + inst_data_builder + .set_rdr1_u8(rd, r1) + .set_imm_len(generic3) + .set_offset_select_value(generic1, generic2); + inst_builder.make_inst(lmv, inst_data_builder.build()); + } + 29 => { + inst_data_builder + .set_rd_u8(rd) + .set_imm_core(r2_or_imm) + .set_imm_len(generic3) + .set_offset_select_value(generic1, generic2); + inst_builder.make_inst(send, inst_data_builder.build()); + } + 30 => { + inst_data_builder + .set_rd_u8(rd) + .set_imm_core(r2_or_imm) + .set_imm_len(generic3) + .set_offset_select_value(generic1, generic2); + inst_builder.make_inst(recv, inst_data_builder.build()); + } + 31 => { + inst_builder.make_inst(wait, inst_data_builder.build()); + } + 32 => { + inst_builder.make_inst(sync, inst_data_builder.build()); + } + _ => bail!("unsupported PIM binary opcode {opcode}"), + } + Ok(()) +} + +fn binary_to_instructions( + core_bytes: &[u8], + core_index: i32, +) -> Result> { + let records = parse_binary_records(core_bytes)?; + let mut insts_builder = InstructionsBuilder::new(); + let mut inst_data_builder = InstructionDataBuilder::new(); + inst_data_builder + .set_core_indx_u16(u16::try_from(core_index).expect("core index does not fit in u16")) + .fix_core_indx(); + + for record in records { + let opcode = record.opcode; + let name = INSTRUCTIONS + .get(&(opcode as usize)) + .copied() + .unwrap_or(""); + + append_record(&mut insts_builder, &mut inst_data_builder, record).with_context(|| { + format!( + "while decoding binary instruction for core {core_index}: opcode {opcode} ({name})" + ) + })?; + } + + Ok(insts_builder.build()) +} + +pub fn binary_to_executor<'a, 'b>( + config: Value, + mut cores: impl Iterator>, + crossbars: Vec>, +) -> Result> { + let core_cnt = config + .get("core_cnt") + .context("missing core_cnt in config")? + .as_i64() + .context("core_cnt is not an integer")? as i32 + - 1; + + let cpu = CPU::new(core_cnt, crossbars); + let mut core_insts_builder = CoreInstructionsBuilder::new(core_cnt as usize); + cores.next(); + for core_indx in 1..=core_cnt { + let core_bytes = cores + .next() + .unwrap_or_else(|| panic!("cores files less than {}", core_indx)); + let instructions = binary_to_instructions(core_bytes, core_indx)?; + core_insts_builder.set_core(core_indx, instructions); + } + + Ok(Executable::new(cpu, core_insts_builder.build())) +} + +#[cfg(test)] +mod tests { + use super::{ + HEADER_SIZE, InstructionRecord, MAGIC, RECORD_SIZE, VERSION, binary_to_instructions, + }; + use crate::{ + instruction_set::{InstructionsBuilder, instruction_data::InstructionDataBuilder}, + json_to_instruction::json_isa::json_to_instruction, + }; + + fn encode_record(record: InstructionRecord, dst: &mut Vec) { + dst.push(record.opcode); + dst.push(record.rd); + dst.push(record.r1); + dst.push(record.flags); + dst.extend_from_slice(&record.r2_or_imm.to_le_bytes()); + dst.extend_from_slice(&record.generic1.to_le_bytes()); + dst.extend_from_slice(&record.generic2.to_le_bytes()); + dst.extend_from_slice(&record.generic3.to_le_bytes()); + } + + fn binary_blob(records: &[InstructionRecord]) -> Vec { + let mut blob = Vec::with_capacity(HEADER_SIZE + records.len() * RECORD_SIZE); + blob.extend_from_slice(MAGIC); + blob.extend_from_slice(&VERSION.to_le_bytes()); + blob.extend_from_slice(&(records.len() as u32).to_le_bytes()); + for &record in records { + encode_record(record, &mut blob); + } + blob + } + + #[test] + fn json_and_binary_decoders_match_for_representative_ops() { + let json_program = [ + r#"{"imm":64,"op":"sldi","rd":0}"#, + r#"{"imm":128,"op":"sldi","rd":1}"#, + r#"{"len":16,"offset":{"offset_select":0,"offset_value":0},"op":"lmv","rd":0,"rs1":1}"#, + r#"{"group":3,"mbiw":8,"op":"mvmul","rd":0,"relu":0,"rs1":1}"#, + r#"{"len":16,"offset":{"offset_select":0,"offset_value":0},"op":"vvadd","rd":0,"rs1":1,"rs2":2}"#, + r#"{"core":2,"offset":{"offset_select":0,"offset_value":0},"op":"send","rd":0,"size":16}"#, + ]; + + let binary_program = binary_blob(&[ + InstructionRecord { + opcode: 1, + rd: 0, + r2_or_imm: 64, + ..Default::default() + }, + InstructionRecord { + opcode: 1, + rd: 1, + r2_or_imm: 128, + ..Default::default() + }, + InstructionRecord { + opcode: 28, + rd: 0, + r1: 1, + generic3: 16, + ..Default::default() + }, + InstructionRecord { + opcode: 9, + rd: 0, + r1: 1, + r2_or_imm: 8, + generic2: 3, + ..Default::default() + }, + InstructionRecord { + opcode: 10, + rd: 0, + r1: 1, + r2_or_imm: 2, + generic3: 16, + ..Default::default() + }, + InstructionRecord { + opcode: 29, + rd: 0, + r2_or_imm: 2, + generic3: 16, + ..Default::default() + }, + ]); + + let mut json_builder = InstructionsBuilder::new(); + let mut json_data_builder = InstructionDataBuilder::new(); + json_data_builder.set_core_indx(1).fix_core_indx(); + for inst in json_program { + let value = serde_json::from_str(inst).unwrap(); + json_to_instruction(&mut json_builder, &mut json_data_builder, &value); + } + let json_instructions = json_builder.build(); + let binary_instructions = binary_to_instructions(&binary_program, 1).unwrap(); + + assert_eq!(json_instructions.len(), binary_instructions.len()); + for (json_inst, binary_inst) in json_instructions.iter().zip(binary_instructions.iter()) { + assert_eq!(json_inst.functor_name(), binary_inst.functor_name()); + assert_eq!(json_inst.data, binary_inst.data); + } + } +} diff --git a/backend-simulators/pim/pim-simulator/src/lib/instruction_set/instruction_data.rs b/backend-simulators/pim/pim-simulator/src/lib/instruction_set/instruction_data.rs index 02d4409..df94357 100644 --- a/backend-simulators/pim/pim-simulator/src/lib/instruction_set/instruction_data.rs +++ b/backend-simulators/pim/pim-simulator/src/lib/instruction_set/instruction_data.rs @@ -1,10 +1,11 @@ use paste::paste; +use std::convert::TryFrom; -#[derive(Clone, Copy, Debug, Default)] +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] pub struct InstructionData { - core_indx: i32, - rd: i32, - r1: i32, + core_indx: u16, + rd: u8, + r1: u8, //r2 imm mbiw imm_core r2_or_imm: i32, //offset_select imm_relu ibiw @@ -16,18 +17,30 @@ pub struct InstructionData { } impl InstructionData { - pub fn core_indx(&self) -> i32 { + pub fn core_indx_u16(&self) -> u16 { self.core_indx } - pub fn rd(&self) -> i32 { + pub fn core_indx(&self) -> i32 { + i32::from(self.core_indx) + } + + pub fn rd_u8(&self) -> u8 { self.rd } - pub fn r1(&self) -> i32 { + pub fn rd(&self) -> i32 { + i32::from(self.rd) + } + + pub fn r1_u8(&self) -> u8 { self.r1 } + pub fn r1(&self) -> i32 { + i32::from(self.r1) + } + pub fn r2(&self) -> i32 { self.r2_or_imm } @@ -49,26 +62,26 @@ impl InstructionData { } pub fn get_core_rd_r1(&self) -> (i32, i32, i32) { - (self.core_indx, self.rd, self.r1) + (self.core_indx(), self.rd(), self.r1()) } pub fn get_core_rd_r1_r2(&self) -> (i32, i32, i32, i32) { - (self.core_indx, self.rd, self.r1, self.r2_or_imm) + (self.core_indx(), self.rd(), self.r1(), self.r2_or_imm) } pub fn get_core_rd_imm(&self) -> (i32, i32, i32) { - (self.core_indx, self.rd, self.r2_or_imm) + (self.core_indx(), self.rd(), self.r2_or_imm) } pub fn get_core_rd_r1_imm(&self) -> (i32, i32, i32, i32) { - (self.core_indx, self.rd, self.r1, self.r2_or_imm) + (self.core_indx(), self.rd(), self.r1(), self.r2_or_imm) } pub fn get_core_rd_r1_r2_immlen_offset(&self) -> (i32, i32, i32, i32, i32, i32, i32) { ( - self.core_indx, - self.rd, - self.r1, + self.core_indx(), + self.rd(), + self.r1(), self.r2_or_imm, self.generic3, self.generic1, @@ -78,9 +91,9 @@ impl InstructionData { pub fn get_core_rd_r1_mbiw_immrelu_immgroup(&self) -> (i32, i32, i32, i32, i32, i32) { ( - self.core_indx, - self.rd, - self.r1, + self.core_indx(), + self.rd(), + self.r1(), self.r2_or_imm, self.generic1, self.generic2, @@ -100,7 +113,7 @@ impl InstructionData { } pub(crate) fn get_core_immcore(&self) -> (i32, i32) { - (self.core_indx, self.r2_or_imm) + (self.core_indx(), self.r2_or_imm) } } @@ -216,6 +229,18 @@ impl InstructionDataBuilder { common_getter_setter![imm_group]; common_getter_setter![imm_core]; + pub fn set_core_indx_u16(&mut self, val: u16) -> &mut Self { + self.set_core_indx(i32::from(val)) + } + + pub fn set_rd_u8(&mut self, val: u8) -> &mut Self { + self.set_rd(i32::from(val)) + } + + pub fn set_r1_u8(&mut self, val: u8) -> &mut Self { + self.set_r1(i32::from(val)) + } + pub fn new() -> Self { Self { core_indx: Fixer::Edit(0), @@ -254,20 +279,16 @@ impl InstructionDataBuilder { fn check_sanity(&self) { assert!(!(self.get_r2() != 0 && self.get_imm() != 0 && self.get_mbiw() != 0 && self.get_imm_core() != 0)); - assert!( - !(self.get_ibiw() != 0 && self.get_offset_select() != 0 && self.get_imm_relu() != 0) - ); - assert!( - !(self.get_obiw() != 0 && self.get_offset_value() != 0 && self.get_imm_group() != 0) - ); + assert!(!(self.get_ibiw() != 0 && self.get_offset_select() != 0 && self.get_imm_relu() != 0)); + assert!(!(self.get_obiw() != 0 && self.get_offset_value() != 0 && self.get_imm_group() != 0)); } pub fn build(&mut self) -> InstructionData { self.check_sanity(); let inst_data = InstructionData { - core_indx: self.get_core_indx(), - rd: self.get_rd(), - r1: self.get_r1(), + core_indx: u16::try_from(self.get_core_indx()).expect("core index does not fit in u16"), + rd: u8::try_from(self.get_rd()).expect("rd does not fit in u8"), + r1: u8::try_from(self.get_r1()).expect("r1 does not fit in u8"), r2_or_imm: self.get_r2() + self.get_imm() + self.get_mbiw() + self.get_imm_core(), generic1: self.get_offset_select() + self.get_ibiw() + self.get_imm_relu(), generic2: self.get_offset_value() + self.get_obiw() + self.get_imm_group(), @@ -281,6 +302,10 @@ impl InstructionDataBuilder { self.set_rd(rd).set_r1(r1).set_r2(r2) } + pub fn set_rdr1r2_u8(&mut self, rd: u8, r1: u8, r2: i32) -> &mut Self { + self.set_rd_u8(rd).set_r1_u8(r1).set_r2(r2) + } + pub fn set_offset_select_value(&mut self, offset_select: i32, offset_value: i32) -> &mut Self { self.set_offset_select(offset_select) .set_offset_value(offset_value) @@ -290,14 +315,26 @@ impl InstructionDataBuilder { self.set_rd(rd).set_r1(r1).set_imm(imm) } + pub fn set_rdr1imm_u8(&mut self, rd: u8, r1: u8, imm: i32) -> &mut Self { + self.set_rd_u8(rd).set_r1_u8(r1).set_imm(imm) + } + pub fn set_rdr1(&mut self, rd: i32, r1: i32) -> &mut Self { self.set_rd(rd).set_r1(r1) } + pub fn set_rdr1_u8(&mut self, rd: u8, r1: u8) -> &mut Self { + self.set_rd_u8(rd).set_r1_u8(r1) + } + pub fn set_rdimm(&mut self, rd: i32, imm: i32) -> &mut Self { self.set_rd(rd).set_imm(imm) } + pub fn set_rdimm_u8(&mut self, rd: u8, imm: i32) -> &mut Self { + self.set_rd_u8(rd).set_imm(imm) + } + pub fn set_ibiw_obiw(&mut self, ibiw: i32, obiw: i32) -> &mut Self { self.set_ibiw(ibiw).set_obiw(obiw) } diff --git a/backend-simulators/pim/pim-simulator/src/lib/pimcore.rs b/backend-simulators/pim/pim-simulator/src/lib/pimcore.rs index 129c8c2..b716d6e 100644 --- a/backend-simulators/pim/pim-simulator/src/lib/pimcore.rs +++ b/backend-simulators/pim/pim-simulator/src/lib/pimcore.rs @@ -10,6 +10,7 @@ use crate::{ tracing::TRACER, }; pub mod cpu; +pub mod binary_to_instruction; pub mod instruction_set; pub mod json_to_instruction; pub mod memory_manager; diff --git a/src/PIM/Compiler/PimArtifactWriter.cpp b/src/PIM/Compiler/PimArtifactWriter.cpp index 909abc0..c30c77b 100644 --- a/src/PIM/Compiler/PimArtifactWriter.cpp +++ b/src/PIM/Compiler/PimArtifactWriter.cpp @@ -11,6 +11,7 @@ #include "src/Accelerators/PIM/Common/IR/WeightUtils.hpp" #include "src/Accelerators/PIM/Compiler/PimArtifactWriter.hpp" +#include "src/Accelerators/PIM/Compiler/PimBinaryFormat.hpp" #include "src/Accelerators/PIM/Compiler/PimCodeGen.hpp" #include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp" @@ -19,18 +20,35 @@ using namespace mlir; namespace onnx_mlir { -OnnxMlirCompilerErrorCodes writeHostCoreJson(StringRef outputDirPath) { +OnnxMlirCompilerErrorCodes writeHostCoreArtifacts(StringRef outputDirPath) { std::error_code errorCode; - std::string outputHostCorePath = outputDirPath.str() + "/core_0.json"; - raw_fd_ostream hostFileStream(outputHostCorePath, errorCode); + std::string outputHostCorePath = outputDirPath.str() + "/core_0.pim"; + raw_fd_ostream hostFileStream(outputHostCorePath, errorCode, sys::fs::OF_None); if (errorCode) { errs() << "Error while opening host core file `" << outputHostCorePath << "`: " << errorCode.message() << '\n'; return InvalidOutputFileAccess; } - // The host core json contains two no-op-like instructions to satisfy pimsim-nn. - hostFileStream << "[{\"imm\":0,\"op\":\"sldi\",\"rd\":0},{\"imm\":0,\"op\":\"sldi\",\"rd\":0}]"; + pim_binary::writeHeader(hostFileStream); + pim_binary::InstructionRecord noop; + noop.opcode = pim_binary::Opcode::sldi; + pim_binary::writeInstructionRecord(hostFileStream, noop); + pim_binary::writeInstructionRecord(hostFileStream, noop); + pim_binary::patchInstructionCount(hostFileStream, 2); hostFileStream.close(); + + if (pimEmitJson.getValue()) { + std::string outputHostJsonPath = outputDirPath.str() + "/core_0.json"; + raw_fd_ostream hostJsonStream(outputHostJsonPath, errorCode); + if (errorCode) { + errs() << "Error while opening host core json file `" << outputHostJsonPath << "`: " << errorCode.message() + << '\n'; + return InvalidOutputFileAccess; + } + // The host core json contains two no-op-like instructions to satisfy pimsim-nn + hostJsonStream << "[{\"imm\":0,\"op\":\"sldi\",\"rd\":0},{\"imm\":0,\"op\":\"sldi\",\"rd\":0}]"; + hostJsonStream.close(); + } return CompilerSuccess; } diff --git a/src/PIM/Compiler/PimArtifactWriter.hpp b/src/PIM/Compiler/PimArtifactWriter.hpp index 346bdab..fdd0d12 100644 --- a/src/PIM/Compiler/PimArtifactWriter.hpp +++ b/src/PIM/Compiler/PimArtifactWriter.hpp @@ -12,7 +12,7 @@ namespace onnx_mlir { class PimAcceleratorMemory; -OnnxMlirCompilerErrorCodes writeHostCoreJson(llvm::StringRef outputDirPath); +OnnxMlirCompilerErrorCodes writeHostCoreArtifacts(llvm::StringRef outputDirPath); OnnxMlirCompilerErrorCodes writeMemoryBinary(mlir::ModuleOp moduleOp, mlir::func::FuncOp funcOp, PimAcceleratorMemory& memory, diff --git a/src/PIM/Compiler/PimBinaryFormat.hpp b/src/PIM/Compiler/PimBinaryFormat.hpp new file mode 100644 index 0000000..216a947 --- /dev/null +++ b/src/PIM/Compiler/PimBinaryFormat.hpp @@ -0,0 +1,381 @@ +#pragma once + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include +#include + +namespace onnx_mlir::pim_binary { + +inline constexpr char kMagic[4] = {'P', 'I', 'M', 'B'}; +inline constexpr uint32_t kVersion = 1; +inline constexpr uint64_t kCountOffset = 8; +inline constexpr size_t kHeaderSize = 12; +inline constexpr size_t kRecordSize = 20; + +enum class Opcode : uint32_t { + nop = 0, + sldi = 1, + sld = 2, + sadd = 3, + ssub = 4, + smul = 5, + saddi = 6, + smuli = 7, + setbw = 8, + mvmul = 9, + vvadd = 10, + vvsub = 11, + vvmul = 12, + vvdmul = 13, + vvmax = 14, + vvsll = 15, + vvsra = 16, + vavg = 17, + vrelu = 18, + vtanh = 19, + vsigm = 20, + vsoftmax = 21, + vmv = 22, + vrsu = 23, + vrsl = 24, + ld = 25, + st = 26, + lldi = 27, + lmv = 28, + send = 29, + recv = 30, + wait = 31, + sync = 32, +}; + +struct InstructionRecord { + Opcode opcode = Opcode::nop; + uint8_t rd = 0; + uint8_t r1 = 0; + int32_t r2OrImm = 0; + int32_t generic1 = 0; + int32_t generic2 = 0; + int32_t generic3 = 0; + uint8_t flags = 0; +}; + +inline void writeUint32LE(llvm::raw_ostream& os, uint32_t value) { + std::array bytes; + llvm::support::endian::write32le(bytes.data(), value); + os.write(bytes.data(), bytes.size()); +} + +inline void writeInt32LE(llvm::raw_ostream& os, int32_t value) { + writeUint32LE(os, static_cast(value)); +} + +inline void writeHeader(llvm::raw_ostream& os) { + os.write(kMagic, sizeof(kMagic)); + writeUint32LE(os, kVersion); + writeUint32LE(os, 0); +} + +inline void patchInstructionCount(llvm::raw_pwrite_stream& os, uint32_t instructionCount) { + std::array bytes; + llvm::support::endian::write32le(bytes.data(), instructionCount); + os.pwrite(bytes.data(), bytes.size(), kCountOffset); +} + +inline void writeInstructionRecord(llvm::raw_ostream& os, const InstructionRecord& record) { + os << static_cast(static_cast(record.opcode)); + os << static_cast(record.rd); + os << static_cast(record.r1); + os << static_cast(record.flags); + writeInt32LE(os, record.r2OrImm); + writeInt32LE(os, record.generic1); + writeInt32LE(os, record.generic2); + writeInt32LE(os, record.generic3); +} + +inline int32_t toI32(int64_t value) { + assert(value >= std::numeric_limits::min() && value <= std::numeric_limits::max() + && "PIM binary field out of int32 range"); + return static_cast(value); +} + +inline uint8_t toU8(int64_t value) { + assert(value >= 0 && value <= std::numeric_limits::max() && "PIM binary field out of uint8 range"); + return static_cast(value); +} + +inline int32_t getOptionalInt(const llvm::json::Object& object, llvm::StringRef key, int32_t defaultValue = 0) { + if (std::optional value = object.getInteger(key)) + return toI32(*value); + return defaultValue; +} + +inline Opcode opcodeFromString(llvm::StringRef opName) { + if (opName == "nop") + return Opcode::nop; + if (opName == "sldi") + return Opcode::sldi; + if (opName == "sld") + return Opcode::sld; + if (opName == "sadd") + return Opcode::sadd; + if (opName == "ssub") + return Opcode::ssub; + if (opName == "smul") + return Opcode::smul; + if (opName == "saddi") + return Opcode::saddi; + if (opName == "smuli") + return Opcode::smuli; + if (opName == "setbw") + return Opcode::setbw; + if (opName == "mvmul") + return Opcode::mvmul; + if (opName == "vvadd") + return Opcode::vvadd; + if (opName == "vvsub") + return Opcode::vvsub; + if (opName == "vvmul") + return Opcode::vvmul; + if (opName == "vvdmul") + return Opcode::vvdmul; + if (opName == "vvmax") + return Opcode::vvmax; + if (opName == "vvsll") + return Opcode::vvsll; + if (opName == "vvsra") + return Opcode::vvsra; + if (opName == "vavg") + return Opcode::vavg; + if (opName == "vrelu") + return Opcode::vrelu; + if (opName == "vtanh") + return Opcode::vtanh; + if (opName == "vsigm") + return Opcode::vsigm; + if (opName == "vsoftmax") + return Opcode::vsoftmax; + if (opName == "vmv") + return Opcode::vmv; + if (opName == "vrsu") + return Opcode::vrsu; + if (opName == "vrsl") + return Opcode::vrsl; + if (opName == "ld") + return Opcode::ld; + if (opName == "st") + return Opcode::st; + if (opName == "lldi") + return Opcode::lldi; + if (opName == "lmv") + return Opcode::lmv; + if (opName == "send") + return Opcode::send; + if (opName == "recv") + return Opcode::recv; + if (opName == "wait") + return Opcode::wait; + if (opName == "sync") + return Opcode::sync; + llvm_unreachable("Unsupported PIM binary opcode"); +} + +inline llvm::StringRef opcodeToString(Opcode opcode) { + switch (opcode) { + case Opcode::nop: return "nop"; + case Opcode::sldi: return "sldi"; + case Opcode::sld: return "sld"; + case Opcode::sadd: return "sadd"; + case Opcode::ssub: return "ssub"; + case Opcode::smul: return "smul"; + case Opcode::saddi: return "saddi"; + case Opcode::smuli: return "smuli"; + case Opcode::setbw: return "setbw"; + case Opcode::mvmul: return "mvmul"; + case Opcode::vvadd: return "vvadd"; + case Opcode::vvsub: return "vvsub"; + case Opcode::vvmul: return "vvmul"; + case Opcode::vvdmul: return "vvdmul"; + case Opcode::vvmax: return "vvmax"; + case Opcode::vvsll: return "vvsll"; + case Opcode::vvsra: return "vvsra"; + case Opcode::vavg: return "vavg"; + case Opcode::vrelu: return "vrelu"; + case Opcode::vtanh: return "vtanh"; + case Opcode::vsigm: return "vsigm"; + case Opcode::vsoftmax: return "vsoftmax"; + case Opcode::vmv: return "vmv"; + case Opcode::vrsu: return "vrsu"; + case Opcode::vrsl: return "vrsl"; + case Opcode::ld: return "ld"; + case Opcode::st: return "st"; + case Opcode::lldi: return "lldi"; + case Opcode::lmv: return "lmv"; + case Opcode::send: return "send"; + case Opcode::recv: return "recv"; + case Opcode::wait: return "wait"; + case Opcode::sync: return "sync"; + } + llvm_unreachable("Unsupported PIM binary opcode"); +} + +inline InstructionRecord makeInstructionRecord(const llvm::json::Object& instruction) { + InstructionRecord record; + std::optional opName = instruction.getString("op"); + assert(opName && "Missing op field in PIM instruction"); + record.opcode = opcodeFromString(*opName); + record.rd = toU8(getOptionalInt(instruction, "rd")); + record.r1 = toU8(getOptionalInt(instruction, "rs1")); + + switch (record.opcode) { + case Opcode::sldi: + case Opcode::saddi: + case Opcode::smuli: + case Opcode::lldi: + record.r2OrImm = getOptionalInt(instruction, "imm"); + break; + case Opcode::mvmul: + record.r2OrImm = getOptionalInt(instruction, "mbiw"); + record.generic1 = getOptionalInt(instruction, "relu"); + record.generic2 = getOptionalInt(instruction, "group"); + break; + case Opcode::setbw: + record.generic1 = getOptionalInt(instruction, "ibiw"); + record.generic2 = getOptionalInt(instruction, "obiw"); + break; + case Opcode::send: + case Opcode::recv: + record.r2OrImm = getOptionalInt(instruction, "core"); + record.generic3 = getOptionalInt(instruction, "size"); + break; + default: + record.r2OrImm = getOptionalInt(instruction, "rs2"); + break; + } + + if (record.opcode != Opcode::mvmul && record.opcode != Opcode::setbw) { + if (auto* offsetValue = instruction.getObject("offset")) { + record.generic1 = getOptionalInt(*offsetValue, "offset_select"); + record.generic2 = getOptionalInt(*offsetValue, "offset_value"); + } + } + + if (instruction.get("len")) + record.generic3 = getOptionalInt(instruction, "len"); + else if (instruction.get("size") && record.opcode != Opcode::send && record.opcode != Opcode::recv) + record.generic3 = getOptionalInt(instruction, "size"); + + return record; +} + +inline llvm::json::Object makeInstructionJson(const InstructionRecord& record) { + llvm::json::Object instruction; + instruction["op"] = opcodeToString(record.opcode).str(); + + auto addOffset = [&](int32_t offsetSelect, int32_t offsetValue) { + llvm::json::Object offset; + offset["offset_select"] = offsetSelect; + offset["offset_value"] = offsetValue; + instruction["offset"] = std::move(offset); + }; + + switch (record.opcode) { + case Opcode::sldi: + instruction["rd"] = static_cast(record.rd); + instruction["imm"] = record.r2OrImm; + break; + case Opcode::sld: + instruction["rd"] = static_cast(record.rd); + instruction["rs1"] = static_cast(record.r1); + addOffset(record.generic1, record.generic2); + break; + case Opcode::sadd: + case Opcode::ssub: + case Opcode::smul: + instruction["rd"] = static_cast(record.rd); + instruction["rs1"] = static_cast(record.r1); + instruction["rs2"] = record.r2OrImm; + break; + case Opcode::saddi: + case Opcode::smuli: + instruction["rd"] = static_cast(record.rd); + instruction["rs1"] = static_cast(record.r1); + instruction["imm"] = record.r2OrImm; + break; + case Opcode::setbw: + instruction["ibiw"] = record.generic1; + instruction["obiw"] = record.generic2; + break; + case Opcode::mvmul: + instruction["rd"] = static_cast(record.rd); + instruction["rs1"] = static_cast(record.r1); + instruction["mbiw"] = record.r2OrImm; + instruction["relu"] = record.generic1; + instruction["group"] = record.generic2; + break; + case Opcode::vvadd: + case Opcode::vvsub: + case Opcode::vvmul: + case Opcode::vvdmul: + case Opcode::vvmax: + case Opcode::vvsll: + case Opcode::vvsra: + case Opcode::vavg: + case Opcode::vmv: + case Opcode::vrsu: + case Opcode::vrsl: + instruction["rd"] = static_cast(record.rd); + instruction["rs1"] = static_cast(record.r1); + instruction["rs2"] = record.r2OrImm; + addOffset(record.generic1, record.generic2); + instruction["len"] = record.generic3; + break; + case Opcode::vrelu: + case Opcode::vtanh: + case Opcode::vsigm: + case Opcode::vsoftmax: + instruction["rd"] = static_cast(record.rd); + instruction["rs1"] = static_cast(record.r1); + addOffset(record.generic1, record.generic2); + instruction["len"] = record.generic3; + break; + case Opcode::ld: + case Opcode::st: + instruction["rd"] = static_cast(record.rd); + instruction["rs1"] = static_cast(record.r1); + addOffset(record.generic1, record.generic2); + instruction["size"] = record.generic3; + break; + case Opcode::lldi: + instruction["rd"] = static_cast(record.rd); + instruction["imm"] = record.r2OrImm; + addOffset(record.generic1, record.generic2); + instruction["len"] = record.generic3; + break; + case Opcode::lmv: + instruction["rd"] = static_cast(record.rd); + instruction["rs1"] = static_cast(record.r1); + addOffset(record.generic1, record.generic2); + instruction["len"] = record.generic3; + break; + case Opcode::send: + case Opcode::recv: + instruction["rd"] = static_cast(record.rd); + instruction["core"] = record.r2OrImm; + addOffset(record.generic1, record.generic2); + instruction["size"] = record.generic3; + break; + case Opcode::wait: + case Opcode::sync: + case Opcode::nop: + break; + } + + return instruction; +} + +} // namespace onnx_mlir::pim_binary diff --git a/src/PIM/Compiler/PimCodeGen.cpp b/src/PIM/Compiler/PimCodeGen.cpp index 2e28e4f..8e029f3 100644 --- a/src/PIM/Compiler/PimCodeGen.cpp +++ b/src/PIM/Compiler/PimCodeGen.cpp @@ -30,6 +30,7 @@ #include "Conversion/ONNXToSpatial/Common/Common.hpp" #include "src/Accelerators/PIM/Compiler/PimArtifactWriter.hpp" #include "src/Accelerators/PIM/Compiler/PimBatchEmission.hpp" +#include "src/Accelerators/PIM/Compiler/PimBinaryFormat.hpp" #include "src/Accelerators/PIM/Compiler/PimCodeGen.hpp" #include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp" #include "src/Accelerators/PIM/Compiler/PimWeightEmitter.hpp" @@ -116,25 +117,29 @@ void PimMemory::allocateCore(Operation* op) { static void printHostMemoryReportRow(raw_ostream& os, const MemoryReportRow& row) { llvm::SmallVector fields = { - {"Number of globals", std::to_string(row.numGlobal)}, - {"Global memory", formatReportMemory(row.sizeGlobal)}}; + {"Number of globals", std::to_string(row.numGlobal) }, + {"Global memory", formatReportMemory(row.sizeGlobal)} + }; printReportFlatFields(os, fields); } static void printCoreMemoryReportRow(raw_ostream& os, const MemoryReportEntry& entry) { llvm::SmallVector fields = { - {"Number of allocas", std::to_string(entry.row.numAlloca)}, - {"Allocated memory", formatReportMemory(entry.row.sizeAlloca)}}; + {"Number of allocas", std::to_string(entry.row.numAlloca) }, + {"Allocated memory", formatReportMemory(entry.row.sizeAlloca)} + }; printReportFlatFields(os, fields); } static void printBatchMemoryReportRow(raw_ostream& os, const MemoryReportEntry& entry) { llvm::SmallVector perCoreFields = { - {"Number of allocas", std::to_string(entry.row.numAlloca)}, - {"Allocated memory", formatReportMemory(entry.row.sizeAlloca)}}; + {"Number of allocas", std::to_string(entry.row.numAlloca) }, + {"Allocated memory", formatReportMemory(entry.row.sizeAlloca)} + }; llvm::SmallVector totalFields = { - {"Number of allocas", std::to_string(entry.totalAllocaCount)}, - {"Batch memory", formatReportMemory(entry.totalAllocaBytes)}}; + {"Number of allocas", std::to_string(entry.totalAllocaCount) }, + {"Batch memory", formatReportMemory(entry.totalAllocaBytes)} + }; printReportPerCoreAndTotalFields(os, perCoreFields, totalFields); } @@ -215,12 +220,8 @@ size_t PimAcceleratorMemory::getValueAddress(mlir::Value value, const StaticValu void PimAcceleratorMemory::reportHost() { hostReportRow = hostMem.getReportRow(); } void PimAcceleratorMemory::recordCoreReport(size_t coreId, const MemoryReportRow& row) { - reportEntries.push_back({MemoryReportEntry::Kind::Core, - coreId, - {static_cast(coreId)}, - row, - row.numAlloca, - row.sizeAlloca}); + reportEntries.push_back( + {MemoryReportEntry::Kind::Core, coreId, {static_cast(coreId)}, row, row.numAlloca, row.sizeAlloca}); } void PimAcceleratorMemory::recordBatchReport(uint64_t batchId, @@ -250,7 +251,8 @@ void PimAcceleratorMemory::flushReport() { llvm::SmallVector totalFields = { {"Global memory", formatReportMemory(totalGlobalMemory)}, - {"Cores memory", formatReportMemory(totalCoresMemory)}}; + {"Cores memory", formatReportMemory(totalCoresMemory) } + }; printReportTotalsBlock(os, totalFields); if (hostReportRow.has_value()) { @@ -312,36 +314,25 @@ void PimAcceleratorMemory::clean(mlir::Operation* op) { } } -json::Object PimCodeGen::createEmptyOffset() { - json::Object offset; - offset["offset_select"] = 0; - offset["offset_value"] = 0; - return offset; -} - size_t PimCodeGen::remapCoreId(size_t coreId) const { auto it = emittedCoreIds.find(coreId); assert(it != emittedCoreIds.end() && "Missing emitted core id remapping"); return it->second; } -static json::Object createRs1OnlyOffset() { - json::Object offset; - offset["offset_select"] = 1; - offset["offset_value"] = 0; - return offset; -} - -void PimCodeGen::emitInstruction(json::Object instruction) const { - coreFileStream << json::Value(std::move(instruction)) << ','; +void PimCodeGen::emitInstruction(const pim_binary::InstructionRecord& instruction) const { + pim_binary::writeInstructionRecord(coreBinaryStream, instruction); + ++emittedInstructionCount; + if (coreJsonStream) + *coreJsonStream << json::Value(pim_binary::makeInstructionJson(instruction)) << ','; } void PimCodeGen::genSetRegisterImmediateUnsigned(size_t registerNumber, size_t immediate) const { - json::Object json; - json["op"] = "sldi"; - json["rd"] = registerNumber; - json["imm"] = immediate; - emitInstruction(std::move(json)); + pim_binary::InstructionRecord instruction; + instruction.opcode = pim_binary::Opcode::sldi; + instruction.rd = static_cast(registerNumber); + instruction.r2OrImm = static_cast(immediate); + emitInstruction(instruction); } void PimCodeGen::setupRd(size_t rdAddress, size_t rdOffset) const { @@ -369,38 +360,41 @@ void PimCodeGen::emitMemCopyOp(StringRef opName, StringRef sizeFieldName) const { setupRdRs1(rdAddr, rdOffset, rs1Addr, rs1Offset); - json::Object json; - json["op"] = opName; - json["rd"] = 0; - json["rs1"] = 1; - json[sizeFieldName] = size; - json["offset"] = createEmptyOffset(); - emitInstruction(std::move(json)); + pim_binary::InstructionRecord instruction; + instruction.opcode = pim_binary::opcodeFromString(opName); + instruction.rd = 0; + instruction.r1 = 1; + instruction.generic1 = 0; + instruction.generic2 = 0; + instruction.generic3 = static_cast(size); + (void)sizeFieldName; + emitInstruction(instruction); } void PimCodeGen::emitCommunicationOp(StringRef opName, size_t bufferAddr, size_t coreId, size_t size) const { setupRd(bufferAddr, 0); - json::Object json; - json["op"] = opName; - json["rd"] = 0; - json["core"] = remapCoreId(coreId); - json["size"] = size; - json["offset"] = createEmptyOffset(); - emitInstruction(std::move(json)); + pim_binary::InstructionRecord instruction; + instruction.opcode = pim_binary::opcodeFromString(opName); + instruction.rd = 0; + instruction.r2OrImm = static_cast(remapCoreId(coreId)); + instruction.generic1 = 0; + instruction.generic2 = 0; + instruction.generic3 = static_cast(size); + emitInstruction(instruction); } void PimCodeGen::emitMvmOp(size_t groupId, size_t rdAddr, size_t rdOffset, size_t rs1Addr, size_t rs1Offset) const { setupRdRs1(rdAddr, rdOffset, rs1Addr, rs1Offset); - json::Object json; - json["op"] = "mvmul"; - json["rd"] = 0; - json["rs1"] = 1; - json["group"] = groupId; - json["relu"] = 0; - json["mbiw"] = 8; - emitInstruction(std::move(json)); + pim_binary::InstructionRecord instruction; + instruction.opcode = pim_binary::Opcode::mvmul; + instruction.rd = 0; + instruction.r1 = 1; + instruction.r2OrImm = 8; + instruction.generic1 = 0; + instruction.generic2 = static_cast(groupId); + emitInstruction(instruction); } void PimCodeGen::codeGenLoadOp(pim::PimMemCopyHostToDevOp loadOp, const StaticValueKnowledge& knowledge) const { @@ -508,14 +502,13 @@ void PimCodeGen::codeGenVVAddOp(pim::PimVVAddOp vvaddOp, const StaticValueKnowle auto rhsAddr = addressOf(vvaddOp.getRhs(), knowledge); setupRdRs1Rs2(outputBufferAddr, 0, lhsAddr, 0, rhsAddr, 0); - json::Object json; - json["op"] = "vvadd"; - json["rd"] = 0; - json["rs1"] = 1; - json["rs2"] = 2; - json["offset"] = createEmptyOffset(); - json["len"] = getValueSizeInBytes(vvaddOp.getLhs()); - emitInstruction(std::move(json)); + pim_binary::InstructionRecord instruction; + instruction.opcode = pim_binary::Opcode::vvadd; + instruction.rd = 0; + instruction.r1 = 1; + instruction.r2OrImm = 2; + instruction.generic3 = static_cast(getValueSizeInBytes(vvaddOp.getLhs())); + emitInstruction(instruction); } void PimCodeGen::codeGenVVSubOp(pim::PimVVSubOp vvsubOp, const StaticValueKnowledge& knowledge) const { @@ -524,14 +517,13 @@ void PimCodeGen::codeGenVVSubOp(pim::PimVVSubOp vvsubOp, const StaticValueKnowle auto rhsAddr = addressOf(vvsubOp.getRhs(), knowledge); setupRdRs1Rs2(outputBufferAddr, 0, lhsAddr, 0, rhsAddr, 0); - json::Object json; - json["op"] = "vvsub"; - json["rd"] = 0; - json["rs1"] = 1; - json["rs2"] = 2; - json["offset"] = createEmptyOffset(); - json["len"] = getValueSizeInBytes(vvsubOp.getLhs()); - emitInstruction(std::move(json)); + pim_binary::InstructionRecord instruction; + instruction.opcode = pim_binary::Opcode::vvsub; + instruction.rd = 0; + instruction.r1 = 1; + instruction.r2OrImm = 2; + instruction.generic3 = static_cast(getValueSizeInBytes(vvsubOp.getLhs())); + emitInstruction(instruction); } void PimCodeGen::codeGenVVMulOp(pim::PimVVMulOp vvmulOp, const StaticValueKnowledge& knowledge) const { @@ -540,14 +532,13 @@ void PimCodeGen::codeGenVVMulOp(pim::PimVVMulOp vvmulOp, const StaticValueKnowle auto rhsAddr = addressOf(vvmulOp.getRhs(), knowledge); setupRdRs1Rs2(outputBufferAddr, 0, lhsAddr, 0, rhsAddr, 0); - json::Object json; - json["op"] = "vvmul"; - json["rd"] = 0; - json["rs1"] = 1; - json["rs2"] = 2; - json["offset"] = createEmptyOffset(); - json["len"] = getValueSizeInBytes(vvmulOp.getLhs()); - emitInstruction(std::move(json)); + pim_binary::InstructionRecord instruction; + instruction.opcode = pim_binary::Opcode::vvmul; + instruction.rd = 0; + instruction.r1 = 1; + instruction.r2OrImm = 2; + instruction.generic3 = static_cast(getValueSizeInBytes(vvmulOp.getLhs())); + emitInstruction(instruction); } void PimCodeGen::codeGenVVMaxOp(pim::PimVVMaxOp vvmaxOp, const StaticValueKnowledge& knowledge) const { @@ -556,14 +547,13 @@ void PimCodeGen::codeGenVVMaxOp(pim::PimVVMaxOp vvmaxOp, const StaticValueKnowle auto rhsAddr = addressOf(vvmaxOp.getRhs(), knowledge); setupRdRs1Rs2(outputBufferAddr, 0, lhsAddr, 0, rhsAddr, 0); - json::Object json; - json["op"] = "vvmax"; - json["rd"] = 0; - json["rs1"] = 1; - json["rs2"] = 2; - json["offset"] = createEmptyOffset(); - json["len"] = getValueSizeInBytes(vvmaxOp.getLhs()); - emitInstruction(std::move(json)); + pim_binary::InstructionRecord instruction; + instruction.opcode = pim_binary::Opcode::vvmax; + instruction.rd = 0; + instruction.r1 = 1; + instruction.r2OrImm = 2; + instruction.generic3 = static_cast(getValueSizeInBytes(vvmaxOp.getLhs())); + emitInstruction(instruction); } void PimCodeGen::codeGenVVDMulOp(pim::PimVVDMulOp vvdmulOp, const StaticValueKnowledge& knowledge) const { @@ -572,14 +562,13 @@ void PimCodeGen::codeGenVVDMulOp(pim::PimVVDMulOp vvdmulOp, const StaticValueKno auto rhsAddr = addressOf(vvdmulOp.getRhs(), knowledge); setupRdRs1Rs2(outputBufferAddr, 0, lhsAddr, 0, rhsAddr, 0); - json::Object json; - json["op"] = "vvdmul"; - json["rd"] = 0; - json["rs1"] = 1; - json["rs2"] = 2; - json["offset"] = createEmptyOffset(); - json["len"] = getValueSizeInBytes(vvdmulOp.getLhs()); - emitInstruction(std::move(json)); + pim_binary::InstructionRecord instruction; + instruction.opcode = pim_binary::Opcode::vvdmul; + instruction.rd = 0; + instruction.r1 = 1; + instruction.r2OrImm = 2; + instruction.generic3 = static_cast(getValueSizeInBytes(vvdmulOp.getLhs())); + emitInstruction(instruction); } void PimCodeGen::codeGenVAvgOp(pim::PimVAvgOp vavgOp, const StaticValueKnowledge& knowledge) const { @@ -587,14 +576,14 @@ void PimCodeGen::codeGenVAvgOp(pim::PimVAvgOp vavgOp, const StaticValueKnowledge auto inputAddr = addressOf(vavgOp.getInput(), knowledge); setupRdRs1(outputBufferAddr, 0, inputAddr, 0); - json::Object json; - json["op"] = "vavg"; - json["rd"] = 0; - json["rs1"] = 1; - json["rs2"] = 1; - json["offset"] = createRs1OnlyOffset(); - json["len"] = getValueSizeInBytes(vavgOp.getInput()); - emitInstruction(std::move(json)); + pim_binary::InstructionRecord instruction; + instruction.opcode = pim_binary::Opcode::vavg; + instruction.rd = 0; + instruction.r1 = 1; + instruction.r2OrImm = 1; + instruction.generic1 = 1; + instruction.generic3 = static_cast(getValueSizeInBytes(vavgOp.getInput())); + emitInstruction(instruction); } void PimCodeGen::codeGenVReluOp(pim::PimVReluOp vreluOp, const StaticValueKnowledge& knowledge) const { @@ -602,13 +591,12 @@ void PimCodeGen::codeGenVReluOp(pim::PimVReluOp vreluOp, const StaticValueKnowle auto inputAddr = addressOf(vreluOp.getInput(), knowledge); setupRdRs1(outputBufferAddr, 0, inputAddr, 0); - json::Object json; - json["op"] = "vrelu"; - json["rd"] = 0; - json["rs1"] = 1; - json["offset"] = createEmptyOffset(); - json["len"] = getValueSizeInBytes(vreluOp.getInput()); - emitInstruction(std::move(json)); + pim_binary::InstructionRecord instruction; + instruction.opcode = pim_binary::Opcode::vrelu; + instruction.rd = 0; + instruction.r1 = 1; + instruction.generic3 = static_cast(getValueSizeInBytes(vreluOp.getInput())); + emitInstruction(instruction); } void PimCodeGen::codeGenVTanhOp(pim::PimVTanhOp vtanhOp, const StaticValueKnowledge& knowledge) const { @@ -616,13 +604,12 @@ void PimCodeGen::codeGenVTanhOp(pim::PimVTanhOp vtanhOp, const StaticValueKnowle auto inputAddr = addressOf(vtanhOp.getInput(), knowledge); setupRdRs1(outputBufferAddr, 0, inputAddr, 0); - json::Object json; - json["op"] = "vtanh"; - json["rd"] = 0; - json["rs1"] = 1; - json["offset"] = createEmptyOffset(); - json["len"] = getValueSizeInBytes(vtanhOp.getInput()); - emitInstruction(std::move(json)); + pim_binary::InstructionRecord instruction; + instruction.opcode = pim_binary::Opcode::vtanh; + instruction.rd = 0; + instruction.r1 = 1; + instruction.generic3 = static_cast(getValueSizeInBytes(vtanhOp.getInput())); + emitInstruction(instruction); } void PimCodeGen::codeGenVSigmOp(pim::PimVSigmOp vsigmOp, const StaticValueKnowledge& knowledge) const { @@ -630,13 +617,12 @@ void PimCodeGen::codeGenVSigmOp(pim::PimVSigmOp vsigmOp, const StaticValueKnowle auto inputAddr = addressOf(vsigmOp.getInput(), knowledge); setupRdRs1(outputBufferAddr, 0, inputAddr, 0); - json::Object json; - json["op"] = "vsigm"; - json["rd"] = 0; - json["rs1"] = 1; - json["offset"] = createEmptyOffset(); - json["len"] = getValueSizeInBytes(vsigmOp.getInput()); - emitInstruction(std::move(json)); + pim_binary::InstructionRecord instruction; + instruction.opcode = pim_binary::Opcode::vsigm; + instruction.rd = 0; + instruction.r1 = 1; + instruction.generic3 = static_cast(getValueSizeInBytes(vsigmOp.getInput())); + emitInstruction(instruction); } void PimCodeGen::codeGenVSoftmaxOp(pim::PimVSoftmaxOp vsoftmaxOp, const StaticValueKnowledge& knowledge) const { @@ -644,13 +630,12 @@ void PimCodeGen::codeGenVSoftmaxOp(pim::PimVSoftmaxOp vsoftmaxOp, const StaticVa auto inputAddr = addressOf(vsoftmaxOp.getInput(), knowledge); setupRdRs1(outputBufferAddr, 0, inputAddr, 0); - json::Object json; - json["op"] = "vsoftmax"; - json["rd"] = 0; - json["rs1"] = 1; - json["offset"] = createEmptyOffset(); - json["len"] = getValueSizeInBytes(vsoftmaxOp.getInput()); - emitInstruction(std::move(json)); + pim_binary::InstructionRecord instruction; + instruction.opcode = pim_binary::Opcode::vsoftmax; + instruction.rd = 0; + instruction.r1 = 1; + instruction.generic3 = static_cast(getValueSizeInBytes(vsoftmaxOp.getInput())); + emitInstruction(instruction); } void PimCodeGen::codeGetGlobalOp(memref::GetGlobalOp getGlobalOp, const StaticValueKnowledge& knowledge) const {} @@ -682,6 +667,30 @@ void PimCodeGen::codeGenTransposeOp(pim::PimTransposeOp transposeOp, const Stati dstStrides[i] = dstStrides[i + 1] * dstShape[i + 1]; } + bool storagePreserving = true; + for (size_t srcFlat = 0; srcFlat < totalElements; srcFlat++) { + SmallVector srcIdx(rank); + size_t remaining = srcFlat; + for (size_t d = 0; d < rank; d++) { + srcIdx[d] = remaining / srcStrides[d]; + remaining %= srcStrides[d]; + } + + size_t dstFlat = 0; + for (size_t d = 0; d < rank; d++) + dstFlat += srcIdx[perm[d]] * dstStrides[d]; + + if (dstFlat != srcFlat) { + storagePreserving = false; + break; + } + } + + if (storagePreserving) { + emitMemCopyOp("lmv", dstAddr, 0, srcAddr, 0, totalElements * elementSize, "len"); + return; + } + // Emit element-by-element copy with transposed addressing for (size_t srcFlat = 0; srcFlat < totalElements; srcFlat++) { // Decompose flat source index into multi-dimensional index @@ -747,9 +756,25 @@ static SmallVector collectTopLevelCoreLikeOps(func::FuncOp funcOp) { return coreLikeOps; } +static SmallDenseMap +collectMaterializedHostGlobals(ModuleOp moduleOp, func::FuncOp funcOp, const PimAcceleratorMemory& memory) { + SmallDenseMap materializedHostGlobals; + funcOp.walk([&](memref::GetGlobalOp getGlobalOp) { + if (hasWeightAlways(getGlobalOp)) + return; + auto targetGlobal = lookupGlobalForGetGlobal(moduleOp, getGlobalOp); + if (!targetGlobal || materializedHostGlobals.contains(targetGlobal)) + return; + auto it = memory.memEntriesMap.find(getGlobalOp.getResult()); + if (it != memory.memEntriesMap.end()) + materializedHostGlobals[targetGlobal] = it->second; + }); + return materializedHostGlobals; +} + static void aliasMaterializedHostGlobals(ModuleOp moduleOp, - func::FuncOp funcOp, pim::PimCoreOp coreOp, + const SmallDenseMap& materializedHostGlobals, PimAcceleratorMemory& memory) { coreOp.walk([&](memref::GetGlobalOp getGlobalOp) { if (hasWeightAlways(getGlobalOp) || memory.memEntriesMap.contains(getGlobalOp.getResult())) @@ -759,16 +784,9 @@ static void aliasMaterializedHostGlobals(ModuleOp moduleOp, if (!targetGlobal) return; - mlir::Value aliasedValue; - funcOp.walk([&](memref::GetGlobalOp candidate) { - if (aliasedValue || candidate == getGlobalOp || !memory.memEntriesMap.contains(candidate.getResult())) - return; - if (lookupGlobalForGetGlobal(moduleOp, candidate) == targetGlobal) - aliasedValue = candidate.getResult(); - }); - - if (aliasedValue) - memory.memEntriesMap[getGlobalOp.getResult()] = memory.memEntriesMap[aliasedValue]; + auto it = materializedHostGlobals.find(targetGlobal); + if (it != materializedHostGlobals.end()) + memory.memEntriesMap[getGlobalOp.getResult()] = it->second; }); } @@ -837,7 +855,7 @@ static int64_t codeGenCoreOps(Block& block, PimCodeGen& coreCodeGen) { return failed(result) ? -1 : static_cast(processedOperations); } -OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimJson(ModuleOp& moduleOp, std::string& outputDirPath) { +OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimCode(ModuleOp& moduleOp, std::string& outputDirPath) { if (!outputDirPath.empty()) { if (auto error = sys::fs::create_directory(outputDirPath)) { errs() << "Error creating output directory: " << outputDirPath << ": " << error.message() << '\n'; @@ -857,7 +875,7 @@ OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimJson(ModuleOp& moduleOp, std:: if (auto err = writeMemoryBinary(moduleOp, funcOp, memory, outputDirPath)) return err; - if (auto err = writeHostCoreJson(outputDirPath)) + if (auto err = writeHostCoreArtifacts(outputDirPath)) return err; // For each core, specify the number of crossbar per array group. @@ -870,6 +888,8 @@ OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimJson(ModuleOp& moduleOp, std:: auto mapCoreWeightToFileName = createAndPopulateWeightFolder(funcOp, outputDirPath); SmallVector coreLikeOps = collectTopLevelCoreLikeOps(funcOp); + SmallDenseMap materializedHostGlobals = + collectMaterializedHostGlobals(moduleOp, funcOp, memory); llvm::DenseMap emittedCoreIds; size_t nextEmittedCoreId = 1; @@ -899,16 +919,30 @@ OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimJson(ModuleOp& moduleOp, std:: maxCoreId = std::max(maxCoreId, coreId); std::error_code errorCode; - auto outputCorePath = outputDirPath + "/core_" + std::to_string(coreId) + ".json"; - raw_fd_ostream coreFileStream(outputCorePath, errorCode); + auto outputCorePath = outputDirPath + "/core_" + std::to_string(coreId) + ".pim"; + raw_fd_ostream coreBinaryStream(outputCorePath, errorCode, sys::fs::OF_None); if (errorCode) { errs() << "Error while opening core file `" << outputCorePath << "`: " << errorCode.message() << '\n'; return InvalidOutputFileAccess; } - coreFileStream << '['; - PimCodeGen coreCodeGen(memory, coreFileStream, emittedCoreIds); - aliasMaterializedHostGlobals(moduleOp, funcOp, coreOp, memory); + std::unique_ptr coreJsonStream; + if (pimEmitJson.getValue()) { + std::string outputCoreJsonPath = outputDirPath + "/core_" + std::to_string(coreId) + ".json"; + errorCode = std::error_code(); + coreJsonStream = std::make_unique(outputCoreJsonPath, errorCode); + if (errorCode) { + errs() << "Error while opening core json file `" << outputCoreJsonPath << "`: " << errorCode.message() + << '\n'; + return InvalidOutputFileAccess; + } + *coreJsonStream << '['; + } + + pim_binary::writeHeader(coreBinaryStream); + + PimCodeGen coreCodeGen(memory, coreBinaryStream, coreJsonStream.get(), emittedCoreIds); + aliasMaterializedHostGlobals(moduleOp, coreOp, materializedHostGlobals, memory); auto& deviceMemory = memory.getOrCreateDeviceMem(coreId); deviceMemory.allocateCore(coreOp); @@ -920,9 +954,14 @@ OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimJson(ModuleOp& moduleOp, std:: if (reportRow) *reportRow = deviceMemory.getReportRow(); - coreFileStream.seek(coreFileStream.tell() - 1); - coreFileStream << ']'; - coreFileStream.close(); + pim_binary::patchInstructionCount(coreBinaryStream, coreCodeGen.getEmittedInstructionCount()); + coreBinaryStream.close(); + + if (coreJsonStream) { + coreJsonStream->seek(coreJsonStream->tell() - 1); + *coreJsonStream << ']'; + coreJsonStream->close(); + } auto coreWeightsDirPath = outputDirPath + "/core_" + std::to_string(coreId); if (auto error = sys::fs::create_directory(coreWeightsDirPath)) { diff --git a/src/PIM/Compiler/PimCodeGen.hpp b/src/PIM/Compiler/PimCodeGen.hpp index 895f9de..b792487 100644 --- a/src/PIM/Compiler/PimCodeGen.hpp +++ b/src/PIM/Compiler/PimCodeGen.hpp @@ -13,6 +13,7 @@ #include "onnx-mlir/Compiler/OMCompilerTypes.h" #include "src/Accelerators/PIM/Common/PimCommon.hpp" #include "src/Accelerators/PIM/Common/Support/ReportUtils.hpp" +#include "src/Accelerators/PIM/Compiler/PimBinaryFormat.hpp" #include "src/Accelerators/PIM/Dialect/Pim/PimOps.hpp" namespace onnx_mlir { @@ -104,16 +105,17 @@ public: class PimCodeGen { PimAcceleratorMemory& memory; - llvm::raw_fd_ostream& coreFileStream; + llvm::raw_fd_ostream& coreBinaryStream; + llvm::raw_fd_ostream* coreJsonStream; const llvm::DenseMap& emittedCoreIds; + mutable uint32_t emittedInstructionCount = 0; size_t addressOf(mlir::Value value, const StaticValueKnowledge& knowledge) const { return memory.getValueAddress(value, knowledge); } size_t remapCoreId(size_t coreId) const; - static llvm::json::Object createEmptyOffset(); - void emitInstruction(llvm::json::Object instruction) const; + void emitInstruction(const pim_binary::InstructionRecord& instruction) const; void genSetRegisterImmediateUnsigned(size_t registerNumber, size_t immediate) const; void setupRd(size_t rdAddress, size_t rdOffset) const; @@ -133,9 +135,12 @@ class PimCodeGen { public: PimCodeGen(PimAcceleratorMemory& memory, - llvm::raw_fd_ostream& coreJson, + llvm::raw_fd_ostream& coreBinary, + llvm::raw_fd_ostream* coreJson, const llvm::DenseMap& emittedCoreIds) - : memory(memory), coreFileStream(coreJson), emittedCoreIds(emittedCoreIds) {} + : memory(memory), coreBinaryStream(coreBinary), coreJsonStream(coreJson), emittedCoreIds(emittedCoreIds) {} + + uint32_t getEmittedInstructionCount() const { return emittedInstructionCount; } void codeGenLoadOp(pim::PimMemCopyHostToDevOp loadOp, const StaticValueKnowledge& knowledge) const; void codeGenStoreOp(pim::PimMemCopyDevToHostOp storeOp, const StaticValueKnowledge& knowledge) const; @@ -164,6 +169,6 @@ public: void codeGenTransposeOp(pim::PimTransposeOp transposeOp, const StaticValueKnowledge& knowledge) const; }; -OnnxMlirCompilerErrorCodes compileToPimJson(mlir::ModuleOp& moduleOpRef, std::string& outputDirName); +OnnxMlirCompilerErrorCodes compileToPimCode(mlir::ModuleOp& moduleOpRef, std::string& outputDirName); } // namespace onnx_mlir diff --git a/src/PIM/Compiler/PimCompilerOptions.cpp b/src/PIM/Compiler/PimCompilerOptions.cpp index ad9bc05..03fd678 100644 --- a/src/PIM/Compiler/PimCompilerOptions.cpp +++ b/src/PIM/Compiler/PimCompilerOptions.cpp @@ -24,6 +24,11 @@ llvm::cl::opt useExperimentalConvImpl("use-experimental-conv-impl", llvm::cl::init(false), llvm::cl::cat(OnnxMlirOptions)); +llvm::cl::opt pimEmitJson("pim-emit-json", + llvm::cl::desc("Also emit per-core JSON instruction files alongside binary .pim files"), + llvm::cl::init(false), + llvm::cl::cat(OnnxMlirOptions)); + llvm::cl::opt crossbarSize("crossbar-size", llvm::cl::desc("Width and heigth of a single crossbar"), llvm::cl::init(2)); diff --git a/src/PIM/Compiler/PimCompilerOptions.hpp b/src/PIM/Compiler/PimCompilerOptions.hpp index e1f6c6a..8fb1467 100644 --- a/src/PIM/Compiler/PimCompilerOptions.hpp +++ b/src/PIM/Compiler/PimCompilerOptions.hpp @@ -25,6 +25,7 @@ extern llvm::cl::opt pimEmissionTarget; extern llvm::cl::opt pimOnlyCodegen; extern llvm::cl::opt useExperimentalConvImpl; +extern llvm::cl::opt pimEmitJson; extern llvm::cl::opt crossbarSize; extern llvm::cl::opt crossbarCountInCore; diff --git a/src/PIM/Compiler/PimCompilerUtils.cpp b/src/PIM/Compiler/PimCompilerUtils.cpp index 73ce20c..1e1ed1e 100644 --- a/src/PIM/Compiler/PimCompilerUtils.cpp +++ b/src/PIM/Compiler/PimCompilerUtils.cpp @@ -52,9 +52,9 @@ void addPassesPim(OwningOpRef& module, pm.addPass(createPimMaterializeHostConstantsPass()); pm.addPass(createPimVerificationPass()); pm.addPass(createMessagePass("Pim verified")); - pm.addPass(createEmitPimJsonPass()); + pm.addPass(createEmitPimCodePass()); // pm.addPass(createCountInstructionPass()); - pm.addPass(createMessagePass("Pim json code emitted")); + pm.addPass(createMessagePass("Pim code emitted")); } } diff --git a/src/PIM/Pass/CMakeLists.txt b/src/PIM/Pass/CMakeLists.txt index e5291c0..95edf64 100644 --- a/src/PIM/Pass/CMakeLists.txt +++ b/src/PIM/Pass/CMakeLists.txt @@ -7,7 +7,7 @@ add_pim_library(OMPimPasses PimCodegen/HostConstantFolding/Patterns/Subview.cpp PimCodegen/MaterializeHostConstantsPass.cpp PimCodegen/VerificationPass.cpp - PimCodegen/EmitPimJsonPass.cpp + PimCodegen/EmitPimCodePass.cpp EXCLUDE_FROM_OM_LIBS diff --git a/src/PIM/Pass/PIMPasses.h b/src/PIM/Pass/PIMPasses.h index ceb9379..9f24bd1 100644 --- a/src/PIM/Pass/PIMPasses.h +++ b/src/PIM/Pass/PIMPasses.h @@ -25,7 +25,7 @@ std::unique_ptr createPimMaterializeHostConstantsPass(); std::unique_ptr createPimVerificationPass(); -std::unique_ptr createEmitPimJsonPass(); +std::unique_ptr createEmitPimCodePass(); std::unique_ptr createMessagePass(std::string message); diff --git a/src/PIM/Pass/PimCodegen/EmitPimCodePass.cpp b/src/PIM/Pass/PimCodegen/EmitPimCodePass.cpp new file mode 100644 index 0000000..ea7cdf8 --- /dev/null +++ b/src/PIM/Pass/PimCodegen/EmitPimCodePass.cpp @@ -0,0 +1,36 @@ +#include "mlir/Pass/Pass.h" + +#include "Common/PimCommon.hpp" +#include "Compiler/PimCodeGen.hpp" + +using namespace mlir; + +namespace onnx_mlir { + +namespace { + +struct EmitPimCodePass : PassWrapper> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(EmitPimCodePass); + StringRef getArgument() const override { return "emit-pim-code-pass"; } + StringRef getDescription() const override { return "Emit PIM simulator code artifacts"; } + + EmitPimCodePass() {} + EmitPimCodePass(const EmitPimCodePass& pass) {} + + void runOnOperation() override { + ModuleOp moduleOp = getOperation(); + + std::string pimDir = getOutputDir() + "/pim"; + createDirectory(pimDir); + + int compiler_error_code = compileToPimCode(moduleOp, pimDir); + if (compiler_error_code != CompilerSuccess) + signalPassFailure(); + } +}; + +} // namespace + +std::unique_ptr createEmitPimCodePass() { return std::make_unique(); } + +} // namespace onnx_mlir diff --git a/src/PIM/Pass/PimCodegen/EmitPimJsonPass.cpp b/src/PIM/Pass/PimCodegen/EmitPimJsonPass.cpp deleted file mode 100644 index baa72d2..0000000 --- a/src/PIM/Pass/PimCodegen/EmitPimJsonPass.cpp +++ /dev/null @@ -1,36 +0,0 @@ -#include "mlir/Pass/Pass.h" - -#include "Common/PimCommon.hpp" -#include "Compiler/PimCodeGen.hpp" - -using namespace mlir; - -namespace onnx_mlir { - -namespace { - -struct EmitPimJsonPass : PassWrapper> { - MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(EmitPimJsonPass); - StringRef getArgument() const override { return "emit-pim-json-pass"; } - StringRef getDescription() const override { return "Emit json code for the pim simulators"; } - - EmitPimJsonPass() {} - EmitPimJsonPass(const EmitPimJsonPass& pass) {} - - void runOnOperation() override { - ModuleOp moduleOp = getOperation(); - - std::string pimDir = getOutputDir() + "/pim"; - createDirectory(pimDir); - - int compiler_error_code = compileToPimJson(moduleOp, pimDir); - if (compiler_error_code != CompilerSuccess) - signalPassFailure(); - } -}; - -} // namespace - -std::unique_ptr createEmitPimJsonPass() { return std::make_unique(); } - -} // namespace onnx_mlir diff --git a/src/PIM/PimAccelerator.cpp b/src/PIM/PimAccelerator.cpp index 74642cb..76836a7 100644 --- a/src/PIM/PimAccelerator.cpp +++ b/src/PIM/PimAccelerator.cpp @@ -80,7 +80,7 @@ void PimAccelerator::registerPasses(int optLevel) const { registerPass(createPimHostConstantFoldingPass); registerPass(createPimMaterializeHostConstantsPass); registerPass(createPimVerificationPass); - registerPass(createEmitPimJsonPass); + registerPass(createEmitPimCodePass); } void PimAccelerator::configurePasses() const { diff --git a/validation/raptor.py b/validation/raptor.py index 9d9faa3..b1f7eae 100644 --- a/validation/raptor.py +++ b/validation/raptor.py @@ -13,7 +13,7 @@ PIM_PASS_LABELS = ( ("HostConstantFoldingPass", "Fold Host Constants"), ("MaterializeHostConstantsPass", "Materialize Host Constants"), ("VerificationPass", "Verify PIM"), - ("EmitPimJsonPass", "Emit PIM JSON"), + ("EmitPimCodePass", "Emit PIM Code"), ) PIM_PASS_LABEL_BY_SUFFIX = dict(PIM_PASS_LABELS) TIMING_LINE_RE = re.compile(r"^\s*([0-9]+\.[0-9]+)\s+\(\s*[0-9.]+%\)\s+(.+?)\s*$")