binary pim code for reduced memory usage
Validate Operations / validate-operations (push) Has been cancelled
Validate Operations / validate-operations (push) Has been cancelled
fast pim code emission
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
use anyhow::{Context, Result, bail};
|
||||
use clap::Parser;
|
||||
use glob::glob;
|
||||
use pimcore::binary_to_instruction::binary_to_executor;
|
||||
use pimcore::cpu::crossbar::Crossbar;
|
||||
use pimcore::json_to_instruction::json_to_executor;
|
||||
use pimcore::memory_manager::CoreMemory;
|
||||
@@ -44,12 +45,14 @@ fn main() -> Result<()> {
|
||||
let args = Args::parse();
|
||||
|
||||
let config_json = retrive_config(&args)?;
|
||||
let core_jsons = retrive_cores(&args)?;
|
||||
let core_inputs = retrive_cores(&args)?;
|
||||
let memory = retrive_memory(&args)?;
|
||||
let global_crossbars = get_crossbars(&config_json, &args).unwrap();
|
||||
let crossbars = map_crossbars_to_cores(&config_json, &args, &global_crossbars);
|
||||
let mut executor =
|
||||
json_to_executor::json_to_executor(config_json, core_jsons.iter(), crossbars);
|
||||
let mut executor = match &core_inputs {
|
||||
CoreInputs::Json(core_jsons) => json_to_executor::json_to_executor(config_json, core_jsons.iter(), crossbars),
|
||||
CoreInputs::Binary(core_bins) => binary_to_executor(config_json, core_bins.iter(), crossbars)?,
|
||||
};
|
||||
set_memory(&mut executor, memory);
|
||||
TRACER
|
||||
.lock()
|
||||
@@ -214,9 +217,29 @@ fn retrive_memory(args: &Args) -> Result<Vec<u8>> {
|
||||
Ok(memory_vector)
|
||||
}
|
||||
|
||||
fn retrive_cores(args: &Args) -> Result<Vec<Value>, anyhow::Error> {
|
||||
let mut core_jsons: Vec<Value> = Vec::new();
|
||||
enum CoreInputs {
|
||||
Json(Vec<Value>),
|
||||
Binary(Vec<Vec<u8>>),
|
||||
}
|
||||
|
||||
fn retrive_cores(args: &Args) -> Result<CoreInputs, anyhow::Error> {
|
||||
if let Some(cores_override) = &args.cores {
|
||||
let first_extension = cores_override
|
||||
.first()
|
||||
.and_then(|path| path.extension())
|
||||
.and_then(|ext| ext.to_str())
|
||||
.unwrap_or_default();
|
||||
if first_extension == "pim" {
|
||||
let mut core_bins = Vec::with_capacity(cores_override.len());
|
||||
for core in cores_override {
|
||||
core_bins.push(
|
||||
fs::read(core)
|
||||
.with_context(|| format!("Failed to read binary core file: {:?}", core))?,
|
||||
);
|
||||
}
|
||||
return Ok(CoreInputs::Binary(core_bins));
|
||||
}
|
||||
let mut core_jsons: Vec<Value> = Vec::with_capacity(cores_override.len());
|
||||
for core in cores_override {
|
||||
let content = fs::read_to_string(core)
|
||||
.with_context(|| format!("Failed to read core file: {:?}", cores_override))?;
|
||||
@@ -224,35 +247,56 @@ fn retrive_cores(args: &Args) -> Result<Vec<Value>, anyhow::Error> {
|
||||
serde_json::from_str(&content).context("Failed to parse core json override")?;
|
||||
core_jsons.push(json);
|
||||
}
|
||||
} else if let Some(folder) = args.folder.as_ref() {
|
||||
let pattern = folder.join("core*.json");
|
||||
let pattern_str = pattern.to_str().context("Invalid path encoding")?;
|
||||
let mut paths: Vec<_> = glob(pattern_str)?.map(|x| x.unwrap()).collect();
|
||||
paths.sort_by_cached_key(|x| {
|
||||
let mut x = x
|
||||
.file_stem()
|
||||
.expect("Extracting the stem")
|
||||
.to_str()
|
||||
.expect("File not utf-8");
|
||||
x = &x[5..];
|
||||
x.parse::<i32>().unwrap()
|
||||
});
|
||||
return Ok(CoreInputs::Json(core_jsons));
|
||||
}
|
||||
|
||||
if paths.is_empty() {
|
||||
bail!("No core*.json files found in {:?}", folder);
|
||||
if let Some(folder) = args.folder.as_ref() {
|
||||
let binary_pattern = folder.join("core*.pim");
|
||||
let binary_pattern_str = binary_pattern.to_str().context("Invalid path encoding")?;
|
||||
let mut binary_paths: Vec<_> = glob(binary_pattern_str)?.map(|x| x.unwrap()).collect();
|
||||
binary_paths.sort_by_cached_key(core_sort_key);
|
||||
if !binary_paths.is_empty() {
|
||||
let mut core_bins = Vec::with_capacity(binary_paths.len());
|
||||
for path in binary_paths {
|
||||
core_bins.push(
|
||||
fs::read(&path)
|
||||
.with_context(|| format!("Failed to read core file: {:?}", path))?,
|
||||
);
|
||||
}
|
||||
return Ok(CoreInputs::Binary(core_bins));
|
||||
}
|
||||
for entry in paths {
|
||||
let path = entry;
|
||||
|
||||
let json_pattern = folder.join("core*.json");
|
||||
let json_pattern_str = json_pattern.to_str().context("Invalid path encoding")?;
|
||||
let mut json_paths: Vec<_> = glob(json_pattern_str)?.map(|x| x.unwrap()).collect();
|
||||
json_paths.sort_by_cached_key(core_sort_key);
|
||||
|
||||
if json_paths.is_empty() {
|
||||
bail!("No core*.pim or core*.json files found in {:?}", folder);
|
||||
}
|
||||
|
||||
let mut core_jsons: Vec<Value> = Vec::with_capacity(json_paths.len());
|
||||
for path in json_paths {
|
||||
let content = fs::read_to_string(&path)
|
||||
.with_context(|| format!("Failed to read core file: {:?}", path))?;
|
||||
let json: Value = serde_json::from_str(&content)
|
||||
.with_context(|| format!("Failed to parse JSON in {:?}", path))?;
|
||||
core_jsons.push(json);
|
||||
}
|
||||
} else {
|
||||
bail!("Either --core or --folder must be provided to find core definitions.");
|
||||
return Ok(CoreInputs::Json(core_jsons));
|
||||
}
|
||||
Ok(core_jsons)
|
||||
|
||||
bail!("Either --core or --folder must be provided to find core definitions.");
|
||||
}
|
||||
|
||||
fn core_sort_key(path: &PathBuf) -> i32 {
|
||||
let mut stem = path
|
||||
.file_stem()
|
||||
.expect("Extracting the stem")
|
||||
.to_str()
|
||||
.expect("File not utf-8");
|
||||
stem = &stem[5..];
|
||||
stem.parse::<i32>().unwrap()
|
||||
}
|
||||
|
||||
fn retrive_config(args: &Args) -> Result<Value, anyhow::Error> {
|
||||
|
||||
@@ -0,0 +1,497 @@
|
||||
use crate::{
|
||||
CoreInstructionsBuilder, Executable,
|
||||
cpu::{CPU, crossbar::Crossbar},
|
||||
instruction_set::{InstructionsBuilder, instruction_data::InstructionDataBuilder, isa::*},
|
||||
};
|
||||
use anyhow::{Context, Result, bail, ensure};
|
||||
use serde_json::Value;
|
||||
use std::collections::HashMap;
|
||||
use std::convert::TryFrom;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
const MAGIC: &[u8; 4] = b"PIMB";
|
||||
const VERSION: u32 = 1;
|
||||
const HEADER_SIZE: usize = 12;
|
||||
const RECORD_SIZE: usize = 20;
|
||||
|
||||
macro_rules! add_name {
|
||||
($storage:ident, $opcode:literal, $name:literal) => {
|
||||
$storage.insert($opcode, $name);
|
||||
};
|
||||
}
|
||||
|
||||
static INSTRUCTIONS: LazyLock<HashMap<usize, &'static str>> = LazyLock::new(|| {
|
||||
let mut hash = HashMap::new();
|
||||
add_name!(hash, 0, "nop");
|
||||
add_name!(hash, 1, "sldi");
|
||||
add_name!(hash, 2, "sld");
|
||||
add_name!(hash, 3, "sadd");
|
||||
add_name!(hash, 4, "ssub");
|
||||
add_name!(hash, 5, "smul");
|
||||
add_name!(hash, 6, "saddi");
|
||||
add_name!(hash, 7, "smuli");
|
||||
add_name!(hash, 8, "setbw");
|
||||
add_name!(hash, 9, "mvmul");
|
||||
add_name!(hash, 10, "vvadd");
|
||||
add_name!(hash, 11, "vvsub");
|
||||
add_name!(hash, 12, "vvmul");
|
||||
add_name!(hash, 13, "vvdmul");
|
||||
add_name!(hash, 14, "vvmax");
|
||||
add_name!(hash, 15, "vvsll");
|
||||
add_name!(hash, 16, "vvsra");
|
||||
add_name!(hash, 17, "vavg");
|
||||
add_name!(hash, 18, "vrelu");
|
||||
add_name!(hash, 19, "vtanh");
|
||||
add_name!(hash, 20, "vsigm");
|
||||
add_name!(hash, 21, "vsoftmax");
|
||||
add_name!(hash, 22, "vmv");
|
||||
add_name!(hash, 23, "vrsu");
|
||||
add_name!(hash, 24, "vrsl");
|
||||
add_name!(hash, 25, "ld");
|
||||
add_name!(hash, 26, "st");
|
||||
add_name!(hash, 27, "lldi");
|
||||
add_name!(hash, 28, "lmv");
|
||||
add_name!(hash, 29, "send");
|
||||
add_name!(hash, 30, "recv");
|
||||
add_name!(hash, 31, "wait");
|
||||
add_name!(hash, 32, "sync");
|
||||
hash
|
||||
});
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default)]
|
||||
struct InstructionRecord {
|
||||
opcode: u8,
|
||||
rd: u8,
|
||||
r1: u8,
|
||||
r2_or_imm: i32,
|
||||
generic1: i32,
|
||||
generic2: i32,
|
||||
generic3: i32,
|
||||
flags: u8,
|
||||
}
|
||||
|
||||
fn read_u32_le(bytes: &[u8], offset: usize) -> u32 {
|
||||
u32::from_le_bytes(bytes[offset..offset + 4].try_into().unwrap())
|
||||
}
|
||||
|
||||
fn read_i32_le(bytes: &[u8], offset: usize) -> i32 {
|
||||
i32::from_le_bytes(bytes[offset..offset + 4].try_into().unwrap())
|
||||
}
|
||||
|
||||
fn parse_binary_records(bytes: &[u8]) -> Result<Vec<InstructionRecord>> {
|
||||
ensure!(bytes.len() >= HEADER_SIZE, "binary core file too small");
|
||||
ensure!(&bytes[0..4] == MAGIC, "invalid PIM binary magic");
|
||||
|
||||
let version = read_u32_le(bytes, 4);
|
||||
ensure!(
|
||||
version == VERSION,
|
||||
"unsupported PIM binary version {version}"
|
||||
);
|
||||
|
||||
let instruction_count = read_u32_le(bytes, 8) as usize;
|
||||
let expected_len = HEADER_SIZE + instruction_count * RECORD_SIZE;
|
||||
ensure!(
|
||||
bytes.len() == expected_len,
|
||||
"PIM binary size mismatch: expected {expected_len} bytes, got {}",
|
||||
bytes.len()
|
||||
);
|
||||
|
||||
let mut records = Vec::with_capacity(instruction_count);
|
||||
for index in 0..instruction_count {
|
||||
let base = HEADER_SIZE + index * RECORD_SIZE;
|
||||
records.push(InstructionRecord {
|
||||
opcode: bytes[base],
|
||||
rd: bytes[base + 1],
|
||||
r1: bytes[base + 2],
|
||||
flags: bytes[base + 3],
|
||||
r2_or_imm: read_i32_le(bytes, base + 4),
|
||||
generic1: read_i32_le(bytes, base + 8),
|
||||
generic2: read_i32_le(bytes, base + 12),
|
||||
generic3: read_i32_le(bytes, base + 16),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(records)
|
||||
}
|
||||
|
||||
fn append_record(
|
||||
inst_builder: &mut InstructionsBuilder,
|
||||
inst_data_builder: &mut InstructionDataBuilder,
|
||||
record: InstructionRecord,
|
||||
) -> Result<()> {
|
||||
let InstructionRecord {
|
||||
opcode,
|
||||
rd,
|
||||
r1,
|
||||
r2_or_imm,
|
||||
generic1,
|
||||
generic2,
|
||||
generic3,
|
||||
flags: _,
|
||||
} = record;
|
||||
|
||||
match opcode {
|
||||
0 => {}
|
||||
1 => {
|
||||
inst_data_builder.set_rd_u8(rd).set_imm(r2_or_imm);
|
||||
inst_builder.make_inst(sldi, inst_data_builder.build());
|
||||
}
|
||||
2 => {
|
||||
inst_data_builder
|
||||
.set_rd_u8(rd)
|
||||
.set_r1_u8(r1)
|
||||
.set_offset_select(generic1)
|
||||
.set_offset_value(generic2);
|
||||
inst_builder.make_inst(sld, inst_data_builder.build());
|
||||
}
|
||||
3 => {
|
||||
inst_data_builder.set_rdr1r2_u8(rd, r1, r2_or_imm);
|
||||
inst_builder.make_inst(sadd, inst_data_builder.build());
|
||||
}
|
||||
4 => {
|
||||
inst_data_builder.set_rdr1r2_u8(rd, r1, r2_or_imm);
|
||||
inst_builder.make_inst(ssub, inst_data_builder.build());
|
||||
}
|
||||
5 => {
|
||||
inst_data_builder.set_rdr1r2_u8(rd, r1, r2_or_imm);
|
||||
inst_builder.make_inst(smul, inst_data_builder.build());
|
||||
}
|
||||
6 => {
|
||||
inst_data_builder.set_rdr1imm_u8(rd, r1, r2_or_imm);
|
||||
inst_builder.make_inst(saddi, inst_data_builder.build());
|
||||
}
|
||||
7 => {
|
||||
inst_data_builder.set_rdr1imm_u8(rd, r1, r2_or_imm);
|
||||
inst_builder.make_inst(smuli, inst_data_builder.build());
|
||||
}
|
||||
8 => {
|
||||
inst_data_builder.set_ibiw_obiw(generic1, generic2);
|
||||
inst_builder.make_inst(setbw, inst_data_builder.build());
|
||||
}
|
||||
9 => {
|
||||
inst_data_builder
|
||||
.set_rd_u8(rd)
|
||||
.set_r1_u8(r1)
|
||||
.set_mbiw_immrelu_immgroup(r2_or_imm, generic1, generic2);
|
||||
inst_builder.make_inst(mvmul, inst_data_builder.build());
|
||||
}
|
||||
10 => {
|
||||
inst_data_builder
|
||||
.set_rdr1r2_u8(rd, r1, r2_or_imm)
|
||||
.set_imm_len(generic3)
|
||||
.set_offset_select_value(generic1, generic2);
|
||||
inst_builder.make_inst(vvadd, inst_data_builder.build());
|
||||
}
|
||||
11 => {
|
||||
inst_data_builder
|
||||
.set_rdr1r2_u8(rd, r1, r2_or_imm)
|
||||
.set_imm_len(generic3)
|
||||
.set_offset_select_value(generic1, generic2);
|
||||
inst_builder.make_inst(vvsub, inst_data_builder.build());
|
||||
}
|
||||
12 => {
|
||||
inst_data_builder
|
||||
.set_rdr1r2_u8(rd, r1, r2_or_imm)
|
||||
.set_imm_len(generic3)
|
||||
.set_offset_select_value(generic1, generic2);
|
||||
inst_builder.make_inst(vvmul, inst_data_builder.build());
|
||||
}
|
||||
13 => {
|
||||
inst_data_builder
|
||||
.set_rdr1r2_u8(rd, r1, r2_or_imm)
|
||||
.set_imm_len(generic3)
|
||||
.set_offset_select_value(generic1, generic2);
|
||||
inst_builder.make_inst(vvdmul, inst_data_builder.build());
|
||||
}
|
||||
14 => {
|
||||
inst_data_builder
|
||||
.set_rdr1r2_u8(rd, r1, r2_or_imm)
|
||||
.set_imm_len(generic3)
|
||||
.set_offset_select_value(generic1, generic2);
|
||||
inst_builder.make_inst(vvmax, inst_data_builder.build());
|
||||
}
|
||||
15 => {
|
||||
inst_data_builder
|
||||
.set_rdr1r2_u8(rd, r1, r2_or_imm)
|
||||
.set_imm_len(generic3)
|
||||
.set_offset_select_value(generic1, generic2);
|
||||
inst_builder.make_inst(vvsll, inst_data_builder.build());
|
||||
}
|
||||
16 => {
|
||||
inst_data_builder
|
||||
.set_rdr1r2_u8(rd, r1, r2_or_imm)
|
||||
.set_imm_len(generic3)
|
||||
.set_offset_select_value(generic1, generic2);
|
||||
inst_builder.make_inst(vvsra, inst_data_builder.build());
|
||||
}
|
||||
17 => {
|
||||
inst_data_builder
|
||||
.set_rdr1r2_u8(rd, r1, r2_or_imm)
|
||||
.set_imm_len(generic3)
|
||||
.set_offset_select_value(generic1, generic2);
|
||||
inst_builder.make_inst(vavg, inst_data_builder.build());
|
||||
}
|
||||
18 => {
|
||||
inst_data_builder
|
||||
.set_rdr1_u8(rd, r1)
|
||||
.set_imm_len(generic3)
|
||||
.set_offset_select_value(generic1, generic2);
|
||||
inst_builder.make_inst(vrelu, inst_data_builder.build());
|
||||
}
|
||||
19 => {
|
||||
inst_data_builder
|
||||
.set_rdr1_u8(rd, r1)
|
||||
.set_imm_len(generic3)
|
||||
.set_offset_select_value(generic1, generic2);
|
||||
inst_builder.make_inst(vtanh, inst_data_builder.build());
|
||||
}
|
||||
20 => {
|
||||
inst_data_builder
|
||||
.set_rdr1_u8(rd, r1)
|
||||
.set_imm_len(generic3)
|
||||
.set_offset_select_value(generic1, generic2);
|
||||
inst_builder.make_inst(vsigm, inst_data_builder.build());
|
||||
}
|
||||
21 => {
|
||||
inst_data_builder
|
||||
.set_rdr1_u8(rd, r1)
|
||||
.set_imm_len(generic3)
|
||||
.set_offset_select_value(generic1, generic2);
|
||||
inst_builder.make_inst(vsoftmax, inst_data_builder.build());
|
||||
}
|
||||
22 => {
|
||||
inst_data_builder
|
||||
.set_rdr1r2_u8(rd, r1, r2_or_imm)
|
||||
.set_imm_len(generic3)
|
||||
.set_offset_select_value(generic1, generic2);
|
||||
inst_builder.make_inst(vmv, inst_data_builder.build());
|
||||
}
|
||||
23 => {
|
||||
inst_data_builder
|
||||
.set_rdr1r2_u8(rd, r1, r2_or_imm)
|
||||
.set_imm_len(generic3)
|
||||
.set_offset_select_value(generic1, generic2);
|
||||
inst_builder.make_inst(vrsu, inst_data_builder.build());
|
||||
}
|
||||
24 => {
|
||||
inst_data_builder
|
||||
.set_rdr1r2_u8(rd, r1, r2_or_imm)
|
||||
.set_imm_len(generic3)
|
||||
.set_offset_select_value(generic1, generic2);
|
||||
inst_builder.make_inst(vrsl, inst_data_builder.build());
|
||||
}
|
||||
25 => {
|
||||
inst_data_builder
|
||||
.set_rdr1_u8(rd, r1)
|
||||
.set_imm_len(generic3)
|
||||
.set_offset_select_value(generic1, generic2);
|
||||
inst_builder.make_inst(ld, inst_data_builder.build());
|
||||
}
|
||||
26 => {
|
||||
inst_data_builder
|
||||
.set_rdr1_u8(rd, r1)
|
||||
.set_imm_len(generic3)
|
||||
.set_offset_select_value(generic1, generic2);
|
||||
inst_builder.make_inst(st, inst_data_builder.build());
|
||||
}
|
||||
27 => {
|
||||
inst_data_builder
|
||||
.set_rd_u8(rd)
|
||||
.set_imm(r2_or_imm)
|
||||
.set_imm_len(generic3)
|
||||
.set_offset_select_value(generic1, generic2);
|
||||
inst_builder.make_inst(lldi, inst_data_builder.build());
|
||||
}
|
||||
28 => {
|
||||
inst_data_builder
|
||||
.set_rdr1_u8(rd, r1)
|
||||
.set_imm_len(generic3)
|
||||
.set_offset_select_value(generic1, generic2);
|
||||
inst_builder.make_inst(lmv, inst_data_builder.build());
|
||||
}
|
||||
29 => {
|
||||
inst_data_builder
|
||||
.set_rd_u8(rd)
|
||||
.set_imm_core(r2_or_imm)
|
||||
.set_imm_len(generic3)
|
||||
.set_offset_select_value(generic1, generic2);
|
||||
inst_builder.make_inst(send, inst_data_builder.build());
|
||||
}
|
||||
30 => {
|
||||
inst_data_builder
|
||||
.set_rd_u8(rd)
|
||||
.set_imm_core(r2_or_imm)
|
||||
.set_imm_len(generic3)
|
||||
.set_offset_select_value(generic1, generic2);
|
||||
inst_builder.make_inst(recv, inst_data_builder.build());
|
||||
}
|
||||
31 => {
|
||||
inst_builder.make_inst(wait, inst_data_builder.build());
|
||||
}
|
||||
32 => {
|
||||
inst_builder.make_inst(sync, inst_data_builder.build());
|
||||
}
|
||||
_ => bail!("unsupported PIM binary opcode {opcode}"),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn binary_to_instructions(
|
||||
core_bytes: &[u8],
|
||||
core_index: i32,
|
||||
) -> Result<Vec<crate::instruction_set::Instruction>> {
|
||||
let records = parse_binary_records(core_bytes)?;
|
||||
let mut insts_builder = InstructionsBuilder::new();
|
||||
let mut inst_data_builder = InstructionDataBuilder::new();
|
||||
inst_data_builder
|
||||
.set_core_indx_u16(u16::try_from(core_index).expect("core index does not fit in u16"))
|
||||
.fix_core_indx();
|
||||
|
||||
for record in records {
|
||||
let opcode = record.opcode;
|
||||
let name = INSTRUCTIONS
|
||||
.get(&(opcode as usize))
|
||||
.copied()
|
||||
.unwrap_or("<unknown>");
|
||||
|
||||
append_record(&mut insts_builder, &mut inst_data_builder, record).with_context(|| {
|
||||
format!(
|
||||
"while decoding binary instruction for core {core_index}: opcode {opcode} ({name})"
|
||||
)
|
||||
})?;
|
||||
}
|
||||
|
||||
Ok(insts_builder.build())
|
||||
}
|
||||
|
||||
pub fn binary_to_executor<'a, 'b>(
|
||||
config: Value,
|
||||
mut cores: impl Iterator<Item = &'b Vec<u8>>,
|
||||
crossbars: Vec<Vec<&'a Crossbar>>,
|
||||
) -> Result<Executable<'a>> {
|
||||
let core_cnt = config
|
||||
.get("core_cnt")
|
||||
.context("missing core_cnt in config")?
|
||||
.as_i64()
|
||||
.context("core_cnt is not an integer")? as i32
|
||||
- 1;
|
||||
|
||||
let cpu = CPU::new(core_cnt, crossbars);
|
||||
let mut core_insts_builder = CoreInstructionsBuilder::new(core_cnt as usize);
|
||||
cores.next();
|
||||
for core_indx in 1..=core_cnt {
|
||||
let core_bytes = cores
|
||||
.next()
|
||||
.unwrap_or_else(|| panic!("cores files less than {}", core_indx));
|
||||
let instructions = binary_to_instructions(core_bytes, core_indx)?;
|
||||
core_insts_builder.set_core(core_indx, instructions);
|
||||
}
|
||||
|
||||
Ok(Executable::new(cpu, core_insts_builder.build()))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{
|
||||
HEADER_SIZE, InstructionRecord, MAGIC, RECORD_SIZE, VERSION, binary_to_instructions,
|
||||
};
|
||||
use crate::{
|
||||
instruction_set::{InstructionsBuilder, instruction_data::InstructionDataBuilder},
|
||||
json_to_instruction::json_isa::json_to_instruction,
|
||||
};
|
||||
|
||||
fn encode_record(record: InstructionRecord, dst: &mut Vec<u8>) {
|
||||
dst.push(record.opcode);
|
||||
dst.push(record.rd);
|
||||
dst.push(record.r1);
|
||||
dst.push(record.flags);
|
||||
dst.extend_from_slice(&record.r2_or_imm.to_le_bytes());
|
||||
dst.extend_from_slice(&record.generic1.to_le_bytes());
|
||||
dst.extend_from_slice(&record.generic2.to_le_bytes());
|
||||
dst.extend_from_slice(&record.generic3.to_le_bytes());
|
||||
}
|
||||
|
||||
fn binary_blob(records: &[InstructionRecord]) -> Vec<u8> {
|
||||
let mut blob = Vec::with_capacity(HEADER_SIZE + records.len() * RECORD_SIZE);
|
||||
blob.extend_from_slice(MAGIC);
|
||||
blob.extend_from_slice(&VERSION.to_le_bytes());
|
||||
blob.extend_from_slice(&(records.len() as u32).to_le_bytes());
|
||||
for &record in records {
|
||||
encode_record(record, &mut blob);
|
||||
}
|
||||
blob
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn json_and_binary_decoders_match_for_representative_ops() {
|
||||
let json_program = [
|
||||
r#"{"imm":64,"op":"sldi","rd":0}"#,
|
||||
r#"{"imm":128,"op":"sldi","rd":1}"#,
|
||||
r#"{"len":16,"offset":{"offset_select":0,"offset_value":0},"op":"lmv","rd":0,"rs1":1}"#,
|
||||
r#"{"group":3,"mbiw":8,"op":"mvmul","rd":0,"relu":0,"rs1":1}"#,
|
||||
r#"{"len":16,"offset":{"offset_select":0,"offset_value":0},"op":"vvadd","rd":0,"rs1":1,"rs2":2}"#,
|
||||
r#"{"core":2,"offset":{"offset_select":0,"offset_value":0},"op":"send","rd":0,"size":16}"#,
|
||||
];
|
||||
|
||||
let binary_program = binary_blob(&[
|
||||
InstructionRecord {
|
||||
opcode: 1,
|
||||
rd: 0,
|
||||
r2_or_imm: 64,
|
||||
..Default::default()
|
||||
},
|
||||
InstructionRecord {
|
||||
opcode: 1,
|
||||
rd: 1,
|
||||
r2_or_imm: 128,
|
||||
..Default::default()
|
||||
},
|
||||
InstructionRecord {
|
||||
opcode: 28,
|
||||
rd: 0,
|
||||
r1: 1,
|
||||
generic3: 16,
|
||||
..Default::default()
|
||||
},
|
||||
InstructionRecord {
|
||||
opcode: 9,
|
||||
rd: 0,
|
||||
r1: 1,
|
||||
r2_or_imm: 8,
|
||||
generic2: 3,
|
||||
..Default::default()
|
||||
},
|
||||
InstructionRecord {
|
||||
opcode: 10,
|
||||
rd: 0,
|
||||
r1: 1,
|
||||
r2_or_imm: 2,
|
||||
generic3: 16,
|
||||
..Default::default()
|
||||
},
|
||||
InstructionRecord {
|
||||
opcode: 29,
|
||||
rd: 0,
|
||||
r2_or_imm: 2,
|
||||
generic3: 16,
|
||||
..Default::default()
|
||||
},
|
||||
]);
|
||||
|
||||
let mut json_builder = InstructionsBuilder::new();
|
||||
let mut json_data_builder = InstructionDataBuilder::new();
|
||||
json_data_builder.set_core_indx(1).fix_core_indx();
|
||||
for inst in json_program {
|
||||
let value = serde_json::from_str(inst).unwrap();
|
||||
json_to_instruction(&mut json_builder, &mut json_data_builder, &value);
|
||||
}
|
||||
let json_instructions = json_builder.build();
|
||||
let binary_instructions = binary_to_instructions(&binary_program, 1).unwrap();
|
||||
|
||||
assert_eq!(json_instructions.len(), binary_instructions.len());
|
||||
for (json_inst, binary_inst) in json_instructions.iter().zip(binary_instructions.iter()) {
|
||||
assert_eq!(json_inst.functor_name(), binary_inst.functor_name());
|
||||
assert_eq!(json_inst.data, binary_inst.data);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,10 +1,11 @@
|
||||
use paste::paste;
|
||||
use std::convert::TryFrom;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default)]
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
||||
pub struct InstructionData {
|
||||
core_indx: i32,
|
||||
rd: i32,
|
||||
r1: i32,
|
||||
core_indx: u16,
|
||||
rd: u8,
|
||||
r1: u8,
|
||||
//r2 imm mbiw imm_core
|
||||
r2_or_imm: i32,
|
||||
//offset_select imm_relu ibiw
|
||||
@@ -16,18 +17,30 @@ pub struct InstructionData {
|
||||
}
|
||||
|
||||
impl InstructionData {
|
||||
pub fn core_indx(&self) -> i32 {
|
||||
pub fn core_indx_u16(&self) -> u16 {
|
||||
self.core_indx
|
||||
}
|
||||
|
||||
pub fn rd(&self) -> i32 {
|
||||
pub fn core_indx(&self) -> i32 {
|
||||
i32::from(self.core_indx)
|
||||
}
|
||||
|
||||
pub fn rd_u8(&self) -> u8 {
|
||||
self.rd
|
||||
}
|
||||
|
||||
pub fn r1(&self) -> i32 {
|
||||
pub fn rd(&self) -> i32 {
|
||||
i32::from(self.rd)
|
||||
}
|
||||
|
||||
pub fn r1_u8(&self) -> u8 {
|
||||
self.r1
|
||||
}
|
||||
|
||||
pub fn r1(&self) -> i32 {
|
||||
i32::from(self.r1)
|
||||
}
|
||||
|
||||
pub fn r2(&self) -> i32 {
|
||||
self.r2_or_imm
|
||||
}
|
||||
@@ -49,26 +62,26 @@ impl InstructionData {
|
||||
}
|
||||
|
||||
pub fn get_core_rd_r1(&self) -> (i32, i32, i32) {
|
||||
(self.core_indx, self.rd, self.r1)
|
||||
(self.core_indx(), self.rd(), self.r1())
|
||||
}
|
||||
|
||||
pub fn get_core_rd_r1_r2(&self) -> (i32, i32, i32, i32) {
|
||||
(self.core_indx, self.rd, self.r1, self.r2_or_imm)
|
||||
(self.core_indx(), self.rd(), self.r1(), self.r2_or_imm)
|
||||
}
|
||||
|
||||
pub fn get_core_rd_imm(&self) -> (i32, i32, i32) {
|
||||
(self.core_indx, self.rd, self.r2_or_imm)
|
||||
(self.core_indx(), self.rd(), self.r2_or_imm)
|
||||
}
|
||||
|
||||
pub fn get_core_rd_r1_imm(&self) -> (i32, i32, i32, i32) {
|
||||
(self.core_indx, self.rd, self.r1, self.r2_or_imm)
|
||||
(self.core_indx(), self.rd(), self.r1(), self.r2_or_imm)
|
||||
}
|
||||
|
||||
pub fn get_core_rd_r1_r2_immlen_offset(&self) -> (i32, i32, i32, i32, i32, i32, i32) {
|
||||
(
|
||||
self.core_indx,
|
||||
self.rd,
|
||||
self.r1,
|
||||
self.core_indx(),
|
||||
self.rd(),
|
||||
self.r1(),
|
||||
self.r2_or_imm,
|
||||
self.generic3,
|
||||
self.generic1,
|
||||
@@ -78,9 +91,9 @@ impl InstructionData {
|
||||
|
||||
pub fn get_core_rd_r1_mbiw_immrelu_immgroup(&self) -> (i32, i32, i32, i32, i32, i32) {
|
||||
(
|
||||
self.core_indx,
|
||||
self.rd,
|
||||
self.r1,
|
||||
self.core_indx(),
|
||||
self.rd(),
|
||||
self.r1(),
|
||||
self.r2_or_imm,
|
||||
self.generic1,
|
||||
self.generic2,
|
||||
@@ -100,7 +113,7 @@ impl InstructionData {
|
||||
}
|
||||
|
||||
pub(crate) fn get_core_immcore(&self) -> (i32, i32) {
|
||||
(self.core_indx, self.r2_or_imm)
|
||||
(self.core_indx(), self.r2_or_imm)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -216,6 +229,18 @@ impl InstructionDataBuilder {
|
||||
common_getter_setter![imm_group];
|
||||
common_getter_setter![imm_core];
|
||||
|
||||
pub fn set_core_indx_u16(&mut self, val: u16) -> &mut Self {
|
||||
self.set_core_indx(i32::from(val))
|
||||
}
|
||||
|
||||
pub fn set_rd_u8(&mut self, val: u8) -> &mut Self {
|
||||
self.set_rd(i32::from(val))
|
||||
}
|
||||
|
||||
pub fn set_r1_u8(&mut self, val: u8) -> &mut Self {
|
||||
self.set_r1(i32::from(val))
|
||||
}
|
||||
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
core_indx: Fixer::Edit(0),
|
||||
@@ -254,20 +279,16 @@ impl InstructionDataBuilder {
|
||||
|
||||
fn check_sanity(&self) {
|
||||
assert!(!(self.get_r2() != 0 && self.get_imm() != 0 && self.get_mbiw() != 0 && self.get_imm_core() != 0));
|
||||
assert!(
|
||||
!(self.get_ibiw() != 0 && self.get_offset_select() != 0 && self.get_imm_relu() != 0)
|
||||
);
|
||||
assert!(
|
||||
!(self.get_obiw() != 0 && self.get_offset_value() != 0 && self.get_imm_group() != 0)
|
||||
);
|
||||
assert!(!(self.get_ibiw() != 0 && self.get_offset_select() != 0 && self.get_imm_relu() != 0));
|
||||
assert!(!(self.get_obiw() != 0 && self.get_offset_value() != 0 && self.get_imm_group() != 0));
|
||||
}
|
||||
|
||||
pub fn build(&mut self) -> InstructionData {
|
||||
self.check_sanity();
|
||||
let inst_data = InstructionData {
|
||||
core_indx: self.get_core_indx(),
|
||||
rd: self.get_rd(),
|
||||
r1: self.get_r1(),
|
||||
core_indx: u16::try_from(self.get_core_indx()).expect("core index does not fit in u16"),
|
||||
rd: u8::try_from(self.get_rd()).expect("rd does not fit in u8"),
|
||||
r1: u8::try_from(self.get_r1()).expect("r1 does not fit in u8"),
|
||||
r2_or_imm: self.get_r2() + self.get_imm() + self.get_mbiw() + self.get_imm_core(),
|
||||
generic1: self.get_offset_select() + self.get_ibiw() + self.get_imm_relu(),
|
||||
generic2: self.get_offset_value() + self.get_obiw() + self.get_imm_group(),
|
||||
@@ -281,6 +302,10 @@ impl InstructionDataBuilder {
|
||||
self.set_rd(rd).set_r1(r1).set_r2(r2)
|
||||
}
|
||||
|
||||
pub fn set_rdr1r2_u8(&mut self, rd: u8, r1: u8, r2: i32) -> &mut Self {
|
||||
self.set_rd_u8(rd).set_r1_u8(r1).set_r2(r2)
|
||||
}
|
||||
|
||||
pub fn set_offset_select_value(&mut self, offset_select: i32, offset_value: i32) -> &mut Self {
|
||||
self.set_offset_select(offset_select)
|
||||
.set_offset_value(offset_value)
|
||||
@@ -290,14 +315,26 @@ impl InstructionDataBuilder {
|
||||
self.set_rd(rd).set_r1(r1).set_imm(imm)
|
||||
}
|
||||
|
||||
pub fn set_rdr1imm_u8(&mut self, rd: u8, r1: u8, imm: i32) -> &mut Self {
|
||||
self.set_rd_u8(rd).set_r1_u8(r1).set_imm(imm)
|
||||
}
|
||||
|
||||
pub fn set_rdr1(&mut self, rd: i32, r1: i32) -> &mut Self {
|
||||
self.set_rd(rd).set_r1(r1)
|
||||
}
|
||||
|
||||
pub fn set_rdr1_u8(&mut self, rd: u8, r1: u8) -> &mut Self {
|
||||
self.set_rd_u8(rd).set_r1_u8(r1)
|
||||
}
|
||||
|
||||
pub fn set_rdimm(&mut self, rd: i32, imm: i32) -> &mut Self {
|
||||
self.set_rd(rd).set_imm(imm)
|
||||
}
|
||||
|
||||
pub fn set_rdimm_u8(&mut self, rd: u8, imm: i32) -> &mut Self {
|
||||
self.set_rd_u8(rd).set_imm(imm)
|
||||
}
|
||||
|
||||
pub fn set_ibiw_obiw(&mut self, ibiw: i32, obiw: i32) -> &mut Self {
|
||||
self.set_ibiw(ibiw).set_obiw(obiw)
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ use crate::{
|
||||
tracing::TRACER,
|
||||
};
|
||||
pub mod cpu;
|
||||
pub mod binary_to_instruction;
|
||||
pub mod instruction_set;
|
||||
pub mod json_to_instruction;
|
||||
pub mod memory_manager;
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
|
||||
#include "src/Accelerators/PIM/Common/IR/WeightUtils.hpp"
|
||||
#include "src/Accelerators/PIM/Compiler/PimArtifactWriter.hpp"
|
||||
#include "src/Accelerators/PIM/Compiler/PimBinaryFormat.hpp"
|
||||
#include "src/Accelerators/PIM/Compiler/PimCodeGen.hpp"
|
||||
#include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp"
|
||||
|
||||
@@ -19,18 +20,35 @@ using namespace mlir;
|
||||
|
||||
namespace onnx_mlir {
|
||||
|
||||
OnnxMlirCompilerErrorCodes writeHostCoreJson(StringRef outputDirPath) {
|
||||
OnnxMlirCompilerErrorCodes writeHostCoreArtifacts(StringRef outputDirPath) {
|
||||
std::error_code errorCode;
|
||||
std::string outputHostCorePath = outputDirPath.str() + "/core_0.json";
|
||||
raw_fd_ostream hostFileStream(outputHostCorePath, errorCode);
|
||||
std::string outputHostCorePath = outputDirPath.str() + "/core_0.pim";
|
||||
raw_fd_ostream hostFileStream(outputHostCorePath, errorCode, sys::fs::OF_None);
|
||||
if (errorCode) {
|
||||
errs() << "Error while opening host core file `" << outputHostCorePath << "`: " << errorCode.message() << '\n';
|
||||
return InvalidOutputFileAccess;
|
||||
}
|
||||
|
||||
// The host core json contains two no-op-like instructions to satisfy pimsim-nn.
|
||||
hostFileStream << "[{\"imm\":0,\"op\":\"sldi\",\"rd\":0},{\"imm\":0,\"op\":\"sldi\",\"rd\":0}]";
|
||||
pim_binary::writeHeader(hostFileStream);
|
||||
pim_binary::InstructionRecord noop;
|
||||
noop.opcode = pim_binary::Opcode::sldi;
|
||||
pim_binary::writeInstructionRecord(hostFileStream, noop);
|
||||
pim_binary::writeInstructionRecord(hostFileStream, noop);
|
||||
pim_binary::patchInstructionCount(hostFileStream, 2);
|
||||
hostFileStream.close();
|
||||
|
||||
if (pimEmitJson.getValue()) {
|
||||
std::string outputHostJsonPath = outputDirPath.str() + "/core_0.json";
|
||||
raw_fd_ostream hostJsonStream(outputHostJsonPath, errorCode);
|
||||
if (errorCode) {
|
||||
errs() << "Error while opening host core json file `" << outputHostJsonPath << "`: " << errorCode.message()
|
||||
<< '\n';
|
||||
return InvalidOutputFileAccess;
|
||||
}
|
||||
// The host core json contains two no-op-like instructions to satisfy pimsim-nn
|
||||
hostJsonStream << "[{\"imm\":0,\"op\":\"sldi\",\"rd\":0},{\"imm\":0,\"op\":\"sldi\",\"rd\":0}]";
|
||||
hostJsonStream.close();
|
||||
}
|
||||
return CompilerSuccess;
|
||||
}
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ namespace onnx_mlir {
|
||||
|
||||
class PimAcceleratorMemory;
|
||||
|
||||
OnnxMlirCompilerErrorCodes writeHostCoreJson(llvm::StringRef outputDirPath);
|
||||
OnnxMlirCompilerErrorCodes writeHostCoreArtifacts(llvm::StringRef outputDirPath);
|
||||
OnnxMlirCompilerErrorCodes writeMemoryBinary(mlir::ModuleOp moduleOp,
|
||||
mlir::func::FuncOp funcOp,
|
||||
PimAcceleratorMemory& memory,
|
||||
|
||||
@@ -0,0 +1,381 @@
|
||||
#pragma once
|
||||
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Support/Endian.h"
|
||||
#include "llvm/Support/JSON.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <limits>
|
||||
|
||||
namespace onnx_mlir::pim_binary {
|
||||
|
||||
inline constexpr char kMagic[4] = {'P', 'I', 'M', 'B'};
|
||||
inline constexpr uint32_t kVersion = 1;
|
||||
inline constexpr uint64_t kCountOffset = 8;
|
||||
inline constexpr size_t kHeaderSize = 12;
|
||||
inline constexpr size_t kRecordSize = 20;
|
||||
|
||||
enum class Opcode : uint32_t {
|
||||
nop = 0,
|
||||
sldi = 1,
|
||||
sld = 2,
|
||||
sadd = 3,
|
||||
ssub = 4,
|
||||
smul = 5,
|
||||
saddi = 6,
|
||||
smuli = 7,
|
||||
setbw = 8,
|
||||
mvmul = 9,
|
||||
vvadd = 10,
|
||||
vvsub = 11,
|
||||
vvmul = 12,
|
||||
vvdmul = 13,
|
||||
vvmax = 14,
|
||||
vvsll = 15,
|
||||
vvsra = 16,
|
||||
vavg = 17,
|
||||
vrelu = 18,
|
||||
vtanh = 19,
|
||||
vsigm = 20,
|
||||
vsoftmax = 21,
|
||||
vmv = 22,
|
||||
vrsu = 23,
|
||||
vrsl = 24,
|
||||
ld = 25,
|
||||
st = 26,
|
||||
lldi = 27,
|
||||
lmv = 28,
|
||||
send = 29,
|
||||
recv = 30,
|
||||
wait = 31,
|
||||
sync = 32,
|
||||
};
|
||||
|
||||
struct InstructionRecord {
|
||||
Opcode opcode = Opcode::nop;
|
||||
uint8_t rd = 0;
|
||||
uint8_t r1 = 0;
|
||||
int32_t r2OrImm = 0;
|
||||
int32_t generic1 = 0;
|
||||
int32_t generic2 = 0;
|
||||
int32_t generic3 = 0;
|
||||
uint8_t flags = 0;
|
||||
};
|
||||
|
||||
inline void writeUint32LE(llvm::raw_ostream& os, uint32_t value) {
|
||||
std::array<char, sizeof(uint32_t)> bytes;
|
||||
llvm::support::endian::write32le(bytes.data(), value);
|
||||
os.write(bytes.data(), bytes.size());
|
||||
}
|
||||
|
||||
inline void writeInt32LE(llvm::raw_ostream& os, int32_t value) {
|
||||
writeUint32LE(os, static_cast<uint32_t>(value));
|
||||
}
|
||||
|
||||
inline void writeHeader(llvm::raw_ostream& os) {
|
||||
os.write(kMagic, sizeof(kMagic));
|
||||
writeUint32LE(os, kVersion);
|
||||
writeUint32LE(os, 0);
|
||||
}
|
||||
|
||||
inline void patchInstructionCount(llvm::raw_pwrite_stream& os, uint32_t instructionCount) {
|
||||
std::array<char, sizeof(uint32_t)> bytes;
|
||||
llvm::support::endian::write32le(bytes.data(), instructionCount);
|
||||
os.pwrite(bytes.data(), bytes.size(), kCountOffset);
|
||||
}
|
||||
|
||||
inline void writeInstructionRecord(llvm::raw_ostream& os, const InstructionRecord& record) {
|
||||
os << static_cast<char>(static_cast<uint8_t>(record.opcode));
|
||||
os << static_cast<char>(record.rd);
|
||||
os << static_cast<char>(record.r1);
|
||||
os << static_cast<char>(record.flags);
|
||||
writeInt32LE(os, record.r2OrImm);
|
||||
writeInt32LE(os, record.generic1);
|
||||
writeInt32LE(os, record.generic2);
|
||||
writeInt32LE(os, record.generic3);
|
||||
}
|
||||
|
||||
inline int32_t toI32(int64_t value) {
|
||||
assert(value >= std::numeric_limits<int32_t>::min() && value <= std::numeric_limits<int32_t>::max()
|
||||
&& "PIM binary field out of int32 range");
|
||||
return static_cast<int32_t>(value);
|
||||
}
|
||||
|
||||
inline uint8_t toU8(int64_t value) {
|
||||
assert(value >= 0 && value <= std::numeric_limits<uint8_t>::max() && "PIM binary field out of uint8 range");
|
||||
return static_cast<uint8_t>(value);
|
||||
}
|
||||
|
||||
inline int32_t getOptionalInt(const llvm::json::Object& object, llvm::StringRef key, int32_t defaultValue = 0) {
|
||||
if (std::optional<int64_t> value = object.getInteger(key))
|
||||
return toI32(*value);
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
inline Opcode opcodeFromString(llvm::StringRef opName) {
|
||||
if (opName == "nop")
|
||||
return Opcode::nop;
|
||||
if (opName == "sldi")
|
||||
return Opcode::sldi;
|
||||
if (opName == "sld")
|
||||
return Opcode::sld;
|
||||
if (opName == "sadd")
|
||||
return Opcode::sadd;
|
||||
if (opName == "ssub")
|
||||
return Opcode::ssub;
|
||||
if (opName == "smul")
|
||||
return Opcode::smul;
|
||||
if (opName == "saddi")
|
||||
return Opcode::saddi;
|
||||
if (opName == "smuli")
|
||||
return Opcode::smuli;
|
||||
if (opName == "setbw")
|
||||
return Opcode::setbw;
|
||||
if (opName == "mvmul")
|
||||
return Opcode::mvmul;
|
||||
if (opName == "vvadd")
|
||||
return Opcode::vvadd;
|
||||
if (opName == "vvsub")
|
||||
return Opcode::vvsub;
|
||||
if (opName == "vvmul")
|
||||
return Opcode::vvmul;
|
||||
if (opName == "vvdmul")
|
||||
return Opcode::vvdmul;
|
||||
if (opName == "vvmax")
|
||||
return Opcode::vvmax;
|
||||
if (opName == "vvsll")
|
||||
return Opcode::vvsll;
|
||||
if (opName == "vvsra")
|
||||
return Opcode::vvsra;
|
||||
if (opName == "vavg")
|
||||
return Opcode::vavg;
|
||||
if (opName == "vrelu")
|
||||
return Opcode::vrelu;
|
||||
if (opName == "vtanh")
|
||||
return Opcode::vtanh;
|
||||
if (opName == "vsigm")
|
||||
return Opcode::vsigm;
|
||||
if (opName == "vsoftmax")
|
||||
return Opcode::vsoftmax;
|
||||
if (opName == "vmv")
|
||||
return Opcode::vmv;
|
||||
if (opName == "vrsu")
|
||||
return Opcode::vrsu;
|
||||
if (opName == "vrsl")
|
||||
return Opcode::vrsl;
|
||||
if (opName == "ld")
|
||||
return Opcode::ld;
|
||||
if (opName == "st")
|
||||
return Opcode::st;
|
||||
if (opName == "lldi")
|
||||
return Opcode::lldi;
|
||||
if (opName == "lmv")
|
||||
return Opcode::lmv;
|
||||
if (opName == "send")
|
||||
return Opcode::send;
|
||||
if (opName == "recv")
|
||||
return Opcode::recv;
|
||||
if (opName == "wait")
|
||||
return Opcode::wait;
|
||||
if (opName == "sync")
|
||||
return Opcode::sync;
|
||||
llvm_unreachable("Unsupported PIM binary opcode");
|
||||
}
|
||||
|
||||
inline llvm::StringRef opcodeToString(Opcode opcode) {
|
||||
switch (opcode) {
|
||||
case Opcode::nop: return "nop";
|
||||
case Opcode::sldi: return "sldi";
|
||||
case Opcode::sld: return "sld";
|
||||
case Opcode::sadd: return "sadd";
|
||||
case Opcode::ssub: return "ssub";
|
||||
case Opcode::smul: return "smul";
|
||||
case Opcode::saddi: return "saddi";
|
||||
case Opcode::smuli: return "smuli";
|
||||
case Opcode::setbw: return "setbw";
|
||||
case Opcode::mvmul: return "mvmul";
|
||||
case Opcode::vvadd: return "vvadd";
|
||||
case Opcode::vvsub: return "vvsub";
|
||||
case Opcode::vvmul: return "vvmul";
|
||||
case Opcode::vvdmul: return "vvdmul";
|
||||
case Opcode::vvmax: return "vvmax";
|
||||
case Opcode::vvsll: return "vvsll";
|
||||
case Opcode::vvsra: return "vvsra";
|
||||
case Opcode::vavg: return "vavg";
|
||||
case Opcode::vrelu: return "vrelu";
|
||||
case Opcode::vtanh: return "vtanh";
|
||||
case Opcode::vsigm: return "vsigm";
|
||||
case Opcode::vsoftmax: return "vsoftmax";
|
||||
case Opcode::vmv: return "vmv";
|
||||
case Opcode::vrsu: return "vrsu";
|
||||
case Opcode::vrsl: return "vrsl";
|
||||
case Opcode::ld: return "ld";
|
||||
case Opcode::st: return "st";
|
||||
case Opcode::lldi: return "lldi";
|
||||
case Opcode::lmv: return "lmv";
|
||||
case Opcode::send: return "send";
|
||||
case Opcode::recv: return "recv";
|
||||
case Opcode::wait: return "wait";
|
||||
case Opcode::sync: return "sync";
|
||||
}
|
||||
llvm_unreachable("Unsupported PIM binary opcode");
|
||||
}
|
||||
|
||||
inline InstructionRecord makeInstructionRecord(const llvm::json::Object& instruction) {
|
||||
InstructionRecord record;
|
||||
std::optional<llvm::StringRef> opName = instruction.getString("op");
|
||||
assert(opName && "Missing op field in PIM instruction");
|
||||
record.opcode = opcodeFromString(*opName);
|
||||
record.rd = toU8(getOptionalInt(instruction, "rd"));
|
||||
record.r1 = toU8(getOptionalInt(instruction, "rs1"));
|
||||
|
||||
switch (record.opcode) {
|
||||
case Opcode::sldi:
|
||||
case Opcode::saddi:
|
||||
case Opcode::smuli:
|
||||
case Opcode::lldi:
|
||||
record.r2OrImm = getOptionalInt(instruction, "imm");
|
||||
break;
|
||||
case Opcode::mvmul:
|
||||
record.r2OrImm = getOptionalInt(instruction, "mbiw");
|
||||
record.generic1 = getOptionalInt(instruction, "relu");
|
||||
record.generic2 = getOptionalInt(instruction, "group");
|
||||
break;
|
||||
case Opcode::setbw:
|
||||
record.generic1 = getOptionalInt(instruction, "ibiw");
|
||||
record.generic2 = getOptionalInt(instruction, "obiw");
|
||||
break;
|
||||
case Opcode::send:
|
||||
case Opcode::recv:
|
||||
record.r2OrImm = getOptionalInt(instruction, "core");
|
||||
record.generic3 = getOptionalInt(instruction, "size");
|
||||
break;
|
||||
default:
|
||||
record.r2OrImm = getOptionalInt(instruction, "rs2");
|
||||
break;
|
||||
}
|
||||
|
||||
if (record.opcode != Opcode::mvmul && record.opcode != Opcode::setbw) {
|
||||
if (auto* offsetValue = instruction.getObject("offset")) {
|
||||
record.generic1 = getOptionalInt(*offsetValue, "offset_select");
|
||||
record.generic2 = getOptionalInt(*offsetValue, "offset_value");
|
||||
}
|
||||
}
|
||||
|
||||
if (instruction.get("len"))
|
||||
record.generic3 = getOptionalInt(instruction, "len");
|
||||
else if (instruction.get("size") && record.opcode != Opcode::send && record.opcode != Opcode::recv)
|
||||
record.generic3 = getOptionalInt(instruction, "size");
|
||||
|
||||
return record;
|
||||
}
|
||||
|
||||
inline llvm::json::Object makeInstructionJson(const InstructionRecord& record) {
|
||||
llvm::json::Object instruction;
|
||||
instruction["op"] = opcodeToString(record.opcode).str();
|
||||
|
||||
auto addOffset = [&](int32_t offsetSelect, int32_t offsetValue) {
|
||||
llvm::json::Object offset;
|
||||
offset["offset_select"] = offsetSelect;
|
||||
offset["offset_value"] = offsetValue;
|
||||
instruction["offset"] = std::move(offset);
|
||||
};
|
||||
|
||||
switch (record.opcode) {
|
||||
case Opcode::sldi:
|
||||
instruction["rd"] = static_cast<int64_t>(record.rd);
|
||||
instruction["imm"] = record.r2OrImm;
|
||||
break;
|
||||
case Opcode::sld:
|
||||
instruction["rd"] = static_cast<int64_t>(record.rd);
|
||||
instruction["rs1"] = static_cast<int64_t>(record.r1);
|
||||
addOffset(record.generic1, record.generic2);
|
||||
break;
|
||||
case Opcode::sadd:
|
||||
case Opcode::ssub:
|
||||
case Opcode::smul:
|
||||
instruction["rd"] = static_cast<int64_t>(record.rd);
|
||||
instruction["rs1"] = static_cast<int64_t>(record.r1);
|
||||
instruction["rs2"] = record.r2OrImm;
|
||||
break;
|
||||
case Opcode::saddi:
|
||||
case Opcode::smuli:
|
||||
instruction["rd"] = static_cast<int64_t>(record.rd);
|
||||
instruction["rs1"] = static_cast<int64_t>(record.r1);
|
||||
instruction["imm"] = record.r2OrImm;
|
||||
break;
|
||||
case Opcode::setbw:
|
||||
instruction["ibiw"] = record.generic1;
|
||||
instruction["obiw"] = record.generic2;
|
||||
break;
|
||||
case Opcode::mvmul:
|
||||
instruction["rd"] = static_cast<int64_t>(record.rd);
|
||||
instruction["rs1"] = static_cast<int64_t>(record.r1);
|
||||
instruction["mbiw"] = record.r2OrImm;
|
||||
instruction["relu"] = record.generic1;
|
||||
instruction["group"] = record.generic2;
|
||||
break;
|
||||
case Opcode::vvadd:
|
||||
case Opcode::vvsub:
|
||||
case Opcode::vvmul:
|
||||
case Opcode::vvdmul:
|
||||
case Opcode::vvmax:
|
||||
case Opcode::vvsll:
|
||||
case Opcode::vvsra:
|
||||
case Opcode::vavg:
|
||||
case Opcode::vmv:
|
||||
case Opcode::vrsu:
|
||||
case Opcode::vrsl:
|
||||
instruction["rd"] = static_cast<int64_t>(record.rd);
|
||||
instruction["rs1"] = static_cast<int64_t>(record.r1);
|
||||
instruction["rs2"] = record.r2OrImm;
|
||||
addOffset(record.generic1, record.generic2);
|
||||
instruction["len"] = record.generic3;
|
||||
break;
|
||||
case Opcode::vrelu:
|
||||
case Opcode::vtanh:
|
||||
case Opcode::vsigm:
|
||||
case Opcode::vsoftmax:
|
||||
instruction["rd"] = static_cast<int64_t>(record.rd);
|
||||
instruction["rs1"] = static_cast<int64_t>(record.r1);
|
||||
addOffset(record.generic1, record.generic2);
|
||||
instruction["len"] = record.generic3;
|
||||
break;
|
||||
case Opcode::ld:
|
||||
case Opcode::st:
|
||||
instruction["rd"] = static_cast<int64_t>(record.rd);
|
||||
instruction["rs1"] = static_cast<int64_t>(record.r1);
|
||||
addOffset(record.generic1, record.generic2);
|
||||
instruction["size"] = record.generic3;
|
||||
break;
|
||||
case Opcode::lldi:
|
||||
instruction["rd"] = static_cast<int64_t>(record.rd);
|
||||
instruction["imm"] = record.r2OrImm;
|
||||
addOffset(record.generic1, record.generic2);
|
||||
instruction["len"] = record.generic3;
|
||||
break;
|
||||
case Opcode::lmv:
|
||||
instruction["rd"] = static_cast<int64_t>(record.rd);
|
||||
instruction["rs1"] = static_cast<int64_t>(record.r1);
|
||||
addOffset(record.generic1, record.generic2);
|
||||
instruction["len"] = record.generic3;
|
||||
break;
|
||||
case Opcode::send:
|
||||
case Opcode::recv:
|
||||
instruction["rd"] = static_cast<int64_t>(record.rd);
|
||||
instruction["core"] = record.r2OrImm;
|
||||
addOffset(record.generic1, record.generic2);
|
||||
instruction["size"] = record.generic3;
|
||||
break;
|
||||
case Opcode::wait:
|
||||
case Opcode::sync:
|
||||
case Opcode::nop:
|
||||
break;
|
||||
}
|
||||
|
||||
return instruction;
|
||||
}
|
||||
|
||||
} // namespace onnx_mlir::pim_binary
|
||||
+194
-155
@@ -30,6 +30,7 @@
|
||||
#include "Conversion/ONNXToSpatial/Common/Common.hpp"
|
||||
#include "src/Accelerators/PIM/Compiler/PimArtifactWriter.hpp"
|
||||
#include "src/Accelerators/PIM/Compiler/PimBatchEmission.hpp"
|
||||
#include "src/Accelerators/PIM/Compiler/PimBinaryFormat.hpp"
|
||||
#include "src/Accelerators/PIM/Compiler/PimCodeGen.hpp"
|
||||
#include "src/Accelerators/PIM/Compiler/PimCompilerOptions.hpp"
|
||||
#include "src/Accelerators/PIM/Compiler/PimWeightEmitter.hpp"
|
||||
@@ -116,25 +117,29 @@ void PimMemory::allocateCore(Operation* op) {
|
||||
|
||||
static void printHostMemoryReportRow(raw_ostream& os, const MemoryReportRow& row) {
|
||||
llvm::SmallVector<ReportField, 2> fields = {
|
||||
{"Number of globals", std::to_string(row.numGlobal)},
|
||||
{"Global memory", formatReportMemory(row.sizeGlobal)}};
|
||||
{"Number of globals", std::to_string(row.numGlobal) },
|
||||
{"Global memory", formatReportMemory(row.sizeGlobal)}
|
||||
};
|
||||
printReportFlatFields(os, fields);
|
||||
}
|
||||
|
||||
static void printCoreMemoryReportRow(raw_ostream& os, const MemoryReportEntry& entry) {
|
||||
llvm::SmallVector<ReportField, 2> fields = {
|
||||
{"Number of allocas", std::to_string(entry.row.numAlloca)},
|
||||
{"Allocated memory", formatReportMemory(entry.row.sizeAlloca)}};
|
||||
{"Number of allocas", std::to_string(entry.row.numAlloca) },
|
||||
{"Allocated memory", formatReportMemory(entry.row.sizeAlloca)}
|
||||
};
|
||||
printReportFlatFields(os, fields);
|
||||
}
|
||||
|
||||
static void printBatchMemoryReportRow(raw_ostream& os, const MemoryReportEntry& entry) {
|
||||
llvm::SmallVector<ReportField, 2> perCoreFields = {
|
||||
{"Number of allocas", std::to_string(entry.row.numAlloca)},
|
||||
{"Allocated memory", formatReportMemory(entry.row.sizeAlloca)}};
|
||||
{"Number of allocas", std::to_string(entry.row.numAlloca) },
|
||||
{"Allocated memory", formatReportMemory(entry.row.sizeAlloca)}
|
||||
};
|
||||
llvm::SmallVector<ReportField, 2> totalFields = {
|
||||
{"Number of allocas", std::to_string(entry.totalAllocaCount)},
|
||||
{"Batch memory", formatReportMemory(entry.totalAllocaBytes)}};
|
||||
{"Number of allocas", std::to_string(entry.totalAllocaCount) },
|
||||
{"Batch memory", formatReportMemory(entry.totalAllocaBytes)}
|
||||
};
|
||||
printReportPerCoreAndTotalFields(os, perCoreFields, totalFields);
|
||||
}
|
||||
|
||||
@@ -215,12 +220,8 @@ size_t PimAcceleratorMemory::getValueAddress(mlir::Value value, const StaticValu
|
||||
void PimAcceleratorMemory::reportHost() { hostReportRow = hostMem.getReportRow(); }
|
||||
|
||||
void PimAcceleratorMemory::recordCoreReport(size_t coreId, const MemoryReportRow& row) {
|
||||
reportEntries.push_back({MemoryReportEntry::Kind::Core,
|
||||
coreId,
|
||||
{static_cast<int32_t>(coreId)},
|
||||
row,
|
||||
row.numAlloca,
|
||||
row.sizeAlloca});
|
||||
reportEntries.push_back(
|
||||
{MemoryReportEntry::Kind::Core, coreId, {static_cast<int32_t>(coreId)}, row, row.numAlloca, row.sizeAlloca});
|
||||
}
|
||||
|
||||
void PimAcceleratorMemory::recordBatchReport(uint64_t batchId,
|
||||
@@ -250,7 +251,8 @@ void PimAcceleratorMemory::flushReport() {
|
||||
|
||||
llvm::SmallVector<ReportField, 2> totalFields = {
|
||||
{"Global memory", formatReportMemory(totalGlobalMemory)},
|
||||
{"Cores memory", formatReportMemory(totalCoresMemory)}};
|
||||
{"Cores memory", formatReportMemory(totalCoresMemory) }
|
||||
};
|
||||
printReportTotalsBlock(os, totalFields);
|
||||
|
||||
if (hostReportRow.has_value()) {
|
||||
@@ -312,36 +314,25 @@ void PimAcceleratorMemory::clean(mlir::Operation* op) {
|
||||
}
|
||||
}
|
||||
|
||||
json::Object PimCodeGen::createEmptyOffset() {
|
||||
json::Object offset;
|
||||
offset["offset_select"] = 0;
|
||||
offset["offset_value"] = 0;
|
||||
return offset;
|
||||
}
|
||||
|
||||
size_t PimCodeGen::remapCoreId(size_t coreId) const {
|
||||
auto it = emittedCoreIds.find(coreId);
|
||||
assert(it != emittedCoreIds.end() && "Missing emitted core id remapping");
|
||||
return it->second;
|
||||
}
|
||||
|
||||
static json::Object createRs1OnlyOffset() {
|
||||
json::Object offset;
|
||||
offset["offset_select"] = 1;
|
||||
offset["offset_value"] = 0;
|
||||
return offset;
|
||||
}
|
||||
|
||||
void PimCodeGen::emitInstruction(json::Object instruction) const {
|
||||
coreFileStream << json::Value(std::move(instruction)) << ',';
|
||||
void PimCodeGen::emitInstruction(const pim_binary::InstructionRecord& instruction) const {
|
||||
pim_binary::writeInstructionRecord(coreBinaryStream, instruction);
|
||||
++emittedInstructionCount;
|
||||
if (coreJsonStream)
|
||||
*coreJsonStream << json::Value(pim_binary::makeInstructionJson(instruction)) << ',';
|
||||
}
|
||||
|
||||
void PimCodeGen::genSetRegisterImmediateUnsigned(size_t registerNumber, size_t immediate) const {
|
||||
json::Object json;
|
||||
json["op"] = "sldi";
|
||||
json["rd"] = registerNumber;
|
||||
json["imm"] = immediate;
|
||||
emitInstruction(std::move(json));
|
||||
pim_binary::InstructionRecord instruction;
|
||||
instruction.opcode = pim_binary::Opcode::sldi;
|
||||
instruction.rd = static_cast<uint8_t>(registerNumber);
|
||||
instruction.r2OrImm = static_cast<int32_t>(immediate);
|
||||
emitInstruction(instruction);
|
||||
}
|
||||
|
||||
void PimCodeGen::setupRd(size_t rdAddress, size_t rdOffset) const {
|
||||
@@ -369,38 +360,41 @@ void PimCodeGen::emitMemCopyOp(StringRef opName,
|
||||
StringRef sizeFieldName) const {
|
||||
setupRdRs1(rdAddr, rdOffset, rs1Addr, rs1Offset);
|
||||
|
||||
json::Object json;
|
||||
json["op"] = opName;
|
||||
json["rd"] = 0;
|
||||
json["rs1"] = 1;
|
||||
json[sizeFieldName] = size;
|
||||
json["offset"] = createEmptyOffset();
|
||||
emitInstruction(std::move(json));
|
||||
pim_binary::InstructionRecord instruction;
|
||||
instruction.opcode = pim_binary::opcodeFromString(opName);
|
||||
instruction.rd = 0;
|
||||
instruction.r1 = 1;
|
||||
instruction.generic1 = 0;
|
||||
instruction.generic2 = 0;
|
||||
instruction.generic3 = static_cast<int32_t>(size);
|
||||
(void)sizeFieldName;
|
||||
emitInstruction(instruction);
|
||||
}
|
||||
|
||||
void PimCodeGen::emitCommunicationOp(StringRef opName, size_t bufferAddr, size_t coreId, size_t size) const {
|
||||
setupRd(bufferAddr, 0);
|
||||
|
||||
json::Object json;
|
||||
json["op"] = opName;
|
||||
json["rd"] = 0;
|
||||
json["core"] = remapCoreId(coreId);
|
||||
json["size"] = size;
|
||||
json["offset"] = createEmptyOffset();
|
||||
emitInstruction(std::move(json));
|
||||
pim_binary::InstructionRecord instruction;
|
||||
instruction.opcode = pim_binary::opcodeFromString(opName);
|
||||
instruction.rd = 0;
|
||||
instruction.r2OrImm = static_cast<int32_t>(remapCoreId(coreId));
|
||||
instruction.generic1 = 0;
|
||||
instruction.generic2 = 0;
|
||||
instruction.generic3 = static_cast<int32_t>(size);
|
||||
emitInstruction(instruction);
|
||||
}
|
||||
|
||||
void PimCodeGen::emitMvmOp(size_t groupId, size_t rdAddr, size_t rdOffset, size_t rs1Addr, size_t rs1Offset) const {
|
||||
setupRdRs1(rdAddr, rdOffset, rs1Addr, rs1Offset);
|
||||
|
||||
json::Object json;
|
||||
json["op"] = "mvmul";
|
||||
json["rd"] = 0;
|
||||
json["rs1"] = 1;
|
||||
json["group"] = groupId;
|
||||
json["relu"] = 0;
|
||||
json["mbiw"] = 8;
|
||||
emitInstruction(std::move(json));
|
||||
pim_binary::InstructionRecord instruction;
|
||||
instruction.opcode = pim_binary::Opcode::mvmul;
|
||||
instruction.rd = 0;
|
||||
instruction.r1 = 1;
|
||||
instruction.r2OrImm = 8;
|
||||
instruction.generic1 = 0;
|
||||
instruction.generic2 = static_cast<int32_t>(groupId);
|
||||
emitInstruction(instruction);
|
||||
}
|
||||
|
||||
void PimCodeGen::codeGenLoadOp(pim::PimMemCopyHostToDevOp loadOp, const StaticValueKnowledge& knowledge) const {
|
||||
@@ -508,14 +502,13 @@ void PimCodeGen::codeGenVVAddOp(pim::PimVVAddOp vvaddOp, const StaticValueKnowle
|
||||
auto rhsAddr = addressOf(vvaddOp.getRhs(), knowledge);
|
||||
setupRdRs1Rs2(outputBufferAddr, 0, lhsAddr, 0, rhsAddr, 0);
|
||||
|
||||
json::Object json;
|
||||
json["op"] = "vvadd";
|
||||
json["rd"] = 0;
|
||||
json["rs1"] = 1;
|
||||
json["rs2"] = 2;
|
||||
json["offset"] = createEmptyOffset();
|
||||
json["len"] = getValueSizeInBytes(vvaddOp.getLhs());
|
||||
emitInstruction(std::move(json));
|
||||
pim_binary::InstructionRecord instruction;
|
||||
instruction.opcode = pim_binary::Opcode::vvadd;
|
||||
instruction.rd = 0;
|
||||
instruction.r1 = 1;
|
||||
instruction.r2OrImm = 2;
|
||||
instruction.generic3 = static_cast<int32_t>(getValueSizeInBytes(vvaddOp.getLhs()));
|
||||
emitInstruction(instruction);
|
||||
}
|
||||
|
||||
void PimCodeGen::codeGenVVSubOp(pim::PimVVSubOp vvsubOp, const StaticValueKnowledge& knowledge) const {
|
||||
@@ -524,14 +517,13 @@ void PimCodeGen::codeGenVVSubOp(pim::PimVVSubOp vvsubOp, const StaticValueKnowle
|
||||
auto rhsAddr = addressOf(vvsubOp.getRhs(), knowledge);
|
||||
setupRdRs1Rs2(outputBufferAddr, 0, lhsAddr, 0, rhsAddr, 0);
|
||||
|
||||
json::Object json;
|
||||
json["op"] = "vvsub";
|
||||
json["rd"] = 0;
|
||||
json["rs1"] = 1;
|
||||
json["rs2"] = 2;
|
||||
json["offset"] = createEmptyOffset();
|
||||
json["len"] = getValueSizeInBytes(vvsubOp.getLhs());
|
||||
emitInstruction(std::move(json));
|
||||
pim_binary::InstructionRecord instruction;
|
||||
instruction.opcode = pim_binary::Opcode::vvsub;
|
||||
instruction.rd = 0;
|
||||
instruction.r1 = 1;
|
||||
instruction.r2OrImm = 2;
|
||||
instruction.generic3 = static_cast<int32_t>(getValueSizeInBytes(vvsubOp.getLhs()));
|
||||
emitInstruction(instruction);
|
||||
}
|
||||
|
||||
void PimCodeGen::codeGenVVMulOp(pim::PimVVMulOp vvmulOp, const StaticValueKnowledge& knowledge) const {
|
||||
@@ -540,14 +532,13 @@ void PimCodeGen::codeGenVVMulOp(pim::PimVVMulOp vvmulOp, const StaticValueKnowle
|
||||
auto rhsAddr = addressOf(vvmulOp.getRhs(), knowledge);
|
||||
setupRdRs1Rs2(outputBufferAddr, 0, lhsAddr, 0, rhsAddr, 0);
|
||||
|
||||
json::Object json;
|
||||
json["op"] = "vvmul";
|
||||
json["rd"] = 0;
|
||||
json["rs1"] = 1;
|
||||
json["rs2"] = 2;
|
||||
json["offset"] = createEmptyOffset();
|
||||
json["len"] = getValueSizeInBytes(vvmulOp.getLhs());
|
||||
emitInstruction(std::move(json));
|
||||
pim_binary::InstructionRecord instruction;
|
||||
instruction.opcode = pim_binary::Opcode::vvmul;
|
||||
instruction.rd = 0;
|
||||
instruction.r1 = 1;
|
||||
instruction.r2OrImm = 2;
|
||||
instruction.generic3 = static_cast<int32_t>(getValueSizeInBytes(vvmulOp.getLhs()));
|
||||
emitInstruction(instruction);
|
||||
}
|
||||
|
||||
void PimCodeGen::codeGenVVMaxOp(pim::PimVVMaxOp vvmaxOp, const StaticValueKnowledge& knowledge) const {
|
||||
@@ -556,14 +547,13 @@ void PimCodeGen::codeGenVVMaxOp(pim::PimVVMaxOp vvmaxOp, const StaticValueKnowle
|
||||
auto rhsAddr = addressOf(vvmaxOp.getRhs(), knowledge);
|
||||
setupRdRs1Rs2(outputBufferAddr, 0, lhsAddr, 0, rhsAddr, 0);
|
||||
|
||||
json::Object json;
|
||||
json["op"] = "vvmax";
|
||||
json["rd"] = 0;
|
||||
json["rs1"] = 1;
|
||||
json["rs2"] = 2;
|
||||
json["offset"] = createEmptyOffset();
|
||||
json["len"] = getValueSizeInBytes(vvmaxOp.getLhs());
|
||||
emitInstruction(std::move(json));
|
||||
pim_binary::InstructionRecord instruction;
|
||||
instruction.opcode = pim_binary::Opcode::vvmax;
|
||||
instruction.rd = 0;
|
||||
instruction.r1 = 1;
|
||||
instruction.r2OrImm = 2;
|
||||
instruction.generic3 = static_cast<int32_t>(getValueSizeInBytes(vvmaxOp.getLhs()));
|
||||
emitInstruction(instruction);
|
||||
}
|
||||
|
||||
void PimCodeGen::codeGenVVDMulOp(pim::PimVVDMulOp vvdmulOp, const StaticValueKnowledge& knowledge) const {
|
||||
@@ -572,14 +562,13 @@ void PimCodeGen::codeGenVVDMulOp(pim::PimVVDMulOp vvdmulOp, const StaticValueKno
|
||||
auto rhsAddr = addressOf(vvdmulOp.getRhs(), knowledge);
|
||||
setupRdRs1Rs2(outputBufferAddr, 0, lhsAddr, 0, rhsAddr, 0);
|
||||
|
||||
json::Object json;
|
||||
json["op"] = "vvdmul";
|
||||
json["rd"] = 0;
|
||||
json["rs1"] = 1;
|
||||
json["rs2"] = 2;
|
||||
json["offset"] = createEmptyOffset();
|
||||
json["len"] = getValueSizeInBytes(vvdmulOp.getLhs());
|
||||
emitInstruction(std::move(json));
|
||||
pim_binary::InstructionRecord instruction;
|
||||
instruction.opcode = pim_binary::Opcode::vvdmul;
|
||||
instruction.rd = 0;
|
||||
instruction.r1 = 1;
|
||||
instruction.r2OrImm = 2;
|
||||
instruction.generic3 = static_cast<int32_t>(getValueSizeInBytes(vvdmulOp.getLhs()));
|
||||
emitInstruction(instruction);
|
||||
}
|
||||
|
||||
void PimCodeGen::codeGenVAvgOp(pim::PimVAvgOp vavgOp, const StaticValueKnowledge& knowledge) const {
|
||||
@@ -587,14 +576,14 @@ void PimCodeGen::codeGenVAvgOp(pim::PimVAvgOp vavgOp, const StaticValueKnowledge
|
||||
auto inputAddr = addressOf(vavgOp.getInput(), knowledge);
|
||||
setupRdRs1(outputBufferAddr, 0, inputAddr, 0);
|
||||
|
||||
json::Object json;
|
||||
json["op"] = "vavg";
|
||||
json["rd"] = 0;
|
||||
json["rs1"] = 1;
|
||||
json["rs2"] = 1;
|
||||
json["offset"] = createRs1OnlyOffset();
|
||||
json["len"] = getValueSizeInBytes(vavgOp.getInput());
|
||||
emitInstruction(std::move(json));
|
||||
pim_binary::InstructionRecord instruction;
|
||||
instruction.opcode = pim_binary::Opcode::vavg;
|
||||
instruction.rd = 0;
|
||||
instruction.r1 = 1;
|
||||
instruction.r2OrImm = 1;
|
||||
instruction.generic1 = 1;
|
||||
instruction.generic3 = static_cast<int32_t>(getValueSizeInBytes(vavgOp.getInput()));
|
||||
emitInstruction(instruction);
|
||||
}
|
||||
|
||||
void PimCodeGen::codeGenVReluOp(pim::PimVReluOp vreluOp, const StaticValueKnowledge& knowledge) const {
|
||||
@@ -602,13 +591,12 @@ void PimCodeGen::codeGenVReluOp(pim::PimVReluOp vreluOp, const StaticValueKnowle
|
||||
auto inputAddr = addressOf(vreluOp.getInput(), knowledge);
|
||||
setupRdRs1(outputBufferAddr, 0, inputAddr, 0);
|
||||
|
||||
json::Object json;
|
||||
json["op"] = "vrelu";
|
||||
json["rd"] = 0;
|
||||
json["rs1"] = 1;
|
||||
json["offset"] = createEmptyOffset();
|
||||
json["len"] = getValueSizeInBytes(vreluOp.getInput());
|
||||
emitInstruction(std::move(json));
|
||||
pim_binary::InstructionRecord instruction;
|
||||
instruction.opcode = pim_binary::Opcode::vrelu;
|
||||
instruction.rd = 0;
|
||||
instruction.r1 = 1;
|
||||
instruction.generic3 = static_cast<int32_t>(getValueSizeInBytes(vreluOp.getInput()));
|
||||
emitInstruction(instruction);
|
||||
}
|
||||
|
||||
void PimCodeGen::codeGenVTanhOp(pim::PimVTanhOp vtanhOp, const StaticValueKnowledge& knowledge) const {
|
||||
@@ -616,13 +604,12 @@ void PimCodeGen::codeGenVTanhOp(pim::PimVTanhOp vtanhOp, const StaticValueKnowle
|
||||
auto inputAddr = addressOf(vtanhOp.getInput(), knowledge);
|
||||
setupRdRs1(outputBufferAddr, 0, inputAddr, 0);
|
||||
|
||||
json::Object json;
|
||||
json["op"] = "vtanh";
|
||||
json["rd"] = 0;
|
||||
json["rs1"] = 1;
|
||||
json["offset"] = createEmptyOffset();
|
||||
json["len"] = getValueSizeInBytes(vtanhOp.getInput());
|
||||
emitInstruction(std::move(json));
|
||||
pim_binary::InstructionRecord instruction;
|
||||
instruction.opcode = pim_binary::Opcode::vtanh;
|
||||
instruction.rd = 0;
|
||||
instruction.r1 = 1;
|
||||
instruction.generic3 = static_cast<int32_t>(getValueSizeInBytes(vtanhOp.getInput()));
|
||||
emitInstruction(instruction);
|
||||
}
|
||||
|
||||
void PimCodeGen::codeGenVSigmOp(pim::PimVSigmOp vsigmOp, const StaticValueKnowledge& knowledge) const {
|
||||
@@ -630,13 +617,12 @@ void PimCodeGen::codeGenVSigmOp(pim::PimVSigmOp vsigmOp, const StaticValueKnowle
|
||||
auto inputAddr = addressOf(vsigmOp.getInput(), knowledge);
|
||||
setupRdRs1(outputBufferAddr, 0, inputAddr, 0);
|
||||
|
||||
json::Object json;
|
||||
json["op"] = "vsigm";
|
||||
json["rd"] = 0;
|
||||
json["rs1"] = 1;
|
||||
json["offset"] = createEmptyOffset();
|
||||
json["len"] = getValueSizeInBytes(vsigmOp.getInput());
|
||||
emitInstruction(std::move(json));
|
||||
pim_binary::InstructionRecord instruction;
|
||||
instruction.opcode = pim_binary::Opcode::vsigm;
|
||||
instruction.rd = 0;
|
||||
instruction.r1 = 1;
|
||||
instruction.generic3 = static_cast<int32_t>(getValueSizeInBytes(vsigmOp.getInput()));
|
||||
emitInstruction(instruction);
|
||||
}
|
||||
|
||||
void PimCodeGen::codeGenVSoftmaxOp(pim::PimVSoftmaxOp vsoftmaxOp, const StaticValueKnowledge& knowledge) const {
|
||||
@@ -644,13 +630,12 @@ void PimCodeGen::codeGenVSoftmaxOp(pim::PimVSoftmaxOp vsoftmaxOp, const StaticVa
|
||||
auto inputAddr = addressOf(vsoftmaxOp.getInput(), knowledge);
|
||||
setupRdRs1(outputBufferAddr, 0, inputAddr, 0);
|
||||
|
||||
json::Object json;
|
||||
json["op"] = "vsoftmax";
|
||||
json["rd"] = 0;
|
||||
json["rs1"] = 1;
|
||||
json["offset"] = createEmptyOffset();
|
||||
json["len"] = getValueSizeInBytes(vsoftmaxOp.getInput());
|
||||
emitInstruction(std::move(json));
|
||||
pim_binary::InstructionRecord instruction;
|
||||
instruction.opcode = pim_binary::Opcode::vsoftmax;
|
||||
instruction.rd = 0;
|
||||
instruction.r1 = 1;
|
||||
instruction.generic3 = static_cast<int32_t>(getValueSizeInBytes(vsoftmaxOp.getInput()));
|
||||
emitInstruction(instruction);
|
||||
}
|
||||
|
||||
void PimCodeGen::codeGetGlobalOp(memref::GetGlobalOp getGlobalOp, const StaticValueKnowledge& knowledge) const {}
|
||||
@@ -682,6 +667,30 @@ void PimCodeGen::codeGenTransposeOp(pim::PimTransposeOp transposeOp, const Stati
|
||||
dstStrides[i] = dstStrides[i + 1] * dstShape[i + 1];
|
||||
}
|
||||
|
||||
bool storagePreserving = true;
|
||||
for (size_t srcFlat = 0; srcFlat < totalElements; srcFlat++) {
|
||||
SmallVector<size_t> srcIdx(rank);
|
||||
size_t remaining = srcFlat;
|
||||
for (size_t d = 0; d < rank; d++) {
|
||||
srcIdx[d] = remaining / srcStrides[d];
|
||||
remaining %= srcStrides[d];
|
||||
}
|
||||
|
||||
size_t dstFlat = 0;
|
||||
for (size_t d = 0; d < rank; d++)
|
||||
dstFlat += srcIdx[perm[d]] * dstStrides[d];
|
||||
|
||||
if (dstFlat != srcFlat) {
|
||||
storagePreserving = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (storagePreserving) {
|
||||
emitMemCopyOp("lmv", dstAddr, 0, srcAddr, 0, totalElements * elementSize, "len");
|
||||
return;
|
||||
}
|
||||
|
||||
// Emit element-by-element copy with transposed addressing
|
||||
for (size_t srcFlat = 0; srcFlat < totalElements; srcFlat++) {
|
||||
// Decompose flat source index into multi-dimensional index
|
||||
@@ -747,9 +756,25 @@ static SmallVector<Operation*> collectTopLevelCoreLikeOps(func::FuncOp funcOp) {
|
||||
return coreLikeOps;
|
||||
}
|
||||
|
||||
static SmallDenseMap<memref::GlobalOp, MemEntry, 16>
|
||||
collectMaterializedHostGlobals(ModuleOp moduleOp, func::FuncOp funcOp, const PimAcceleratorMemory& memory) {
|
||||
SmallDenseMap<memref::GlobalOp, MemEntry, 16> materializedHostGlobals;
|
||||
funcOp.walk([&](memref::GetGlobalOp getGlobalOp) {
|
||||
if (hasWeightAlways(getGlobalOp))
|
||||
return;
|
||||
auto targetGlobal = lookupGlobalForGetGlobal(moduleOp, getGlobalOp);
|
||||
if (!targetGlobal || materializedHostGlobals.contains(targetGlobal))
|
||||
return;
|
||||
auto it = memory.memEntriesMap.find(getGlobalOp.getResult());
|
||||
if (it != memory.memEntriesMap.end())
|
||||
materializedHostGlobals[targetGlobal] = it->second;
|
||||
});
|
||||
return materializedHostGlobals;
|
||||
}
|
||||
|
||||
static void aliasMaterializedHostGlobals(ModuleOp moduleOp,
|
||||
func::FuncOp funcOp,
|
||||
pim::PimCoreOp coreOp,
|
||||
const SmallDenseMap<memref::GlobalOp, MemEntry, 16>& materializedHostGlobals,
|
||||
PimAcceleratorMemory& memory) {
|
||||
coreOp.walk([&](memref::GetGlobalOp getGlobalOp) {
|
||||
if (hasWeightAlways(getGlobalOp) || memory.memEntriesMap.contains(getGlobalOp.getResult()))
|
||||
@@ -759,16 +784,9 @@ static void aliasMaterializedHostGlobals(ModuleOp moduleOp,
|
||||
if (!targetGlobal)
|
||||
return;
|
||||
|
||||
mlir::Value aliasedValue;
|
||||
funcOp.walk([&](memref::GetGlobalOp candidate) {
|
||||
if (aliasedValue || candidate == getGlobalOp || !memory.memEntriesMap.contains(candidate.getResult()))
|
||||
return;
|
||||
if (lookupGlobalForGetGlobal(moduleOp, candidate) == targetGlobal)
|
||||
aliasedValue = candidate.getResult();
|
||||
});
|
||||
|
||||
if (aliasedValue)
|
||||
memory.memEntriesMap[getGlobalOp.getResult()] = memory.memEntriesMap[aliasedValue];
|
||||
auto it = materializedHostGlobals.find(targetGlobal);
|
||||
if (it != materializedHostGlobals.end())
|
||||
memory.memEntriesMap[getGlobalOp.getResult()] = it->second;
|
||||
});
|
||||
}
|
||||
|
||||
@@ -837,7 +855,7 @@ static int64_t codeGenCoreOps(Block& block, PimCodeGen& coreCodeGen) {
|
||||
return failed(result) ? -1 : static_cast<int64_t>(processedOperations);
|
||||
}
|
||||
|
||||
OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimJson(ModuleOp& moduleOp, std::string& outputDirPath) {
|
||||
OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimCode(ModuleOp& moduleOp, std::string& outputDirPath) {
|
||||
if (!outputDirPath.empty()) {
|
||||
if (auto error = sys::fs::create_directory(outputDirPath)) {
|
||||
errs() << "Error creating output directory: " << outputDirPath << ": " << error.message() << '\n';
|
||||
@@ -857,7 +875,7 @@ OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimJson(ModuleOp& moduleOp, std::
|
||||
if (auto err = writeMemoryBinary(moduleOp, funcOp, memory, outputDirPath))
|
||||
return err;
|
||||
|
||||
if (auto err = writeHostCoreJson(outputDirPath))
|
||||
if (auto err = writeHostCoreArtifacts(outputDirPath))
|
||||
return err;
|
||||
|
||||
// For each core, specify the number of crossbar per array group.
|
||||
@@ -870,6 +888,8 @@ OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimJson(ModuleOp& moduleOp, std::
|
||||
auto mapCoreWeightToFileName = createAndPopulateWeightFolder(funcOp, outputDirPath);
|
||||
|
||||
SmallVector<Operation*> coreLikeOps = collectTopLevelCoreLikeOps(funcOp);
|
||||
SmallDenseMap<memref::GlobalOp, MemEntry, 16> materializedHostGlobals =
|
||||
collectMaterializedHostGlobals(moduleOp, funcOp, memory);
|
||||
llvm::DenseMap<size_t, size_t> emittedCoreIds;
|
||||
size_t nextEmittedCoreId = 1;
|
||||
|
||||
@@ -899,16 +919,30 @@ OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimJson(ModuleOp& moduleOp, std::
|
||||
maxCoreId = std::max(maxCoreId, coreId);
|
||||
|
||||
std::error_code errorCode;
|
||||
auto outputCorePath = outputDirPath + "/core_" + std::to_string(coreId) + ".json";
|
||||
raw_fd_ostream coreFileStream(outputCorePath, errorCode);
|
||||
auto outputCorePath = outputDirPath + "/core_" + std::to_string(coreId) + ".pim";
|
||||
raw_fd_ostream coreBinaryStream(outputCorePath, errorCode, sys::fs::OF_None);
|
||||
if (errorCode) {
|
||||
errs() << "Error while opening core file `" << outputCorePath << "`: " << errorCode.message() << '\n';
|
||||
return InvalidOutputFileAccess;
|
||||
}
|
||||
coreFileStream << '[';
|
||||
|
||||
PimCodeGen coreCodeGen(memory, coreFileStream, emittedCoreIds);
|
||||
aliasMaterializedHostGlobals(moduleOp, funcOp, coreOp, memory);
|
||||
std::unique_ptr<raw_fd_ostream> coreJsonStream;
|
||||
if (pimEmitJson.getValue()) {
|
||||
std::string outputCoreJsonPath = outputDirPath + "/core_" + std::to_string(coreId) + ".json";
|
||||
errorCode = std::error_code();
|
||||
coreJsonStream = std::make_unique<raw_fd_ostream>(outputCoreJsonPath, errorCode);
|
||||
if (errorCode) {
|
||||
errs() << "Error while opening core json file `" << outputCoreJsonPath << "`: " << errorCode.message()
|
||||
<< '\n';
|
||||
return InvalidOutputFileAccess;
|
||||
}
|
||||
*coreJsonStream << '[';
|
||||
}
|
||||
|
||||
pim_binary::writeHeader(coreBinaryStream);
|
||||
|
||||
PimCodeGen coreCodeGen(memory, coreBinaryStream, coreJsonStream.get(), emittedCoreIds);
|
||||
aliasMaterializedHostGlobals(moduleOp, coreOp, materializedHostGlobals, memory);
|
||||
auto& deviceMemory = memory.getOrCreateDeviceMem(coreId);
|
||||
deviceMemory.allocateCore(coreOp);
|
||||
|
||||
@@ -920,9 +954,14 @@ OnnxMlirCompilerErrorCodes onnx_mlir::compileToPimJson(ModuleOp& moduleOp, std::
|
||||
if (reportRow)
|
||||
*reportRow = deviceMemory.getReportRow();
|
||||
|
||||
coreFileStream.seek(coreFileStream.tell() - 1);
|
||||
coreFileStream << ']';
|
||||
coreFileStream.close();
|
||||
pim_binary::patchInstructionCount(coreBinaryStream, coreCodeGen.getEmittedInstructionCount());
|
||||
coreBinaryStream.close();
|
||||
|
||||
if (coreJsonStream) {
|
||||
coreJsonStream->seek(coreJsonStream->tell() - 1);
|
||||
*coreJsonStream << ']';
|
||||
coreJsonStream->close();
|
||||
}
|
||||
|
||||
auto coreWeightsDirPath = outputDirPath + "/core_" + std::to_string(coreId);
|
||||
if (auto error = sys::fs::create_directory(coreWeightsDirPath)) {
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include "onnx-mlir/Compiler/OMCompilerTypes.h"
|
||||
#include "src/Accelerators/PIM/Common/PimCommon.hpp"
|
||||
#include "src/Accelerators/PIM/Common/Support/ReportUtils.hpp"
|
||||
#include "src/Accelerators/PIM/Compiler/PimBinaryFormat.hpp"
|
||||
#include "src/Accelerators/PIM/Dialect/Pim/PimOps.hpp"
|
||||
|
||||
namespace onnx_mlir {
|
||||
@@ -104,16 +105,17 @@ public:
|
||||
|
||||
class PimCodeGen {
|
||||
PimAcceleratorMemory& memory;
|
||||
llvm::raw_fd_ostream& coreFileStream;
|
||||
llvm::raw_fd_ostream& coreBinaryStream;
|
||||
llvm::raw_fd_ostream* coreJsonStream;
|
||||
const llvm::DenseMap<size_t, size_t>& emittedCoreIds;
|
||||
mutable uint32_t emittedInstructionCount = 0;
|
||||
|
||||
size_t addressOf(mlir::Value value, const StaticValueKnowledge& knowledge) const {
|
||||
return memory.getValueAddress(value, knowledge);
|
||||
}
|
||||
size_t remapCoreId(size_t coreId) const;
|
||||
|
||||
static llvm::json::Object createEmptyOffset();
|
||||
void emitInstruction(llvm::json::Object instruction) const;
|
||||
void emitInstruction(const pim_binary::InstructionRecord& instruction) const;
|
||||
|
||||
void genSetRegisterImmediateUnsigned(size_t registerNumber, size_t immediate) const;
|
||||
void setupRd(size_t rdAddress, size_t rdOffset) const;
|
||||
@@ -133,9 +135,12 @@ class PimCodeGen {
|
||||
|
||||
public:
|
||||
PimCodeGen(PimAcceleratorMemory& memory,
|
||||
llvm::raw_fd_ostream& coreJson,
|
||||
llvm::raw_fd_ostream& coreBinary,
|
||||
llvm::raw_fd_ostream* coreJson,
|
||||
const llvm::DenseMap<size_t, size_t>& emittedCoreIds)
|
||||
: memory(memory), coreFileStream(coreJson), emittedCoreIds(emittedCoreIds) {}
|
||||
: memory(memory), coreBinaryStream(coreBinary), coreJsonStream(coreJson), emittedCoreIds(emittedCoreIds) {}
|
||||
|
||||
uint32_t getEmittedInstructionCount() const { return emittedInstructionCount; }
|
||||
|
||||
void codeGenLoadOp(pim::PimMemCopyHostToDevOp loadOp, const StaticValueKnowledge& knowledge) const;
|
||||
void codeGenStoreOp(pim::PimMemCopyDevToHostOp storeOp, const StaticValueKnowledge& knowledge) const;
|
||||
@@ -164,6 +169,6 @@ public:
|
||||
void codeGenTransposeOp(pim::PimTransposeOp transposeOp, const StaticValueKnowledge& knowledge) const;
|
||||
};
|
||||
|
||||
OnnxMlirCompilerErrorCodes compileToPimJson(mlir::ModuleOp& moduleOpRef, std::string& outputDirName);
|
||||
OnnxMlirCompilerErrorCodes compileToPimCode(mlir::ModuleOp& moduleOpRef, std::string& outputDirName);
|
||||
|
||||
} // namespace onnx_mlir
|
||||
|
||||
@@ -24,6 +24,11 @@ llvm::cl::opt<bool> useExperimentalConvImpl("use-experimental-conv-impl",
|
||||
llvm::cl::init(false),
|
||||
llvm::cl::cat(OnnxMlirOptions));
|
||||
|
||||
llvm::cl::opt<bool> pimEmitJson("pim-emit-json",
|
||||
llvm::cl::desc("Also emit per-core JSON instruction files alongside binary .pim files"),
|
||||
llvm::cl::init(false),
|
||||
llvm::cl::cat(OnnxMlirOptions));
|
||||
|
||||
llvm::cl::opt<size_t>
|
||||
crossbarSize("crossbar-size", llvm::cl::desc("Width and heigth of a single crossbar"), llvm::cl::init(2));
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ extern llvm::cl::opt<PimEmissionTargetType> pimEmissionTarget;
|
||||
|
||||
extern llvm::cl::opt<bool> pimOnlyCodegen;
|
||||
extern llvm::cl::opt<bool> useExperimentalConvImpl;
|
||||
extern llvm::cl::opt<bool> pimEmitJson;
|
||||
|
||||
extern llvm::cl::opt<size_t> crossbarSize;
|
||||
extern llvm::cl::opt<size_t> crossbarCountInCore;
|
||||
|
||||
@@ -52,9 +52,9 @@ void addPassesPim(OwningOpRef<ModuleOp>& module,
|
||||
pm.addPass(createPimMaterializeHostConstantsPass());
|
||||
pm.addPass(createPimVerificationPass());
|
||||
pm.addPass(createMessagePass("Pim verified"));
|
||||
pm.addPass(createEmitPimJsonPass());
|
||||
pm.addPass(createEmitPimCodePass());
|
||||
// pm.addPass(createCountInstructionPass());
|
||||
pm.addPass(createMessagePass("Pim json code emitted"));
|
||||
pm.addPass(createMessagePass("Pim code emitted"));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ add_pim_library(OMPimPasses
|
||||
PimCodegen/HostConstantFolding/Patterns/Subview.cpp
|
||||
PimCodegen/MaterializeHostConstantsPass.cpp
|
||||
PimCodegen/VerificationPass.cpp
|
||||
PimCodegen/EmitPimJsonPass.cpp
|
||||
PimCodegen/EmitPimCodePass.cpp
|
||||
|
||||
EXCLUDE_FROM_OM_LIBS
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ std::unique_ptr<mlir::Pass> createPimMaterializeHostConstantsPass();
|
||||
|
||||
std::unique_ptr<mlir::Pass> createPimVerificationPass();
|
||||
|
||||
std::unique_ptr<mlir::Pass> createEmitPimJsonPass();
|
||||
std::unique_ptr<mlir::Pass> createEmitPimCodePass();
|
||||
|
||||
std::unique_ptr<mlir::Pass> createMessagePass(std::string message);
|
||||
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
#include "mlir/Pass/Pass.h"
|
||||
|
||||
#include "Common/PimCommon.hpp"
|
||||
#include "Compiler/PimCodeGen.hpp"
|
||||
|
||||
using namespace mlir;
|
||||
|
||||
namespace onnx_mlir {
|
||||
|
||||
namespace {
|
||||
|
||||
struct EmitPimCodePass : PassWrapper<EmitPimCodePass, OperationPass<ModuleOp>> {
|
||||
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(EmitPimCodePass);
|
||||
StringRef getArgument() const override { return "emit-pim-code-pass"; }
|
||||
StringRef getDescription() const override { return "Emit PIM simulator code artifacts"; }
|
||||
|
||||
EmitPimCodePass() {}
|
||||
EmitPimCodePass(const EmitPimCodePass& pass) {}
|
||||
|
||||
void runOnOperation() override {
|
||||
ModuleOp moduleOp = getOperation();
|
||||
|
||||
std::string pimDir = getOutputDir() + "/pim";
|
||||
createDirectory(pimDir);
|
||||
|
||||
int compiler_error_code = compileToPimCode(moduleOp, pimDir);
|
||||
if (compiler_error_code != CompilerSuccess)
|
||||
signalPassFailure();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
std::unique_ptr<Pass> createEmitPimCodePass() { return std::make_unique<EmitPimCodePass>(); }
|
||||
|
||||
} // namespace onnx_mlir
|
||||
@@ -1,36 +0,0 @@
|
||||
#include "mlir/Pass/Pass.h"
|
||||
|
||||
#include "Common/PimCommon.hpp"
|
||||
#include "Compiler/PimCodeGen.hpp"
|
||||
|
||||
using namespace mlir;
|
||||
|
||||
namespace onnx_mlir {
|
||||
|
||||
namespace {
|
||||
|
||||
struct EmitPimJsonPass : PassWrapper<EmitPimJsonPass, OperationPass<ModuleOp>> {
|
||||
MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(EmitPimJsonPass);
|
||||
StringRef getArgument() const override { return "emit-pim-json-pass"; }
|
||||
StringRef getDescription() const override { return "Emit json code for the pim simulators"; }
|
||||
|
||||
EmitPimJsonPass() {}
|
||||
EmitPimJsonPass(const EmitPimJsonPass& pass) {}
|
||||
|
||||
void runOnOperation() override {
|
||||
ModuleOp moduleOp = getOperation();
|
||||
|
||||
std::string pimDir = getOutputDir() + "/pim";
|
||||
createDirectory(pimDir);
|
||||
|
||||
int compiler_error_code = compileToPimJson(moduleOp, pimDir);
|
||||
if (compiler_error_code != CompilerSuccess)
|
||||
signalPassFailure();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
std::unique_ptr<Pass> createEmitPimJsonPass() { return std::make_unique<EmitPimJsonPass>(); }
|
||||
|
||||
} // namespace onnx_mlir
|
||||
@@ -80,7 +80,7 @@ void PimAccelerator::registerPasses(int optLevel) const {
|
||||
registerPass(createPimHostConstantFoldingPass);
|
||||
registerPass(createPimMaterializeHostConstantsPass);
|
||||
registerPass(createPimVerificationPass);
|
||||
registerPass(createEmitPimJsonPass);
|
||||
registerPass(createEmitPimCodePass);
|
||||
}
|
||||
|
||||
void PimAccelerator::configurePasses() const {
|
||||
|
||||
@@ -13,7 +13,7 @@ PIM_PASS_LABELS = (
|
||||
("HostConstantFoldingPass", "Fold Host Constants"),
|
||||
("MaterializeHostConstantsPass", "Materialize Host Constants"),
|
||||
("VerificationPass", "Verify PIM"),
|
||||
("EmitPimJsonPass", "Emit PIM JSON"),
|
||||
("EmitPimCodePass", "Emit PIM Code"),
|
||||
)
|
||||
PIM_PASS_LABEL_BY_SUFFIX = dict(PIM_PASS_LABELS)
|
||||
TIMING_LINE_RE = re.compile(r"^\s*([0-9]+\.[0-9]+)\s+\(\s*[0-9.]+%\)\s+(.+?)\s*$")
|
||||
|
||||
Reference in New Issue
Block a user