add pimsim-nn simulator

This commit is contained in:
NiccoloN
2026-02-23 16:17:52 +01:00
parent 9c716d2582
commit 9ba08a6780
66 changed files with 8 additions and 4 deletions

View File

@@ -0,0 +1,211 @@
use anyhow::{Context, Result, bail};
use clap::Parser;
use glob::glob;
use pimcore::cpu::crossbar;
use pimcore::json_to_instruction::json_to_executor;
use serde_json::Value;
use std::{fs, usize};
use std::io::Write;
use std::path::PathBuf;
/// Program to simulate core execution configuration
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
struct Args {
/// The folder containing the configuration files
#[arg(short, long)]
folder: Option<PathBuf>,
/// Override path for the config.json file
#[arg(long)]
config: Option<PathBuf>,
/// Override path for the core*.json files
#[arg(long, value_delimiter = ',', num_args = 1..)]
cores: Option<Vec<PathBuf>>,
/// Override path for the memory file (.txt or .bin)
#[arg(long)]
memory: Option<PathBuf>,
/// Path to output file
#[arg(short, long)]
output: PathBuf,
/// Comma separated list of (address,size) for memory output dump
#[arg(short, long, value_delimiter = ',', num_args = 1.., value_name = "ADDR,SIZE")]
dump: Vec<i32>,
}
fn main() -> Result<()> {
let args = Args::parse();
let config_json = retrive_config(&args)?;
let core_jsons = retrive_cores(&args)?;
let memory = retrive_memory(&args)?;
let mut executor = json_to_executor::json_to_executor(config_json, core_jsons.iter());
populate_crossbar(&args, &mut executor);
set_memory(&mut executor, memory);
executor.execute();
dump_memory(executor, &args)?;
Ok(())
}
fn populate_crossbar(args: &Args, executor: &mut pimcore::Executable) {
let num_cores = executor.cpu_mut().num_core();
if let Some(folder) = args.folder.as_ref() {
for core_idx in 0..num_cores {
let core_folder = folder.join(format!("core_{}", core_idx));
if !core_folder.is_dir() {
continue;
}
let mut bin_files: Vec<(u32, std::path::PathBuf)> = std::fs::read_dir(&core_folder)
.expect("Failed to read core directory")
.filter_map(|entry| {
let path = entry.ok()?.path();
let file_name = path.file_name()?.to_str()?;
if file_name.starts_with("crossbar_") && file_name.ends_with(".bin") {
let num_str = &file_name[9..file_name.len() - 4];
let num = num_str.parse::<u32>().ok()?;
Some((num, path))
} else {
None
}
})
.collect();
bin_files.sort_by_key(|&(num, _)| num);
let core = executor.cpu_mut().core(core_idx+1);
let (_memory, crossbars) = core.get_memory_crossbar();
for (i, path) in bin_files {
let bytes = std::fs::read(path).expect("Failed to read binary file");
crossbars.get_mut(i as usize)
.unwrap().execute_store(&bytes).unwrap();
}
}
}
}
fn dump_memory(mut executor: pimcore::Executable, args: &Args) -> Result<()> {
let dumps: Vec<(i32, i32)> = args
.dump
.chunks_exact(2)
.map(|chunk| (chunk[0], chunk[1]))
.collect();
let mut out_file = fs::OpenOptions::new()
.create(true)
.write(true)
.truncate(true)
.open(&args.output)
.with_context(|| format!("cannot open file {:?} for writing", args.output))?;
for (address, size) in dumps {
out_file.write_all(executor.cpu_mut().host().load::<u8>(address, size).unwrap()[0])?;
}
Ok(())
}
fn set_memory(executor: &mut pimcore::Executable, memory: Vec<u8>) {
executor.cpu_mut().host().execute_store(0, &memory).unwrap();
}
fn retrive_memory(args: &Args) -> Result<Vec<u8>> {
let memory_path = if let Some(mem_override) = &args.memory {
mem_override.clone()
} else if let Some(folder) = &args.folder.as_ref() {
let bin_path = folder.join("memory.bin");
let txt_path = folder.join("memory.txt");
if bin_path.exists() {
bin_path
} else if txt_path.exists() {
txt_path
} else {
bail!(
"No memory file (memory.bin or memory.txt) found in {:?}",
folder
);
}
} else {
bail!("Either --memory or --folder must be provided.");
};
let memory_vector: Vec<u8> = if let Some(ext) = memory_path.extension() {
if ext == "bin" {
fs::read(&memory_path).context("Failed to read binary memory file")?
} else {
let content =
fs::read_to_string(&memory_path).context("Failed to read ascii memory file")?;
let clean_hex: String = content.chars().filter(|c| !c.is_whitespace()).collect();
hex::decode(&clean_hex).context("Failed to decode ASCII hex string")?
}
} else {
bail!("Memory file has no extension, cannot determine type");
};
Ok(memory_vector)
}
fn retrive_cores(args: &Args) -> Result<Vec<Value>, anyhow::Error> {
let mut core_jsons: Vec<Value> = Vec::new();
if let Some(cores_override) = &args.cores {
for core in cores_override {
let content = fs::read_to_string(core)
.with_context(|| format!("Failed to read core file: {:?}", cores_override))?;
let json: Value =
serde_json::from_str(&content).context("Failed to parse core json override")?;
core_jsons.push(json);
}
} else if let Some(folder) = args.folder.as_ref() {
let pattern = folder.join("core*.json");
let pattern_str = pattern.to_str().context("Invalid path encoding")?;
let paths: Vec<_> = glob(pattern_str)?.collect();
if paths.is_empty() {
bail!("No core*.json files found in {:?}", folder);
}
for entry in paths {
let path = entry?;
let content = fs::read_to_string(&path)
.with_context(|| format!("Failed to read core file: {:?}", path))?;
let json: Value = serde_json::from_str(&content)
.with_context(|| format!("Failed to parse JSON in {:?}", path))?;
core_jsons.push(json);
}
} else {
bail!("Either --core or --folder must be provided to find core definitions.");
}
Ok(core_jsons)
}
fn retrive_config(args: &Args) -> Result<Value, anyhow::Error> {
let config_path: PathBuf = {
let override_path = args.config.as_ref();
let folder = args.folder.as_ref();
let filename = "config.json";
if let Some(path) = override_path {
path.clone()
} else if let Some(folder) = folder {
let path = folder.join(filename);
if path.exists() {
path
} else {
bail!("File {} not found in folder {:?}", filename, folder);
}
} else {
bail!(
"Cannot resolve {}: no override path and no input folder provided.",
filename
);
}
};
let config_content = fs::read_to_string(&config_path)
.with_context(|| format!("Failed to read config file: {:?}", config_path))?;
let config_json: Value =
serde_json::from_str(&config_content).context("Failed to parse config.json")?;
Ok(config_json)
}

View File

@@ -0,0 +1,53 @@
use crate::memory_manager::{CoreMemory, MemoryStorable};
use anyhow::{Result, bail, ensure};
#[derive(Debug, Clone)]
pub struct Crossbar {
max_width: usize,
max_height: usize,
stored_bytes: usize,
memory: CoreMemory,
}
impl Crossbar {
pub fn new(width: usize, height: usize, memory: CoreMemory) -> Self {
Self { max_width: width, max_height: height, memory, stored_bytes:0 }
}
pub fn width(&self) -> usize {
self.max_width
}
pub fn height(&self) -> usize {
self.max_height
}
pub fn stored_bytes(&self) -> usize {
self.stored_bytes
}
pub fn execute_store<T>(&mut self, element: &[T]) -> Result<()> where
T: MemoryStorable, {
self.memory.clear();
let total_size = self.max_width * self.max_height;
self.memory.set_capacity(total_size);
let stored_size = std::mem::size_of_val(element);
ensure!(stored_size <= total_size, "Storing more than crossbar can handle");
self.stored_bytes=stored_size;
self.memory.execute_store(0, element)
}
pub fn load<T>(&mut self, size: usize) -> Result<Vec<&[T]>> where
T: MemoryStorable, {
if self.memory.get_len() < size
//|| self.stored_bytes < size
{
bail!("Loading outside crossbar boundary [{} {}] < {}", self.stored_bytes, self.memory.get_len() , size);
}
self.memory.load(0, size)
}
}

View File

@@ -0,0 +1,174 @@
use std::{collections::HashMap, fmt::Debug};
use anyhow::{Context, Result};
use crate::{
cpu::crossbar::Crossbar,
instruction_set::Instructions,
memory_manager::{CoreMemory, MemoryStorable, type_traits::TryToUsize},
};
pub mod crossbar;
#[derive(Debug, Clone)]
pub struct CPU {
cores: Box<[Core]>,
}
impl CPU {
pub fn new(num_cores: impl TryToUsize) -> Self {
let num_cores = num_cores.try_into().expect("num_cores can not be negative");
let mut cores: Vec<Core> = std::iter::repeat_with(Core::new)
.take(num_cores + 1)
.collect();
Self {
cores: cores.into(),
}
}
pub fn reserve_crossbar(
&mut self,
num_crossbar: impl TryToUsize,
byte_width: impl TryToUsize,
height: impl TryToUsize,
) {
let num_crossbar = num_crossbar
.try_into()
.expect("num_crossbar can not be negative");
let byte_width = byte_width
.try_into()
.expect("byte_width can not be negative");
let height = height.try_into().expect("height can not be negative");
for core in &mut self.cores {
core.reserve_crossbar(num_crossbar, byte_width, height);
}
}
pub fn host(&mut self) -> &mut Core {
&mut self.cores[0]
}
pub fn core(&mut self, index: impl TryToUsize) -> &mut Core {
let index = index.try_into().expect("can not be negative");
&mut self.cores[index]
}
pub fn num_core(&self) -> usize {
self.cores.len()
}
pub(crate) fn host_and_cores(&mut self, core: impl TryToUsize) -> (&mut Core, &mut Core) {
let core = core.try_into().expect("core can not be negative");
assert_ne!(
core, 0,
"Retriving a core with the host that will always be at position 0"
);
let (host, cores) = self.cores.split_at_mut(1);
let host = host.get_mut(0).expect("Missing the host??");
let core = cores
.get_mut(core - 1)
.expect("Requested a core not present ");
(host, core)
}
pub fn get_multiple_cores<const N: usize>(&mut self, indices: [usize; N]) -> [&mut Core; N] {
self.cores.get_disjoint_mut(indices).unwrap()
}
}
#[derive(Debug, Clone)]
pub struct Core {
crossbars: Vec<Crossbar>,
memory: CoreMemory,
registers: [i32; 32],
}
impl Core {
fn new() -> Self {
Self {
crossbars: Vec::new(),
memory: CoreMemory::new(),
registers: [0; 32],
}
}
pub fn reserve_crossbar(
&mut self,
num_crossbar: impl TryToUsize,
width: impl TryToUsize,
height: impl TryToUsize,
) {
let num_crossbar = num_crossbar
.try_into()
.expect("num_crossbar can not be negative");
let width = width.try_into().expect("width can not be negative");
let height = height.try_into().expect("height can not be negative");
for _ in 0..num_crossbar {
let mut crossbar = CoreMemory::new();
crossbar.set_capacity(width * height);
self.crossbars.push(Crossbar::new(width, height, crossbar));
}
}
pub fn execute_load<T>(&mut self) -> Result<Vec<&[T]>>
where
T: MemoryStorable,
{
self.memory.execute_load()
}
pub fn execute_store<T>(&mut self, address: impl TryToUsize, element: &[T]) -> Result<()>
where
T: MemoryStorable,
{
let address = address.try_into().context("address can not be negative")?;
self.memory.execute_store(address, element)
}
pub fn reserve_load(
&mut self,
address: impl TryToUsize,
size: impl TryToUsize,
) -> Result<&mut CoreMemory> {
let address = address.try_into().context("address can not be negative")?;
let size = size.try_into().context("size can not be negative")?;
self.memory.reserve_load(address, size)
}
pub fn set_register(&mut self, index: impl TryToUsize, value: i32) {
let index = index.try_into().expect("index can not be negative");
assert!(
value >= 0,
"Register cannot be negative if happens remove this and go check where it's used as usize"
);
self.registers[index] = value;
}
pub fn register(&mut self, index: impl TryToUsize) -> i32 {
let index = index.try_into().expect("index can not be negative");
self.registers[index]
}
pub fn load<T>(&mut self, address: impl TryToUsize, size: impl TryToUsize) -> Result<Vec<&[T]>>
where
T: MemoryStorable,
{
let address = address.try_into().context("address can not be negative")?;
let size = size.try_into().context("size can not be negative")?;
self.memory.load(address, size)
}
pub fn get_memory_crossbar(&mut self) -> (&mut CoreMemory, &mut Vec<Crossbar>) {
let Self {
crossbars,
memory,
registers,
} = self;
(memory, crossbars)
}
pub fn memset(&mut self, address: impl TryToUsize, size: impl TryToUsize, val: u8) -> Result<()> {
let address = address.try_into().context("address can not be negative")?;
let size = size.try_into().context("size can not be negative")?;
self.memory.memset(address, size, val)
}
}

View File

@@ -0,0 +1,272 @@
use std::mem::transmute;
use crate::memory_manager::{
MemoryStorable,
type_traits::{FromFloat, UpcastDestTraits},
};
#[inline]
pub fn add_all<M>(sum: &[M]) -> M
where
M: UpcastDestTraits<M> + MemoryStorable + FromFloat,
{
if size_of::<M>() == 4 {
let (prefix, slice, suffix) = unsafe { sum.align_to::<f32>() };
return M::from_f32(add_all_f32_impl(slice));
} else if size_of::<M>() == 8 {
let (prefix, slice, suffix) = unsafe { sum.align_to::<f64>() };
return M::from_f64(add_all_f64_impl(slice));
}
panic!("Size not found");
}
#[inline]
pub fn add_all_f64_impl(sum: &[f64]) -> f64 {
let len = sum.len();
if len > 64 {
let mut acc = 0f64;
let mut y = 0;
#[cfg(all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "avx512f"
))]
{
while y + 64 < len {
unsafe {
use std::arch::x86_64::_mm512_load_pd;
use std::arch::x86_64::_mm512_reduce_add_pd;
let tmp = _mm512_load_pd(&sum[0 + y]);
let a = _mm512_reduce_add_pd(tmp);
let tmp = _mm512_load_pd(&sum[8 + y]);
let b = _mm512_reduce_add_pd(tmp);
let tmp = _mm512_load_pd(&sum[16 + y]);
let c = _mm512_reduce_add_pd(tmp);
let tmp = _mm512_load_pd(&sum[24 + y]);
let d = _mm512_reduce_add_pd(tmp);
let tmp = _mm512_load_pd(&sum[32 + y]);
let e = _mm512_reduce_add_pd(tmp);
let tmp = _mm512_load_pd(&sum[40 + y]);
let f = _mm512_reduce_add_pd(tmp);
let tmp = _mm512_load_pd(&sum[48 + y]);
let g = _mm512_reduce_add_pd(tmp);
let tmp = _mm512_load_pd(&sum[54 + y]);
let h = _mm512_reduce_add_pd(tmp);
acc = acc + a + b + c + d + e + f + g + h;
}
y += 64;
}
}
#[cfg(all(
any(target_arch = "x86", target_arch = "x86_64"),
not(target_feature = "avx512f")
))]
{
while y + 64 < len {
let a = sum[y]
+ sum[1 + y]
+ sum[2 + y]
+ sum[3 + y]
+ sum[4 + y]
+ sum[5 + y]
+ sum[6 + y]
+ sum[7 + y]
+ sum[8 + y]
+ sum[9 + y]
+ sum[10 + y]
+ sum[11 + y]
+ sum[12 + y]
+ sum[13 + y]
+ sum[14 + y]
+ sum[15 + y];
let b = sum[16 + y]
+ sum[17 + y]
+ sum[18 + y]
+ sum[19 + y]
+ sum[20 + y]
+ sum[21 + y]
+ sum[22 + y]
+ sum[23 + y]
+ sum[24 + y]
+ sum[25 + y]
+ sum[26 + y]
+ sum[27 + y]
+ sum[28 + y]
+ sum[29 + y]
+ sum[30 + y]
+ sum[31 + y];
let c = sum[32 + y]
+ sum[33 + y]
+ sum[34 + y]
+ sum[35 + y]
+ sum[36 + y]
+ sum[37 + y]
+ sum[38 + y]
+ sum[39 + y]
+ sum[40 + y]
+ sum[41 + y]
+ sum[42 + y]
+ sum[43 + y]
+ sum[44 + y]
+ sum[45 + y]
+ sum[46 + y]
+ sum[47 + y];
let d = sum[48 + y]
+ sum[49 + y]
+ sum[50 + y]
+ sum[51 + y]
+ sum[52 + y]
+ sum[53 + y]
+ sum[54 + y]
+ sum[55 + y]
+ sum[56 + y]
+ sum[57 + y]
+ sum[58 + y]
+ sum[59 + y]
+ sum[60 + y]
+ sum[61 + y]
+ sum[62 + y]
+ sum[63 + y];
acc = acc + a + b + c + d;
y += 64;
}
}
for x in y..len {
acc += sum[x];
}
acc
} else {
let mut acc = 0f64;
for y in sum {
acc += *y;
}
acc
}
}
#[inline]
pub fn add_all_f32_impl(sum: &[f32]) -> f32 {
let len = sum.len();
let rem_4 = len % 4;
let rem_4 = len % 16;
if len > 64 {
let mut acc = 0f32;
let mut y = 0;
#[cfg(all(
any(target_arch = "x86", target_arch = "x86_64"),
target_feature = "avx512f"
))]
{
while y + 64 < len {
unsafe {
use std::arch::x86_64::{_mm512_load_ps, _mm512_reduce_add_ps};
let a = _mm512_load_ps(&sum[0 + y]);
let a = _mm512_reduce_add_ps(a);
let b = _mm512_load_ps(&sum[16 + y]);
let b = _mm512_reduce_add_ps(b);
let c = _mm512_load_ps(&sum[32 + y]);
let c = _mm512_reduce_add_ps(c);
let d = _mm512_load_ps(&sum[48 + y]);
let d = _mm512_reduce_add_ps(d);
acc = acc + a + b + c + d;
}
y += 64;
}
}
#[cfg(all(
any(target_arch = "x86", target_arch = "x86_64"),
not(target_feature = "avx512f")
))]
{
while y + 64 < len {
let a = sum[y]
+ sum[1 + y]
+ sum[2 + y]
+ sum[3 + y]
+ sum[4 + y]
+ sum[5 + y]
+ sum[6 + y]
+ sum[7 + y]
+ sum[8 + y]
+ sum[9 + y]
+ sum[10 + y]
+ sum[11 + y]
+ sum[12 + y]
+ sum[13 + y]
+ sum[14 + y]
+ sum[15 + y];
let b = sum[16 + y]
+ sum[17 + y]
+ sum[18 + y]
+ sum[19 + y]
+ sum[20 + y]
+ sum[21 + y]
+ sum[22 + y]
+ sum[23 + y]
+ sum[24 + y]
+ sum[25 + y]
+ sum[26 + y]
+ sum[27 + y]
+ sum[28 + y]
+ sum[29 + y]
+ sum[30 + y]
+ sum[31 + y];
let c = sum[32 + y]
+ sum[33 + y]
+ sum[34 + y]
+ sum[35 + y]
+ sum[36 + y]
+ sum[37 + y]
+ sum[38 + y]
+ sum[39 + y]
+ sum[40 + y]
+ sum[41 + y]
+ sum[42 + y]
+ sum[43 + y]
+ sum[44 + y]
+ sum[45 + y]
+ sum[46 + y]
+ sum[47 + y];
let d = sum[48 + y]
+ sum[49 + y]
+ sum[50 + y]
+ sum[51 + y]
+ sum[52 + y]
+ sum[53 + y]
+ sum[54 + y]
+ sum[55 + y]
+ sum[56 + y]
+ sum[57 + y]
+ sum[58 + y]
+ sum[59 + y]
+ sum[60 + y]
+ sum[61 + y]
+ sum[62 + y]
+ sum[63 + y];
acc = acc + a + b + c + d;
y += 64;
}
}
for x in y..len {
acc += sum[x];
}
acc
} else {
let mut acc = 0f32;
for y in sum {
acc += *y;
}
acc
}
}

View File

@@ -0,0 +1,315 @@
use paste::paste;
#[derive(Clone, Copy, Debug, Default)]
pub struct InstructionData {
core_indx: i32,
rd: i32,
r1: i32,
//r2 imm mbiw imm_core
r2_or_imm: i32,
//offset_select imm_relu ibiw
generic1: i32,
//offset_value imm_group obiw
generic2: i32,
//imm_len
generic3: i32,
}
impl InstructionData {
pub fn core_indx(&self) -> i32 {
self.core_indx
}
pub fn rd(&self) -> i32 {
self.rd
}
pub fn r1(&self) -> i32 {
self.r1
}
pub fn r2(&self) -> i32 {
self.r2_or_imm
}
pub fn imm(&self) -> i32 {
self.r2_or_imm
}
pub fn mbiw(&self) -> i32 {
self.r2_or_imm
}
pub fn offset_select(&self) -> i32 {
self.generic1
}
pub fn offset_value(&self) -> i32 {
self.generic2
}
pub fn get_core_rd_r1(&self) -> (i32, i32, i32) {
(self.core_indx, self.rd, self.r1)
}
pub fn get_core_rd_r1_r2(&self) -> (i32, i32, i32, i32) {
(self.core_indx, self.rd, self.r1, self.r2_or_imm)
}
pub fn get_core_rd_imm(&self) -> (i32, i32, i32) {
(self.core_indx, self.rd, self.r2_or_imm)
}
pub fn get_core_rd_r1_imm(&self) -> (i32, i32, i32, i32) {
(self.core_indx, self.rd, self.r1, self.r2_or_imm)
}
pub fn get_core_rd_r1_r2_immlen_offset(&self) -> (i32, i32, i32, i32, i32, i32, i32) {
(
self.core_indx,
self.rd,
self.r1,
self.r2_or_imm,
self.generic3,
self.generic1,
self.generic2,
)
}
pub fn get_core_rd_r1_mbiw_immrelu_immgroup(&self) -> (i32, i32, i32, i32, i32, i32) {
(
self.core_indx,
self.rd,
self.r1,
self.r2_or_imm,
self.generic1,
self.generic2,
)
}
pub fn get_ibiw_obiw(&self) -> (i32, i32) {
(self.generic1, self.generic2)
}
pub fn imm_len(&self) -> i32 {
self.generic3
}
pub fn imm_core(&self) -> i32 {
self.r2_or_imm
}
pub(crate) fn get_core_immcore(&self) -> (i32, i32) {
(self.core_indx, self.r2_or_imm)
}
}
#[derive(Clone, Copy, Debug)]
enum Fixer {
Fix(i32),
Edit(i32),
}
impl Fixer {
fn to_fix(self) -> Self {
match self {
Fixer::Fix(x) => Fixer::Fix(x),
Fixer::Edit(x) => Fixer::Fix(x),
}
}
fn to_edit(self) -> Self {
match self {
Fixer::Fix(x) => Fixer::Edit(x),
Fixer::Edit(x) => Fixer::Edit(x),
}
}
fn set(self, val: i32) -> Self {
match self {
Fixer::Fix(_) => Fixer::Fix(val),
Fixer::Edit(_) => Fixer::Edit(val),
}
}
fn get(&self) -> i32 {
match self {
Fixer::Fix(x) => *x,
Fixer::Edit(x) => *x,
}
}
fn clear(self) -> Self {
match self {
Fixer::Fix(x) => Fixer::Fix(x),
Fixer::Edit(_) => Fixer::Edit(0),
}
}
}
impl Default for Fixer {
fn default() -> Self {
Fixer::Edit(0)
}
}
#[derive(Clone, Copy, Debug, Default)]
pub struct InstructionDataBuilder {
core_indx: Fixer,
rd: Fixer,
r1: Fixer,
r2: Fixer,
imm: Fixer,
offset_select: Fixer,
offset_value: Fixer,
imm_len: Fixer,
ibiw: Fixer,
obiw: Fixer,
mbiw: Fixer,
imm_relu: Fixer,
imm_group: Fixer,
imm_core: Fixer,
}
macro_rules! common_getter_setter {
($id:ident) => {
paste! {
pub fn [<fix_ $id>](&mut self) -> &mut Self {
self.[<$id>] = self.[<$id>].to_fix();
self
}
pub fn [<edit_ $id>](&mut self) -> &mut Self {
self.[<$id>] = self.[<$id>].to_edit();
self
}
pub fn [<set_ $id>](&mut self, val: i32) -> &mut Self{
self.[<$id>] = self.[<$id>].set(val);
self
}
pub fn [<get_ $id>](&self) -> i32{
self.[<$id>].get()
}
pub fn [<clear_ $id>](&mut self){
self.[<$id>] = self.[<$id>].clear();
}
}
};
}
impl InstructionDataBuilder {
common_getter_setter![core_indx];
common_getter_setter![rd];
common_getter_setter![r1];
common_getter_setter![r2];
common_getter_setter![imm];
common_getter_setter![offset_select];
common_getter_setter![offset_value];
common_getter_setter![imm_len];
common_getter_setter![ibiw];
common_getter_setter![obiw];
common_getter_setter![mbiw];
common_getter_setter![imm_relu];
common_getter_setter![imm_group];
common_getter_setter![imm_core];
pub fn new() -> Self {
Self {
core_indx: Fixer::Edit(0),
rd: Fixer::Edit(0),
r1: Fixer::Edit(0),
r2: Fixer::Edit(0),
imm: Fixer::Edit(0),
offset_select: Fixer::Edit(0),
offset_value: Fixer::Edit(0),
imm_len: Fixer::Edit(0),
ibiw: Fixer::Edit(0),
obiw: Fixer::Edit(0),
mbiw: Fixer::Edit(0),
imm_relu: Fixer::Edit(0),
imm_group: Fixer::Edit(0),
imm_core: Fixer::Edit(0),
}
}
pub fn clear(&mut self) {
self.clear_core_indx();
self.clear_rd();
self.clear_r1();
self.clear_r2();
self.clear_imm();
self.clear_offset_value();
self.clear_offset_select();
self.clear_imm_len();
self.clear_ibiw();
self.clear_obiw();
self.clear_mbiw();
self.clear_imm_relu();
self.clear_imm_group();
self.clear_imm_core();
}
fn check_sanity(&self) {
assert!(!(self.get_r2() != 0 && self.get_imm() != 0 && self.get_mbiw() != 0 && self.get_imm_core() != 0));
assert!(
!(self.get_ibiw() != 0 && self.get_offset_select() != 0 && self.get_imm_relu() != 0)
);
assert!(
!(self.get_obiw() != 0 && self.get_offset_value() != 0 && self.get_imm_group() != 0)
);
}
pub fn build(&mut self) -> InstructionData {
self.check_sanity();
let inst_data = InstructionData {
core_indx: self.get_core_indx(),
rd: self.get_rd(),
r1: self.get_r1(),
r2_or_imm: self.get_r2() + self.get_imm() + self.get_mbiw() + self.get_imm_core(),
generic1: self.get_offset_select() + self.get_ibiw() + self.get_imm_relu(),
generic2: self.get_offset_value() + self.get_obiw() + self.get_imm_group(),
generic3: self.get_imm_len(),
};
self.clear();
inst_data
}
pub fn set_rdr1r2(&mut self, rd: i32, r1: i32, r2: i32) -> &mut Self {
self.set_rd(rd).set_r1(r1).set_r2(r2)
}
pub fn set_offset_select_value(&mut self, offset_select: i32, offset_value: i32) -> &mut Self {
self.set_offset_select(offset_select)
.set_offset_value(offset_value)
}
pub fn set_rdr1imm(&mut self, rd: i32, r1: i32, imm: i32) -> &mut Self {
self.set_rd(rd).set_r1(r1).set_imm(imm)
}
pub fn set_rdr1(&mut self, rd: i32, r1: i32) -> &mut Self {
self.set_rd(rd).set_r1(r1)
}
pub fn set_rdimm(&mut self, rd: i32, imm: i32) -> &mut Self {
self.set_rd(rd).set_imm(imm)
}
pub fn set_ibiw_obiw(&mut self, ibiw: i32, obiw: i32) -> &mut Self {
self.set_ibiw(ibiw).set_obiw(obiw)
}
pub fn set_mbiw_immrelu_immgroup(
&mut self,
mbiw: i32,
imm_relu: i32,
imm_group: i32,
) -> &mut Self {
self.set_mbiw(mbiw)
.set_imm_relu(imm_relu)
.set_imm_group(imm_group)
}
}

View File

@@ -0,0 +1,722 @@
use crate::{
cpu::{CPU, crossbar}, instruction_set::{
Instruction, InstructionData, InstructionStatus, InstructionType, VectorBitWith,
helper::add_all,
}, memory_manager::{
MemoryStorable,
type_traits::{FromFloat, UpcastDestTraits, UpcastSlice},
}, tracing::TRACER, utility::{add_offset_r1, add_offset_r2, add_offset_rd}
};
use aligned_vec::{AVec, ConstAlign};
use anyhow::{Context, Result, ensure};
use paste::paste;
use std::{borrow::Cow, cell::OnceCell, collections::HashMap};
use std::{collections::HashSet, sync::LazyLock};
macro_rules! add_name {
($storage:ident, $id:ident) => {
$storage.insert($id as *const () as usize, stringify!($id));
};
}
macro_rules! add_name_simd {
($storage:ident, $id:ident) => {
paste! {
$storage.insert([<$id _impl>]::<f32,f32> as *const () as usize, concat!(stringify!($id), "::<f32, f32>"));
$storage.insert([<$id _impl>]::<f64,f64> as *const () as usize, concat!(stringify!($id), "::<f64, f64>"));
$storage.insert([<$id _impl>]::<f32,f64> as *const () as usize, concat!(stringify!($id), "::<f32, f64>"));
$storage.insert([<$id _impl>]::<f64,f32> as *const () as usize, concat!(stringify!($id), "::<f64, f32>"));
}
};
}
static NAMES: LazyLock<HashMap<usize, &'static str>> = LazyLock::new(|| {
let mut hash = HashMap::new();
add_name!(hash, sldi);
add_name!(hash, sld);
add_name!(hash, sadd);
add_name!(hash, ssub);
add_name!(hash, smul);
add_name!(hash, saddi);
add_name!(hash, smuli);
add_name!(hash, setbw);
add_name_simd!(hash, mvmul);
add_name_simd!(hash, vvadd);
add_name_simd!(hash, vvsub);
add_name_simd!(hash, vvmul);
add_name_simd!(hash, vvdmul);
add_name_simd!(hash, vvmax);
add_name!(hash, vvsll);
add_name!(hash, vvsra);
add_name_simd!(hash, vavg);
add_name_simd!(hash, vrelu);
add_name_simd!(hash, vtanh);
add_name_simd!(hash, vsigm);
add_name!(hash, vmv);
add_name!(hash, vrsu);
add_name!(hash, vrsl);
add_name!(hash, ld);
add_name!(hash, st);
add_name!(hash, lldi);
add_name!(hash, lmv);
add_name!(hash, send);
add_name!(hash, recv);
add_name!(hash, wait);
add_name!(hash, sync);
hash
});
pub fn functor_to_name(functor: usize) -> &'static str {
NAMES
.get(&functor)
.unwrap_or_else(|| panic!("Function not found"))
}
///////////////////////////////////////////////////////////////
/////////////////Scalar/register Instructions//////////////////
///////////////////////////////////////////////////////////////
pub fn sldi(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
TRACER.lock().unwrap().pre_sldi(cores, data);
let (core_indx, rd, imm) = data.get_core_rd_imm();
let core = cores.core(core_indx);
core.set_register(rd, imm);
TRACER.lock().unwrap().post_sldi(cores, data);
Ok(InstructionStatus::Completed)
}
pub fn sld(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
TRACER.lock().unwrap().pre_sld(cores, data);
let (core_indx, rd, r1) = data.get_core_rd_r1();
let offset_value = data.offset_value();
let core = cores.core(core_indx);
let r1_val = core.register(r1);
let host = cores.host();
let num = host.load::<i32>((r1_val + offset_value), 4)?[0][0];
let core = cores.core(core_indx);
core.set_register(rd, num);
TRACER.lock().unwrap().post_sld(cores, data);
Ok(InstructionStatus::Completed)
}
pub fn sadd(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
TRACER.lock().unwrap().pre_sadd(cores, data);
let (core_indx, rd, r1, r2) = data.get_core_rd_r1_r2();
let core = cores.core(core_indx);
let r1_val = core.register(r1);
let r2_val = core.register(r2);
core.set_register(rd, r1_val + r2_val);
Ok(InstructionStatus::Completed)
}
pub fn ssub(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
TRACER.lock().unwrap().pre_ssub(cores, data);
let (core_indx, rd, r1, r2) = data.get_core_rd_r1_r2();
let core = cores.core(core_indx);
let r1_val = core.register(r1);
let r2_val = core.register(r2);
core.set_register(rd, r1_val - r2_val);
Ok(InstructionStatus::Completed)
}
pub fn smul(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
TRACER.lock().unwrap().pre_smul(cores, data);
let (core_indx, rd, r1, r2) = data.get_core_rd_r1_r2();
let core = cores.core(core_indx);
let r1_val = core.register(r1);
let r2_val = core.register(r2);
core.set_register(rd, r1_val * r2_val);
Ok(InstructionStatus::Completed)
}
pub fn saddi(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
TRACER.lock().unwrap().pre_saddi(cores, data);
let (core_indx, rd, r1, imm) = data.get_core_rd_r1_imm();
let core = cores.core(core_indx);
let r1_val = core.register(r1);
core.set_register(rd, r1_val - imm);
Ok(InstructionStatus::Completed)
}
pub fn smuli(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
TRACER.lock().unwrap().pre_smuli(cores, data);
let (core_indx, rd, r1, imm) = data.get_core_rd_r1_imm();
let core = cores.core(core_indx);
let r1_val = core.register(r1);
core.set_register(rd, r1_val * imm);
Ok(InstructionStatus::Completed)
}
///////////////////////////////////////////////////////////////
/////////////////Matrix/vector Instructions////////////////////
///////////////////////////////////////////////////////////////
macro_rules! add_simd_to_map {
($storage:ident, $id:ident) => {
paste! {
let mut tmp = HashMap::new();
tmp.insert((32_usize,32_usize), ([<$id _impl>]::<f32, f32> as InstructionType));
tmp.insert((32_usize,64_usize), ([<$id _impl>]::<f32, f64> as InstructionType));
tmp.insert((64_usize,32_usize), ([<$id _impl>]::<f64, f32> as InstructionType));
tmp.insert((64_usize,64_usize), ([<$id _impl>]::<f64, f64> as InstructionType));
//TODO WTF WHY
tmp.insert((8_usize,8_usize), ([<$id _impl>]::<f32, f32> as InstructionType));
$storage.insert($id as *const () as usize, tmp);
}
};
}
static SIMD: LazyLock<HashMap<usize, HashMap<(usize, usize), InstructionType>>> =
LazyLock::new(|| {
let mut storage = HashMap::new();
add_simd_to_map!(storage, vvadd);
add_simd_to_map!(storage, vvsub);
add_simd_to_map!(storage, vvmul);
add_simd_to_map!(storage, vvdmul);
add_simd_to_map!(storage, vvmax);
add_simd_to_map!(storage, vavg);
add_simd_to_map!(storage, vrelu);
add_simd_to_map!(storage, vtanh);
add_simd_to_map!(storage, vsigm);
add_simd_to_map!(storage, mvmul);
storage
});
pub fn isa_simd(functor: InstructionType) -> bool {
SIMD.contains_key(&(functor as usize))
}
pub fn dispatch_simd(
functor: InstructionType,
vector_bit_with: VectorBitWith,
) -> Result<InstructionType> {
let VectorBitWith {
vector_input_bitwith,
vector_output_bitwith,
} = vector_bit_with;
let res = SIMD
.get(&(functor as usize))
.context("Request a non present simd")?
.get(&(vector_input_bitwith, vector_output_bitwith))
.with_context(|| {
format!(
"Function not found for the requested size input:{} output:{}",
vector_input_bitwith, vector_output_bitwith
)
})?;
Ok(*res)
}
pub fn is_setbw(functor: InstructionType) -> bool {
functor as usize == setbw as *const () as usize
}
pub fn setbw(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
panic!("You are calling a placeholder, this instruction is resolved in the construction phase");
}
pub fn mvmul(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
panic!("You are calling a placeholder, the real call is the generic version");
}
pub(super) fn mvm_impl_internal<F, M, T>(
cores: &mut CPU,
data: InstructionData,
) -> Result<InstructionStatus>
where
[F]: UpcastSlice<T> + UpcastSlice<M>,
[M]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
M: UpcastDestTraits<M> + MemoryStorable + FromFloat,
F: UpcastDestTraits<F> + MemoryStorable,
{
TRACER.lock().unwrap().pre_mvm::<F,M,T>(cores, data);
let (core_indx, rd, r1, mbiw, relu, group) = data.get_core_rd_r1_mbiw_immrelu_immgroup();
let group: usize = group.try_into().context("group can not be negative")?;
let core = cores.core(core_indx);
let r1_val = core.register(r1);
let rd_val = core.register(rd);
let (memory, crossbars) = core.get_memory_crossbar();
let crossbar = crossbars.get_mut(group).unwrap();
let crossbar_stored_bytes = crossbar.stored_bytes();
let crossbar_byte_width = crossbar.width();
//Fix this
let crossbar_elem_width = crossbar_byte_width / size_of::<M>();
ensure!(
crossbar_byte_width & size_of::<M>() == 0,
"M not divisor of the crosbbar size"
);
let crossbar_height = crossbar.height();
let crossbar_byte_size = crossbar_byte_width * crossbar_height;
let loads = memory
.reserve_load(r1_val, crossbar_height * size_of::<F>())?
.execute_load::<F>()?;
let load = loads[0];
let vec: Cow<[M]> = load.up();
let matrix = crossbar.load::<M>(crossbar_byte_size)?[0];
let mut res = Vec::with_capacity(crossbar_elem_width);
let mut partial :AVec<M, _> = AVec::<M, ConstAlign<64>>::with_capacity(64, vec.len());
partial.resize(vec.len(), M::from_f32(0.0));
for x in 0..crossbar_elem_width {
partial[0] = vec[0] * matrix[x];
for y in 1..crossbar_height {
partial[y] = vec[y] * matrix[y * crossbar_elem_width + x];
}
let mut acc = add_all(partial.as_slice());
res.push(acc);
}
if relu != 0 {
res.iter_mut().for_each(|x| {
if *x < M::from_f32(0.0) {
*x = M::from_f32(0.0)
}
});
}
ensure!(
res.len() == crossbar_elem_width,
"mvm generate a vector bigger thant it's requested elements"
);
let res_up: Cow<[T]> = res.as_slice().up();
core.execute_store(rd_val, res_up.as_ref());
TRACER.lock().unwrap().post_mvm::<F,M,T>(cores, data);
Ok(InstructionStatus::Completed)
}
pub(super) fn mvmul_impl<F, T>(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus>
where
[F]: UpcastSlice<T> + UpcastSlice<f32> + UpcastSlice<f64>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
[f32]: UpcastSlice<T>,
[f64]: UpcastSlice<T>,
{
let mbiw = data.mbiw();
match mbiw {
32 => mvm_impl_internal::<F, f32, T>(cores, data),
64 => mvm_impl_internal::<F, f64, T>(cores, data),
//TODO i don't know why
8 => mvm_impl_internal::<F, f32, T>(cores, data),
n => {
panic!("mvm size {} not handled ", n)
}
}
}
pub fn vvadd(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
panic!("You are calling a placeholder, the real call is the generic version");
}
pub(super) fn vvadd_impl<F, T>(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus>
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
TRACER.lock().unwrap().pre_vvadd::<F,T>(cores, data);
let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
data.get_core_rd_r1_r2_immlen_offset();
let core = cores.core(core_indx);
let r1_val = core.register(r1);
let r2_val = core.register(r2);
let rd_val = core.register(rd);
let r1_val = add_offset_r1(r1_val, offset_select, offset_value);
let r2_val = add_offset_r2(r2_val, offset_select, offset_value);
let rd_val = add_offset_rd(rd_val, offset_select, offset_value);
let imm_len: usize = imm_len.try_into().context("imm_len can not be negative")?;
let loads = core
.reserve_load(r1_val, imm_len)?
.reserve_load(r2_val, imm_len)?
.execute_load::<F>()?;
let (load1, load2) = (loads[0], loads[1]);
let res: Vec<F> = load1
.iter()
.zip(load2.iter())
.map(|(&a, &b)| a + b)
.collect();
ensure!(
imm_len / size_of::<F>() == res.len(),
"vvadd generate a vector bigger thant it's requested elements"
);
let res_up: Cow<[T]> = res.as_slice().up();
core.execute_store(rd_val, res_up.as_ref());
TRACER.lock().unwrap().post_vvadd::<F,T>(cores, data);
Ok(InstructionStatus::Completed)
}
pub fn vvsub(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
panic!("You are calling a placeholder, the real call is the generic version");
}
pub(super) fn vvsub_impl<F, T>(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus>
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
TRACER.lock().unwrap().pre_vvsub::<F,T>(cores, data);
let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
data.get_core_rd_r1_r2_immlen_offset();
let core = cores.core(core_indx);
let r1_val = core.register(r1);
let r2_val = core.register(r2);
let rd_val = core.register(rd);
let r1_val = add_offset_r1(r1_val, offset_select, offset_value);
let r2_val = add_offset_r2(r2_val, offset_select, offset_value);
let rd_val = add_offset_rd(rd_val, offset_select, offset_value);
let imm_len: usize = imm_len.try_into().context("imm_len can not be negative")?;
let loads = core
.reserve_load(r1_val, imm_len)?
.reserve_load(r2_val, imm_len)?
.execute_load::<F>()?;
let (load1, load2) = (loads[0], loads[1]);
let res: Vec<F> = load1
.iter()
.zip(load2.iter())
.map(|(&a, &b)| a - b)
.collect();
ensure!(
imm_len / size_of::<F>() == res.len(),
"vvadd generate a vector bigger thant it's requested elements"
);
let res_up: Cow<[T]> = res.as_slice().up();
core.execute_store(rd_val, res_up.as_ref());
Ok(InstructionStatus::Completed)
}
pub fn vvmul(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
panic!("You are calling a placeholder, the real call is the generic version");
}
pub(super) fn vvmul_impl<F, T>(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus>
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
TRACER.lock().unwrap().pre_vvmul::<F,T>(cores, data);
let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
data.get_core_rd_r1_r2_immlen_offset();
let core = cores.core(core_indx);
let r1_val = core.register(r1);
let r2_val = core.register(r2);
let rd_val = core.register(rd);
let r1_val = add_offset_r1(r1_val, offset_select, offset_value);
let r2_val = add_offset_r2(r2_val, offset_select, offset_value);
let rd_val = add_offset_rd(rd_val, offset_select, offset_value);
let imm_len: usize = imm_len.try_into().context("imm_len can not be negative")?;
let loads = core
.reserve_load(r1_val, imm_len)?
.reserve_load(r2_val, imm_len)?
.execute_load::<F>()?;
let (load1, load2) = (loads[0], loads[1]);
let res: Vec<F> = load1
.iter()
.zip(load2.iter())
.map(|(&a, &b)| a * b)
.collect();
ensure!(
imm_len / size_of::<F>() == res.len(),
"vvadd generate a vector bigger thant it's requested elements"
);
let res_up: Cow<[T]> = res.as_slice().up();
core.execute_store(rd_val, res_up.as_ref());
Ok(InstructionStatus::Completed)
}
pub fn vvdmul(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
panic!("You are calling a placeholder, the real call is the generic version");
}
pub(super) fn vvdmul_impl<F, T>(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus>
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
TRACER.lock().unwrap().pre_vvdmul::<F,T>(cores, data);
let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
data.get_core_rd_r1_r2_immlen_offset();
let core = cores.core(core_indx);
let r1_val = core.register(r1);
let r2_val = core.register(r2);
let rd_val = core.register(rd);
let r1_val = add_offset_r1(r1_val, offset_select, offset_value);
let r2_val = add_offset_r2(r2_val, offset_select, offset_value);
let loads = core
.reserve_load(r1_val, imm_len)?
.reserve_load(r2_val, imm_len)?
.execute_load::<F>()?;
let (load1, load2) = (loads[0], loads[1]);
let res: [F; 1] = [load1
.iter()
.zip(load2.iter())
.map(|(&a, &b)| a * b)
.reduce(|a, b| a + b)
.unwrap()];
let res_up: Cow<[T]> = res.as_slice().up();
core.execute_store(rd_val, res_up.as_ref());
Ok(InstructionStatus::Completed)
}
pub fn vvmax(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
panic!("You are calling a placeholder, the real call is the generic version");
}
pub(super) fn vvmax_impl<F, T>(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus>
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
TRACER.lock().unwrap().pre_vvmax::<F,T>(cores, data);
let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
data.get_core_rd_r1_r2_immlen_offset();
let core = cores.core(core_indx);
let r1_val = core.register(r1);
let r2_val = core.register(r2);
let rd_val = core.register(rd);
let r1_val = add_offset_r1(r1_val, offset_select, offset_value);
let r2_val = add_offset_r2(r2_val, offset_select, offset_value);
let rd_val = add_offset_rd(rd_val, offset_select, offset_value);
let loads = core
.reserve_load(r1_val, imm_len)?
.reserve_load(r2_val, imm_len)?
.execute_load::<F>()?;
let (load1, load2) = (loads[0], loads[1]);
let res: Vec<F> = load1
.iter()
.zip(load2.iter())
.map(|(&a, &b)| if (a > b) { a } else { b })
.collect();
let res_up: Cow<[T]> = res.as_slice().up();
core.execute_store(rd_val, res_up.as_ref());
Ok(InstructionStatus::Completed)
}
pub fn vvsll(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
panic!(
"Shift left on floating point what does it means? who has generated this instruction???"
);
}
pub fn vvsra(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
panic!(
"Shift right on floating point what does it means? who has generated this instruction???"
);
}
pub fn vavg(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
panic!("You are calling a placeholder, the real call is the generic version");
}
pub(super) fn vavg_impl<F, T>(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus>
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
TRACER.lock().unwrap().pre_vavg::<F,T>(cores, data);
let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
data.get_core_rd_r1_r2_immlen_offset();
let core = cores.core(core_indx);
let r1_val = core.register(r1);
let r2_val = r2;
ensure!(r2_val == 1, "Stride different than 1 not supported");
let rd_val = core.register(rd);
let r1_val = add_offset_r1(r1_val, offset_select, offset_value);
let loads = core.reserve_load(r1_val, imm_len)?.execute_load::<F>()?;
let load1 = loads[0];
let len = load1.len();
let res: [F; _] =
[load1.iter().copied().reduce(|a: F, b: F| a + b).unwrap() / F::from_usize(len)];
let res_up: Cow<[T]> = res.as_slice().up();
core.execute_store(rd_val, res_up.as_ref());
Ok(InstructionStatus::Completed)
}
pub fn vrelu(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
panic!("You are calling a placeholder, the real call is the generic version");
}
pub(super) fn vrelu_impl<F, T>(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus>
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
{
TRACER.lock().unwrap().pre_vrelu::<F,T>(cores, data);
let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
data.get_core_rd_r1_r2_immlen_offset();
let core = cores.core(core_indx);
let r1_val = core.register(r1);
let rd_val = core.register(rd);
let r1_val = add_offset_r1(r1_val, offset_select, offset_value);
let rd_val = add_offset_rd(rd_val, offset_select, offset_value);
let loads = core.reserve_load(r1_val, imm_len)?.execute_load::<F>()?;
let load1 = loads[0];
let res: Vec<F> = load1
.iter()
.map(|&a| if (a > 0.0.into()) { a } else { 0.0.into() })
.collect();
let res_up: Cow<[T]> = res.as_slice().up();
core.execute_store(rd_val, res_up.as_ref());
Ok(InstructionStatus::Completed)
}
pub fn vtanh(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
panic!("You are calling a placeholder, the real call is the generic version");
}
pub(super) fn vtanh_impl<F, T>(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus>
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
{
TRACER.lock().unwrap().pre_vtanh::<F,T>(cores, data);
let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
data.get_core_rd_r1_r2_immlen_offset();
let core = cores.core(core_indx);
let r1_val = core.register(r1);
let rd_val = core.register(rd);
let r1_val = add_offset_r1(r1_val, offset_select, offset_value);
let rd_val = add_offset_rd(rd_val, offset_select, offset_value);
let loads = core.reserve_load(r1_val, imm_len)?.execute_load::<F>()?;
let load1 = loads[0];
let res: Vec<F> = load1.iter().map(|&a| a.tanh()).collect();
let res_up: Cow<[T]> = res.as_slice().up();
core.execute_store(rd_val, res_up.as_ref());
Ok(InstructionStatus::Completed)
}
pub fn vsigm(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
panic!("You are calling a placeholder, the real call is the generic version");
}
pub(super) fn vsigm_impl<F, T>(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus>
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
{
TRACER.lock().unwrap().pre_vsigm::<F,T>(cores, data);
let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
data.get_core_rd_r1_r2_immlen_offset();
let core = cores.core(core_indx);
let r1_val = core.register(r1);
let rd_val = core.register(rd);
let r1_val = add_offset_r1(r1_val, offset_select, offset_value);
let rd_val = add_offset_rd(rd_val, offset_select, offset_value);
let loads = core.reserve_load(r1_val, imm_len)?.execute_load::<F>()?;
let load1 = loads[0];
let res: Vec<F> = load1.iter().map(|&a| a.sigm()).collect();
let res_up: Cow<[T]> = res.as_slice().up();
core.execute_store(rd_val, res_up.as_ref());
Ok(InstructionStatus::Completed)
}
pub fn vmv(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
todo!()
}
pub fn vrsu(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
todo!()
}
pub fn vrsl(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
todo!()
}
///////////////////////////////////////////////////////////////
///Communication/synchronization Instructions/////////////////
///////////////////////////////////////////////////////////////
pub fn ld(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
TRACER.lock().unwrap().pre_ld(cores, data);
let (core, rd, r1, _, imm_len, offset_select, offset_value) =
data.get_core_rd_r1_r2_immlen_offset();
ensure!(core != 0, "LD cannot be used to move from host to host");
let (host, core) = cores.host_and_cores(core);
let r1_val = core.register(r1);
let rd_val = core.register(rd);
let r1_val = add_offset_r1(r1_val, offset_select, offset_value);
let rd_val = add_offset_rd(rd_val, offset_select, offset_value);
let global_memory = host.load::<u8>(r1_val, imm_len)?;
core.execute_store(rd_val, global_memory[0])?;
TRACER.lock().unwrap().post_ld(cores, data);
Ok(InstructionStatus::Completed)
}
pub fn st(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
TRACER.lock().unwrap().pre_st(cores, data);
let (core, rd, r1, _, imm_len, offset_select, offset_value) =
data.get_core_rd_r1_r2_immlen_offset();
ensure!(core != 0, "ST cannot be used to move from host to host");
let (host, core) = cores.host_and_cores(core);
let r1_val = core.register(r1);
let rd_val = core.register(rd);
let r1_val = add_offset_r1(r1_val, offset_select, offset_value);
let rd_val = add_offset_rd(rd_val, offset_select, offset_value);
let local_memory = core.load::<u8>(r1_val, imm_len)?;
host.execute_store(rd_val, local_memory[0]);
TRACER.lock().unwrap().post_st(cores, data);
Ok(InstructionStatus::Completed)
}
pub fn lldi(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
TRACER.lock().unwrap().pre_lldi(cores, data);
let (core, rd, imm) = data.get_core_rd_imm();
let offset_value = data.offset_value();
let imm_len = data.imm_len();
let core = cores.core(core);
let rd_val = core.register(rd);
let rd_val = add_offset_rd(rd_val, 4, offset_value);
core.memset(
rd_val,
imm_len,
u8::try_from(imm).context("lldi with value bigger than u8")?,
);
Ok(InstructionStatus::Completed)
}
pub fn lmv(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
TRACER.lock().unwrap().pre_lmv(cores, data);
let (core, rd, r1, _, imm_len, offset_select, offset_value) =
data.get_core_rd_r1_r2_immlen_offset();
let core = cores.core(core);
let r1_val = core.register(r1);
let rd_val = core.register(rd);
let r1_val = add_offset_r1(r1_val, offset_select, offset_value);
let rd_val = add_offset_rd(rd_val, offset_select, offset_value);
let local_memory = core.load::<u8>(r1_val, imm_len)?;
let tmp = local_memory[0].to_vec();
core.execute_store(rd_val, tmp.as_slice());
Ok(InstructionStatus::Completed)
}
pub fn send(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
TRACER.lock().unwrap().pre_send(cores, data);
Ok(InstructionStatus::Sending(data))
}
pub fn recv(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
TRACER.lock().unwrap().pre_recv(cores, data);
Ok(InstructionStatus::Reciving(data))
}
pub fn wait(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
Ok(InstructionStatus::Waiting(data))
}
pub fn sync(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
Ok(InstructionStatus::Sync(data))
}

View File

@@ -0,0 +1,115 @@
use crate::{
cpu::CPU,
instruction_set::{
instruction_data::InstructionData,
isa::{dispatch_simd, functor_to_name, is_setbw, isa_simd},
},
};
use anyhow::{Context, Result};
use std::mem::swap;
pub mod instruction_data;
pub mod isa;
pub mod helper;
#[derive(Clone, Copy, Debug)]
pub struct Instruction {
pub data: InstructionData,
functor: InstructionType,
}
#[derive(Debug, Clone, Copy, Default)]
pub enum InstructionStatus {
Completed,
Waiting(InstructionData),
Sending(InstructionData),
Reciving(InstructionData),
Sync(InstructionData),
#[default]
NotExecuted,
}
impl InstructionStatus {
#[must_use]
pub fn is_completed(&self) -> bool {
matches!(self, Self::Completed)
}
}
impl Instruction {
fn new(data: InstructionData, functor: InstructionType) -> Self {
Self { data, functor }
}
pub fn execute(&self, cpu: &mut CPU) -> InstructionStatus {
(self.functor)(cpu, self.data)
.with_context(|| format!("Instruction: {}", functor_to_name(self.functor as usize)))
.with_context(|| format!("Error in core: {}", self.data.core_indx() - 1))
.unwrap()
}
}
pub type Instructions = Vec<Instruction>;
pub type InstructionType = fn(&mut CPU, InstructionData) -> Result<InstructionStatus>;
#[derive(Debug, Clone, Copy, Default)]
pub struct VectorBitWith {
pub vector_input_bitwith: usize,
pub vector_output_bitwith: usize,
}
/// Support for the
/// setbw ibiw, obiw
/// Set the bit-widths of each element for input vectors and output vectors. Related vector instructions
/// use the configured bit-widths. Once setbw is caled, all subsequent related vector instructions will
/// use the configured bit-widths, until a new setbw is called. Once ibiw and obiw are set, ibyw and
/// obyw are also set accordingly by the hardware.
/// If the hardware does not support variable bit-width, this instruction is invalid and the matrix/vector
/// instructions use the fixed bit-width of the hardware.
pub struct InstructionsBuilder {
vector_bit_with: VectorBitWith,
instructions: Instructions,
}
impl Default for InstructionsBuilder {
fn default() -> Self {
Self::new()
}
}
impl InstructionsBuilder {
pub fn new() -> Self {
Self {
vector_bit_with: VectorBitWith {
vector_input_bitwith: 32,
vector_output_bitwith: 32,
},
instructions: Instructions::new(),
}
}
pub fn make_inst(&mut self, functor: InstructionType, data: InstructionData) {
if is_setbw(functor) {
let (ibiw, obiw) = data.get_ibiw_obiw();
self.vector_bit_with.vector_input_bitwith =
ibiw.try_into().expect("ibiw can not be negative");
self.vector_bit_with.vector_output_bitwith =
obiw.try_into().expect("obiw can not be negative");
return;
}
if (isa_simd(functor)) {
self.instructions.push(Instruction::new(
data,
dispatch_simd(functor, self.vector_bit_with).unwrap(),
))
} else {
self.instructions.push(Instruction::new(data, functor))
}
}
pub fn build(&mut self) -> Instructions {
let mut inst = Instructions::new();
swap(&mut self.instructions, &mut inst);
inst
}
}

View File

@@ -0,0 +1,569 @@
use anyhow::{Context, Result};
use paste::paste;
use std::{collections::HashMap, mem::offset_of, sync::LazyLock};
use crate::{
instruction_set::{
Instruction, InstructionsBuilder, instruction_data::InstructionDataBuilder, isa::*,
},
utility::pack_float_in_i32,
};
use serde_json::Value;
type FunctorType = fn(&mut InstructionsBuilder, &mut InstructionDataBuilder, &Value) -> Result<()>;
macro_rules! add_to_json_map {
($storage:ident, $id:ident) => {
paste! {
$storage.insert(stringify!($id).to_string(), [<json_to_ $id>] as FunctorType );
}
};
}
static SIMD: LazyLock<HashMap<String, FunctorType>> = LazyLock::new(|| {
let mut storage = HashMap::new();
add_to_json_map!(storage, sldi);
add_to_json_map!(storage, sld);
add_to_json_map!(storage, sadd);
add_to_json_map!(storage, ssub);
add_to_json_map!(storage, smul);
add_to_json_map!(storage, saddi);
add_to_json_map!(storage, setbw);
add_to_json_map!(storage, mvmul);
add_to_json_map!(storage, vvadd);
add_to_json_map!(storage, vvsub);
add_to_json_map!(storage, vvmul);
add_to_json_map!(storage, vvdmul);
add_to_json_map!(storage, vvmax);
add_to_json_map!(storage, vvsll);
add_to_json_map!(storage, vvsra);
add_to_json_map!(storage, vrelu);
add_to_json_map!(storage, vtanh);
add_to_json_map!(storage, vsigm);
add_to_json_map!(storage, vmv);
add_to_json_map!(storage, vrsu);
add_to_json_map!(storage, vrsl);
add_to_json_map!(storage, ld);
add_to_json_map!(storage, st);
add_to_json_map!(storage, lldi);
add_to_json_map!(storage, lmv);
add_to_json_map!(storage, send);
add_to_json_map!(storage, recv);
add_to_json_map!(storage, wait);
add_to_json_map!(storage, sync);
storage
});
fn json_to_offset(json: &Value) -> (i32, i32) {
let offset_value = json.get("offset_value").unwrap().as_i64().unwrap() as i32;
let offset_select = json.get("offset_select").unwrap().as_i64().unwrap() as i32;
(offset_select, offset_value)
}
pub fn json_to_instruction(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) {
let json_struct = json.as_object().expect("Not an object");
let op = json_struct.get("op").unwrap().as_str().unwrap();
SIMD.get(op)
.unwrap_or_else(|| panic!("Operation not found {}", op))(
inst_builder,
inst_data_builder,
json,
);
}
macro_rules! json_str {
($json:ident , $value:literal) => {
$json.get($value).context(concat![$value, " field not present"])?.as_str().context(concat![$value, " field not str"])?
};
}
macro_rules! json_i64 {
($json:ident , $value:literal) => {
$json.get($value).context(concat![$value, " field not present"])?.as_i64().context(concat![$value, " field not i64"])?
};
}
fn json_to_sldi(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
let json = json.as_object().expect("Not an object");
assert_eq!("sldi", json_str!(json, "op"));
let rd = json_i64!(json, "rd");
let imm = json_i64!(json, "imm");
inst_data_builder.set_rd(rd as i32);
inst_data_builder.set_imm(imm as i32);
inst_builder.make_inst(sldi, inst_data_builder.build());
Ok(())
}
fn json_to_sld(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
todo!("Not present in the compiler");
Ok(())
}
fn json_to_sadd(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
todo!("Not present in the compiler");
Ok(())
}
fn json_to_ssub(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
todo!("Not present in the compiler");
Ok(())
}
fn json_to_smul(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
todo!("Not present in the compiler");
Ok(())
}
fn json_to_saddi(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
todo!("Not present in the compiler");
Ok(())
}
///////////////////////////////////////////////////////////////
/////////////////Matrix/vector Instructions////////////////////
///////////////////////////////////////////////////////////////
fn json_to_setbw(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
let json = json.as_object().expect("Not an object");
assert_eq!("setbw", json_str!(json, "op"));
let ibiw = json_i64!(json, "ibiw");
let obiw = json_i64!(json, "obiw");
inst_data_builder.set_ibiw(ibiw as i32);
inst_data_builder.set_obiw(obiw as i32);
inst_builder.make_inst(setbw, inst_data_builder.build());
Ok(())
}
fn json_to_mvmul(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
let json = json.as_object().expect("Not an object");
assert_eq!("mvmul", json_str!(json, "op"));
let group = json_i64!(json, "group") as i32;
let relu = json_i64!(json, "relu") as i32;
let rd = json_i64!(json, "rd") as i32;
let rs1 = json_i64!(json, "rs1") as i32;
let mbiw = json_i64!(json, "mbiw") as i32;
inst_data_builder
.set_imm_group(group)
.set_imm_relu(relu)
.set_rd(rd)
.set_r1(rs1)
.set_mbiw(mbiw);
inst_builder.make_inst(mvmul, inst_data_builder.build());
Ok(())
}
fn json_to_vvadd(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
let json = json.as_object().expect("Not an object");
assert_eq!("vvadd", json_str!(json, "op"));
let rd = json_i64!(json, "rd") as i32;
let rs1 = json_i64!(json, "rs1") as i32;
let rs2 = json_i64!(json, "rs2") as i32;
let len = json_i64!(json, "len") as i32;
let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
inst_data_builder
.set_rd(rd)
.set_r1(rs1)
.set_r2(rs2)
.set_imm_len(len)
.set_offset_select(offset_select)
.set_offset_value(offset_value);
inst_builder.make_inst(vvadd, inst_data_builder.build());
Ok(())
}
fn json_to_vvsub(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
todo!("Not present in the compiler");
Ok(())
}
fn json_to_vvmul(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
let json = json.as_object().expect("Not an object");
assert_eq!("vvmul", json_str!(json, "op"));
let rd = json_i64!(json, "rd") as i32;
let rs1 = json_i64!(json, "rs1") as i32;
let rs2 = json_i64!(json, "rs2") as i32;
let len = json_i64!(json, "len") as i32;
let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
inst_data_builder
.set_rd(rd)
.set_r1(rs1)
.set_r2(rs2)
.set_imm_len(len)
.set_offset_select(offset_select)
.set_offset_value(offset_value);
inst_builder.make_inst(vvmul, inst_data_builder.build());
Ok(())
}
fn json_to_vvdmul(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
todo!("Not present in the compiler");
Ok(())
}
fn json_to_vvmax(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
let json = json.as_object().expect("Not an object");
assert_eq!("vvmax", json_str!(json, "op"));
let rd = json_i64!(json, "rd") as i32;
let rs1 = json_i64!(json, "rs1") as i32;
let rs2 = json_i64!(json, "rs2") as i32;
let len = json_i64!(json, "len") as i32;
let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
inst_data_builder
.set_rd(rd)
.set_r1(rs1)
.set_r2(rs2)
.set_imm_len(len)
.set_offset_select(offset_select)
.set_offset_value(offset_value);
inst_builder.make_inst(vvmax, inst_data_builder.build());
Ok(())
}
fn json_to_vvsll(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
todo!("Not present in the compiler");
Ok(())
}
fn json_to_vvsra(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
todo!("Not present in the compiler");
Ok(())
}
fn json_to_vavg(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
todo!("Not present in the compiler");
Ok(())
}
fn json_to_vrelu(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
let json = json.as_object().expect("Not an object");
assert_eq!("vrelu", json_str!(json, "op"));
let rd = json_i64!(json, "rd") as i32;
let rs1 = json_i64!(json, "rs1") as i32;
let len = json_i64!(json, "len") as i32;
let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
inst_data_builder
.set_rd(rd)
.set_r1(rs1)
.set_imm_len(len)
.set_offset_select(offset_select)
.set_offset_value(offset_value);
inst_builder.make_inst(vrelu, inst_data_builder.build());
Ok(())
}
fn json_to_vtanh(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
let json = json.as_object().expect("Not an object");
assert_eq!("vtanh", json_str!(json, "op"));
let rd = json_i64!(json, "rd") as i32;
let rs1 = json_i64!(json, "rs1") as i32;
let len = json_i64!(json, "len") as i32;
let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
inst_data_builder
.set_rd(rd)
.set_r1(rs1)
.set_imm_len(len)
.set_offset_select(offset_select)
.set_offset_value(offset_value);
inst_builder.make_inst(vtanh, inst_data_builder.build());
Ok(())
}
fn json_to_vsigm(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
let json = json.as_object().expect("Not an object");
assert_eq!("vsigmoid", json_str!(json, "op"));
let rd = json_i64!(json, "rd") as i32;
let rs1 = json_i64!(json, "rs1") as i32;
let len = json_i64!(json, "len") as i32;
let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
inst_data_builder
.set_rd(rd)
.set_r1(rs1)
.set_imm_len(len)
.set_offset_select(offset_select)
.set_offset_value(offset_value);
inst_builder.make_inst(vsigm, inst_data_builder.build());
Ok(())
}
fn json_to_vmv(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
todo!("Not present in the compiler");
Ok(())
}
fn json_to_vrsu(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
todo!("Not present in the compiler");
Ok(())
}
fn json_to_vrsl(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
todo!("Not present in the compiler");
Ok(())
}
///////////////////////////////////////////////////////////////
///Communication/synchronization Instructions/////////////////
///////////////////////////////////////////////////////////////
fn json_to_ld(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
let json = json.as_object().expect("Not an object");
assert_eq!("ld", json_str!(json, "op"));
let rd = json_i64!(json, "rd") as i32;
let rs1 = json_i64!(json, "rs1") as i32;
let len = json_i64!(json, "size") as i32;
let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
inst_data_builder
.set_rd(rd)
.set_r1(rs1)
.set_imm_len(len)
.set_offset_select(offset_select)
.set_offset_value(offset_value);
inst_builder.make_inst(ld, inst_data_builder.build());
Ok(())
}
fn json_to_st(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
let json = json.as_object().expect("Not an object");
assert_eq!("st", json_str!(json, "op"));
let rd = json_i64!(json, "rd") as i32;
let rs1 = json_i64!(json, "rs1") as i32;
let len = json_i64!(json, "size") as i32;
let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
inst_data_builder
.set_rd(rd)
.set_r1(rs1)
.set_imm_len(len)
.set_offset_select(offset_select)
.set_offset_value(offset_value);
inst_builder.make_inst(st, inst_data_builder.build());
Ok(())
}
fn json_to_lldi(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
let json = json.as_object().expect("Not an object");
assert_eq!("lldi", json_str!(json, "op"));
let rd = json_i64!(json, "rd") as i32;
let imm = json_i64!(json, "imm") as f32;
let imm = pack_float_in_i32(imm);
let len = json_i64!(json, "len") as i32;
let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
inst_data_builder
.set_rd(rd)
.set_imm(imm)
.set_imm_len(len)
.set_offset_select(offset_select)
.set_offset_value(offset_value);
inst_builder.make_inst(lldi, inst_data_builder.build());
Ok(())
}
fn json_to_lmv(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
let json = json.as_object().expect("Not an object");
assert_eq!("lmv", json_str!(json, "op"));
let rd = json_i64!(json, "rd") as i32;
let rs1 = json_i64!(json, "rs1") as i32;
let len = json_i64!(json, "len") as i32;
let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
inst_data_builder
.set_rd(rd)
.set_r1(rs1)
.set_imm_len(len)
.set_offset_select(offset_select)
.set_offset_value(offset_value);
inst_builder.make_inst(lmv, inst_data_builder.build());
Ok(())
}
fn json_to_send(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
let json = json.as_object().expect("Not an object");
assert_eq!("send", json_str!(json, "op"));
let rd = json_i64!(json, "rd") as i32;
let core = json_i64!(json, "core") as i32 + 1;
let size = json_i64!(json, "size") as i32;
let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
inst_data_builder
.set_rd(rd)
.set_imm_core(core)
.set_imm_len(size)
.set_offset_select(offset_select)
.set_offset_value(offset_value);
inst_builder.make_inst(send, inst_data_builder.build());
Ok(())
}
fn json_to_recv(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
let json = json.as_object().expect("Not an object");
assert_eq!("recv", json_str!(json, "op"));
let rd = json_i64!(json, "rd") as i32;
let core = json_i64!(json, "core") as i32 + 1;
let size = json_i64!(json, "size") as i32;
let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
inst_data_builder
.set_rd(rd)
.set_imm_core(core)
.set_imm_len(size)
.set_offset_select(offset_select)
.set_offset_value(offset_value);
inst_builder.make_inst(recv, inst_data_builder.build());
Ok(())
}
fn json_to_wait(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
todo!("Not present in the compiler");
Ok(())
}
fn json_to_sync(
inst_builder: &mut InstructionsBuilder,
inst_data_builder: &mut InstructionDataBuilder,
json: &Value,
) -> Result<()> {
todo!("Not present in the compiler");
Ok(())
}
#[cfg(test)]
mod test {
use serde_json::Value;
use crate::{
instruction_set::{InstructionsBuilder, instruction_data::InstructionDataBuilder},
json_to_instruction::json_isa::json_to_instruction,
};
#[test]
#[should_panic(expected = "Operation not found bazza")]
fn test_op_not_present() {
let json = r#"{"imm":15168,"op":"bazza","rd":1}"#;
let json = serde_json::from_str(json).unwrap();
let mut instruction_builder = InstructionsBuilder::new();
let mut inst_data_builder = InstructionDataBuilder::new();
json_to_instruction(&mut instruction_builder, &mut inst_data_builder, &json);
}
#[test]
fn test_json_sldi() {
let json = r#"{"imm":15168,"op":"sldi","rd":1}"#;
let json = serde_json::from_str(json).unwrap();
let mut instruction_builder = InstructionsBuilder::new();
let mut inst_data_builder = InstructionDataBuilder::new();
json_to_instruction(&mut instruction_builder, &mut inst_data_builder, &json);
}
}

View File

@@ -0,0 +1,46 @@
use core::panic;
use serde_json::{Map, Value};
use crate::{
CoreInstructionsBuilder, Executable,
cpu::{CPU, crossbar},
instruction_set::{
InstructionsBuilder,
instruction_data::{self, InstructionData, InstructionDataBuilder},
},
json_to_instruction::{self, json_isa},
memory_manager::type_traits::TryToUsize,
};
pub fn json_to_executor<'a>(
config: Value,
mut cores: impl Iterator<Item = &'a Value>,
) -> Executable {
let cell_precision = config.get("cell_precision").unwrap().as_i64().unwrap() as i32;
let core_cnt = config.get("core_cnt").unwrap().as_i64().unwrap() as i32;
let xbar_count = config.get("xbar_array_count").unwrap().as_i64().unwrap() as i32;
let xbar_size = config.get("xbar_size").unwrap().as_array().unwrap();
let rows_crossbar = xbar_size[0].as_i64().unwrap() as i32;
let column_corssbar = xbar_size[1].as_i64().unwrap() as i32;
let array_group_map = config.get("array_group_map");
let mut cpu = CPU::new(core_cnt);
cpu.reserve_crossbar(xbar_count, column_corssbar * 4, rows_crossbar);
let mut core_insts_builder = CoreInstructionsBuilder::new(core_cnt as usize);
for core_indx in 1..=core_cnt {
let mut insts_builder = InstructionsBuilder::new();
let mut inst_data_builder = InstructionDataBuilder::new();
inst_data_builder.set_core_indx(core_indx).fix_core_indx();
let json_core = cores
.next()
.unwrap_or_else(|| panic!("cores files less than {}", core_indx - 1));
let json_core_insts = json_core
.as_array()
.unwrap_or_else(|| panic!("core{} has not a list of instruction", core_indx));
for json_inst in json_core_insts {
json_isa::json_to_instruction(&mut insts_builder, &mut inst_data_builder, json_inst);
}
core_insts_builder.set_core(core_indx, insts_builder.build());
}
Executable::new(cpu, core_insts_builder.build())
}

View File

@@ -0,0 +1,2 @@
mod json_isa;
pub mod json_to_executor;

View File

@@ -0,0 +1,245 @@
use std::fmt::Debug;
use anyhow::{Context, Result, bail, ensure};
use crate::memory_manager::type_traits::TryToUsize;
pub trait MemoryStorable: Copy {}
pub mod type_traits;
impl MemoryStorable for f32 {}
impl MemoryStorable for f64 {}
impl MemoryStorable for i32 {}
impl MemoryStorable for i64 {}
impl MemoryStorable for u8 {}
impl MemoryStorable for i8 {}
#[derive(Debug, Clone, Copy)]
struct LoadRequest {
pub index: usize,
pub size: usize,
}
unsafe fn slice_from_u8<T>(slice: &[u8]) -> Result<&[T]>
where
T: MemoryStorable,
{
let size_t = size_of::<T>();
let size = slice.len();
ensure!(
size >= size_t,
"Size {} smaller than the data type size {}",
size,
size_t
);
ensure!(
size.is_multiple_of(size_t),
"Size not a multiple of selected data type size"
);
let (prefix, slice, suffix) = unsafe { slice.align_to::<T>() };
ensure!(
prefix.is_empty() && suffix.is_empty(),
"{:?} {:?} T not aligned",
prefix,
suffix
);
Ok(slice)
}
unsafe fn slice_into_u8<T>(slice: &[T]) -> Result<&[u8]>
where
T: MemoryStorable,
{
ensure!(!slice.is_empty(), "Empty slice not convertable");
let (prefix, slice, suffix) = unsafe { slice.align_to::<u8>() };
ensure!(prefix.is_empty() && suffix.is_empty(), "T not aligned");
Ok(slice)
}
//Not thread safe
#[derive(Debug, Clone)]
pub struct CoreMemory {
memory: Vec<u8>,
load_requests: Vec<LoadRequest>,
}
impl Default for CoreMemory {
fn default() -> Self {
Self::new()
}
}
impl CoreMemory {
pub fn new() -> Self {
Self {
memory: Vec::new(),
load_requests: Vec::new(),
}
}
pub fn reserve_load(&mut self, address: impl TryToUsize, size: impl TryToUsize) -> Result<&mut Self>
where {
let address = address.try_into().context("address can not be negative")?;
let size = size.try_into().context("size can not be negative")?;
let load_request = LoadRequest {
index: address,
size,
};
if self.memory.len() < address + size {
self.memory.resize((address + size) * 2, 0);
}
self.load_requests.push(load_request);
Ok(self)
}
pub fn execute_load<T>(&mut self) -> Result<Vec<&[T]>>
where
T: MemoryStorable,
{
let Self {
memory,
load_requests,
} = self;
let mut res = Vec::new();
for (load_index, load_request) in load_requests.drain(..).enumerate() {
let LoadRequest { index, size } = load_request;
let memory_slice = &memory[index..index + size];
let memory_slice = unsafe { slice_from_u8(memory_slice) }
.with_context(|| format!("Load number: {} Accessing from {} to {}", load_index, index, index + size))?;
res.push(memory_slice);
}
Ok(res)
}
pub fn load<T>(&mut self, address: impl TryToUsize, size: impl TryToUsize) -> Result<Vec<&[T]>>
where
T: MemoryStorable,
{
let address = address.try_into().expect("address can not be negative");
let size = size.try_into().expect("size can not be negative");
self.reserve_load(address, size)?.execute_load()
}
pub fn execute_store<T>(&mut self, address: impl TryToUsize, element: &[T]) -> Result<()>
where
T: MemoryStorable,
{
let address = address.try_into().context("address can not be negative")?;
let Self { memory, .. } = self;
let size = std::mem::size_of_val(element);
if memory.len() < address + size {
memory.resize((address + size) * 2, 0);
}
let slice = unsafe { slice_into_u8(element) }?;
let memory_slice = &mut memory[address..address + size];
memory_slice.copy_from_slice(slice);
Ok(())
}
pub fn memset(&mut self, address: impl TryToUsize, size: impl TryToUsize, val: u8) -> Result<()> {
let address = address.try_into().expect("address can not be negative");
let size = size.try_into().expect("size can not be negative");
let Self { memory, .. } = self;
if memory.len() < address + size {
memory.resize((address + size) * 2, 0);
}
memory[address..address + size].fill(val);
Ok(())
}
pub fn set_capacity(&mut self, size: impl TryToUsize) {
let size = size.try_into().expect("size can not be negative");
let Self { memory, .. } = self;
if memory.len() < size {
memory.resize(size, 0);
}
}
pub fn get_len(&self) ->usize {
self.memory.len()
}
pub(crate) fn clear(&mut self) {
self.memory.clear();
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_slice_from_u8() {
let buff: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0];
let transmuted: &[f32] = unsafe { slice_from_u8(&buff).unwrap() };
assert!(
transmuted[0] == 0_f32 && transmuted[1] == 0_f32,
"Failed conversion from [00 00 00 00, 00 00 00 00] to [0_f32, 0_f32]"
);
let buff = [0xdb, 0x0f, 0x49, 0x40];
let transmuted: &[f32] = unsafe { slice_from_u8(&buff).unwrap() };
assert!(
transmuted[0] == std::f32::consts::PI,
"Failed conversion from [0xdb, 0x0f, 0x49, 0x40] to [3.14]"
);
let buff = [0x8f, 0xc2, 0xf5, 0x28, 0x5c, 0x8f, 0x18, 0x40];
let transmuted: &[f64] = unsafe { slice_from_u8(&buff).unwrap() };
assert!(
transmuted[0] == 6.14_f64,
"Failed conversion from [0x8f, 0xc2, 0xf5, 0x28, 0x5c, 0x8f, 0x18, 0x40] to [6.14]"
);
}
#[test]
fn test_slice_into_u8() {
let buff: [f32; 2] = [0_f32, 0_f32];
let transmuted: &[u8] = unsafe { slice_into_u8(&buff).unwrap() };
assert!(
transmuted[0] == 0
&& transmuted[1] == 0
&& transmuted[2] == 0
&& transmuted[3] == 0
&& transmuted[4] == 0
&& transmuted[5] == 0
&& transmuted[6] == 0
&& transmuted[7] == 0,
"Failed conversion from [00 00 00 00, 00 00 00 00] to [0_f32, 0_f32]"
);
let buff = [std::f32::consts::PI];
let transmuted: &[u8] = unsafe { slice_into_u8(&buff).unwrap() };
println!("{:?}", transmuted);
assert!(
transmuted[0] == 0xdb
&& transmuted[1] == 0x0f
&& transmuted[2] == 0x49
&& transmuted[3] == 0x40,
"Failed conversion from [0xdb, 0x0f, 0x49, 0x40] to [3.14]"
);
let buff = [0x8f, 0xc2, 0xf5, 0x28, 0x5c, 0x8f, 0x18, 0x40];
let transmuted: &[f64] = unsafe { slice_from_u8(&buff).unwrap() };
assert!(
transmuted[0] == 6.14_f64,
"Failed conversion from [0x8f, 0xc2, 0xf5, 0x28, 0x5c, 0x8f, 0x18, 0x40] to [6.14]"
);
}
#[test]
fn test_simple_load_store() {
let mut core_memory = CoreMemory::new();
core_memory.execute_store(0, &[5_f32, 6_f32, 7_f32, 15_f32]);
core_memory.reserve_load(0, 4); // Load [5_f32]
core_memory.reserve_load(8, 8); // Load [7_f32, 15_f32]
let loads: Vec<&[f32]> = core_memory.execute_load().unwrap();
let mut data = [0_f32; 2];
data[0] = loads[0][0] + loads[1][0];
core_memory.execute_store(4, &data[0..1]).unwrap();
let loads: &[f32] = core_memory.reserve_load(0, 16).unwrap().execute_load().unwrap()[0];
println!("{:?}", loads);
assert!(loads[0] == 5_f32 && loads[1] == 12_f32 && loads[2] == 7_f32 && loads[3] == 15_f32)
}
}

View File

@@ -0,0 +1,208 @@
use std::{
borrow::Cow,
fmt::Debug,
ops::{Add, Div, Mul, Sub},
};
use anyhow::Context;
pub trait FromFloat {
fn from_f32(val :f32) -> Self;
fn from_f64(val :f64) -> Self;
}
impl FromFloat for f32 {
fn from_f32(val :f32) -> Self {
val
}
fn from_f64(val :f64) -> Self {
val as f32
}
}
impl FromFloat for f64 {
fn from_f32(val :f32) -> Self {
val as f64
}
fn from_f64(val :f64) -> Self {
val
}
}
pub trait HasTanh {
fn tanh(self) -> Self ;
}
impl HasTanh for f32 {
fn tanh(self) -> Self {
self.tanh()
}
}
impl HasTanh for f64 {
fn tanh(self) -> Self {
self.tanh()
}
}
pub trait HasSigm {
fn sigm(self) -> Self ;
}
impl HasSigm for f32 {
fn sigm(self) -> Self {
let x = self;
let e = std::f32::consts::E;
let ex = x.powf(x);
(ex) / (1.0+ex)
}
}
impl HasSigm for f64 {
fn sigm(self) -> Self {
let x = self;
let e = std::f64::consts::E;
let ex = x.powf(x);
(ex) / (1.0+ex)
}
}
pub trait TryToUsize: TryInto<usize, Error = Self::TryError>
where std::result::Result<usize, Self::TryError> : Context<usize, Self::TryError>
{
type TryError: Debug + Send + Sync + 'static + std::error::Error;
}
impl<T, E> TryToUsize for T
where
T: TryInto<usize, Error = E>,
E: Debug + Send + Sync + 'static + std::error::Error,
std::result::Result<usize, E> : Context<usize, E>
{
type TryError = E;
}
pub trait FromUsize {
fn from_usize(v: usize) -> Self;
}
impl FromUsize for f32 {
fn from_usize(v: usize) -> Self { v as f32 }
}
impl FromUsize for f64 {
fn from_usize(v: usize) -> Self { v as f64 }
}
pub trait UpcastDestTraits<T>:
Sync
+ Send
+ Sized
+ Clone
+ Copy
+ Debug
+ Add<Output = T>
+ Sub<Output = T>
+ Mul<Output = T>
+ Div<Output = T>
+ PartialEq<T>
+ PartialOrd<T>
+ HasTanh
+ HasSigm
+ FromUsize
{
}
pub trait UpcastSlice<X> {
fn up<'a>(&'a self) -> Cow<'a, [X]>
where
[X]: ToOwned<Owned = Vec<X>>,
X: UpcastDestTraits<X>;
}
macro_rules! upcast_impl {
($from:ty, $to:ty) => {
impl UpcastSlice<$to> for [$from] {
fn up<'a>(&'a self) -> Cow<'a, [$to]>
where
[$to]: ToOwned<Owned = Vec<$to>>,
{
self.iter().map(|&x| x as $to).collect::<Vec<_>>().into()
}
}
};
($same:ty) => {
impl UpcastSlice<$same> for [$same] {
fn up<'a>(&'a self) -> Cow<'a, [$same]>
where
[$same]: ToOwned<Owned = Vec<$same>>,
{
self.into()
}
}
impl UpcastDestTraits<$same> for $same {}
};
}
upcast_impl!(f32, f64);
upcast_impl!(f64, f32);
upcast_impl!(f32);
upcast_impl!(f64);
#[cfg(test)]
mod test {
use core::panic;
use std::borrow::Cow;
use crate::memory_manager::type_traits::{UpcastDestTraits, UpcastSlice};
#[test]
fn test_same_type() {
let elem: [f32; 4] = [1.0, 2.0, 3.0, 4.0];
let elem_up: Cow<[f32]> = elem.up();
if let std::borrow::Cow::Owned(_) = elem_up {
panic!("Allocate when same element")
}
}
#[test]
fn test_different_type() {
let elem: [f32; 4] = [1.0, 2.0, 3.0, 4.0];
let elem_up: Cow<[f64]> = elem.up();
if let std::borrow::Cow::Borrowed(_) = elem_up {
panic!("Not allocating with different type")
}
}
fn generic_sum<A, B, T>(a: &[A], b: &[B]) -> Vec<T>
where
[A]: UpcastSlice<T>,
[B]: UpcastSlice<T>,
T: UpcastDestTraits<T>,
{
let a_up = a.up();
let b_up = b.up();
let mut ret = Vec::new();
for (a, b) in a_up.iter().zip(b_up.iter()) {
ret.push(*a + *b);
}
ret
}
#[test]
fn sum_between_different_slice_in_generic() {
let a: [f32; 4] = [1.0, 2.0, 3.0, 4.0];
let b: [f64; 4] = [1.0, 2.0, 3.0, 4.0];
let c: Vec<f64> = generic_sum(&a, &b);
assert_eq!(c, vec![2.0, 4.0, 6.0, 8.0]);
}
}

View File

@@ -0,0 +1,197 @@
#![allow(unused)]
use crate::{
cpu::CPU, instruction_set::{Instruction, InstructionStatus, Instructions}, memory_manager::type_traits::TryToUsize, send_recv::{SendRecv, handle_send_recv}, tracing::TRACER
};
pub mod cpu;
pub mod instruction_set;
pub mod memory_manager;
pub mod send_recv;
pub mod utility;
pub mod json_to_instruction;
pub mod tracing;
#[derive(Debug, Clone)]
pub struct CoreInstructionsBuilder {
core_instructions : Vec<CoreInstruction>
}
impl CoreInstructionsBuilder {
pub fn new(size:usize) -> Self {
let mut core_instructions = Vec::with_capacity(size);
for _ in 0..=size {
core_instructions.push(CoreInstruction::empty());
}
Self { core_instructions }
}
pub fn build(self) -> Vec<CoreInstruction> {
self.core_instructions
}
pub fn set_core(&mut self, core : impl TryToUsize, core_instruction : Instructions) -> &mut Self{
self.core_instructions[core.try_into().expect("Set core with not valid size")] = core_instruction.into();
self
}
}
#[derive(Debug, Clone)]
pub struct CoreInstruction {
instructions: Instructions,
program_counter: usize,
}
impl CoreInstruction {
fn new(instructions: Instructions, program_counter: usize) -> Self {
Self {
instructions,
program_counter,
}
}
fn empty() -> Self {
Self { instructions: Vec::new(), program_counter: 0 }
}
}
impl From<Instructions> for CoreInstruction {
fn from(value: Instructions) -> Self {
CoreInstruction {
instructions: value,
program_counter: 0,
}
}
}
#[derive(Debug, Clone)]
pub struct Executable {
cpu: CPU,
core_instructions: Vec<CoreInstruction>,
send_recv : SendRecv,
}
impl Executable {
pub fn new(cpu: CPU, core_instructions: Vec<CoreInstruction>) -> Self {
let num_core = cpu.num_core();
let send_recv = SendRecv::new(num_core);
assert_eq!(num_core, core_instructions.len(), "Some core doesn't have is list of istruction (required even if empty)");
Self {
cpu,
core_instructions,
send_recv
}
}
pub fn execute(&mut self) {
TRACER.lock().unwrap().init(self);
let Self {
cpu,
core_instructions,
send_recv
} = self;
let mut cpu_progressed = 0;
let max_core = cpu.num_core();
let mut index_unit = 0;
while (cpu_progressed > -2) {
let mut core_result = InstructionStatus::Completed;
while core_result.is_completed() && let Some(core_instruction) = core_instructions.get_mut(index_unit){
core_result = InstructionStatus::NotExecuted;
let CoreInstruction {
instructions,
program_counter,
} = core_instruction;
core_result = instructions
.get(*program_counter)
.map_or(InstructionStatus::default(), |inst: &Instruction| {
inst.execute(cpu)
});
if core_result.is_completed() {
cpu_progressed = 0;
*program_counter += 1;
}
}
if handle_send_recv(cpu, core_instructions, send_recv, core_result) { cpu_progressed = 0; }
handle_wait_sync(cpu, core_instructions, core_result);
index_unit = if index_unit + 1 >= max_core {
cpu_progressed-=1;
0
} else {
index_unit + 1
};
}
}
pub fn cpu(&self) -> &CPU {
&self.cpu
}
pub fn cpu_mut(&mut self) -> &mut CPU {
&mut self.cpu
}
}
fn handle_wait_sync(cpu: &mut CPU, core_instructions: &mut [CoreInstruction], core_result: InstructionStatus) {
}
#[cfg(test)]
mod test {
use super::*;
use crate::instruction_set::instruction_data::InstructionDataBuilder;
use crate::instruction_set::{InstructionsBuilder, isa::*};
#[test]
fn test_only_host() {
let mut cpu = CPU::new(0);
cpu.host()
.execute_store(0, &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
let mut inst_builder = InstructionsBuilder::new();
let mut idata_build = InstructionDataBuilder::new();
idata_build.set_core_indx(0).fix_core_indx();
inst_builder.make_inst(sldi, idata_build.set_rdimm(1, 0).build());
inst_builder.make_inst(sld, idata_build.set_rdr1(1, 1).build());
inst_builder.make_inst(sldi, idata_build.set_rdimm(2, 8).build());
inst_builder.make_inst(sld, idata_build.set_rdr1(2, 2).build());
inst_builder.make_inst(sadd, idata_build.set_rdr1r2(2, 1, 2).build());
let mut core_instruction = vec![inst_builder.build().into()];
let mut executable = Executable::new(cpu, core_instruction);
executable.execute();
assert_eq!(executable.cpu_mut().host().register(2), 4, "Not sum to 4");
}
#[test]
fn test_10_core_same_code() {
let setup_core = |index: usize, cpu: &mut CPU| -> Instructions {
cpu.core(index)
.execute_store(0, &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
let mut inst_builder = InstructionsBuilder::new();
let mut idata_build = InstructionDataBuilder::new();
idata_build.set_core_indx(index as i32).fix_core_indx();
inst_builder.make_inst(sldi, idata_build.set_rdimm(1, 0).build());
inst_builder.make_inst(sld, idata_build.set_rdr1(1, 1).build());
inst_builder.make_inst(sldi, idata_build.set_rdimm(2, 8).build());
inst_builder.make_inst(sld, idata_build.set_rdr1(2, 2).build());
inst_builder.make_inst(sadd, idata_build.set_rdr1r2(2, 1, 2).build());
inst_builder.build()
};
let mut cpu = CPU::new(10);
let mut core_instruction = Vec::new();
for i in 0..cpu.num_core() {
core_instruction.push(setup_core(i, &mut cpu).into())
}
let mut executable = Executable::new(cpu, core_instruction);
executable.execute();
for i in 0.. executable.cpu.num_core() {
assert_eq!(executable.cpu_mut().core(i).register(2), 4, "Core {} not sum to 4", i);
}
}
}

View File

@@ -0,0 +1,143 @@
use anyhow::Context;
use crate::{
CoreInstruction, cpu::CPU, instruction_set::InstructionStatus, tracing::TRACER,
utility::add_offset_rd,
};
#[derive(Debug, Clone, Copy)]
struct SendRecvInfo {
internal_core: usize,
external_core: usize,
address: usize,
size: usize,
}
impl SendRecvInfo {
fn new(internal_core: usize, external_core: usize, address: usize, size: usize) -> Self {
Self {
internal_core,
external_core,
address,
size,
}
}
}
#[derive(Debug, Clone)]
pub struct SendRecv {
sending: Box<[Option<SendRecvInfo>]>,
receiving: Box<[Option<SendRecvInfo>]>,
}
impl SendRecv {
pub fn new(num_core: usize) -> Self {
let sending = [Option::None].repeat(num_core);
let reciving = [Option::None].repeat(num_core);
Self {
sending: sending.into(),
receiving: reciving.into(),
}
}
}
pub fn handle_send_recv(
cpu: &mut CPU,
core_instructions: &mut [CoreInstruction],
send_recv: &mut SendRecv,
core_result: InstructionStatus,
) -> bool {
let transfer_memory = |cpu: &mut CPU,
core_instructions: &mut [CoreInstruction],
sender: Option<SendRecvInfo>,
receiver: Option<SendRecvInfo>| {
if let Some(sender) = sender
&& let Some(receiver) = receiver
&& sender.internal_core == receiver.external_core
&& receiver.internal_core == sender.external_core
{
let [sender_core, reciver_core] =
cpu.get_multiple_cores([sender.internal_core, receiver.internal_core]);
let memory = sender_core
.load::<u8>(sender.address, sender.size)
.with_context(|| {
format!(
"Sender crash tranfering memroy from {} with size {}",
sender.address, sender.size
)
})
.unwrap();
reciver_core.execute_store(receiver.address, memory[0]);
{
let sender = &mut core_instructions[sender.internal_core];
let pc = sender.program_counter;
let inst = sender.instructions.get(pc).unwrap();
let data = inst.data;
TRACER.lock().unwrap().post_send(cpu, data);
}
{
let recv = &mut core_instructions[receiver.internal_core];
let pc = recv.program_counter;
let inst = recv.instructions.get(pc).unwrap();
let data = inst.data;
TRACER.lock().unwrap().post_recv(cpu, data);
}
core_instructions[sender.internal_core].program_counter += 1;
core_instructions[receiver.internal_core].program_counter += 1;
return true;
}
false
};
match core_result {
InstructionStatus::Sending(instruction_data) => {
let (core_idx, imm_core) = instruction_data.get_core_immcore();
let r1 = instruction_data.r1();
let imm_len = instruction_data
.imm_len()
.try_into()
.expect("imm_len can not be negative");
let offset_value = instruction_data.offset_value();
let core = cpu.core(core_idx);
let r1_val = core.register(r1);
let address = add_offset_rd(r1_val, 1, offset_value);
let sender: usize = core_idx.try_into().expect("core can not be negative");
assert_ne!(sender, 0, "Host can not use send");
let receiver: usize = imm_core.try_into().expect("imm_core can not be negative");
assert_ne!(receiver, 0, "Host can not use receive");
send_recv.sending[sender] = Some(SendRecvInfo::new(sender, receiver, address, imm_len));
transfer_memory(
cpu,
core_instructions,
send_recv.sending[sender],
send_recv.receiving[receiver],
)
}
InstructionStatus::Reciving(instruction_data) => {
let (core_idx, imm_core) = instruction_data.get_core_immcore();
let rd = instruction_data.rd();
let imm_len = instruction_data
.imm_len()
.try_into()
.expect("imm_len can not be negative");
let offset_value = instruction_data.offset_value();
let core = cpu.core(core_idx);
let rd_val = core.register(rd);
let address = add_offset_rd(rd_val, 4, offset_value);
let receiver: usize = core_idx.try_into().expect("core can not be negative");
assert_ne!(receiver, 0, "Host can not use receive");
let sender: usize = imm_core.try_into().expect("imm_core can not be negative");
assert_ne!(sender, 0, "Host can not use send");
send_recv.receiving[receiver] =
Some(SendRecvInfo::new(receiver, sender, address, imm_len));
transfer_memory(
cpu,
core_instructions,
send_recv.sending[sender],
send_recv.receiving[receiver],
)
}
_ => false,
}
}

View File

@@ -0,0 +1,289 @@
use std::fs::File;
use crate::{
cpu::CPU,
instruction_set::instruction_data::InstructionData,
memory_manager::{
MemoryStorable,
type_traits::{FromFloat, UpcastDestTraits, UpcastSlice},
},
tracing::Trace,
utility::{add_offset_r1, add_offset_rd},
};
use std::io::Write;
#[cfg(not(feature = "tracing"))]
impl Trace {
///////////////////////////////////////////////////////////////
/////////////////Scalar/register Instructions//////////////////
///////////////////////////////////////////////////////////////
pub fn pre_sldi(&mut self, cores: &mut CPU, data: InstructionData) {
}
pub fn post_sldi(&mut self, cores: &mut CPU, data: InstructionData) {
}
pub fn pre_sld(&mut self, cores: &mut CPU, data: InstructionData) {
}
pub fn post_sld(&mut self, cores: &mut CPU, data: InstructionData) {
}
pub fn pre_sadd(&mut self, cores: &mut CPU, data: InstructionData) {
}
pub fn post_sadd(&mut self, cores: &mut CPU, data: InstructionData) {
}
pub fn pre_ssub(&mut self, cores: &mut CPU, data: InstructionData) {
}
pub fn post_ssub(&mut self, cores: &mut CPU, data: InstructionData) {
}
pub fn pre_smul(&mut self, cores: &mut CPU, data: InstructionData) {
}
pub fn post_smul(&mut self, cores: &mut CPU, data: InstructionData) {
}
pub fn pre_saddi(&mut self, cores: &mut CPU, data: InstructionData) {
}
pub fn post_saddi(&mut self, cores: &mut CPU, data: InstructionData) {
}
pub fn pre_smuli(&mut self, cores: &mut CPU, data: InstructionData) {
}
pub fn post_smuli(&mut self, cores: &mut CPU, data: InstructionData) {
}
/////////////////////////////////////////////////////////////////
///////////////////Matrix/vector Instructions////////////////////
/////////////////////////////////////////////////////////////////
pub fn pre_setbw(&mut self, cores: &mut CPU, data: InstructionData) {
}
pub fn post_setbw(&mut self, cores: &mut CPU, data: InstructionData) {
}
pub fn pre_mvm<F, M, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T> + UpcastSlice<M>,
[M]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
M: UpcastDestTraits<M> + MemoryStorable + FromFloat,
F: UpcastDestTraits<F> + MemoryStorable,
{
self.mvm_impl::<F,M,T>(cores, data, "Pre");
}
pub fn post_mvm<F, M, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T> + UpcastSlice<M>,
[M]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
M: UpcastDestTraits<M> + MemoryStorable + FromFloat,
F: UpcastDestTraits<F> + MemoryStorable,
{
self.mvm_impl::<F,M,T>(cores, data, "Post");
}
pub fn mvm_impl<F, M, T>(&mut self, cores: &mut CPU, data: InstructionData, prefix : &'static str)
where
[F]: UpcastSlice<T> + UpcastSlice<M>,
[M]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
M: UpcastDestTraits<M> + MemoryStorable + FromFloat,
F: UpcastDestTraits<F> + MemoryStorable,
{
}
pub fn pre_vvadd<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
}
pub fn post_vvadd<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
}
pub fn pre_vvsub<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
}
pub fn post_vvsub<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
}
pub fn pre_vvmul<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
}
pub fn post_vvmul<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
}
pub fn pre_vvdmul<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
}
pub fn post_vvdmul<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
}
pub fn pre_vvmax<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
}
pub fn post_vvmax<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
}
pub fn pre_vavg<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
}
pub fn post_vavg<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
}
pub fn pre_vrelu<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
{
}
pub fn post_vrelu<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
{
}
pub fn pre_vtanh<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
{
}
pub fn post_vtanh<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
{
}
pub fn pre_vsigm<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
{
}
pub fn post_vsigm<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
{
}
/////////////////////////////////////////////////////////////////
/////Communication/synchronization Instructions/////////////////
/////////////////////////////////////////////////////////////////
pub fn pre_ld(&mut self, cores: &mut CPU, data: InstructionData) {
}
pub fn post_ld(&mut self, cores: &mut CPU, data: InstructionData) {
}
pub fn pre_st(&mut self, cores: &mut CPU, data: InstructionData) {
}
pub fn post_st(&mut self, cores: &mut CPU, data: InstructionData) {
}
pub fn pre_lldi(&mut self, cores: &mut CPU, data: InstructionData) {
}
pub fn post_lldi(&mut self, cores: &mut CPU, data: InstructionData) {
}
pub fn pre_lmv(&mut self, cores: &mut CPU, data: InstructionData) {
}
pub fn post_lmv(&mut self, cores: &mut CPU, data: InstructionData) {
}
pub fn pre_send(&mut self, cores: &mut CPU, data: InstructionData) {
}
pub fn post_send(&mut self, cores: &mut CPU, data: InstructionData) {
}
pub fn pre_recv(&mut self, cores: &mut CPU, data: InstructionData) {
}
pub fn post_recv(&mut self, cores: &mut CPU, data: InstructionData) {
}
}

View File

@@ -0,0 +1,52 @@
mod tracing_isa;
mod disable;
mod pretty_print;
#[cfg(feature = "tracing")]
use std::fs::File;
use std::sync::{LazyLock, Mutex};
use crate::Executable;
#[cfg(feature = "tracing")]
pub struct Trace {
out_files : Vec<File>
}
#[cfg(feature = "tracing")]
impl Trace {
fn new() -> Self {
Self { out_files : Vec::new()}
}
pub fn init(&mut self, executor : & Executable) {
let cpu = executor.cpu();
let num_core = cpu.num_core();
for i in 0..num_core {
let file = File::create(format!("TraceCore{}", i)).expect("Can not create file");
self.out_files.push(file);
}
}
}
#[cfg(not(feature = "tracing"))]
pub struct Trace {
}
#[cfg(not(feature = "tracing"))]
impl Trace {
fn new() -> Self {
Self { }
}
pub fn init(&mut self, executor : & Executable) {
}
}
pub static TRACER: LazyLock<Mutex<Trace>> = LazyLock::new(|| { Trace::new().into()});

View File

@@ -0,0 +1,19 @@
use std::{fmt::Debug, io::Write};
pub fn print_slice<D: Debug, F: Sized + Debug>(writer: &mut impl Write, slice: &[D], size: usize) {
let slice = unsafe {
let (a, b, c) = slice.align_to::<F>();
assert_eq!(a.len(), 0);
assert_eq!(c.len(), 0);
b
};
writeln!(writer, "\t\t[");
for elements in slice.chunks(size) {
write!(writer, "\t\t");
for element in elements {
write!(writer, "{:?} ", element);
}
writeln!(writer);
}
writeln!(writer, "\t\t]");
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,39 @@
use std::{fmt::Debug, mem::transmute};
use crate::memory_manager::type_traits::TryToUsize;
fn add_offset_impl(address: usize, offset_select : i32, offset_value : i32, id:i32) -> usize{
assert!(offset_select == 1 || offset_select == 2 || offset_select == 4 || offset_value == 0, "offset_select not a bit field");
let offset_value = (offset_select & id) * offset_value;
if offset_value > 0 {
address + offset_value as usize
} else {
address - offset_value as usize
}
}
pub fn add_offset_rd(address: impl TryToUsize, offset_select : i32, offset_value : i32) -> usize
{
let address = address.try_into().expect("address can not be negative");
add_offset_impl(address, offset_select, offset_value, 4)
}
pub fn add_offset_r1(address: impl TryToUsize, offset_select : i32, offset_value : i32) -> usize
{
let address = address.try_into().expect("address can not be negative");
add_offset_impl(address, offset_select, offset_value, 1)
}
pub fn add_offset_r2(address: impl TryToUsize, offset_select : i32, offset_value : i32) -> usize
{
let address = address.try_into().expect("address can not be negative");
add_offset_impl(address, offset_select, offset_value, 2)
}
pub fn pack_float_in_i32(val : impl TryInto<f32>) -> i32 {
let val = val.try_into().unwrap_or_else( |x| panic!("Cannot parse into f32"));
f32::to_bits(val).cast_signed()
}