add pimsim-nn simulator

2026-02-23 16:17:52 +01:00
parent 9c716d2582
commit 9ba08a6780
66 changed files with 8 additions and 4 deletions
@@ -0,0 +1,211 @@
+use anyhow::{Context, Result, bail};
+use clap::Parser;
+use glob::glob;
+use pimcore::cpu::crossbar;
+use pimcore::json_to_instruction::json_to_executor;
+use serde_json::Value;
+use std::{fs, usize};
+use std::io::Write;
+use std::path::PathBuf;
+
+/// Program to simulate core execution configuration
+#[derive(Parser, Debug)]
+#[command(author, version, about, long_about = None)]
+struct Args {
+    /// The folder containing the configuration files
+    #[arg(short, long)]
+    folder: Option<PathBuf>,
+
+    /// Override path for the config.json file
+    #[arg(long)]
+    config: Option<PathBuf>,
+
+    /// Override path for the core*.json files
+    #[arg(long, value_delimiter = ',', num_args = 1..)]
+    cores: Option<Vec<PathBuf>>,
+
+    /// Override path for the memory file (.txt or .bin)
+    #[arg(long)]
+    memory: Option<PathBuf>,
+
+    /// Path to output file
+    #[arg(short, long)]
+    output: PathBuf,
+
+    /// Comma separated list of (address,size) for memory output dump
+    #[arg(short, long, value_delimiter = ',', num_args = 1.., value_name = "ADDR,SIZE")]
+    dump: Vec<i32>,
+}
+
+fn main() -> Result<()> {
+    let args = Args::parse();
+
+    let config_json = retrive_config(&args)?;
+    let core_jsons = retrive_cores(&args)?;
+    let memory = retrive_memory(&args)?;
+    let mut executor = json_to_executor::json_to_executor(config_json, core_jsons.iter());
+    populate_crossbar(&args, &mut executor);
+    set_memory(&mut executor, memory);
+    executor.execute();
+    dump_memory(executor, &args)?;
+    Ok(())
+}
+
+
+fn populate_crossbar(args: &Args, executor: &mut pimcore::Executable) {
+    let num_cores = executor.cpu_mut().num_core();
+    
+    if let Some(folder) = args.folder.as_ref() {
+        for core_idx in 0..num_cores {
+            let core_folder = folder.join(format!("core_{}", core_idx));
+
+            if !core_folder.is_dir() {
+                continue; 
+            }
+
+            let mut bin_files: Vec<(u32, std::path::PathBuf)> = std::fs::read_dir(&core_folder)
+                .expect("Failed to read core directory")
+                .filter_map(|entry| {
+                    let path = entry.ok()?.path();
+                    let file_name = path.file_name()?.to_str()?;
+                    
+                    if file_name.starts_with("crossbar_") && file_name.ends_with(".bin") {
+                        let num_str = &file_name[9..file_name.len() - 4];
+                        let num = num_str.parse::<u32>().ok()?;
+                        Some((num, path))
+                    } else {
+                        None
+                    }
+                })
+                .collect();
+            bin_files.sort_by_key(|&(num, _)| num);
+            let core = executor.cpu_mut().core(core_idx+1);
+            let (_memory, crossbars) = core.get_memory_crossbar();
+
+            for (i, path) in bin_files {
+                let bytes = std::fs::read(path).expect("Failed to read binary file");
+                crossbars.get_mut(i as usize)
+                    .unwrap().execute_store(&bytes).unwrap();
+            }
+
+        }
+    }
+}
+
+
+fn dump_memory(mut executor: pimcore::Executable, args: &Args) -> Result<()> {
+    let dumps: Vec<(i32, i32)> = args
+        .dump
+        .chunks_exact(2)
+        .map(|chunk| (chunk[0], chunk[1]))
+        .collect();
+    let mut out_file = fs::OpenOptions::new()
+        .create(true)
+        .write(true)
+        .truncate(true)
+        .open(&args.output)
+        .with_context(|| format!("cannot open file {:?} for writing", args.output))?;
+
+    for (address, size) in dumps {
+        out_file.write_all(executor.cpu_mut().host().load::<u8>(address, size).unwrap()[0])?;
+    }
+    Ok(())
+}
+
+fn set_memory(executor: &mut pimcore::Executable, memory: Vec<u8>) {
+    executor.cpu_mut().host().execute_store(0, &memory).unwrap();
+}
+
+fn retrive_memory(args: &Args) -> Result<Vec<u8>> {
+    let memory_path = if let Some(mem_override) = &args.memory {
+        mem_override.clone()
+    } else if let Some(folder) = &args.folder.as_ref() {
+        let bin_path = folder.join("memory.bin");
+        let txt_path = folder.join("memory.txt");
+
+        if bin_path.exists() {
+            bin_path
+        } else if txt_path.exists() {
+            txt_path
+        } else {
+            bail!(
+                "No memory file (memory.bin or memory.txt) found in {:?}",
+                folder
+            );
+        }
+    } else {
+        bail!("Either --memory or --folder must be provided.");
+    };
+    let memory_vector: Vec<u8> = if let Some(ext) = memory_path.extension() {
+        if ext == "bin" {
+            fs::read(&memory_path).context("Failed to read binary memory file")?
+        } else {
+            let content =
+                fs::read_to_string(&memory_path).context("Failed to read ascii memory file")?;
+            let clean_hex: String = content.chars().filter(|c| !c.is_whitespace()).collect();
+            hex::decode(&clean_hex).context("Failed to decode ASCII hex string")?
+        }
+    } else {
+        bail!("Memory file has no extension, cannot determine type");
+    };
+    Ok(memory_vector)
+}
+
+fn retrive_cores(args: &Args) -> Result<Vec<Value>, anyhow::Error> {
+    let mut core_jsons: Vec<Value> = Vec::new();
+    if let Some(cores_override) = &args.cores {
+        for core in cores_override {
+            let content = fs::read_to_string(core)
+                .with_context(|| format!("Failed to read core file: {:?}", cores_override))?;
+            let json: Value =
+                serde_json::from_str(&content).context("Failed to parse core json override")?;
+            core_jsons.push(json);
+        }
+    } else if let Some(folder) = args.folder.as_ref() {
+        let pattern = folder.join("core*.json");
+        let pattern_str = pattern.to_str().context("Invalid path encoding")?;
+        let paths: Vec<_> = glob(pattern_str)?.collect();
+        if paths.is_empty() {
+            bail!("No core*.json files found in {:?}", folder);
+        }
+        for entry in paths {
+            let path = entry?;
+            let content = fs::read_to_string(&path)
+                .with_context(|| format!("Failed to read core file: {:?}", path))?;
+            let json: Value = serde_json::from_str(&content)
+                .with_context(|| format!("Failed to parse JSON in {:?}", path))?;
+            core_jsons.push(json);
+        }
+    } else {
+        bail!("Either --core or --folder must be provided to find core definitions.");
+    }
+    Ok(core_jsons)
+}
+
+fn retrive_config(args: &Args) -> Result<Value, anyhow::Error> {
+    let config_path: PathBuf = {
+        let override_path = args.config.as_ref();
+        let folder = args.folder.as_ref();
+        let filename = "config.json";
+        if let Some(path) = override_path {
+            path.clone()
+        } else if let Some(folder) = folder {
+            let path = folder.join(filename);
+            if path.exists() {
+                path
+            } else {
+                bail!("File {} not found in folder {:?}", filename, folder);
+            }
+        } else {
+            bail!(
+                "Cannot resolve {}: no override path and no input folder provided.",
+                filename
+            );
+        }
+    };
+    let config_content = fs::read_to_string(&config_path)
+        .with_context(|| format!("Failed to read config file: {:?}", config_path))?;
+    let config_json: Value =
+        serde_json::from_str(&config_content).context("Failed to parse config.json")?;
+    Ok(config_json)
+}
@@ -0,0 +1,53 @@
+use crate::memory_manager::{CoreMemory, MemoryStorable};
+use anyhow::{Result, bail, ensure};
+
+
+#[derive(Debug, Clone)]
+pub struct Crossbar {
+    max_width: usize,
+    max_height: usize,
+    stored_bytes: usize,
+    memory: CoreMemory,
+}
+
+impl Crossbar {
+    pub fn new(width: usize, height: usize, memory: CoreMemory) -> Self {
+        Self { max_width: width, max_height: height, memory, stored_bytes:0 }
+    }
+
+    pub fn width(&self) -> usize {
+        self.max_width
+    }
+
+    pub fn height(&self) -> usize {
+        self.max_height
+    }
+
+    pub fn stored_bytes(&self) -> usize {
+        self.stored_bytes
+    }
+
+    pub fn execute_store<T>(&mut self, element: &[T]) -> Result<()> where
+            T: MemoryStorable, {
+        self.memory.clear();
+        let total_size = self.max_width * self.max_height;
+        self.memory.set_capacity(total_size);
+        let stored_size = std::mem::size_of_val(element);
+        ensure!(stored_size <= total_size, "Storing more than crossbar can handle");
+        self.stored_bytes=stored_size;
+        self.memory.execute_store(0, element)
+    }
+
+    pub fn load<T>(&mut self, size: usize) -> Result<Vec<&[T]>> where
+            T: MemoryStorable, {
+        if self.memory.get_len() <  size 
+            //|| self.stored_bytes < size 
+        {
+            bail!("Loading outside crossbar boundary [{} {}] <  {}", self.stored_bytes, self.memory.get_len() ,  size);
+        }
+        self.memory.load(0, size)
+    }
+
+
+
+}
@@ -0,0 +1,174 @@
+use std::{collections::HashMap, fmt::Debug};
+use anyhow::{Context, Result};
+
+use crate::{
+    cpu::crossbar::Crossbar,
+    instruction_set::Instructions,
+    memory_manager::{CoreMemory, MemoryStorable, type_traits::TryToUsize},
+};
+
+pub mod crossbar;
+
+#[derive(Debug, Clone)]
+pub struct CPU {
+    cores: Box<[Core]>,
+}
+
+impl CPU {
+    pub fn new(num_cores: impl TryToUsize) -> Self {
+        let num_cores = num_cores.try_into().expect("num_cores can not be negative");
+        let mut cores: Vec<Core> = std::iter::repeat_with(Core::new)
+            .take(num_cores + 1)
+            .collect();
+        Self {
+            cores: cores.into(),
+        }
+    }
+
+    pub fn reserve_crossbar(
+        &mut self,
+        num_crossbar: impl TryToUsize,
+        byte_width: impl TryToUsize,
+        height: impl TryToUsize,
+    ) {
+        let num_crossbar = num_crossbar
+            .try_into()
+            .expect("num_crossbar can not be negative");
+        let byte_width = byte_width
+            .try_into()
+            .expect("byte_width can not be negative");
+        let height = height.try_into().expect("height can not be negative");
+        for core in &mut self.cores {
+            core.reserve_crossbar(num_crossbar, byte_width, height);
+        }
+    }
+
+    pub fn host(&mut self) -> &mut Core {
+        &mut self.cores[0]
+    }
+
+    pub fn core(&mut self, index: impl TryToUsize) -> &mut Core {
+        let index = index.try_into().expect("can not be negative");
+        &mut self.cores[index]
+    }
+
+    pub fn num_core(&self) -> usize {
+        self.cores.len()
+    }
+
+    pub(crate) fn host_and_cores(&mut self, core: impl TryToUsize) -> (&mut Core, &mut Core) {
+        let core = core.try_into().expect("core can not be negative");
+        assert_ne!(
+            core, 0,
+            "Retriving a core with the host that will always be at position 0"
+        );
+        let (host, cores) = self.cores.split_at_mut(1);
+        let host = host.get_mut(0).expect("Missing the host??");
+        let core = cores
+            .get_mut(core - 1)
+            .expect("Requested a core not present ");
+        (host, core)
+    }
+
+    pub fn get_multiple_cores<const N: usize>(&mut self, indices: [usize; N]) -> [&mut Core; N] {
+        self.cores.get_disjoint_mut(indices).unwrap()
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct Core {
+    crossbars: Vec<Crossbar>,
+    memory: CoreMemory,
+    registers: [i32; 32],
+}
+
+impl Core {
+    fn new() -> Self {
+        Self {
+            crossbars: Vec::new(),
+            memory: CoreMemory::new(),
+            registers: [0; 32],
+        }
+    }
+
+    pub fn reserve_crossbar(
+        &mut self,
+        num_crossbar: impl TryToUsize,
+        width: impl TryToUsize,
+        height: impl TryToUsize,
+    ) {
+        let num_crossbar = num_crossbar
+            .try_into()
+            .expect("num_crossbar can not be negative");
+        let width = width.try_into().expect("width can not be negative");
+        let height = height.try_into().expect("height can not be negative");
+        for _ in 0..num_crossbar {
+            let mut crossbar = CoreMemory::new();
+            crossbar.set_capacity(width * height);
+            self.crossbars.push(Crossbar::new(width, height, crossbar));
+        }
+    }
+
+    pub fn execute_load<T>(&mut self) -> Result<Vec<&[T]>>
+    where
+        T: MemoryStorable,
+    {
+        self.memory.execute_load()
+    }
+
+    pub fn execute_store<T>(&mut self, address: impl TryToUsize, element: &[T]) -> Result<()>
+    where
+        T: MemoryStorable,
+    {
+        let address = address.try_into().context("address can not be negative")?;
+        self.memory.execute_store(address, element)
+    }
+
+    pub fn reserve_load(
+        &mut self,
+        address: impl TryToUsize,
+        size: impl TryToUsize,
+    ) -> Result<&mut CoreMemory> {
+        let address = address.try_into().context("address can not be negative")?;
+        let size = size.try_into().context("size can not be negative")?;
+        self.memory.reserve_load(address, size)
+    }
+
+    pub fn set_register(&mut self, index: impl TryToUsize, value: i32) {
+        let index = index.try_into().expect("index can not be negative");
+        assert!(
+            value >= 0,
+            "Register cannot be negative if happens remove this and go check where it's used as usize"
+        );
+        self.registers[index] = value;
+    }
+
+    pub fn register(&mut self, index: impl TryToUsize) -> i32 {
+        let index = index.try_into().expect("index can not be negative");
+        self.registers[index]
+    }
+
+    pub fn load<T>(&mut self, address: impl TryToUsize, size: impl TryToUsize) -> Result<Vec<&[T]>>
+    where
+        T: MemoryStorable,
+    {
+        let address = address.try_into().context("address can not be negative")?;
+        let size = size.try_into().context("size can not be negative")?;
+        self.memory.load(address, size)
+    }
+
+    pub fn get_memory_crossbar(&mut self) -> (&mut CoreMemory, &mut Vec<Crossbar>) {
+        let Self {
+            crossbars,
+            memory,
+            registers,
+        } = self;
+        (memory, crossbars)
+    }
+
+    pub fn memset(&mut self, address: impl TryToUsize, size: impl TryToUsize, val: u8) -> Result<()> {
+        let address = address.try_into().context("address can not be negative")?;
+        let size = size.try_into().context("size can not be negative")?;
+        self.memory.memset(address, size, val)
+    }
+}
@@ -0,0 +1,272 @@
+use std::mem::transmute;
+
+use crate::memory_manager::{
+    MemoryStorable,
+    type_traits::{FromFloat, UpcastDestTraits},
+};
+
+#[inline]
+pub fn add_all<M>(sum: &[M]) -> M
+where
+    M: UpcastDestTraits<M> + MemoryStorable + FromFloat,
+{
+    if size_of::<M>() == 4 {
+        let (prefix, slice, suffix) = unsafe { sum.align_to::<f32>() };
+        return M::from_f32(add_all_f32_impl(slice));
+    } else if size_of::<M>() == 8 {
+        let (prefix, slice, suffix) = unsafe { sum.align_to::<f64>() };
+        return M::from_f64(add_all_f64_impl(slice));
+    }
+    panic!("Size not found");
+}
+
+#[inline]
+pub fn add_all_f64_impl(sum: &[f64]) -> f64 {
+    let len = sum.len();
+    if len > 64 {
+        let mut acc = 0f64;
+        let mut y = 0;
+        #[cfg(all(
+            any(target_arch = "x86", target_arch = "x86_64"),
+            target_feature = "avx512f"
+        ))]
+        {
+            while y + 64 < len {
+                unsafe {
+                    use std::arch::x86_64::_mm512_load_pd;
+                    use std::arch::x86_64::_mm512_reduce_add_pd;
+
+                    let tmp = _mm512_load_pd(&sum[0 + y]);
+                    let a = _mm512_reduce_add_pd(tmp);
+                    let tmp = _mm512_load_pd(&sum[8 + y]);
+                    let b = _mm512_reduce_add_pd(tmp);
+                    let tmp = _mm512_load_pd(&sum[16 + y]);
+                    let c = _mm512_reduce_add_pd(tmp);
+                    let tmp = _mm512_load_pd(&sum[24 + y]);
+                    let d = _mm512_reduce_add_pd(tmp);
+                    let tmp = _mm512_load_pd(&sum[32 + y]);
+                    let e = _mm512_reduce_add_pd(tmp);
+                    let tmp = _mm512_load_pd(&sum[40 + y]);
+                    let f = _mm512_reduce_add_pd(tmp);
+                    let tmp = _mm512_load_pd(&sum[48 + y]);
+                    let g = _mm512_reduce_add_pd(tmp);
+                    let tmp = _mm512_load_pd(&sum[54 + y]);
+                    let h = _mm512_reduce_add_pd(tmp);
+
+                    acc = acc + a + b + c + d + e + f + g + h;
+                }
+                y += 64;
+            }
+        }
+        #[cfg(all(
+            any(target_arch = "x86", target_arch = "x86_64"),
+            not(target_feature = "avx512f")
+        ))]
+        {
+            while y + 64 < len {
+                let a = sum[y]
+                    + sum[1 + y]
+                    + sum[2 + y]
+                    + sum[3 + y]
+                    + sum[4 + y]
+                    + sum[5 + y]
+                    + sum[6 + y]
+                    + sum[7 + y]
+                    + sum[8 + y]
+                    + sum[9 + y]
+                    + sum[10 + y]
+                    + sum[11 + y]
+                    + sum[12 + y]
+                    + sum[13 + y]
+                    + sum[14 + y]
+                    + sum[15 + y];
+
+                let b = sum[16 + y]
+                    + sum[17 + y]
+                    + sum[18 + y]
+                    + sum[19 + y]
+                    + sum[20 + y]
+                    + sum[21 + y]
+                    + sum[22 + y]
+                    + sum[23 + y]
+                    + sum[24 + y]
+                    + sum[25 + y]
+                    + sum[26 + y]
+                    + sum[27 + y]
+                    + sum[28 + y]
+                    + sum[29 + y]
+                    + sum[30 + y]
+                    + sum[31 + y];
+
+                let c = sum[32 + y]
+                    + sum[33 + y]
+                    + sum[34 + y]
+                    + sum[35 + y]
+                    + sum[36 + y]
+                    + sum[37 + y]
+                    + sum[38 + y]
+                    + sum[39 + y]
+                    + sum[40 + y]
+                    + sum[41 + y]
+                    + sum[42 + y]
+                    + sum[43 + y]
+                    + sum[44 + y]
+                    + sum[45 + y]
+                    + sum[46 + y]
+                    + sum[47 + y];
+
+                let d = sum[48 + y]
+                    + sum[49 + y]
+                    + sum[50 + y]
+                    + sum[51 + y]
+                    + sum[52 + y]
+                    + sum[53 + y]
+                    + sum[54 + y]
+                    + sum[55 + y]
+                    + sum[56 + y]
+                    + sum[57 + y]
+                    + sum[58 + y]
+                    + sum[59 + y]
+                    + sum[60 + y]
+                    + sum[61 + y]
+                    + sum[62 + y]
+                    + sum[63 + y];
+                acc = acc + a + b + c + d;
+                y += 64;
+            }
+        }
+
+        for x in y..len {
+            acc += sum[x];
+        }
+
+        acc
+    } else {
+        let mut acc = 0f64;
+        for y in sum {
+            acc += *y;
+        }
+        acc
+    }
+}
+
+#[inline]
+pub fn add_all_f32_impl(sum: &[f32]) -> f32 {
+    let len = sum.len();
+    let rem_4 = len % 4;
+    let rem_4 = len % 16;
+    if len > 64 {
+        let mut acc = 0f32;
+        let mut y = 0;
+        #[cfg(all(
+            any(target_arch = "x86", target_arch = "x86_64"),
+            target_feature = "avx512f"
+        ))]
+        {
+            while y + 64 < len {
+                unsafe {
+                    use std::arch::x86_64::{_mm512_load_ps, _mm512_reduce_add_ps};
+                    let a = _mm512_load_ps(&sum[0 + y]);
+                    let a = _mm512_reduce_add_ps(a);
+                    let b = _mm512_load_ps(&sum[16 + y]);
+                    let b = _mm512_reduce_add_ps(b);
+                    let c = _mm512_load_ps(&sum[32 + y]);
+                    let c = _mm512_reduce_add_ps(c);
+                    let d = _mm512_load_ps(&sum[48 + y]);
+                    let d = _mm512_reduce_add_ps(d);
+                    acc = acc + a + b + c + d;
+                }
+                y += 64;
+            }
+        }
+        #[cfg(all(
+            any(target_arch = "x86", target_arch = "x86_64"),
+            not(target_feature = "avx512f")
+        ))]
+        {
+            while y + 64 < len {
+                let a = sum[y]
+                    + sum[1 + y]
+                    + sum[2 + y]
+                    + sum[3 + y]
+                    + sum[4 + y]
+                    + sum[5 + y]
+                    + sum[6 + y]
+                    + sum[7 + y]
+                    + sum[8 + y]
+                    + sum[9 + y]
+                    + sum[10 + y]
+                    + sum[11 + y]
+                    + sum[12 + y]
+                    + sum[13 + y]
+                    + sum[14 + y]
+                    + sum[15 + y];
+
+                let b = sum[16 + y]
+                    + sum[17 + y]
+                    + sum[18 + y]
+                    + sum[19 + y]
+                    + sum[20 + y]
+                    + sum[21 + y]
+                    + sum[22 + y]
+                    + sum[23 + y]
+                    + sum[24 + y]
+                    + sum[25 + y]
+                    + sum[26 + y]
+                    + sum[27 + y]
+                    + sum[28 + y]
+                    + sum[29 + y]
+                    + sum[30 + y]
+                    + sum[31 + y];
+
+                let c = sum[32 + y]
+                    + sum[33 + y]
+                    + sum[34 + y]
+                    + sum[35 + y]
+                    + sum[36 + y]
+                    + sum[37 + y]
+                    + sum[38 + y]
+                    + sum[39 + y]
+                    + sum[40 + y]
+                    + sum[41 + y]
+                    + sum[42 + y]
+                    + sum[43 + y]
+                    + sum[44 + y]
+                    + sum[45 + y]
+                    + sum[46 + y]
+                    + sum[47 + y];
+
+                let d = sum[48 + y]
+                    + sum[49 + y]
+                    + sum[50 + y]
+                    + sum[51 + y]
+                    + sum[52 + y]
+                    + sum[53 + y]
+                    + sum[54 + y]
+                    + sum[55 + y]
+                    + sum[56 + y]
+                    + sum[57 + y]
+                    + sum[58 + y]
+                    + sum[59 + y]
+                    + sum[60 + y]
+                    + sum[61 + y]
+                    + sum[62 + y]
+                    + sum[63 + y];
+                acc = acc + a + b + c + d;
+                y += 64;
+            }
+        }
+
+        for x in y..len {
+            acc += sum[x];
+        }
+
+        acc
+    } else {
+        let mut acc = 0f32;
+        for y in sum {
+            acc += *y;
+        }
+        acc
+    }
+}
@@ -0,0 +1,315 @@
+use paste::paste;
+
+#[derive(Clone, Copy, Debug, Default)]
+pub struct InstructionData {
+    core_indx: i32,
+    rd: i32,
+    r1: i32,
+    //r2 imm mbiw imm_core
+    r2_or_imm: i32,
+    //offset_select imm_relu ibiw
+    generic1: i32,
+    //offset_value imm_group obiw
+    generic2: i32,
+    //imm_len
+    generic3: i32,
+}
+
+impl InstructionData {
+    pub fn core_indx(&self) -> i32 {
+        self.core_indx
+    }
+
+    pub fn rd(&self) -> i32 {
+        self.rd
+    }
+
+    pub fn r1(&self) -> i32 {
+        self.r1
+    }
+
+    pub fn r2(&self) -> i32 {
+        self.r2_or_imm
+    }
+
+    pub fn imm(&self) -> i32 {
+        self.r2_or_imm
+    }
+
+    pub fn mbiw(&self) -> i32 {
+        self.r2_or_imm
+    }
+
+    pub fn offset_select(&self) -> i32 {
+        self.generic1
+    }
+
+    pub fn offset_value(&self) -> i32 {
+        self.generic2
+    }
+
+    pub fn get_core_rd_r1(&self) -> (i32, i32, i32) {
+        (self.core_indx, self.rd, self.r1)
+    }
+
+    pub fn get_core_rd_r1_r2(&self) -> (i32, i32, i32, i32) {
+        (self.core_indx, self.rd, self.r1, self.r2_or_imm)
+    }
+
+    pub fn get_core_rd_imm(&self) -> (i32, i32, i32) {
+        (self.core_indx, self.rd, self.r2_or_imm)
+    }
+
+    pub fn get_core_rd_r1_imm(&self) -> (i32, i32, i32, i32) {
+        (self.core_indx, self.rd, self.r1, self.r2_or_imm)
+    }
+
+    pub fn get_core_rd_r1_r2_immlen_offset(&self) -> (i32, i32, i32, i32, i32, i32, i32) {
+        (
+            self.core_indx,
+            self.rd,
+            self.r1,
+            self.r2_or_imm,
+            self.generic3,
+            self.generic1,
+            self.generic2,
+        )
+    }
+
+    pub fn get_core_rd_r1_mbiw_immrelu_immgroup(&self) -> (i32, i32, i32, i32, i32, i32) {
+        (
+            self.core_indx,
+            self.rd,
+            self.r1,
+            self.r2_or_imm,
+            self.generic1,
+            self.generic2,
+        )
+    }
+
+    pub fn get_ibiw_obiw(&self) -> (i32, i32) {
+        (self.generic1, self.generic2)
+    }
+
+    pub fn imm_len(&self) -> i32 {
+        self.generic3
+    }
+
+    pub fn imm_core(&self) -> i32 {
+        self.r2_or_imm
+    }
+
+    pub(crate) fn get_core_immcore(&self) -> (i32, i32) {
+        (self.core_indx, self.r2_or_imm)
+    }
+}
+
+#[derive(Clone, Copy, Debug)]
+enum Fixer {
+    Fix(i32),
+    Edit(i32),
+}
+
+impl Fixer {
+    fn to_fix(self) -> Self {
+        match self {
+            Fixer::Fix(x) => Fixer::Fix(x),
+            Fixer::Edit(x) => Fixer::Fix(x),
+        }
+    }
+
+    fn to_edit(self) -> Self {
+        match self {
+            Fixer::Fix(x) => Fixer::Edit(x),
+            Fixer::Edit(x) => Fixer::Edit(x),
+        }
+    }
+
+    fn set(self, val: i32) -> Self {
+        match self {
+            Fixer::Fix(_) => Fixer::Fix(val),
+            Fixer::Edit(_) => Fixer::Edit(val),
+        }
+    }
+
+    fn get(&self) -> i32 {
+        match self {
+            Fixer::Fix(x) => *x,
+            Fixer::Edit(x) => *x,
+        }
+    }
+
+    fn clear(self) -> Self {
+        match self {
+            Fixer::Fix(x) => Fixer::Fix(x),
+            Fixer::Edit(_) => Fixer::Edit(0),
+        }
+    }
+}
+
+impl Default for Fixer {
+    fn default() -> Self {
+        Fixer::Edit(0)
+    }
+}
+
+#[derive(Clone, Copy, Debug, Default)]
+pub struct InstructionDataBuilder {
+    core_indx: Fixer,
+    rd: Fixer,
+    r1: Fixer,
+    r2: Fixer,
+    imm: Fixer,
+    offset_select: Fixer,
+    offset_value: Fixer,
+    imm_len: Fixer,
+    ibiw: Fixer,
+    obiw: Fixer,
+    mbiw: Fixer,
+    imm_relu: Fixer,
+    imm_group: Fixer,
+    imm_core: Fixer,
+}
+
+macro_rules! common_getter_setter {
+    ($id:ident) => {
+        paste! {
+            pub fn [<fix_ $id>](&mut self) -> &mut Self {
+                self.[<$id>] = self.[<$id>].to_fix();
+                self
+            }
+
+            pub fn [<edit_ $id>](&mut self) -> &mut Self {
+                self.[<$id>] = self.[<$id>].to_edit();
+                self
+            }
+
+            pub fn [<set_ $id>](&mut self, val: i32) -> &mut Self{
+                self.[<$id>] = self.[<$id>].set(val);
+                self
+            }
+
+            pub fn [<get_ $id>](&self) -> i32{
+                self.[<$id>].get()
+            }
+
+            pub fn [<clear_ $id>](&mut self){
+                self.[<$id>] = self.[<$id>].clear();
+            }
+        }
+    };
+}
+
+impl InstructionDataBuilder {
+    common_getter_setter![core_indx];
+    common_getter_setter![rd];
+    common_getter_setter![r1];
+    common_getter_setter![r2];
+    common_getter_setter![imm];
+    common_getter_setter![offset_select];
+    common_getter_setter![offset_value];
+    common_getter_setter![imm_len];
+    common_getter_setter![ibiw];
+    common_getter_setter![obiw];
+    common_getter_setter![mbiw];
+    common_getter_setter![imm_relu];
+    common_getter_setter![imm_group];
+    common_getter_setter![imm_core];
+
+    pub fn new() -> Self {
+        Self {
+            core_indx: Fixer::Edit(0),
+            rd: Fixer::Edit(0),
+            r1: Fixer::Edit(0),
+            r2: Fixer::Edit(0),
+            imm: Fixer::Edit(0),
+            offset_select: Fixer::Edit(0),
+            offset_value: Fixer::Edit(0),
+            imm_len: Fixer::Edit(0),
+            ibiw: Fixer::Edit(0),
+            obiw: Fixer::Edit(0),
+            mbiw: Fixer::Edit(0),
+            imm_relu: Fixer::Edit(0),
+            imm_group: Fixer::Edit(0),
+            imm_core: Fixer::Edit(0),
+        }
+    }
+
+    pub fn clear(&mut self) {
+        self.clear_core_indx();
+        self.clear_rd();
+        self.clear_r1();
+        self.clear_r2();
+        self.clear_imm();
+        self.clear_offset_value();
+        self.clear_offset_select();
+        self.clear_imm_len();
+        self.clear_ibiw();
+        self.clear_obiw();
+        self.clear_mbiw();
+        self.clear_imm_relu();
+        self.clear_imm_group();
+        self.clear_imm_core();
+    }
+
+    fn check_sanity(&self) {
+        assert!(!(self.get_r2() != 0 && self.get_imm() != 0 && self.get_mbiw() != 0 && self.get_imm_core() != 0));
+        assert!(
+            !(self.get_ibiw() != 0 && self.get_offset_select() != 0 && self.get_imm_relu() != 0)
+        );
+        assert!(
+            !(self.get_obiw() != 0 && self.get_offset_value() != 0 && self.get_imm_group() != 0)
+        );
+    }
+
+    pub fn build(&mut self) -> InstructionData {
+        self.check_sanity();
+        let inst_data = InstructionData {
+            core_indx: self.get_core_indx(),
+            rd: self.get_rd(),
+            r1: self.get_r1(),
+            r2_or_imm: self.get_r2() + self.get_imm() + self.get_mbiw() + self.get_imm_core(),
+            generic1: self.get_offset_select() + self.get_ibiw() + self.get_imm_relu(),
+            generic2: self.get_offset_value() + self.get_obiw() + self.get_imm_group(),
+            generic3: self.get_imm_len(),
+        };
+        self.clear();
+        inst_data
+    }
+
+    pub fn set_rdr1r2(&mut self, rd: i32, r1: i32, r2: i32) -> &mut Self {
+        self.set_rd(rd).set_r1(r1).set_r2(r2)
+    }
+
+    pub fn set_offset_select_value(&mut self, offset_select: i32, offset_value: i32) -> &mut Self {
+        self.set_offset_select(offset_select)
+            .set_offset_value(offset_value)
+    }
+
+    pub fn set_rdr1imm(&mut self, rd: i32, r1: i32, imm: i32) -> &mut Self {
+        self.set_rd(rd).set_r1(r1).set_imm(imm)
+    }
+
+    pub fn set_rdr1(&mut self, rd: i32, r1: i32) -> &mut Self {
+        self.set_rd(rd).set_r1(r1)
+    }
+
+    pub fn set_rdimm(&mut self, rd: i32, imm: i32) -> &mut Self {
+        self.set_rd(rd).set_imm(imm)
+    }
+
+    pub fn set_ibiw_obiw(&mut self, ibiw: i32, obiw: i32) -> &mut Self {
+        self.set_ibiw(ibiw).set_obiw(obiw)
+    }
+
+    pub fn set_mbiw_immrelu_immgroup(
+        &mut self,
+        mbiw: i32,
+        imm_relu: i32,
+        imm_group: i32,
+    ) -> &mut Self {
+        self.set_mbiw(mbiw)
+            .set_imm_relu(imm_relu)
+            .set_imm_group(imm_group)
+    }
+}
@@ -0,0 +1,722 @@
+use crate::{
+    cpu::{CPU, crossbar}, instruction_set::{
+        Instruction, InstructionData, InstructionStatus, InstructionType, VectorBitWith,
+        helper::add_all,
+    }, memory_manager::{
+        MemoryStorable,
+        type_traits::{FromFloat, UpcastDestTraits, UpcastSlice},
+    }, tracing::TRACER, utility::{add_offset_r1, add_offset_r2, add_offset_rd}
+};
+use aligned_vec::{AVec, ConstAlign};
+use anyhow::{Context, Result, ensure};
+
+use paste::paste;
+use std::{borrow::Cow, cell::OnceCell, collections::HashMap};
+use std::{collections::HashSet, sync::LazyLock};
+
+macro_rules! add_name {
+    ($storage:ident, $id:ident) => {
+        $storage.insert($id as *const () as usize, stringify!($id));
+    };
+}
+macro_rules! add_name_simd {
+    ($storage:ident, $id:ident) => {
+        paste! {
+        $storage.insert([<$id _impl>]::<f32,f32> as *const () as usize, concat!(stringify!($id), "::<f32, f32>"));
+        $storage.insert([<$id _impl>]::<f64,f64> as *const () as usize, concat!(stringify!($id), "::<f64, f64>"));
+        $storage.insert([<$id _impl>]::<f32,f64> as *const () as usize, concat!(stringify!($id), "::<f32, f64>"));
+        $storage.insert([<$id _impl>]::<f64,f32> as *const () as usize, concat!(stringify!($id), "::<f64, f32>"));
+        }
+    };
+}
+
+static NAMES: LazyLock<HashMap<usize, &'static str>> = LazyLock::new(|| {
+    let mut hash = HashMap::new();
+    add_name!(hash, sldi);
+    add_name!(hash, sld);
+    add_name!(hash, sadd);
+    add_name!(hash, ssub);
+    add_name!(hash, smul);
+    add_name!(hash, saddi);
+    add_name!(hash, smuli);
+    add_name!(hash, setbw);
+    add_name_simd!(hash, mvmul);
+    add_name_simd!(hash, vvadd);
+    add_name_simd!(hash, vvsub);
+    add_name_simd!(hash, vvmul);
+    add_name_simd!(hash, vvdmul);
+    add_name_simd!(hash, vvmax);
+    add_name!(hash, vvsll);
+    add_name!(hash, vvsra);
+    add_name_simd!(hash, vavg);
+    add_name_simd!(hash, vrelu);
+    add_name_simd!(hash, vtanh);
+    add_name_simd!(hash, vsigm);
+    add_name!(hash, vmv);
+    add_name!(hash, vrsu);
+    add_name!(hash, vrsl);
+    add_name!(hash, ld);
+    add_name!(hash, st);
+    add_name!(hash, lldi);
+    add_name!(hash, lmv);
+    add_name!(hash, send);
+    add_name!(hash, recv);
+    add_name!(hash, wait);
+    add_name!(hash, sync);
+    hash
+});
+
+pub fn functor_to_name(functor: usize) -> &'static str {
+    NAMES
+        .get(&functor)
+        .unwrap_or_else(|| panic!("Function not found"))
+}
+
+///////////////////////////////////////////////////////////////
+/////////////////Scalar/register Instructions//////////////////
+///////////////////////////////////////////////////////////////
+pub fn sldi(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    TRACER.lock().unwrap().pre_sldi(cores, data);
+    let (core_indx, rd, imm) = data.get_core_rd_imm();
+    let core = cores.core(core_indx);
+    core.set_register(rd, imm);
+    TRACER.lock().unwrap().post_sldi(cores, data);
+    Ok(InstructionStatus::Completed)
+}
+
+pub fn sld(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    TRACER.lock().unwrap().pre_sld(cores, data);
+    let (core_indx, rd, r1) = data.get_core_rd_r1();
+    let offset_value = data.offset_value();
+    let core = cores.core(core_indx);
+    let r1_val = core.register(r1);
+    let host = cores.host();
+    let num = host.load::<i32>((r1_val + offset_value), 4)?[0][0];
+    let core = cores.core(core_indx);
+    core.set_register(rd, num);
+    TRACER.lock().unwrap().post_sld(cores, data);
+    Ok(InstructionStatus::Completed)
+}
+
+pub fn sadd(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    TRACER.lock().unwrap().pre_sadd(cores, data);
+    let (core_indx, rd, r1, r2) = data.get_core_rd_r1_r2();
+    let core = cores.core(core_indx);
+    let r1_val = core.register(r1);
+    let r2_val = core.register(r2);
+    core.set_register(rd, r1_val + r2_val);
+    Ok(InstructionStatus::Completed)
+}
+
+pub fn ssub(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    TRACER.lock().unwrap().pre_ssub(cores, data);
+    let (core_indx, rd, r1, r2) = data.get_core_rd_r1_r2();
+    let core = cores.core(core_indx);
+    let r1_val = core.register(r1);
+    let r2_val = core.register(r2);
+    core.set_register(rd, r1_val - r2_val);
+    Ok(InstructionStatus::Completed)
+}
+
+pub fn smul(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    TRACER.lock().unwrap().pre_smul(cores, data);
+    let (core_indx, rd, r1, r2) = data.get_core_rd_r1_r2();
+    let core = cores.core(core_indx);
+    let r1_val = core.register(r1);
+    let r2_val = core.register(r2);
+    core.set_register(rd, r1_val * r2_val);
+    Ok(InstructionStatus::Completed)
+}
+
+pub fn saddi(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    TRACER.lock().unwrap().pre_saddi(cores, data);
+    let (core_indx, rd, r1, imm) = data.get_core_rd_r1_imm();
+    let core = cores.core(core_indx);
+    let r1_val = core.register(r1);
+    core.set_register(rd, r1_val - imm);
+    Ok(InstructionStatus::Completed)
+}
+
+pub fn smuli(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    TRACER.lock().unwrap().pre_smuli(cores, data);
+    let (core_indx, rd, r1, imm) = data.get_core_rd_r1_imm();
+    let core = cores.core(core_indx);
+    let r1_val = core.register(r1);
+    core.set_register(rd, r1_val * imm);
+    Ok(InstructionStatus::Completed)
+}
+
+///////////////////////////////////////////////////////////////
+/////////////////Matrix/vector Instructions////////////////////
+///////////////////////////////////////////////////////////////
+macro_rules! add_simd_to_map {
+    ($storage:ident, $id:ident) => {
+        paste! {
+        let mut tmp   = HashMap::new();
+        tmp.insert((32_usize,32_usize), ([<$id _impl>]::<f32, f32> as InstructionType));
+        tmp.insert((32_usize,64_usize), ([<$id _impl>]::<f32, f64> as InstructionType));
+        tmp.insert((64_usize,32_usize), ([<$id _impl>]::<f64, f32> as InstructionType));
+        tmp.insert((64_usize,64_usize), ([<$id _impl>]::<f64, f64> as InstructionType));
+        //TODO WTF WHY
+        tmp.insert((8_usize,8_usize), ([<$id _impl>]::<f32, f32> as InstructionType));
+        $storage.insert($id as *const () as usize, tmp);
+
+        }
+    };
+}
+
+static SIMD: LazyLock<HashMap<usize, HashMap<(usize, usize), InstructionType>>> =
+    LazyLock::new(|| {
+        let mut storage = HashMap::new();
+        add_simd_to_map!(storage, vvadd);
+        add_simd_to_map!(storage, vvsub);
+        add_simd_to_map!(storage, vvmul);
+        add_simd_to_map!(storage, vvdmul);
+        add_simd_to_map!(storage, vvmax);
+        add_simd_to_map!(storage, vavg);
+        add_simd_to_map!(storage, vrelu);
+        add_simd_to_map!(storage, vtanh);
+        add_simd_to_map!(storage, vsigm);
+        add_simd_to_map!(storage, mvmul);
+        storage
+    });
+
+pub fn isa_simd(functor: InstructionType) -> bool {
+    SIMD.contains_key(&(functor as usize))
+}
+
+pub fn dispatch_simd(
+    functor: InstructionType,
+    vector_bit_with: VectorBitWith,
+) -> Result<InstructionType> {
+    let VectorBitWith {
+        vector_input_bitwith,
+        vector_output_bitwith,
+    } = vector_bit_with;
+    let res = SIMD
+        .get(&(functor as usize))
+        .context("Request a non present simd")?
+        .get(&(vector_input_bitwith, vector_output_bitwith))
+        .with_context(|| {
+            format!(
+                "Function not found for the requested size input:{} output:{}",
+                vector_input_bitwith, vector_output_bitwith
+            )
+        })?;
+    Ok(*res)
+}
+
+pub fn is_setbw(functor: InstructionType) -> bool {
+    functor as usize == setbw as *const () as usize
+}
+
+pub fn setbw(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    panic!("You are calling a placeholder, this instruction is resolved in the construction phase");
+}
+
+pub fn mvmul(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    panic!("You are calling a placeholder, the real call is the generic version");
+}
+
+pub(super) fn mvm_impl_internal<F, M, T>(
+    cores: &mut CPU,
+    data: InstructionData,
+) -> Result<InstructionStatus>
+where
+    [F]: UpcastSlice<T> + UpcastSlice<M>,
+    [M]: UpcastSlice<T>,
+    T: UpcastDestTraits<T> + MemoryStorable,
+    M: UpcastDestTraits<M> + MemoryStorable + FromFloat,
+    F: UpcastDestTraits<F> + MemoryStorable,
+{
+    TRACER.lock().unwrap().pre_mvm::<F,M,T>(cores, data);
+    let (core_indx, rd, r1, mbiw, relu, group) = data.get_core_rd_r1_mbiw_immrelu_immgroup();
+    let group: usize = group.try_into().context("group can not be negative")?;
+    let core = cores.core(core_indx);
+    let r1_val = core.register(r1);
+    let rd_val = core.register(rd);
+    let (memory, crossbars) = core.get_memory_crossbar();
+    let crossbar = crossbars.get_mut(group).unwrap();
+    let crossbar_stored_bytes = crossbar.stored_bytes();
+    let crossbar_byte_width = crossbar.width();
+    //Fix this
+    let crossbar_elem_width = crossbar_byte_width / size_of::<M>();
+    ensure!(
+        crossbar_byte_width & size_of::<M>() == 0,
+        "M not divisor of the crosbbar size"
+    );
+    let crossbar_height = crossbar.height();
+    let crossbar_byte_size = crossbar_byte_width * crossbar_height;
+
+    let loads = memory
+        .reserve_load(r1_val, crossbar_height * size_of::<F>())?
+        .execute_load::<F>()?;
+    let load = loads[0];
+    let vec: Cow<[M]> = load.up();
+    let matrix = crossbar.load::<M>(crossbar_byte_size)?[0];
+    let mut res = Vec::with_capacity(crossbar_elem_width);
+    let mut partial :AVec<M, _> = AVec::<M, ConstAlign<64>>::with_capacity(64, vec.len());
+    partial.resize(vec.len(), M::from_f32(0.0));
+
+    for x in 0..crossbar_elem_width {
+        partial[0] = vec[0] * matrix[x];
+        for y in 1..crossbar_height {
+            partial[y] = vec[y] * matrix[y * crossbar_elem_width + x];
+        }
+
+        let mut acc = add_all(partial.as_slice());
+        res.push(acc);
+    }
+    if relu != 0 {
+        res.iter_mut().for_each(|x| {
+            if *x < M::from_f32(0.0) {
+                *x = M::from_f32(0.0)
+            }
+        });
+    }
+    ensure!(
+        res.len() == crossbar_elem_width,
+        "mvm generate a vector bigger thant it's  requested elements"
+    );
+    let res_up: Cow<[T]> = res.as_slice().up();
+    core.execute_store(rd_val, res_up.as_ref());
+    TRACER.lock().unwrap().post_mvm::<F,M,T>(cores, data);
+    Ok(InstructionStatus::Completed)
+}
+
+pub(super) fn mvmul_impl<F, T>(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus>
+where
+    [F]: UpcastSlice<T> + UpcastSlice<f32> + UpcastSlice<f64>,
+    T: UpcastDestTraits<T> + MemoryStorable,
+    F: UpcastDestTraits<F> + MemoryStorable,
+    [f32]: UpcastSlice<T>,
+    [f64]: UpcastSlice<T>,
+{
+    let mbiw = data.mbiw();
+    match mbiw {
+        32 => mvm_impl_internal::<F, f32, T>(cores, data),
+        64 => mvm_impl_internal::<F, f64, T>(cores, data),
+        //TODO i don't know why
+        8 => mvm_impl_internal::<F, f32, T>(cores, data),
+        n => {
+            panic!("mvm size {} not handled ", n)
+        }
+    }
+}
+
+pub fn vvadd(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    panic!("You are calling a placeholder, the real call is the generic version");
+}
+
+pub(super) fn vvadd_impl<F, T>(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus>
+where
+    [F]: UpcastSlice<T>,
+    T: UpcastDestTraits<T> + MemoryStorable,
+    F: UpcastDestTraits<F> + MemoryStorable,
+{
+    TRACER.lock().unwrap().pre_vvadd::<F,T>(cores, data);
+    let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
+        data.get_core_rd_r1_r2_immlen_offset();
+    let core = cores.core(core_indx);
+    let r1_val = core.register(r1);
+    let r2_val = core.register(r2);
+    let rd_val = core.register(rd);
+    let r1_val = add_offset_r1(r1_val, offset_select, offset_value);
+    let r2_val = add_offset_r2(r2_val, offset_select, offset_value);
+    let rd_val = add_offset_rd(rd_val, offset_select, offset_value);
+    let imm_len: usize = imm_len.try_into().context("imm_len can not be negative")?;
+
+    let loads = core
+        .reserve_load(r1_val, imm_len)?
+        .reserve_load(r2_val, imm_len)?
+        .execute_load::<F>()?;
+    let (load1, load2) = (loads[0], loads[1]);
+    let res: Vec<F> = load1
+        .iter()
+        .zip(load2.iter())
+        .map(|(&a, &b)| a + b)
+        .collect();
+    ensure!(
+        imm_len / size_of::<F>() == res.len(),
+        "vvadd generate a vector bigger thant it's  requested elements"
+    );
+    let res_up: Cow<[T]> = res.as_slice().up();
+    core.execute_store(rd_val, res_up.as_ref());
+    TRACER.lock().unwrap().post_vvadd::<F,T>(cores, data);
+    Ok(InstructionStatus::Completed)
+}
+
+pub fn vvsub(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    panic!("You are calling a placeholder, the real call is the generic version");
+}
+
+pub(super) fn vvsub_impl<F, T>(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus>
+where
+    [F]: UpcastSlice<T>,
+    T: UpcastDestTraits<T> + MemoryStorable,
+    F: UpcastDestTraits<F> + MemoryStorable,
+{
+    TRACER.lock().unwrap().pre_vvsub::<F,T>(cores, data);
+    let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
+        data.get_core_rd_r1_r2_immlen_offset();
+    let core = cores.core(core_indx);
+    let r1_val = core.register(r1);
+    let r2_val = core.register(r2);
+    let rd_val = core.register(rd);
+    let r1_val = add_offset_r1(r1_val, offset_select, offset_value);
+    let r2_val = add_offset_r2(r2_val, offset_select, offset_value);
+    let rd_val = add_offset_rd(rd_val, offset_select, offset_value);
+    let imm_len: usize = imm_len.try_into().context("imm_len can not be negative")?;
+
+    let loads = core
+        .reserve_load(r1_val, imm_len)?
+        .reserve_load(r2_val, imm_len)?
+        .execute_load::<F>()?;
+    let (load1, load2) = (loads[0], loads[1]);
+    let res: Vec<F> = load1
+        .iter()
+        .zip(load2.iter())
+        .map(|(&a, &b)| a - b)
+        .collect();
+    ensure!(
+        imm_len / size_of::<F>() == res.len(),
+        "vvadd generate a vector bigger thant it's  requested elements"
+    );
+    let res_up: Cow<[T]> = res.as_slice().up();
+    core.execute_store(rd_val, res_up.as_ref());
+    Ok(InstructionStatus::Completed)
+}
+
+pub fn vvmul(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    panic!("You are calling a placeholder, the real call is the generic version");
+}
+
+pub(super) fn vvmul_impl<F, T>(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus>
+where
+    [F]: UpcastSlice<T>,
+    T: UpcastDestTraits<T> + MemoryStorable,
+    F: UpcastDestTraits<F> + MemoryStorable,
+{
+    TRACER.lock().unwrap().pre_vvmul::<F,T>(cores, data);
+    let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
+        data.get_core_rd_r1_r2_immlen_offset();
+    let core = cores.core(core_indx);
+    let r1_val = core.register(r1);
+    let r2_val = core.register(r2);
+    let rd_val = core.register(rd);
+    let r1_val = add_offset_r1(r1_val, offset_select, offset_value);
+    let r2_val = add_offset_r2(r2_val, offset_select, offset_value);
+    let rd_val = add_offset_rd(rd_val, offset_select, offset_value);
+    let imm_len: usize = imm_len.try_into().context("imm_len can not be negative")?;
+    let loads = core
+        .reserve_load(r1_val, imm_len)?
+        .reserve_load(r2_val, imm_len)?
+        .execute_load::<F>()?;
+    let (load1, load2) = (loads[0], loads[1]);
+    let res: Vec<F> = load1
+        .iter()
+        .zip(load2.iter())
+        .map(|(&a, &b)| a * b)
+        .collect();
+    ensure!(
+        imm_len / size_of::<F>() == res.len(),
+        "vvadd generate a vector bigger thant it's  requested elements"
+    );
+    let res_up: Cow<[T]> = res.as_slice().up();
+    core.execute_store(rd_val, res_up.as_ref());
+    Ok(InstructionStatus::Completed)
+}
+
+pub fn vvdmul(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    panic!("You are calling a placeholder, the real call is the generic version");
+}
+
+pub(super) fn vvdmul_impl<F, T>(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus>
+where
+    [F]: UpcastSlice<T>,
+    T: UpcastDestTraits<T> + MemoryStorable,
+    F: UpcastDestTraits<F> + MemoryStorable,
+{
+    TRACER.lock().unwrap().pre_vvdmul::<F,T>(cores, data);
+    let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
+        data.get_core_rd_r1_r2_immlen_offset();
+    let core = cores.core(core_indx);
+    let r1_val = core.register(r1);
+    let r2_val = core.register(r2);
+    let rd_val = core.register(rd);
+    let r1_val = add_offset_r1(r1_val, offset_select, offset_value);
+    let r2_val = add_offset_r2(r2_val, offset_select, offset_value);
+    let loads = core
+        .reserve_load(r1_val, imm_len)?
+        .reserve_load(r2_val, imm_len)?
+        .execute_load::<F>()?;
+    let (load1, load2) = (loads[0], loads[1]);
+    let res: [F; 1] = [load1
+        .iter()
+        .zip(load2.iter())
+        .map(|(&a, &b)| a * b)
+        .reduce(|a, b| a + b)
+        .unwrap()];
+
+    let res_up: Cow<[T]> = res.as_slice().up();
+    core.execute_store(rd_val, res_up.as_ref());
+    Ok(InstructionStatus::Completed)
+}
+
+pub fn vvmax(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    panic!("You are calling a placeholder, the real call is the generic version");
+}
+
+pub(super) fn vvmax_impl<F, T>(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus>
+where
+    [F]: UpcastSlice<T>,
+    T: UpcastDestTraits<T> + MemoryStorable,
+    F: UpcastDestTraits<F> + MemoryStorable,
+{
+    TRACER.lock().unwrap().pre_vvmax::<F,T>(cores, data);
+    let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
+        data.get_core_rd_r1_r2_immlen_offset();
+    let core = cores.core(core_indx);
+    let r1_val = core.register(r1);
+    let r2_val = core.register(r2);
+    let rd_val = core.register(rd);
+    let r1_val = add_offset_r1(r1_val, offset_select, offset_value);
+    let r2_val = add_offset_r2(r2_val, offset_select, offset_value);
+    let rd_val = add_offset_rd(rd_val, offset_select, offset_value);
+
+    let loads = core
+        .reserve_load(r1_val, imm_len)?
+        .reserve_load(r2_val, imm_len)?
+        .execute_load::<F>()?;
+    let (load1, load2) = (loads[0], loads[1]);
+    let res: Vec<F> = load1
+        .iter()
+        .zip(load2.iter())
+        .map(|(&a, &b)| if (a > b) { a } else { b })
+        .collect();
+
+    let res_up: Cow<[T]> = res.as_slice().up();
+    core.execute_store(rd_val, res_up.as_ref());
+    Ok(InstructionStatus::Completed)
+}
+
+pub fn vvsll(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    panic!(
+        "Shift left on floating point what does it means? who has generated this instruction???"
+    );
+}
+
+pub fn vvsra(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    panic!(
+        "Shift right on floating point what does it means? who has generated this instruction???"
+    );
+}
+
+pub fn vavg(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    panic!("You are calling a placeholder, the real call is the generic version");
+}
+
+pub(super) fn vavg_impl<F, T>(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus>
+where
+    [F]: UpcastSlice<T>,
+    T: UpcastDestTraits<T> + MemoryStorable,
+    F: UpcastDestTraits<F> + MemoryStorable,
+{
+    TRACER.lock().unwrap().pre_vavg::<F,T>(cores, data);
+    let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
+        data.get_core_rd_r1_r2_immlen_offset();
+    let core = cores.core(core_indx);
+    let r1_val = core.register(r1);
+    let r2_val = r2;
+    ensure!(r2_val == 1, "Stride different than 1 not supported");
+    let rd_val = core.register(rd);
+    let r1_val = add_offset_r1(r1_val, offset_select, offset_value);
+    let loads = core.reserve_load(r1_val, imm_len)?.execute_load::<F>()?;
+    let load1 = loads[0];
+    let len = load1.len();
+    let res: [F; _] =
+        [load1.iter().copied().reduce(|a: F, b: F| a + b).unwrap() / F::from_usize(len)];
+    let res_up: Cow<[T]> = res.as_slice().up();
+    core.execute_store(rd_val, res_up.as_ref());
+    Ok(InstructionStatus::Completed)
+}
+
+pub fn vrelu(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    panic!("You are calling a placeholder, the real call is the generic version");
+}
+
+pub(super) fn vrelu_impl<F, T>(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus>
+where
+    [F]: UpcastSlice<T>,
+    T: UpcastDestTraits<T> + MemoryStorable,
+    F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
+{
+    TRACER.lock().unwrap().pre_vrelu::<F,T>(cores, data);
+    let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
+        data.get_core_rd_r1_r2_immlen_offset();
+    let core = cores.core(core_indx);
+    let r1_val = core.register(r1);
+    let rd_val = core.register(rd);
+    let r1_val = add_offset_r1(r1_val, offset_select, offset_value);
+    let rd_val = add_offset_rd(rd_val, offset_select, offset_value);
+    let loads = core.reserve_load(r1_val, imm_len)?.execute_load::<F>()?;
+    let load1 = loads[0];
+    let res: Vec<F> = load1
+        .iter()
+        .map(|&a| if (a > 0.0.into()) { a } else { 0.0.into() })
+        .collect();
+
+    let res_up: Cow<[T]> = res.as_slice().up();
+    core.execute_store(rd_val, res_up.as_ref());
+    Ok(InstructionStatus::Completed)
+}
+
+pub fn vtanh(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    panic!("You are calling a placeholder, the real call is the generic version");
+}
+
+pub(super) fn vtanh_impl<F, T>(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus>
+where
+    [F]: UpcastSlice<T>,
+    T: UpcastDestTraits<T> + MemoryStorable,
+    F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
+{
+    TRACER.lock().unwrap().pre_vtanh::<F,T>(cores, data);
+    let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
+        data.get_core_rd_r1_r2_immlen_offset();
+    let core = cores.core(core_indx);
+    let r1_val = core.register(r1);
+    let rd_val = core.register(rd);
+    let r1_val = add_offset_r1(r1_val, offset_select, offset_value);
+    let rd_val = add_offset_rd(rd_val, offset_select, offset_value);
+
+    let loads = core.reserve_load(r1_val, imm_len)?.execute_load::<F>()?;
+    let load1 = loads[0];
+    let res: Vec<F> = load1.iter().map(|&a| a.tanh()).collect();
+
+    let res_up: Cow<[T]> = res.as_slice().up();
+    core.execute_store(rd_val, res_up.as_ref());
+    Ok(InstructionStatus::Completed)
+}
+
+pub fn vsigm(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    panic!("You are calling a placeholder, the real call is the generic version");
+}
+
+pub(super) fn vsigm_impl<F, T>(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus>
+where
+    [F]: UpcastSlice<T>,
+    T: UpcastDestTraits<T> + MemoryStorable,
+    F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
+{
+    TRACER.lock().unwrap().pre_vsigm::<F,T>(cores, data);
+    let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
+        data.get_core_rd_r1_r2_immlen_offset();
+    let core = cores.core(core_indx);
+    let r1_val = core.register(r1);
+    let rd_val = core.register(rd);
+    let r1_val = add_offset_r1(r1_val, offset_select, offset_value);
+    let rd_val = add_offset_rd(rd_val, offset_select, offset_value);
+    let loads = core.reserve_load(r1_val, imm_len)?.execute_load::<F>()?;
+    let load1 = loads[0];
+    let res: Vec<F> = load1.iter().map(|&a| a.sigm()).collect();
+    let res_up: Cow<[T]> = res.as_slice().up();
+    core.execute_store(rd_val, res_up.as_ref());
+    Ok(InstructionStatus::Completed)
+}
+
+pub fn vmv(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    todo!()
+}
+
+pub fn vrsu(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    todo!()
+}
+
+pub fn vrsl(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    todo!()
+}
+
+///////////////////////////////////////////////////////////////
+///Communication/synchronization Instructions/////////////////
+///////////////////////////////////////////////////////////////
+pub fn ld(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    TRACER.lock().unwrap().pre_ld(cores, data);
+    let (core, rd, r1, _, imm_len, offset_select, offset_value) =
+        data.get_core_rd_r1_r2_immlen_offset();
+    ensure!(core != 0, "LD cannot be used to move from host to host");
+    let (host, core) = cores.host_and_cores(core);
+    let r1_val = core.register(r1);
+    let rd_val = core.register(rd);
+    let r1_val = add_offset_r1(r1_val, offset_select, offset_value);
+    let rd_val = add_offset_rd(rd_val, offset_select, offset_value);
+    let global_memory = host.load::<u8>(r1_val, imm_len)?;
+    core.execute_store(rd_val, global_memory[0])?;
+    TRACER.lock().unwrap().post_ld(cores, data);
+    Ok(InstructionStatus::Completed)
+}
+
+pub fn st(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    TRACER.lock().unwrap().pre_st(cores, data);
+    let (core, rd, r1, _, imm_len, offset_select, offset_value) =
+        data.get_core_rd_r1_r2_immlen_offset();
+    ensure!(core != 0, "ST cannot be used to move from host to host");
+    let (host, core) = cores.host_and_cores(core);
+    let r1_val = core.register(r1);
+    let rd_val = core.register(rd);
+    let r1_val = add_offset_r1(r1_val, offset_select, offset_value);
+    let rd_val = add_offset_rd(rd_val, offset_select, offset_value);
+    let local_memory = core.load::<u8>(r1_val, imm_len)?;
+    host.execute_store(rd_val, local_memory[0]);
+    TRACER.lock().unwrap().post_st(cores, data);
+    Ok(InstructionStatus::Completed)
+}
+
+pub fn lldi(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    TRACER.lock().unwrap().pre_lldi(cores, data);
+    let (core, rd, imm) = data.get_core_rd_imm();
+    let offset_value = data.offset_value();
+    let imm_len = data.imm_len();
+    let core = cores.core(core);
+    let rd_val = core.register(rd);
+    let rd_val = add_offset_rd(rd_val, 4, offset_value);
+    core.memset(
+        rd_val,
+        imm_len,
+        u8::try_from(imm).context("lldi with value bigger than u8")?,
+    );
+    Ok(InstructionStatus::Completed)
+}
+
+pub fn lmv(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    TRACER.lock().unwrap().pre_lmv(cores, data);
+    let (core, rd, r1, _, imm_len, offset_select, offset_value) =
+        data.get_core_rd_r1_r2_immlen_offset();
+    let core = cores.core(core);
+    let r1_val = core.register(r1);
+    let rd_val = core.register(rd);
+    let r1_val = add_offset_r1(r1_val, offset_select, offset_value);
+    let rd_val = add_offset_rd(rd_val, offset_select, offset_value);
+    let local_memory = core.load::<u8>(r1_val, imm_len)?;
+    let tmp = local_memory[0].to_vec();
+    core.execute_store(rd_val, tmp.as_slice());
+    Ok(InstructionStatus::Completed)
+}
+
+pub fn send(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    TRACER.lock().unwrap().pre_send(cores, data);
+    Ok(InstructionStatus::Sending(data))
+}
+
+pub fn recv(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    TRACER.lock().unwrap().pre_recv(cores, data);
+    Ok(InstructionStatus::Reciving(data))
+}
+
+pub fn wait(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    Ok(InstructionStatus::Waiting(data))
+}
+
+pub fn sync(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
+    Ok(InstructionStatus::Sync(data))
+}
@@ -0,0 +1,115 @@
+use crate::{
+    cpu::CPU,
+    instruction_set::{
+        instruction_data::InstructionData,
+        isa::{dispatch_simd, functor_to_name, is_setbw, isa_simd},
+    },
+};
+use anyhow::{Context, Result};
+use std::mem::swap;
+pub mod instruction_data;
+pub mod isa;
+pub mod helper;
+
+#[derive(Clone, Copy, Debug)]
+pub struct Instruction {
+    pub data: InstructionData,
+    functor: InstructionType,
+}
+
+#[derive(Debug, Clone, Copy, Default)]
+pub enum InstructionStatus {
+    Completed,
+    Waiting(InstructionData),
+    Sending(InstructionData),
+    Reciving(InstructionData),
+    Sync(InstructionData),
+    #[default]
+    NotExecuted,
+}
+
+impl InstructionStatus {
+    #[must_use]
+    pub fn is_completed(&self) -> bool {
+        matches!(self, Self::Completed)
+    }
+}
+
+impl Instruction {
+    fn new(data: InstructionData, functor: InstructionType) -> Self {
+        Self { data, functor }
+    }
+
+    pub fn execute(&self, cpu: &mut CPU) -> InstructionStatus {
+        (self.functor)(cpu, self.data)
+            .with_context(|| format!("Instruction: {}", functor_to_name(self.functor as usize)))
+            .with_context(|| format!("Error in core: {}", self.data.core_indx() - 1))
+            .unwrap()
+    }
+}
+
+pub type Instructions = Vec<Instruction>;
+pub type InstructionType = fn(&mut CPU, InstructionData) -> Result<InstructionStatus>;
+
+#[derive(Debug, Clone, Copy, Default)]
+pub struct VectorBitWith {
+    pub vector_input_bitwith: usize,
+    pub vector_output_bitwith: usize,
+}
+
+/// Support for the
+/// setbw ibiw, obiw
+/// Set the bit-widths of each element for input vectors and output vectors. Related vector instructions
+/// use the configured bit-widths. Once setbw is caled, all subsequent related vector instructions will
+/// use the configured bit-widths, until a new setbw is called. Once ibiw and obiw are set, ibyw and
+/// obyw are also set accordingly by the hardware.
+/// If the hardware does not support variable bit-width, this instruction is invalid and the matrix/vector
+/// instructions use the fixed bit-width of the hardware.
+pub struct InstructionsBuilder {
+    vector_bit_with: VectorBitWith,
+    instructions: Instructions,
+}
+
+impl Default for InstructionsBuilder {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl InstructionsBuilder {
+    pub fn new() -> Self {
+        Self {
+            vector_bit_with: VectorBitWith {
+                vector_input_bitwith: 32,
+                vector_output_bitwith: 32,
+            },
+            instructions: Instructions::new(),
+        }
+    }
+
+    pub fn make_inst(&mut self, functor: InstructionType, data: InstructionData) {
+        if is_setbw(functor) {
+            let (ibiw, obiw) = data.get_ibiw_obiw();
+            self.vector_bit_with.vector_input_bitwith =
+                ibiw.try_into().expect("ibiw can not be negative");
+            self.vector_bit_with.vector_output_bitwith =
+                obiw.try_into().expect("obiw can not be negative");
+            return;
+        }
+
+        if (isa_simd(functor)) {
+            self.instructions.push(Instruction::new(
+                data,
+                dispatch_simd(functor, self.vector_bit_with).unwrap(),
+            ))
+        } else {
+            self.instructions.push(Instruction::new(data, functor))
+        }
+    }
+
+    pub fn build(&mut self) -> Instructions {
+        let mut inst = Instructions::new();
+        swap(&mut self.instructions, &mut inst);
+        inst
+    }
+}
@@ -0,0 +1,569 @@
+use anyhow::{Context, Result};
+use paste::paste;
+use std::{collections::HashMap, mem::offset_of, sync::LazyLock};
+
+use crate::{
+    instruction_set::{
+        Instruction, InstructionsBuilder, instruction_data::InstructionDataBuilder, isa::*,
+    },
+    utility::pack_float_in_i32,
+};
+use serde_json::Value;
+type FunctorType = fn(&mut InstructionsBuilder, &mut InstructionDataBuilder, &Value) -> Result<()>;
+
+macro_rules! add_to_json_map {
+    ($storage:ident, $id:ident) => {
+        paste! {
+        $storage.insert(stringify!($id).to_string(), [<json_to_ $id>] as FunctorType );
+        }
+    };
+}
+
+static SIMD: LazyLock<HashMap<String, FunctorType>> = LazyLock::new(|| {
+    let mut storage = HashMap::new();
+    add_to_json_map!(storage, sldi);
+    add_to_json_map!(storage, sld);
+    add_to_json_map!(storage, sadd);
+    add_to_json_map!(storage, ssub);
+    add_to_json_map!(storage, smul);
+    add_to_json_map!(storage, saddi);
+    add_to_json_map!(storage, setbw);
+    add_to_json_map!(storage, mvmul);
+    add_to_json_map!(storage, vvadd);
+    add_to_json_map!(storage, vvsub);
+    add_to_json_map!(storage, vvmul);
+    add_to_json_map!(storage, vvdmul);
+    add_to_json_map!(storage, vvmax);
+    add_to_json_map!(storage, vvsll);
+    add_to_json_map!(storage, vvsra);
+    add_to_json_map!(storage, vrelu);
+    add_to_json_map!(storage, vtanh);
+    add_to_json_map!(storage, vsigm);
+    add_to_json_map!(storage, vmv);
+    add_to_json_map!(storage, vrsu);
+    add_to_json_map!(storage, vrsl);
+    add_to_json_map!(storage, ld);
+    add_to_json_map!(storage, st);
+    add_to_json_map!(storage, lldi);
+    add_to_json_map!(storage, lmv);
+    add_to_json_map!(storage, send);
+    add_to_json_map!(storage, recv);
+    add_to_json_map!(storage, wait);
+    add_to_json_map!(storage, sync);
+    storage
+});
+
+fn json_to_offset(json: &Value) -> (i32, i32) {
+    let offset_value = json.get("offset_value").unwrap().as_i64().unwrap() as i32;
+    let offset_select = json.get("offset_select").unwrap().as_i64().unwrap() as i32;
+    (offset_select, offset_value)
+}
+
+pub fn json_to_instruction(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) {
+    let json_struct = json.as_object().expect("Not an object");
+    let op = json_struct.get("op").unwrap().as_str().unwrap();
+    SIMD.get(op)
+        .unwrap_or_else(|| panic!("Operation not found {}", op))(
+        inst_builder,
+        inst_data_builder,
+        json,
+    );
+}
+
+macro_rules! json_str {
+    ($json:ident , $value:literal) => {
+       $json.get($value).context(concat![$value, " field not present"])?.as_str().context(concat![$value, " field not str"])?
+    };
+}
+
+macro_rules! json_i64 {
+    ($json:ident , $value:literal) => {
+       $json.get($value).context(concat![$value, " field not present"])?.as_i64().context(concat![$value, " field not i64"])?
+    };
+}
+
+fn json_to_sldi(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    let json = json.as_object().expect("Not an object");
+    assert_eq!("sldi", json_str!(json, "op"));
+    let rd = json_i64!(json, "rd");
+    let imm = json_i64!(json, "imm");
+    inst_data_builder.set_rd(rd as i32);
+    inst_data_builder.set_imm(imm as i32);
+    inst_builder.make_inst(sldi, inst_data_builder.build());
+    Ok(())
+}
+
+fn json_to_sld(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    todo!("Not present in the compiler");
+    Ok(())
+}
+
+fn json_to_sadd(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    todo!("Not present in the compiler");
+    Ok(())
+}
+
+fn json_to_ssub(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    todo!("Not present in the compiler");
+    Ok(())
+}
+
+fn json_to_smul(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    todo!("Not present in the compiler");
+    Ok(())
+}
+
+fn json_to_saddi(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    todo!("Not present in the compiler");
+    Ok(())
+}
+
+///////////////////////////////////////////////////////////////
+/////////////////Matrix/vector Instructions////////////////////
+///////////////////////////////////////////////////////////////
+
+fn json_to_setbw(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    let json = json.as_object().expect("Not an object");
+    assert_eq!("setbw", json_str!(json, "op"));
+    let ibiw = json_i64!(json, "ibiw");
+    let obiw = json_i64!(json, "obiw");
+    inst_data_builder.set_ibiw(ibiw as i32);
+    inst_data_builder.set_obiw(obiw as i32);
+    inst_builder.make_inst(setbw, inst_data_builder.build());
+    Ok(())
+}
+
+fn json_to_mvmul(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    let json = json.as_object().expect("Not an object");
+    assert_eq!("mvmul", json_str!(json, "op"));
+    let group = json_i64!(json, "group") as i32;
+    let relu = json_i64!(json, "relu") as i32;
+    let rd = json_i64!(json, "rd") as i32;
+    let rs1 = json_i64!(json, "rs1") as i32;
+    let mbiw = json_i64!(json, "mbiw") as i32;
+    inst_data_builder
+        .set_imm_group(group)
+        .set_imm_relu(relu)
+        .set_rd(rd)
+        .set_r1(rs1)
+        .set_mbiw(mbiw);
+    inst_builder.make_inst(mvmul, inst_data_builder.build());
+    Ok(())
+}
+
+fn json_to_vvadd(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    let json = json.as_object().expect("Not an object");
+    assert_eq!("vvadd", json_str!(json, "op"));
+    let rd = json_i64!(json, "rd") as i32;
+    let rs1 = json_i64!(json, "rs1") as i32;
+    let rs2 = json_i64!(json, "rs2") as i32;
+    let len = json_i64!(json, "len") as i32;
+    let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
+    inst_data_builder
+        .set_rd(rd)
+        .set_r1(rs1)
+        .set_r2(rs2)
+        .set_imm_len(len)
+        .set_offset_select(offset_select)
+        .set_offset_value(offset_value);
+    inst_builder.make_inst(vvadd, inst_data_builder.build());
+    Ok(())
+}
+
+fn json_to_vvsub(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    todo!("Not present in the compiler");
+    Ok(())
+}
+
+fn json_to_vvmul(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    let json = json.as_object().expect("Not an object");
+    assert_eq!("vvmul", json_str!(json, "op"));
+    let rd = json_i64!(json, "rd") as i32;
+    let rs1 = json_i64!(json, "rs1") as i32;
+    let rs2 = json_i64!(json, "rs2") as i32;
+    let len = json_i64!(json, "len") as i32;
+    let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
+    inst_data_builder
+        .set_rd(rd)
+        .set_r1(rs1)
+        .set_r2(rs2)
+        .set_imm_len(len)
+        .set_offset_select(offset_select)
+        .set_offset_value(offset_value);
+    inst_builder.make_inst(vvmul, inst_data_builder.build());
+    Ok(())
+}
+
+fn json_to_vvdmul(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    todo!("Not present in the compiler");
+    Ok(())
+}
+
+fn json_to_vvmax(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    let json = json.as_object().expect("Not an object");
+    assert_eq!("vvmax", json_str!(json, "op"));
+    let rd = json_i64!(json, "rd") as i32;
+    let rs1 = json_i64!(json, "rs1") as i32;
+    let rs2 = json_i64!(json, "rs2") as i32;
+    let len = json_i64!(json, "len") as i32;
+    let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
+    inst_data_builder
+        .set_rd(rd)
+        .set_r1(rs1)
+        .set_r2(rs2)
+        .set_imm_len(len)
+        .set_offset_select(offset_select)
+        .set_offset_value(offset_value);
+    inst_builder.make_inst(vvmax, inst_data_builder.build());
+    Ok(())
+}
+
+fn json_to_vvsll(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    todo!("Not present in the compiler");
+    Ok(())
+}
+
+fn json_to_vvsra(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    todo!("Not present in the compiler");
+    Ok(())
+}
+
+fn json_to_vavg(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    todo!("Not present in the compiler");
+    Ok(())
+}
+
+fn json_to_vrelu(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    let json = json.as_object().expect("Not an object");
+    assert_eq!("vrelu", json_str!(json, "op"));
+    let rd = json_i64!(json, "rd") as i32;
+    let rs1 = json_i64!(json, "rs1") as i32;
+    let len = json_i64!(json, "len") as i32;
+    let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
+    inst_data_builder
+        .set_rd(rd)
+        .set_r1(rs1)
+        .set_imm_len(len)
+        .set_offset_select(offset_select)
+        .set_offset_value(offset_value);
+    inst_builder.make_inst(vrelu, inst_data_builder.build());
+    Ok(())
+}
+
+fn json_to_vtanh(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    let json = json.as_object().expect("Not an object");
+    assert_eq!("vtanh", json_str!(json, "op"));
+    let rd = json_i64!(json, "rd") as i32;
+    let rs1 = json_i64!(json, "rs1") as i32;
+    let len = json_i64!(json, "len") as i32;
+    let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
+    inst_data_builder
+        .set_rd(rd)
+        .set_r1(rs1)
+        .set_imm_len(len)
+        .set_offset_select(offset_select)
+        .set_offset_value(offset_value);
+    inst_builder.make_inst(vtanh, inst_data_builder.build());
+    Ok(())
+}
+
+fn json_to_vsigm(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    let json = json.as_object().expect("Not an object");
+    assert_eq!("vsigmoid", json_str!(json, "op"));
+    let rd = json_i64!(json, "rd") as i32;
+    let rs1 = json_i64!(json, "rs1") as i32;
+    let len = json_i64!(json, "len") as i32;
+    let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
+    inst_data_builder
+        .set_rd(rd)
+        .set_r1(rs1)
+        .set_imm_len(len)
+        .set_offset_select(offset_select)
+        .set_offset_value(offset_value);
+    inst_builder.make_inst(vsigm, inst_data_builder.build());
+    Ok(())
+}
+
+fn json_to_vmv(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    todo!("Not present in the compiler");
+    Ok(())
+}
+
+fn json_to_vrsu(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    todo!("Not present in the compiler");
+    Ok(())
+}
+
+fn json_to_vrsl(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    todo!("Not present in the compiler");
+    Ok(())
+}
+
+///////////////////////////////////////////////////////////////
+///Communication/synchronization Instructions/////////////////
+///////////////////////////////////////////////////////////////
+fn json_to_ld(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    let json = json.as_object().expect("Not an object");
+    assert_eq!("ld", json_str!(json, "op"));
+    let rd = json_i64!(json, "rd") as i32;
+    let rs1 = json_i64!(json, "rs1") as i32;
+    let len = json_i64!(json, "size") as i32;
+    let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
+    inst_data_builder
+        .set_rd(rd)
+        .set_r1(rs1)
+        .set_imm_len(len)
+        .set_offset_select(offset_select)
+        .set_offset_value(offset_value);
+    inst_builder.make_inst(ld, inst_data_builder.build());
+    Ok(())
+}
+
+fn json_to_st(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    let json = json.as_object().expect("Not an object");
+    assert_eq!("st", json_str!(json, "op"));
+    let rd = json_i64!(json, "rd") as i32;
+    let rs1 = json_i64!(json, "rs1") as i32;
+    let len = json_i64!(json, "size") as i32;
+    let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
+    inst_data_builder
+        .set_rd(rd)
+        .set_r1(rs1)
+        .set_imm_len(len)
+        .set_offset_select(offset_select)
+        .set_offset_value(offset_value);
+    inst_builder.make_inst(st, inst_data_builder.build());
+    Ok(())
+}
+
+fn json_to_lldi(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    let json = json.as_object().expect("Not an object");
+    assert_eq!("lldi", json_str!(json, "op"));
+    let rd = json_i64!(json, "rd") as i32;
+    let imm = json_i64!(json, "imm") as f32;
+    let imm = pack_float_in_i32(imm);
+    let len = json_i64!(json, "len") as i32;
+    let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
+    inst_data_builder
+        .set_rd(rd)
+        .set_imm(imm)
+        .set_imm_len(len)
+        .set_offset_select(offset_select)
+        .set_offset_value(offset_value);
+    inst_builder.make_inst(lldi, inst_data_builder.build());
+    Ok(())
+}
+
+fn json_to_lmv(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    let json = json.as_object().expect("Not an object");
+    assert_eq!("lmv", json_str!(json, "op"));
+    let rd = json_i64!(json, "rd") as i32;
+    let rs1 = json_i64!(json, "rs1") as i32;
+    let len = json_i64!(json, "len") as i32;
+    let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
+    inst_data_builder
+        .set_rd(rd)
+        .set_r1(rs1)
+        .set_imm_len(len)
+        .set_offset_select(offset_select)
+        .set_offset_value(offset_value);
+    inst_builder.make_inst(lmv, inst_data_builder.build());
+    Ok(())
+}
+
+fn json_to_send(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    let json = json.as_object().expect("Not an object");
+    assert_eq!("send", json_str!(json, "op"));
+    let rd = json_i64!(json, "rd") as i32;
+    let core = json_i64!(json, "core") as i32 + 1;
+    let size = json_i64!(json, "size") as i32;
+    let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
+    inst_data_builder
+        .set_rd(rd)
+        .set_imm_core(core)
+        .set_imm_len(size)
+        .set_offset_select(offset_select)
+        .set_offset_value(offset_value);
+    inst_builder.make_inst(send, inst_data_builder.build());
+    Ok(())
+}
+
+fn json_to_recv(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    let json = json.as_object().expect("Not an object");
+    assert_eq!("recv", json_str!(json, "op"));
+    let rd = json_i64!(json, "rd") as i32;
+    let core = json_i64!(json, "core") as i32 + 1;
+    let size = json_i64!(json, "size") as i32;
+    let (offset_select, offset_value) = json_to_offset(json.get("offset").unwrap());
+    inst_data_builder
+        .set_rd(rd)
+        .set_imm_core(core)
+        .set_imm_len(size)
+        .set_offset_select(offset_select)
+        .set_offset_value(offset_value);
+    inst_builder.make_inst(recv, inst_data_builder.build());
+    Ok(())
+}
+
+fn json_to_wait(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    todo!("Not present in the compiler");
+    Ok(())
+}
+
+fn json_to_sync(
+    inst_builder: &mut InstructionsBuilder,
+    inst_data_builder: &mut InstructionDataBuilder,
+    json: &Value,
+) -> Result<()> {
+    todo!("Not present in the compiler");
+    Ok(())
+}
+
+#[cfg(test)]
+mod test {
+    use serde_json::Value;
+
+    use crate::{
+        instruction_set::{InstructionsBuilder, instruction_data::InstructionDataBuilder},
+        json_to_instruction::json_isa::json_to_instruction,
+    };
+
+    #[test]
+    #[should_panic(expected = "Operation not found bazza")]
+    fn test_op_not_present() {
+        let json = r#"{"imm":15168,"op":"bazza","rd":1}"#;
+        let json = serde_json::from_str(json).unwrap();
+        let mut instruction_builder = InstructionsBuilder::new();
+        let mut inst_data_builder = InstructionDataBuilder::new();
+        json_to_instruction(&mut instruction_builder, &mut inst_data_builder, &json);
+    }
+
+    #[test]
+    fn test_json_sldi() {
+        let json = r#"{"imm":15168,"op":"sldi","rd":1}"#;
+        let json = serde_json::from_str(json).unwrap();
+        let mut instruction_builder = InstructionsBuilder::new();
+        let mut inst_data_builder = InstructionDataBuilder::new();
+        json_to_instruction(&mut instruction_builder, &mut inst_data_builder, &json);
+    }
+}
@@ -0,0 +1,46 @@
+use core::panic;
+
+use serde_json::{Map, Value};
+
+use crate::{
+    CoreInstructionsBuilder, Executable,
+    cpu::{CPU, crossbar},
+    instruction_set::{
+        InstructionsBuilder,
+        instruction_data::{self, InstructionData, InstructionDataBuilder},
+    },
+    json_to_instruction::{self, json_isa},
+    memory_manager::type_traits::TryToUsize,
+};
+
+pub fn json_to_executor<'a>(
+    config: Value,
+    mut cores: impl Iterator<Item = &'a Value>,
+) -> Executable {
+    let cell_precision = config.get("cell_precision").unwrap().as_i64().unwrap() as i32;
+    let core_cnt = config.get("core_cnt").unwrap().as_i64().unwrap() as i32;
+    let xbar_count = config.get("xbar_array_count").unwrap().as_i64().unwrap() as i32;
+    let xbar_size = config.get("xbar_size").unwrap().as_array().unwrap();
+    let rows_crossbar = xbar_size[0].as_i64().unwrap() as i32;
+    let column_corssbar = xbar_size[1].as_i64().unwrap() as i32;
+    let array_group_map = config.get("array_group_map");
+    let mut cpu = CPU::new(core_cnt);
+    cpu.reserve_crossbar(xbar_count, column_corssbar * 4, rows_crossbar);
+    let mut core_insts_builder = CoreInstructionsBuilder::new(core_cnt as usize);
+    for core_indx in 1..=core_cnt {
+        let mut insts_builder = InstructionsBuilder::new();
+        let mut inst_data_builder = InstructionDataBuilder::new();
+        inst_data_builder.set_core_indx(core_indx).fix_core_indx();
+        let json_core = cores
+            .next()
+            .unwrap_or_else(|| panic!("cores files less than {}", core_indx - 1));
+        let json_core_insts = json_core
+            .as_array()
+            .unwrap_or_else(|| panic!("core{} has not a list of instruction", core_indx));
+        for json_inst in json_core_insts {
+            json_isa::json_to_instruction(&mut insts_builder, &mut inst_data_builder, json_inst);
+        }
+        core_insts_builder.set_core(core_indx, insts_builder.build());
+    }
+    Executable::new(cpu, core_insts_builder.build())
+}
@@ -0,0 +1,2 @@
+mod json_isa;
+pub mod json_to_executor;
@@ -0,0 +1,245 @@
+use std::fmt::Debug;
+
+use anyhow::{Context, Result, bail, ensure};
+
+use crate::memory_manager::type_traits::TryToUsize;
+
+pub trait MemoryStorable: Copy {}
+pub mod type_traits;
+
+impl MemoryStorable for f32 {}
+impl MemoryStorable for f64 {}
+impl MemoryStorable for i32 {}
+impl MemoryStorable for i64 {}
+impl MemoryStorable for u8 {}
+impl MemoryStorable for i8 {}
+
+#[derive(Debug, Clone, Copy)]
+struct LoadRequest {
+    pub index: usize,
+    pub size: usize,
+}
+
+unsafe fn slice_from_u8<T>(slice: &[u8]) -> Result<&[T]>
+where
+    T: MemoryStorable,
+{
+    let size_t = size_of::<T>();
+    let size = slice.len();
+    ensure!(
+        size >= size_t,
+        "Size {} smaller than the data type size {}",
+        size,
+        size_t
+    );
+    ensure!(
+        size.is_multiple_of(size_t),
+        "Size not a multiple of selected data type size"
+    );
+    let (prefix, slice, suffix) = unsafe { slice.align_to::<T>() };
+    ensure!(
+        prefix.is_empty() && suffix.is_empty(),
+        "{:?} {:?} T not aligned",
+        prefix,
+        suffix
+    );
+    Ok(slice)
+}
+
+unsafe fn slice_into_u8<T>(slice: &[T]) -> Result<&[u8]>
+where
+    T: MemoryStorable,
+{
+    ensure!(!slice.is_empty(), "Empty slice not convertable");
+    let (prefix, slice, suffix) = unsafe { slice.align_to::<u8>() };
+    ensure!(prefix.is_empty() && suffix.is_empty(), "T not aligned");
+    Ok(slice)
+}
+
+//Not thread safe
+#[derive(Debug, Clone)]
+pub struct CoreMemory {
+    memory: Vec<u8>,
+    load_requests: Vec<LoadRequest>,
+}
+
+impl Default for CoreMemory {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl CoreMemory {
+    pub fn new() -> Self {
+        Self {
+            memory: Vec::new(),
+            load_requests: Vec::new(),
+        }
+    }
+
+    pub fn reserve_load(&mut self, address: impl TryToUsize, size: impl TryToUsize) -> Result<&mut Self>
+where {
+        let address = address.try_into().context("address can not be negative")?;
+        let size = size.try_into().context("size can not be negative")?;
+        let load_request = LoadRequest {
+            index: address,
+            size,
+        };
+        if self.memory.len() < address + size {
+            self.memory.resize((address + size) * 2, 0);
+        }
+        self.load_requests.push(load_request);
+        Ok(self)
+    }
+
+    pub fn execute_load<T>(&mut self) -> Result<Vec<&[T]>>
+    where
+        T: MemoryStorable,
+    {
+        let Self {
+            memory,
+            load_requests,
+        } = self;
+        let mut res = Vec::new();
+        for (load_index, load_request) in load_requests.drain(..).enumerate() {
+            let LoadRequest { index, size } = load_request;
+            let memory_slice = &memory[index..index + size];
+            let memory_slice = unsafe { slice_from_u8(memory_slice) }
+                .with_context(|| format!("Load number: {} Accessing from {} to {}", load_index, index, index + size))?;
+            res.push(memory_slice);
+        }
+        Ok(res)
+    }
+
+    pub fn load<T>(&mut self, address: impl TryToUsize, size: impl TryToUsize) -> Result<Vec<&[T]>>
+    where
+        T: MemoryStorable,
+    {
+        let address = address.try_into().expect("address can not be negative");
+        let size = size.try_into().expect("size can not be negative");
+        self.reserve_load(address, size)?.execute_load()
+    }
+
+    pub fn execute_store<T>(&mut self, address: impl TryToUsize, element: &[T]) -> Result<()>
+    where
+        T: MemoryStorable,
+    {
+        let address = address.try_into().context("address can not be negative")?;
+        let Self { memory, .. } = self;
+        let size = std::mem::size_of_val(element);
+        if memory.len() < address + size {
+            memory.resize((address + size) * 2, 0);
+        }
+        let slice = unsafe { slice_into_u8(element) }?;
+        let memory_slice = &mut memory[address..address + size];
+        memory_slice.copy_from_slice(slice);
+        Ok(())
+    }
+
+    pub fn memset(&mut self, address: impl TryToUsize, size: impl TryToUsize, val: u8) -> Result<()> {
+        let address = address.try_into().expect("address can not be negative");
+        let size = size.try_into().expect("size can not be negative");
+        let Self { memory, .. } = self;
+        if memory.len() < address + size {
+            memory.resize((address + size) * 2, 0);
+        }
+        memory[address..address + size].fill(val);
+        Ok(())
+    }
+
+    pub fn set_capacity(&mut self, size: impl TryToUsize) {
+        let size = size.try_into().expect("size can not be negative");
+        let Self { memory, .. } = self;
+        if memory.len() < size {
+            memory.resize(size, 0);
+        }
+    }
+
+    pub fn get_len(&self) ->usize {
+        self.memory.len()
+    }
+
+    pub(crate) fn clear(&mut self)  {
+        self.memory.clear();
+    }
+}
+
+#[cfg(test)]
+mod test {
+
+    use super::*;
+
+    #[test]
+    fn test_slice_from_u8() {
+        let buff: [u8; 8] = [0, 0, 0, 0, 0, 0, 0, 0];
+        let transmuted: &[f32] = unsafe { slice_from_u8(&buff).unwrap() };
+        assert!(
+            transmuted[0] == 0_f32 && transmuted[1] == 0_f32,
+            "Failed conversion from [00 00 00 00, 00 00 00 00] to [0_f32, 0_f32]"
+        );
+
+        let buff = [0xdb, 0x0f, 0x49, 0x40];
+        let transmuted: &[f32] = unsafe { slice_from_u8(&buff).unwrap() };
+        assert!(
+            transmuted[0] == std::f32::consts::PI,
+            "Failed conversion from [0xdb, 0x0f, 0x49, 0x40] to [3.14]"
+        );
+
+        let buff = [0x8f, 0xc2, 0xf5, 0x28, 0x5c, 0x8f, 0x18, 0x40];
+        let transmuted: &[f64] = unsafe { slice_from_u8(&buff).unwrap() };
+        assert!(
+            transmuted[0] == 6.14_f64,
+            "Failed conversion from [0x8f, 0xc2, 0xf5, 0x28, 0x5c, 0x8f, 0x18, 0x40] to [6.14]"
+        );
+    }
+
+    #[test]
+    fn test_slice_into_u8() {
+        let buff: [f32; 2] = [0_f32, 0_f32];
+        let transmuted: &[u8] = unsafe { slice_into_u8(&buff).unwrap() };
+        assert!(
+            transmuted[0] == 0
+                && transmuted[1] == 0
+                && transmuted[2] == 0
+                && transmuted[3] == 0
+                && transmuted[4] == 0
+                && transmuted[5] == 0
+                && transmuted[6] == 0
+                && transmuted[7] == 0,
+            "Failed conversion from [00 00 00 00, 00 00 00 00] to [0_f32, 0_f32]"
+        );
+
+        let buff = [std::f32::consts::PI];
+        let transmuted: &[u8] = unsafe { slice_into_u8(&buff).unwrap() };
+        println!("{:?}", transmuted);
+        assert!(
+            transmuted[0] == 0xdb
+                && transmuted[1] == 0x0f
+                && transmuted[2] == 0x49
+                && transmuted[3] == 0x40,
+            "Failed conversion from [0xdb, 0x0f, 0x49, 0x40] to [3.14]"
+        );
+
+        let buff = [0x8f, 0xc2, 0xf5, 0x28, 0x5c, 0x8f, 0x18, 0x40];
+        let transmuted: &[f64] = unsafe { slice_from_u8(&buff).unwrap() };
+        assert!(
+            transmuted[0] == 6.14_f64,
+            "Failed conversion from [0x8f, 0xc2, 0xf5, 0x28, 0x5c, 0x8f, 0x18, 0x40] to [6.14]"
+        );
+    }
+
+    #[test]
+    fn test_simple_load_store() {
+        let mut core_memory = CoreMemory::new();
+        core_memory.execute_store(0, &[5_f32, 6_f32, 7_f32, 15_f32]);
+        core_memory.reserve_load(0, 4); // Load [5_f32]
+        core_memory.reserve_load(8, 8); // Load [7_f32, 15_f32]
+        let loads: Vec<&[f32]> = core_memory.execute_load().unwrap();
+        let mut data = [0_f32; 2];
+        data[0] = loads[0][0] + loads[1][0];
+        core_memory.execute_store(4, &data[0..1]).unwrap();
+        let loads: &[f32] = core_memory.reserve_load(0, 16).unwrap().execute_load().unwrap()[0];
+        println!("{:?}", loads);
+        assert!(loads[0] == 5_f32 && loads[1] == 12_f32 && loads[2] == 7_f32 && loads[3] == 15_f32)
+    }
+}
@@ -0,0 +1,208 @@
+
+
+use std::{
+    borrow::Cow,
+    fmt::Debug,
+    ops::{Add, Div, Mul, Sub},
+};
+
+use anyhow::Context;
+
+pub trait FromFloat {
+    fn from_f32(val :f32) -> Self;
+    fn from_f64(val :f64) -> Self;
+}
+
+impl FromFloat for f32 {
+    fn from_f32(val :f32) -> Self {
+        val
+    }
+
+    fn from_f64(val :f64) -> Self {
+        val as f32
+    }
+}
+
+impl FromFloat for f64 {
+    fn from_f32(val :f32) -> Self {
+        val as f64
+    }
+
+    fn from_f64(val :f64) -> Self {
+        val 
+    }
+}
+
+pub trait HasTanh {
+    fn tanh(self) -> Self ;
+}
+
+impl HasTanh for f32 {
+    fn tanh(self) -> Self {
+        self.tanh()
+    }
+}
+
+impl HasTanh for f64 {
+    fn tanh(self) -> Self {
+        self.tanh()
+    }
+}
+
+pub trait HasSigm {
+    fn sigm(self) -> Self ;
+}
+
+impl HasSigm for f32 {
+    fn sigm(self) -> Self {
+        let x = self;
+        let e = std::f32::consts::E;
+        let ex = x.powf(x);
+        (ex) / (1.0+ex)
+    }
+}
+
+impl HasSigm for f64 {
+    fn sigm(self) -> Self {
+        let x = self;
+        let e = std::f64::consts::E;
+        let ex = x.powf(x);
+        (ex) / (1.0+ex)
+    }
+}
+
+
+
+pub trait TryToUsize: TryInto<usize, Error = Self::TryError> 
+where std::result::Result<usize, Self::TryError> : Context<usize, Self::TryError>
+{
+    type TryError: Debug + Send + Sync + 'static + std::error::Error;
+}
+
+impl<T, E> TryToUsize for T 
+where 
+    T: TryInto<usize, Error = E>,
+    E: Debug + Send + Sync + 'static + std::error::Error,
+    std::result::Result<usize, E> : Context<usize, E>
+{
+    type TryError = E;
+}
+
+
+pub trait FromUsize {
+    fn from_usize(v: usize) -> Self;
+}
+
+impl FromUsize for f32 {
+    fn from_usize(v: usize) -> Self { v as f32 }
+}
+
+impl FromUsize for f64 {
+    fn from_usize(v: usize) -> Self { v as f64 }
+}
+
+pub trait UpcastDestTraits<T>:
+    Sync
+    + Send
+    + Sized
+    + Clone
+    + Copy
+    + Debug
+    + Add<Output = T>
+    + Sub<Output = T>
+    + Mul<Output = T>
+    + Div<Output = T>
+    + PartialEq<T>
+    + PartialOrd<T>
+    + HasTanh
+    + HasSigm
+    + FromUsize
+{
+}
+
+pub trait UpcastSlice<X> {
+    fn up<'a>(&'a self) -> Cow<'a, [X]>
+    where
+        [X]: ToOwned<Owned = Vec<X>>,
+        X: UpcastDestTraits<X>;
+}
+
+macro_rules! upcast_impl {
+    ($from:ty, $to:ty) => {
+        impl UpcastSlice<$to> for [$from] {
+            fn up<'a>(&'a self) -> Cow<'a, [$to]>
+            where
+                [$to]: ToOwned<Owned = Vec<$to>>,
+            {
+                self.iter().map(|&x| x as $to).collect::<Vec<_>>().into()
+            }
+        }
+    };
+
+    ($same:ty) => {
+        impl UpcastSlice<$same> for [$same] {
+            fn up<'a>(&'a self) -> Cow<'a, [$same]>
+            where
+                [$same]: ToOwned<Owned = Vec<$same>>,
+            {
+                self.into()
+            }
+        }
+
+        impl UpcastDestTraits<$same> for $same {}
+    };
+}
+
+upcast_impl!(f32, f64);
+upcast_impl!(f64, f32);
+upcast_impl!(f32);
+upcast_impl!(f64);
+
+#[cfg(test)]
+mod test {
+    use core::panic;
+    use std::borrow::Cow;
+
+    use crate::memory_manager::type_traits::{UpcastDestTraits, UpcastSlice};
+
+    #[test]
+    fn test_same_type() {
+        let elem: [f32; 4] = [1.0, 2.0, 3.0, 4.0];
+        let elem_up: Cow<[f32]> = elem.up();
+        if let std::borrow::Cow::Owned(_) = elem_up {
+            panic!("Allocate when same element")
+        }
+    }
+
+    #[test]
+    fn test_different_type() {
+        let elem: [f32; 4] = [1.0, 2.0, 3.0, 4.0];
+        let elem_up: Cow<[f64]> = elem.up();
+        if let std::borrow::Cow::Borrowed(_) = elem_up {
+            panic!("Not allocating with different type")
+        }
+    }
+
+    fn generic_sum<A, B, T>(a: &[A], b: &[B]) -> Vec<T>
+    where
+        [A]: UpcastSlice<T>,
+        [B]: UpcastSlice<T>,
+        T: UpcastDestTraits<T>,
+    {
+        let a_up = a.up();
+        let b_up = b.up();
+        let mut ret = Vec::new();
+        for (a, b) in a_up.iter().zip(b_up.iter()) {
+            ret.push(*a + *b);
+        }
+        ret
+    }
+
+    #[test]
+    fn sum_between_different_slice_in_generic() {
+        let a: [f32; 4] = [1.0, 2.0, 3.0, 4.0];
+        let b: [f64; 4] = [1.0, 2.0, 3.0, 4.0];
+        let c: Vec<f64> = generic_sum(&a, &b);
+        assert_eq!(c, vec![2.0, 4.0, 6.0, 8.0]);
+    }
+}
@@ -0,0 +1,197 @@
+#![allow(unused)]
+
+use crate::{
+    cpu::CPU, instruction_set::{Instruction, InstructionStatus, Instructions}, memory_manager::type_traits::TryToUsize, send_recv::{SendRecv, handle_send_recv}, tracing::TRACER
+};
+pub mod cpu;
+pub mod instruction_set;
+pub mod memory_manager;
+pub mod send_recv;
+pub mod utility;
+pub mod json_to_instruction;
+pub mod tracing;
+
+
+
+#[derive(Debug, Clone)]
+pub struct CoreInstructionsBuilder {
+    core_instructions : Vec<CoreInstruction>
+}
+
+impl CoreInstructionsBuilder {
+    pub fn new(size:usize) -> Self {
+        let mut core_instructions = Vec::with_capacity(size);
+        for _ in 0..=size {
+            core_instructions.push(CoreInstruction::empty());
+        }
+        Self { core_instructions }
+    }
+
+    pub fn build(self) -> Vec<CoreInstruction> {
+        self.core_instructions
+    }
+
+    pub fn set_core(&mut self, core : impl TryToUsize, core_instruction : Instructions) -> &mut Self{
+        self.core_instructions[core.try_into().expect("Set core with not valid size")] = core_instruction.into();
+         self
+    }
+}
+
+
+#[derive(Debug, Clone)]
+pub struct CoreInstruction {
+    instructions: Instructions,
+    program_counter: usize,
+}
+
+impl CoreInstruction {
+    fn new(instructions: Instructions, program_counter: usize) -> Self {
+        Self {
+            instructions,
+            program_counter,
+        }
+    }
+
+    fn empty() -> Self {
+        Self { instructions: Vec::new(), program_counter: 0 }
+    }
+}
+
+impl From<Instructions> for CoreInstruction {
+    fn from(value: Instructions) -> Self {
+        CoreInstruction {
+            instructions: value,
+            program_counter: 0,
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct Executable {
+    cpu: CPU,
+    core_instructions: Vec<CoreInstruction>,
+    send_recv : SendRecv,
+}
+
+impl Executable {
+    pub fn new(cpu: CPU, core_instructions: Vec<CoreInstruction>) -> Self {
+        let num_core = cpu.num_core();
+        let send_recv = SendRecv::new(num_core);
+        assert_eq!(num_core, core_instructions.len(), "Some core doesn't have is list of istruction (required even if empty)");
+        Self {
+            cpu,
+            core_instructions,
+            send_recv
+        }
+    }
+
+    pub fn execute(&mut self) {
+        TRACER.lock().unwrap().init(self);
+        let Self {
+            cpu,
+            core_instructions,
+            send_recv
+        } = self;
+        let mut cpu_progressed = 0;
+        let max_core = cpu.num_core();
+        let mut index_unit = 0;
+
+        while (cpu_progressed > -2) {
+            let mut core_result = InstructionStatus::Completed;
+            while core_result.is_completed() && let Some(core_instruction) = core_instructions.get_mut(index_unit){
+                core_result = InstructionStatus::NotExecuted;
+                let CoreInstruction {
+                    instructions,
+                    program_counter,
+                } = core_instruction;
+                core_result = instructions
+                    .get(*program_counter)
+                    .map_or(InstructionStatus::default(), |inst: &Instruction| {
+                        inst.execute(cpu)
+                    });
+                if core_result.is_completed() {
+                    cpu_progressed = 0;
+                    *program_counter += 1;
+                }
+            }
+            if handle_send_recv(cpu, core_instructions, send_recv, core_result) { cpu_progressed = 0; }
+            handle_wait_sync(cpu, core_instructions, core_result);
+            index_unit = if index_unit + 1 >= max_core {
+            cpu_progressed-=1;
+                0
+            } else {
+                index_unit + 1
+            };
+        }
+    }
+
+    pub fn cpu(&self) -> &CPU {
+        &self.cpu
+    }
+
+    pub fn cpu_mut(&mut self) -> &mut CPU {
+        &mut self.cpu
+    }
+}
+
+fn handle_wait_sync(cpu: &mut CPU, core_instructions: &mut [CoreInstruction], core_result: InstructionStatus) {
+}
+
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::instruction_set::instruction_data::InstructionDataBuilder;
+    use crate::instruction_set::{InstructionsBuilder, isa::*};
+
+    #[test]
+    fn test_only_host() {
+        let mut cpu = CPU::new(0);
+        cpu.host()
+            .execute_store(0, &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
+        let mut inst_builder = InstructionsBuilder::new();
+        let mut idata_build = InstructionDataBuilder::new();
+        idata_build.set_core_indx(0).fix_core_indx();
+        inst_builder.make_inst(sldi, idata_build.set_rdimm(1, 0).build());
+        inst_builder.make_inst(sld, idata_build.set_rdr1(1, 1).build());
+        inst_builder.make_inst(sldi, idata_build.set_rdimm(2, 8).build());
+        inst_builder.make_inst(sld, idata_build.set_rdr1(2, 2).build());
+        inst_builder.make_inst(sadd, idata_build.set_rdr1r2(2, 1, 2).build());
+        let mut core_instruction = vec![inst_builder.build().into()];
+        let mut executable = Executable::new(cpu, core_instruction);
+        executable.execute();
+        assert_eq!(executable.cpu_mut().host().register(2), 4, "Not sum to 4");
+    }
+
+    #[test]
+    fn test_10_core_same_code() {
+        let setup_core = |index: usize, cpu: &mut CPU| -> Instructions {
+            cpu.core(index)
+                .execute_store(0, &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
+            let mut inst_builder = InstructionsBuilder::new();
+            let mut idata_build = InstructionDataBuilder::new();
+            idata_build.set_core_indx(index as i32).fix_core_indx();
+            inst_builder.make_inst(sldi, idata_build.set_rdimm(1, 0).build());
+            inst_builder.make_inst(sld, idata_build.set_rdr1(1, 1).build());
+            inst_builder.make_inst(sldi, idata_build.set_rdimm(2, 8).build());
+            inst_builder.make_inst(sld, idata_build.set_rdr1(2, 2).build());
+            inst_builder.make_inst(sadd, idata_build.set_rdr1r2(2, 1, 2).build());
+            inst_builder.build()
+        };
+
+        let mut cpu = CPU::new(10);
+        let mut core_instruction = Vec::new();
+        for i in 0..cpu.num_core() {
+            core_instruction.push(setup_core(i, &mut cpu).into())
+        }
+
+        let mut executable = Executable::new(cpu, core_instruction);
+        executable.execute();
+        for i in 0.. executable.cpu.num_core() {
+            assert_eq!(executable.cpu_mut().core(i).register(2), 4, "Core {} not sum to 4", i);
+        }
+    }
+
+    
+
+}
@@ -0,0 +1,143 @@
+use anyhow::Context;
+
+use crate::{
+    CoreInstruction, cpu::CPU, instruction_set::InstructionStatus, tracing::TRACER,
+    utility::add_offset_rd,
+};
+
+#[derive(Debug, Clone, Copy)]
+struct SendRecvInfo {
+    internal_core: usize,
+    external_core: usize,
+    address: usize,
+    size: usize,
+}
+
+impl SendRecvInfo {
+    fn new(internal_core: usize, external_core: usize, address: usize, size: usize) -> Self {
+        Self {
+            internal_core,
+            external_core,
+            address,
+            size,
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct SendRecv {
+    sending: Box<[Option<SendRecvInfo>]>,
+    receiving: Box<[Option<SendRecvInfo>]>,
+}
+
+impl SendRecv {
+    pub fn new(num_core: usize) -> Self {
+        let sending = [Option::None].repeat(num_core);
+        let reciving = [Option::None].repeat(num_core);
+        Self {
+            sending: sending.into(),
+            receiving: reciving.into(),
+        }
+    }
+}
+
+pub fn handle_send_recv(
+    cpu: &mut CPU,
+    core_instructions: &mut [CoreInstruction],
+    send_recv: &mut SendRecv,
+    core_result: InstructionStatus,
+) -> bool {
+    let transfer_memory = |cpu: &mut CPU,
+                           core_instructions: &mut [CoreInstruction],
+                           sender: Option<SendRecvInfo>,
+                           receiver: Option<SendRecvInfo>| {
+        if let Some(sender) = sender
+            && let Some(receiver) = receiver
+            && sender.internal_core == receiver.external_core
+            && receiver.internal_core == sender.external_core
+        {
+            let [sender_core, reciver_core] =
+                cpu.get_multiple_cores([sender.internal_core, receiver.internal_core]);
+            let memory = sender_core
+                .load::<u8>(sender.address, sender.size)
+                .with_context(|| {
+                    format!(
+                        "Sender crash tranfering memroy from {} with size {}",
+                        sender.address, sender.size
+                    )
+                })
+                .unwrap();
+            reciver_core.execute_store(receiver.address, memory[0]);
+            {
+                let sender = &mut core_instructions[sender.internal_core];
+                let pc = sender.program_counter;
+                let inst = sender.instructions.get(pc).unwrap();
+                let data = inst.data;
+                TRACER.lock().unwrap().post_send(cpu, data);
+            }
+            {
+                let recv = &mut core_instructions[receiver.internal_core];
+                let pc = recv.program_counter;
+                let inst = recv.instructions.get(pc).unwrap();
+                let data = inst.data;
+                TRACER.lock().unwrap().post_recv(cpu, data);
+            }
+            core_instructions[sender.internal_core].program_counter += 1;
+            core_instructions[receiver.internal_core].program_counter += 1;
+
+            return true;
+        }
+        false
+    };
+
+    match core_result {
+        InstructionStatus::Sending(instruction_data) => {
+            let (core_idx, imm_core) = instruction_data.get_core_immcore();
+            let r1 = instruction_data.r1();
+            let imm_len = instruction_data
+                .imm_len()
+                .try_into()
+                .expect("imm_len can not be negative");
+            let offset_value = instruction_data.offset_value();
+            let core = cpu.core(core_idx);
+            let r1_val = core.register(r1);
+            let address = add_offset_rd(r1_val, 1, offset_value);
+            let sender: usize = core_idx.try_into().expect("core can not be negative");
+            assert_ne!(sender, 0, "Host can not use send");
+            let receiver: usize = imm_core.try_into().expect("imm_core can not be negative");
+            assert_ne!(receiver, 0, "Host can not use receive");
+            send_recv.sending[sender] = Some(SendRecvInfo::new(sender, receiver, address, imm_len));
+            transfer_memory(
+                cpu,
+                core_instructions,
+                send_recv.sending[sender],
+                send_recv.receiving[receiver],
+            )
+        }
+        InstructionStatus::Reciving(instruction_data) => {
+            let (core_idx, imm_core) = instruction_data.get_core_immcore();
+            let rd = instruction_data.rd();
+            let imm_len = instruction_data
+                .imm_len()
+                .try_into()
+                .expect("imm_len can not be negative");
+            let offset_value = instruction_data.offset_value();
+            let core = cpu.core(core_idx);
+            let rd_val = core.register(rd);
+            let address = add_offset_rd(rd_val, 4, offset_value);
+            let receiver: usize = core_idx.try_into().expect("core can not be negative");
+            assert_ne!(receiver, 0, "Host can not use receive");
+            let sender: usize = imm_core.try_into().expect("imm_core can not be negative");
+            assert_ne!(sender, 0, "Host can not use send");
+            send_recv.receiving[receiver] =
+                Some(SendRecvInfo::new(receiver, sender, address, imm_len));
+            transfer_memory(
+                cpu,
+                core_instructions,
+                send_recv.sending[sender],
+                send_recv.receiving[receiver],
+            )
+        }
+        _ => false,
+    }
+}
@@ -0,0 +1,289 @@
+
+use std::fs::File;
+
+use crate::{
+    cpu::CPU,
+    instruction_set::instruction_data::InstructionData,
+    memory_manager::{
+        MemoryStorable,
+        type_traits::{FromFloat, UpcastDestTraits, UpcastSlice},
+    },
+    tracing::Trace,
+    utility::{add_offset_r1, add_offset_rd},
+};
+use std::io::Write;
+
+#[cfg(not(feature = "tracing"))]
+impl Trace {
+    ///////////////////////////////////////////////////////////////
+    /////////////////Scalar/register Instructions//////////////////
+    ///////////////////////////////////////////////////////////////
+
+    pub fn pre_sldi(&mut self, cores: &mut CPU, data: InstructionData) {
+
+    }
+
+    pub fn post_sldi(&mut self, cores: &mut CPU, data: InstructionData) {
+    }
+
+    pub fn pre_sld(&mut self, cores: &mut CPU, data: InstructionData) {
+    }
+
+    pub fn post_sld(&mut self, cores: &mut CPU, data: InstructionData) {
+    }
+
+    pub fn pre_sadd(&mut self, cores: &mut CPU, data: InstructionData) {
+    }
+
+    pub fn post_sadd(&mut self, cores: &mut CPU, data: InstructionData) {
+    }
+
+    pub fn pre_ssub(&mut self, cores: &mut CPU, data: InstructionData) {
+    }
+
+    pub fn post_ssub(&mut self, cores: &mut CPU, data: InstructionData) {
+    }
+
+    pub fn pre_smul(&mut self, cores: &mut CPU, data: InstructionData) {
+    }
+
+    pub fn post_smul(&mut self, cores: &mut CPU, data: InstructionData) {
+    }
+
+    pub fn pre_saddi(&mut self, cores: &mut CPU, data: InstructionData) {
+    }
+
+    pub fn post_saddi(&mut self, cores: &mut CPU, data: InstructionData) {
+    }
+
+    pub fn pre_smuli(&mut self, cores: &mut CPU, data: InstructionData) {
+    }
+
+    pub fn post_smuli(&mut self, cores: &mut CPU, data: InstructionData) {
+    }
+
+    /////////////////////////////////////////////////////////////////
+    ///////////////////Matrix/vector Instructions////////////////////
+    /////////////////////////////////////////////////////////////////
+
+    pub fn pre_setbw(&mut self, cores: &mut CPU, data: InstructionData) {
+    }
+
+    pub fn post_setbw(&mut self, cores: &mut CPU, data: InstructionData) {
+    }
+
+    pub fn pre_mvm<F, M, T>(&mut self, cores: &mut CPU, data: InstructionData)
+    where
+        [F]: UpcastSlice<T> + UpcastSlice<M>,
+        [M]: UpcastSlice<T>,
+        T: UpcastDestTraits<T> + MemoryStorable,
+        M: UpcastDestTraits<M> + MemoryStorable + FromFloat,
+        F: UpcastDestTraits<F> + MemoryStorable,
+    {
+        self.mvm_impl::<F,M,T>(cores, data, "Pre");
+    }
+
+    pub fn post_mvm<F, M, T>(&mut self, cores: &mut CPU, data: InstructionData)
+    where
+        [F]: UpcastSlice<T> + UpcastSlice<M>,
+        [M]: UpcastSlice<T>,
+        T: UpcastDestTraits<T> + MemoryStorable,
+        M: UpcastDestTraits<M> + MemoryStorable + FromFloat,
+        F: UpcastDestTraits<F> + MemoryStorable,
+    {
+        self.mvm_impl::<F,M,T>(cores, data, "Post");
+    }
+
+    pub fn mvm_impl<F, M, T>(&mut self, cores: &mut CPU, data: InstructionData, prefix : &'static str)
+    where
+        [F]: UpcastSlice<T> + UpcastSlice<M>,
+        [M]: UpcastSlice<T>,
+        T: UpcastDestTraits<T> + MemoryStorable,
+        M: UpcastDestTraits<M> + MemoryStorable + FromFloat,
+        F: UpcastDestTraits<F> + MemoryStorable,
+    {
+    }
+
+    pub fn pre_vvadd<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
+    where
+        [F]: UpcastSlice<T>,
+        T: UpcastDestTraits<T> + MemoryStorable,
+        F: UpcastDestTraits<F> + MemoryStorable,
+    {
+    }
+
+    pub fn post_vvadd<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
+    where
+        [F]: UpcastSlice<T>,
+        T: UpcastDestTraits<T> + MemoryStorable,
+        F: UpcastDestTraits<F> + MemoryStorable,
+    {
+    }
+
+    pub fn pre_vvsub<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
+    where
+        [F]: UpcastSlice<T>,
+        T: UpcastDestTraits<T> + MemoryStorable,
+        F: UpcastDestTraits<F> + MemoryStorable,
+    {
+    }
+
+    pub fn post_vvsub<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
+    where
+        [F]: UpcastSlice<T>,
+        T: UpcastDestTraits<T> + MemoryStorable,
+        F: UpcastDestTraits<F> + MemoryStorable,
+    {
+    }
+
+    pub fn pre_vvmul<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
+    where
+        [F]: UpcastSlice<T>,
+        T: UpcastDestTraits<T> + MemoryStorable,
+        F: UpcastDestTraits<F> + MemoryStorable,
+    {
+    }
+
+    pub fn post_vvmul<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
+    where
+        [F]: UpcastSlice<T>,
+        T: UpcastDestTraits<T> + MemoryStorable,
+        F: UpcastDestTraits<F> + MemoryStorable,
+    {
+    }
+
+    pub fn pre_vvdmul<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
+    where
+        [F]: UpcastSlice<T>,
+        T: UpcastDestTraits<T> + MemoryStorable,
+        F: UpcastDestTraits<F> + MemoryStorable,
+    {
+    }
+
+    pub fn post_vvdmul<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
+    where
+        [F]: UpcastSlice<T>,
+        T: UpcastDestTraits<T> + MemoryStorable,
+        F: UpcastDestTraits<F> + MemoryStorable,
+    {
+    }
+
+    pub fn pre_vvmax<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
+    where
+        [F]: UpcastSlice<T>,
+        T: UpcastDestTraits<T> + MemoryStorable,
+        F: UpcastDestTraits<F> + MemoryStorable,
+    {
+    }
+
+    pub fn post_vvmax<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
+    where
+        [F]: UpcastSlice<T>,
+        T: UpcastDestTraits<T> + MemoryStorable,
+        F: UpcastDestTraits<F> + MemoryStorable,
+    {
+    }
+
+    pub fn pre_vavg<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
+    where
+        [F]: UpcastSlice<T>,
+        T: UpcastDestTraits<T> + MemoryStorable,
+        F: UpcastDestTraits<F> + MemoryStorable,
+    {
+    }
+
+    pub fn post_vavg<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
+    where
+        [F]: UpcastSlice<T>,
+        T: UpcastDestTraits<T> + MemoryStorable,
+        F: UpcastDestTraits<F> + MemoryStorable,
+    {
+    }
+
+    pub fn pre_vrelu<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
+    where
+        [F]: UpcastSlice<T>,
+        T: UpcastDestTraits<T> + MemoryStorable,
+        F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
+    {
+    }
+
+    pub fn post_vrelu<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
+    where
+        [F]: UpcastSlice<T>,
+        T: UpcastDestTraits<T> + MemoryStorable,
+        F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
+    {
+    }
+
+    pub fn pre_vtanh<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
+    where
+        [F]: UpcastSlice<T>,
+        T: UpcastDestTraits<T> + MemoryStorable,
+        F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
+    {
+    }
+
+    pub fn post_vtanh<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
+    where
+        [F]: UpcastSlice<T>,
+        T: UpcastDestTraits<T> + MemoryStorable,
+        F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
+    {
+    }
+
+    pub fn pre_vsigm<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
+    where
+        [F]: UpcastSlice<T>,
+        T: UpcastDestTraits<T> + MemoryStorable,
+        F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
+    {
+    }
+
+    pub fn post_vsigm<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
+    where
+        [F]: UpcastSlice<T>,
+        T: UpcastDestTraits<T> + MemoryStorable,
+        F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
+    {
+    }
+
+    /////////////////////////////////////////////////////////////////
+    /////Communication/synchronization Instructions/////////////////
+    /////////////////////////////////////////////////////////////////
+    pub fn pre_ld(&mut self, cores: &mut CPU, data: InstructionData) {
+    }
+
+    pub fn post_ld(&mut self, cores: &mut CPU, data: InstructionData) {
+    }
+
+    pub fn pre_st(&mut self, cores: &mut CPU, data: InstructionData) {
+    }
+
+    pub fn post_st(&mut self, cores: &mut CPU, data: InstructionData) {
+    }
+
+    pub fn pre_lldi(&mut self, cores: &mut CPU, data: InstructionData) {
+    }
+
+    pub fn post_lldi(&mut self, cores: &mut CPU, data: InstructionData) {
+    }
+
+    pub fn pre_lmv(&mut self, cores: &mut CPU, data: InstructionData) {
+    }
+
+    pub fn post_lmv(&mut self, cores: &mut CPU, data: InstructionData) {
+    }
+
+    pub fn pre_send(&mut self, cores: &mut CPU, data: InstructionData) {
+    }
+
+    pub fn post_send(&mut self, cores: &mut CPU, data: InstructionData) {
+    }
+
+    pub fn pre_recv(&mut self, cores: &mut CPU, data: InstructionData) {
+    }
+
+    pub fn post_recv(&mut self, cores: &mut CPU, data: InstructionData) {
+    }
+}
@@ -0,0 +1,52 @@
+mod tracing_isa;
+mod disable;
+mod pretty_print;
+
+#[cfg(feature = "tracing")]
+use std::fs::File;
+use std::sync::{LazyLock, Mutex};
+
+use crate::Executable;
+
+#[cfg(feature = "tracing")]
+pub struct Trace {
+    out_files : Vec<File>
+}
+
+
+#[cfg(feature = "tracing")]
+impl Trace {
+    fn new() -> Self {
+        Self { out_files : Vec::new()}
+    }
+
+
+    pub fn init(&mut self, executor : & Executable) {
+        let cpu = executor.cpu();
+        let num_core = cpu.num_core();
+        for i in 0..num_core {
+            let file = File::create(format!("TraceCore{}", i)).expect("Can not create file");
+            self.out_files.push(file);
+        }
+    }
+}
+
+#[cfg(not(feature = "tracing"))]
+pub struct Trace {
+}
+
+
+#[cfg(not(feature = "tracing"))]
+impl Trace {
+    fn new() -> Self {
+        Self {  }
+    }
+
+
+    pub fn init(&mut self, executor : & Executable) {
+    }
+}
+
+
+pub static TRACER: LazyLock<Mutex<Trace>> = LazyLock::new(|| { Trace::new().into()});
+
@@ -0,0 +1,19 @@
+use std::{fmt::Debug, io::Write};
+
+pub fn print_slice<D: Debug, F: Sized + Debug>(writer: &mut impl Write, slice: &[D], size: usize) {
+    let slice = unsafe {
+        let (a, b, c) = slice.align_to::<F>();
+        assert_eq!(a.len(), 0);
+        assert_eq!(c.len(), 0);
+        b
+    };
+    writeln!(writer, "\t\t[");
+    for elements in slice.chunks(size) {
+        write!(writer, "\t\t");
+        for element in elements {
+            write!(writer, "{:?} ", element);
+        }
+        writeln!(writer);
+    }
+    writeln!(writer, "\t\t]");
+}
@@ -0,0 +1,39 @@
+use std::{fmt::Debug, mem::transmute};
+
+use crate::memory_manager::type_traits::TryToUsize;
+
+
+fn add_offset_impl(address: usize, offset_select : i32, offset_value : i32, id:i32) -> usize{
+     assert!(offset_select == 1 || offset_select == 2 || offset_select == 4 || offset_value == 0, "offset_select not a bit field");
+     let offset_value = (offset_select & id) * offset_value;
+     if offset_value > 0 {
+        address + offset_value as usize
+     } else {
+         address - offset_value as usize
+     }
+}
+
+
+pub fn add_offset_rd(address: impl TryToUsize, offset_select : i32, offset_value : i32) -> usize
+{
+    let address = address.try_into().expect("address can not be negative");
+    add_offset_impl(address, offset_select, offset_value, 4)
+}
+
+pub fn add_offset_r1(address: impl TryToUsize, offset_select : i32, offset_value : i32) -> usize
+{
+    let address = address.try_into().expect("address can not be negative");
+    add_offset_impl(address, offset_select, offset_value, 1)
+}
+
+pub fn add_offset_r2(address: impl TryToUsize, offset_select : i32, offset_value : i32) -> usize
+{
+    let address = address.try_into().expect("address can not be negative");
+    add_offset_impl(address, offset_select, offset_value, 2)
+}
+
+
+pub fn pack_float_in_i32(val : impl TryInto<f32>) -> i32 {
+    let val = val.try_into().unwrap_or_else( |x| panic!("Cannot parse into f32"));
+     f32::to_bits(val).cast_signed()
+}