This commit is contained in:
+2102
-8
File diff suppressed because it is too large
Load Diff
@@ -13,8 +13,9 @@ name = "pimcore"
|
||||
path = "src/lib/pimcore.rs"
|
||||
|
||||
[features]
|
||||
default = ["tracing"]
|
||||
default = []
|
||||
tracing = []
|
||||
profile_time = ["dep:plotly", "dep:comfy-table", "dep:statrs"]
|
||||
|
||||
|
||||
|
||||
@@ -27,3 +28,9 @@ hex = "0"
|
||||
paste = "1"
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
statrs = {version="0.16", optional=true}
|
||||
comfy-table = {version="7.1", optional=true}
|
||||
plotly = {version="0.8", optional=true}
|
||||
rayon = "1.12.0"
|
||||
faer = "0.24.0"
|
||||
faer-traits = "0.24.0"
|
||||
|
||||
@@ -1,14 +1,19 @@
|
||||
use crate::{
|
||||
cpu::{CPU, crossbar}, instruction_set::{
|
||||
cpu::{CPU, crossbar},
|
||||
instruction_set::{
|
||||
Instruction, InstructionData, InstructionStatus, InstructionType, VectorBitWith,
|
||||
helper::add_all,
|
||||
}, memory_manager::{
|
||||
},
|
||||
memory_manager::{
|
||||
MemoryStorable,
|
||||
type_traits::{FromFloat, UpcastDestTraits, UpcastSlice},
|
||||
}, tracing::TRACER, utility::{add_offset_r1, add_offset_r2, add_offset_rd}
|
||||
},
|
||||
tracing::TRACER,
|
||||
utility::{add_offset_r1, add_offset_r2, add_offset_rd},
|
||||
};
|
||||
use aligned_vec::{AVec, ConstAlign};
|
||||
use anyhow::{Context, Result, ensure};
|
||||
use rayon::prelude::*;
|
||||
|
||||
use paste::paste;
|
||||
use std::{borrow::Cow, cell::OnceCell, collections::HashMap};
|
||||
@@ -76,8 +81,7 @@ pub fn functor_to_name(functor: usize) -> &'static str {
|
||||
///////////////////////////////////////////////////////////////
|
||||
/////////////////Scalar/register Instructions//////////////////
|
||||
///////////////////////////////////////////////////////////////
|
||||
pub fn sldi(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus>
|
||||
{
|
||||
pub fn sldi(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
|
||||
TRACER.lock().unwrap().pre_sldi(cores, data);
|
||||
let (core_indx, rd, imm) = data.get_core_rd_imm();
|
||||
let core = cores.core(core_indx);
|
||||
@@ -229,25 +233,30 @@ where
|
||||
[F]: UpcastSlice<T> + UpcastSlice<M>,
|
||||
[M]: UpcastSlice<T>,
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
M: UpcastDestTraits<M> + MemoryStorable + FromFloat,
|
||||
// Add faer::ComplexField HERE, directly bounding M for this function only
|
||||
M: UpcastDestTraits<M> + MemoryStorable + FromFloat + faer_traits::ComplexField,
|
||||
F: UpcastDestTraits<F> + MemoryStorable,
|
||||
{
|
||||
TRACER.lock().unwrap().pre_mvm::<F,M,T>(cores, data);
|
||||
TRACER.lock().unwrap().pre_mvm::<F, M, T>(cores, data);
|
||||
|
||||
let (core_indx, rd, r1, mbiw, relu, group) = data.get_core_rd_r1_mbiw_immrelu_immgroup();
|
||||
let group: usize = group.try_into().context("group can not be negative")?;
|
||||
|
||||
let core = cores.core(core_indx);
|
||||
let r1_val = core.register(r1);
|
||||
let rd_val = core.register(rd);
|
||||
|
||||
let (memory, crossbars) = core.get_memory_crossbar();
|
||||
let crossbar = crossbars.get_mut(group).unwrap();
|
||||
let crossbar_stored_bytes = crossbar.stored_bytes();
|
||||
let crossbar_byte_width = crossbar.width();
|
||||
//Fix this
|
||||
|
||||
let crossbar_elem_width = crossbar_byte_width / size_of::<M>();
|
||||
ensure!(
|
||||
crossbar_byte_width & size_of::<M>() == 0,
|
||||
crossbar_byte_width % size_of::<M>() == 0,
|
||||
"M not divisor of the crosbbar size"
|
||||
);
|
||||
|
||||
let crossbar_height = crossbar.height();
|
||||
let crossbar_byte_size = crossbar_byte_width * crossbar_height;
|
||||
|
||||
@@ -257,19 +266,29 @@ where
|
||||
let load = loads[0];
|
||||
let vec: Cow<[M]> = load.up();
|
||||
let matrix = crossbar.load::<M>(crossbar_byte_size)?[0];
|
||||
let mut res = Vec::with_capacity(crossbar_elem_width);
|
||||
let mut partial :AVec<M, _> = AVec::<M, ConstAlign<64>>::with_capacity(64, vec.len());
|
||||
partial.resize(vec.len(), M::from_f32(0.0));
|
||||
|
||||
for x in 0..crossbar_elem_width {
|
||||
partial[0] = vec[0] * matrix[x];
|
||||
for y in 1..crossbar_height {
|
||||
partial[y] = vec[y] * matrix[y * crossbar_elem_width + x];
|
||||
}
|
||||
// --- FAER IMPLEMENTATION ---
|
||||
|
||||
// 1. Explicitly create a Matrix Reference (MatRef)
|
||||
let matrix_view = faer::mat::MatRef::from_row_major_slice(
|
||||
matrix.as_ref(),
|
||||
crossbar_height,
|
||||
crossbar_elem_width,
|
||||
);
|
||||
|
||||
// 2. Explicitly create a Column Vector Reference (ColRef)
|
||||
// Using `ColRef` here guarantees we don't accidentally get a RowRef (Fixes E0277)
|
||||
let vec_view = faer::col::ColRef::from_slice(vec.as_ref());
|
||||
|
||||
let res_col: faer::col::Col<M> = matrix_view.transpose() * vec_view;
|
||||
|
||||
// 4. Convert back to standard Rust Vec
|
||||
// try_as_slice() returns an Option<&[M]>.
|
||||
// We can safely unwrap() because a freshly allocated, owned Col is ALWAYS contiguous!
|
||||
let mut res: Vec<M> = (0..crossbar_elem_width).map(|i| res_col[i]).collect();
|
||||
|
||||
// --- END FAER ---
|
||||
|
||||
let mut acc = add_all(partial.as_slice());
|
||||
res.push(acc);
|
||||
}
|
||||
if relu != 0 {
|
||||
res.iter_mut().for_each(|x| {
|
||||
if *x < M::from_f32(0.0) {
|
||||
@@ -277,13 +296,16 @@ where
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
ensure!(
|
||||
res.len() == crossbar_elem_width,
|
||||
"mvm generate a vector bigger thant it's requested elements"
|
||||
"mvm generate a vector bigger thant it's requested elements"
|
||||
);
|
||||
|
||||
let res_up: Cow<[T]> = res.as_slice().up();
|
||||
core.execute_store(rd_val, res_up.as_ref());
|
||||
TRACER.lock().unwrap().post_mvm::<F,M,T>(cores, data);
|
||||
|
||||
TRACER.lock().unwrap().post_mvm::<F, M, T>(cores, data);
|
||||
Ok(InstructionStatus::Completed)
|
||||
}
|
||||
|
||||
@@ -317,7 +339,7 @@ where
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable,
|
||||
{
|
||||
TRACER.lock().unwrap().pre_vvadd::<F,T>(cores, data);
|
||||
TRACER.lock().unwrap().pre_vvadd::<F, T>(cores, data);
|
||||
let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
|
||||
data.get_core_rd_r1_r2_immlen_offset();
|
||||
let core = cores.core(core_indx);
|
||||
@@ -345,7 +367,7 @@ where
|
||||
);
|
||||
let res_up: Cow<[T]> = res.as_slice().up();
|
||||
core.execute_store(rd_val, res_up.as_ref());
|
||||
TRACER.lock().unwrap().post_vvadd::<F,T>(cores, data);
|
||||
TRACER.lock().unwrap().post_vvadd::<F, T>(cores, data);
|
||||
Ok(InstructionStatus::Completed)
|
||||
}
|
||||
|
||||
@@ -359,7 +381,7 @@ where
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable,
|
||||
{
|
||||
TRACER.lock().unwrap().pre_vvsub::<F,T>(cores, data);
|
||||
TRACER.lock().unwrap().pre_vvsub::<F, T>(cores, data);
|
||||
let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
|
||||
data.get_core_rd_r1_r2_immlen_offset();
|
||||
let core = cores.core(core_indx);
|
||||
@@ -400,7 +422,7 @@ where
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable,
|
||||
{
|
||||
TRACER.lock().unwrap().pre_vvmul::<F,T>(cores, data);
|
||||
TRACER.lock().unwrap().pre_vvmul::<F, T>(cores, data);
|
||||
let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
|
||||
data.get_core_rd_r1_r2_immlen_offset();
|
||||
let core = cores.core(core_indx);
|
||||
@@ -440,7 +462,7 @@ where
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable,
|
||||
{
|
||||
TRACER.lock().unwrap().pre_vvdmul::<F,T>(cores, data);
|
||||
TRACER.lock().unwrap().pre_vvdmul::<F, T>(cores, data);
|
||||
let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
|
||||
data.get_core_rd_r1_r2_immlen_offset();
|
||||
let core = cores.core(core_indx);
|
||||
@@ -476,7 +498,7 @@ where
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable,
|
||||
{
|
||||
TRACER.lock().unwrap().pre_vvmax::<F,T>(cores, data);
|
||||
TRACER.lock().unwrap().pre_vvmax::<F, T>(cores, data);
|
||||
let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
|
||||
data.get_core_rd_r1_r2_immlen_offset();
|
||||
let core = cores.core(core_indx);
|
||||
@@ -525,7 +547,7 @@ where
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable,
|
||||
{
|
||||
TRACER.lock().unwrap().pre_vavg::<F,T>(cores, data);
|
||||
TRACER.lock().unwrap().pre_vavg::<F, T>(cores, data);
|
||||
let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
|
||||
data.get_core_rd_r1_r2_immlen_offset();
|
||||
let core = cores.core(core_indx);
|
||||
@@ -533,7 +555,10 @@ where
|
||||
let r2_val = r2;
|
||||
ensure!(r2_val == 1, "Stride different than 1 not supported");
|
||||
let rd_val = core.register(rd);
|
||||
ensure!(offset_select == 1, "Offset select cannot be different from 1");
|
||||
ensure!(
|
||||
offset_select == 1,
|
||||
"Offset select cannot be different from 1"
|
||||
);
|
||||
let r1_val = add_offset_r1(r1_val, offset_select, offset_value);
|
||||
let loads = core.reserve_load(r1_val, imm_len)?.execute_load::<F>()?;
|
||||
let load1 = loads[0];
|
||||
@@ -555,7 +580,7 @@ where
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
|
||||
{
|
||||
TRACER.lock().unwrap().pre_vrelu::<F,T>(cores, data);
|
||||
TRACER.lock().unwrap().pre_vrelu::<F, T>(cores, data);
|
||||
let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
|
||||
data.get_core_rd_r1_r2_immlen_offset();
|
||||
let core = cores.core(core_indx);
|
||||
@@ -585,7 +610,7 @@ where
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
|
||||
{
|
||||
TRACER.lock().unwrap().pre_vtanh::<F,T>(cores, data);
|
||||
TRACER.lock().unwrap().pre_vtanh::<F, T>(cores, data);
|
||||
let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
|
||||
data.get_core_rd_r1_r2_immlen_offset();
|
||||
let core = cores.core(core_indx);
|
||||
@@ -613,7 +638,7 @@ where
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
|
||||
{
|
||||
TRACER.lock().unwrap().pre_vsigm::<F,T>(cores, data);
|
||||
TRACER.lock().unwrap().pre_vsigm::<F, T>(cores, data);
|
||||
let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
|
||||
data.get_core_rd_r1_r2_immlen_offset();
|
||||
let core = cores.core(core_indx);
|
||||
@@ -633,13 +658,16 @@ pub fn vsoftmax(cores: &mut CPU, data: InstructionData) -> Result<InstructionSta
|
||||
panic!("You are calling a placeholder, the real call is the generic version");
|
||||
}
|
||||
|
||||
pub(super) fn vsoftmax_impl<F, T>(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus>
|
||||
pub(super) fn vsoftmax_impl<F, T>(
|
||||
cores: &mut CPU,
|
||||
data: InstructionData,
|
||||
) -> Result<InstructionStatus>
|
||||
where
|
||||
[F]: UpcastSlice<T>,
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
|
||||
{
|
||||
TRACER.lock().unwrap().pre_vsoftmax::<F,T>(cores, data);
|
||||
TRACER.lock().unwrap().pre_vsoftmax::<F, T>(cores, data);
|
||||
let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
|
||||
data.get_core_rd_r1_r2_immlen_offset();
|
||||
let core = cores.core(core_indx);
|
||||
@@ -656,16 +684,15 @@ where
|
||||
.reduce(|a, b| if a > b { a } else { b })
|
||||
.unwrap();
|
||||
let exp_values: Vec<F> = load1.iter().map(|&a| (a - max_val).exp()).collect();
|
||||
let sum = exp_values
|
||||
.iter()
|
||||
.copied()
|
||||
.reduce(|a, b| a + b)
|
||||
.unwrap();
|
||||
ensure!(sum > 0.0.into(), "vsoftmax normalization sum must be positive");
|
||||
let sum = exp_values.iter().copied().reduce(|a, b| a + b).unwrap();
|
||||
ensure!(
|
||||
sum > 0.0.into(),
|
||||
"vsoftmax normalization sum must be positive"
|
||||
);
|
||||
let res: Vec<F> = exp_values.iter().map(|&a| a / sum).collect();
|
||||
let res_up: Cow<[T]> = res.as_slice().up();
|
||||
core.execute_store(rd_val, res_up.as_ref());
|
||||
TRACER.lock().unwrap().post_vsoftmax::<F,T>(cores, data);
|
||||
TRACER.lock().unwrap().post_vsoftmax::<F, T>(cores, data);
|
||||
Ok(InstructionStatus::Completed)
|
||||
}
|
||||
|
||||
@@ -749,12 +776,10 @@ pub fn lmv(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus>
|
||||
}
|
||||
|
||||
pub fn send(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
|
||||
TRACER.lock().unwrap().pre_send(cores, data);
|
||||
Ok(InstructionStatus::Sending(data))
|
||||
}
|
||||
|
||||
pub fn recv(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
|
||||
TRACER.lock().unwrap().pre_recv(cores, data);
|
||||
Ok(InstructionStatus::Reciving(data))
|
||||
}
|
||||
|
||||
|
||||
@@ -169,6 +169,9 @@ impl<'a> Executable<'a> {
|
||||
}
|
||||
}
|
||||
print_status(cores_instructions);
|
||||
|
||||
#[cfg(feature = "profile_time")]
|
||||
TRACER.lock().unwrap().report();
|
||||
}
|
||||
|
||||
pub fn cpu(&self) -> &CPU<'a> {
|
||||
|
||||
@@ -58,6 +58,20 @@ where 'a : 'b
|
||||
&& sender.internal_core == receiver.external_core
|
||||
&& receiver.internal_core == sender.external_core
|
||||
{
|
||||
{
|
||||
let sender = &mut core_instructions[sender.internal_core];
|
||||
let pc = sender.program_counter;
|
||||
let inst = sender.instructions.get(pc).unwrap();
|
||||
let data = inst.data;
|
||||
TRACER.lock().unwrap().pre_send(cpu, data);
|
||||
}
|
||||
{
|
||||
let recv = &mut core_instructions[receiver.internal_core];
|
||||
let pc = recv.program_counter;
|
||||
let inst = recv.instructions.get(pc).unwrap();
|
||||
let data = inst.data;
|
||||
TRACER.lock().unwrap().pre_recv(cpu, data);
|
||||
}
|
||||
let [sender_core, reciver_core] =
|
||||
cpu.get_multiple_cores([sender.internal_core, receiver.internal_core]);
|
||||
let memory = sender_core
|
||||
|
||||
@@ -13,7 +13,7 @@ use crate::{
|
||||
};
|
||||
use std::io::Write;
|
||||
|
||||
#[cfg(not(feature = "tracing"))]
|
||||
#[cfg(not(any(feature = "tracing", feature = "profile_time")))]
|
||||
impl Trace {
|
||||
///////////////////////////////////////////////////////////////
|
||||
/////////////////Scalar/register Instructions//////////////////
|
||||
|
||||
@@ -1,52 +1,32 @@
|
||||
mod tracing_isa;
|
||||
mod disable;
|
||||
mod pretty_print;
|
||||
use std::{fs::File, path::{ PathBuf}};
|
||||
#[cfg(feature = "profile_time")]
|
||||
mod profile;
|
||||
|
||||
#[cfg(feature = "profile_time")]
|
||||
use profile::Trace;
|
||||
|
||||
#[cfg(feature = "tracing")]
|
||||
mod trace;
|
||||
#[cfg(feature = "tracing")]
|
||||
use trace::Trace;
|
||||
|
||||
use crate::Executable;
|
||||
#[cfg(not(any(feature = "tracing", feature = "profile_time")))]
|
||||
use std::path::PathBuf;
|
||||
use std::sync::{LazyLock, Mutex};
|
||||
|
||||
|
||||
use crate::Executable;
|
||||
#[cfg(not(any(feature = "tracing", feature = "profile_time")))]
|
||||
pub struct Trace {}
|
||||
|
||||
#[cfg(feature = "tracing")]
|
||||
pub struct Trace {
|
||||
out_files : Vec<File>
|
||||
}
|
||||
|
||||
|
||||
#[cfg(feature = "tracing")]
|
||||
#[cfg(not(any(feature = "tracing", feature = "profile_time")))]
|
||||
impl Trace {
|
||||
fn new() -> Self {
|
||||
Self { out_files : Vec::new()}
|
||||
Self {}
|
||||
}
|
||||
|
||||
|
||||
pub fn init(&mut self, num_core : usize , mut path : PathBuf) {
|
||||
path.pop();
|
||||
for i in 0..num_core {
|
||||
path.push(format!("TraceCore{}", i));
|
||||
let file = File::create(&path).expect("Can not create file");
|
||||
self.out_files.push(file);
|
||||
path.pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "tracing"))]
|
||||
pub struct Trace {
|
||||
pub fn init(&mut self, num_core: usize, path: PathBuf) {}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(not(feature = "tracing"))]
|
||||
impl Trace {
|
||||
fn new() -> Self {
|
||||
Self { }
|
||||
}
|
||||
|
||||
|
||||
pub fn init(&mut self, num_core : usize, path : PathBuf ) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub static TRACER: LazyLock<Mutex<Trace>> = LazyLock::new(|| { Trace::new().into()});
|
||||
|
||||
pub static TRACER: LazyLock<Mutex<Trace>> = LazyLock::new(|| Trace::new().into());
|
||||
|
||||
@@ -0,0 +1,73 @@
|
||||
use std::{collections::HashMap, path::PathBuf, time::Instant};
|
||||
|
||||
use crate::tracing::profile::profile_analysis::{
|
||||
analyze_timings, generate_interactive_report, print_textual_report,
|
||||
};
|
||||
|
||||
pub mod profile_analysis;
|
||||
pub mod profile_isa;
|
||||
|
||||
pub struct Trace {
|
||||
instruction_times: HashMap<String, Vec<(u128,u128)>>,
|
||||
core_start_time: HashMap<usize, Option<Instant>>,
|
||||
start_time: Instant,
|
||||
}
|
||||
|
||||
impl Trace {
|
||||
pub fn new() -> Self {
|
||||
let mut instruction_times = HashMap::new();
|
||||
instruction_times.insert("sldi".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("sld".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("sadd".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("ssub".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("smul".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("saddi".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("smuli".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("setbw".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("mvmul".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("vvadd".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("vvsub".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("vvmul".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("vvdmul".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("vvmax".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("vvsll".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("vvsra".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("vavg".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("vrelu".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("vtanh".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("vsigm".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("vsoftmax".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("vmv".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("vrsu".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("vrsl".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("ld".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("st".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("lldi".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("lmv".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("send".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("recv".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("wait".to_string(), Vec::with_capacity(20000));
|
||||
instruction_times.insert("sync".to_string(), Vec::with_capacity(20000));
|
||||
Self {
|
||||
instruction_times,
|
||||
core_start_time: HashMap::new(),
|
||||
start_time: Instant::now()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn init(&mut self, num_core: usize, path: PathBuf) {
|
||||
for i in 0..num_core {
|
||||
self.core_start_time.insert(i, None);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn report(&self) {
|
||||
let res = analyze_timings(&self.instruction_times);
|
||||
print_textual_report(&res);
|
||||
generate_interactive_report(
|
||||
&self.instruction_times,
|
||||
&["mvmul", "recv"],
|
||||
"/tmp/report.html",
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,192 @@
|
||||
use comfy_table::{Cell, Table, modifiers::UTF8_ROUND_CORNERS, presets::UTF8_FULL};
|
||||
use statrs::statistics::{Data, Distribution, Max, Min, OrderStatistics};
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct InstructionStats {
|
||||
pub name: String,
|
||||
pub count: usize,
|
||||
pub total_time: u128,
|
||||
pub min: f64,
|
||||
pub max: f64,
|
||||
pub mean: f64,
|
||||
pub median: f64,
|
||||
pub std_dev: f64,
|
||||
pub cv: f64,
|
||||
pub p95: f64,
|
||||
pub p99: f64,
|
||||
pub skewness: f64,
|
||||
pub kurtosis: f64,
|
||||
}
|
||||
|
||||
fn format_time(ns: f64) -> String {
|
||||
if ns.is_nan() {
|
||||
return "NaN".to_string();
|
||||
}
|
||||
|
||||
if ns >= 1_000_000_000.0 {
|
||||
format!("{:.2} s", ns / 1_000_000_000.0)
|
||||
} else if ns >= 1_000_000.0 {
|
||||
format!("{:.2} ms", ns / 1_000_000.0)
|
||||
} else if ns >= 1_000.0 {
|
||||
format!("{:.2} µs", ns / 1_000.0)
|
||||
} else {
|
||||
format!("{:.2} ns", ns)
|
||||
}
|
||||
}
|
||||
|
||||
fn calculate_skewness_kurtosis(times: &[f64], mean: f64, std_dev: f64) -> (f64, f64) {
|
||||
let n = times.len() as f64;
|
||||
|
||||
if n < 4.0 || std_dev == 0.0 {
|
||||
return (f64::NAN, f64::NAN);
|
||||
}
|
||||
|
||||
let mut sum_m3 = 0.0;
|
||||
let mut sum_m4 = 0.0;
|
||||
|
||||
for &x in times {
|
||||
let deviation = x - mean;
|
||||
sum_m3 += deviation.powi(3);
|
||||
sum_m4 += deviation.powi(4);
|
||||
}
|
||||
|
||||
let m3 = sum_m3 / n;
|
||||
let m4 = sum_m4 / n;
|
||||
|
||||
let skewness = m3 / std_dev.powi(3);
|
||||
let kurtosis = (m4 / std_dev.powi(4)) - 3.0;
|
||||
|
||||
(skewness, kurtosis)
|
||||
}
|
||||
|
||||
pub fn analyze_timings(timings: &HashMap<String, Vec<(u128, u128)>>) -> Vec<InstructionStats> {
|
||||
let mut results = Vec::new();
|
||||
|
||||
for (instruction, times) in timings {
|
||||
let count = times.len();
|
||||
if count == 0 {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Extract ONLY the duration (the second element of the tuple) for stats
|
||||
let durations: Vec<u128> = times.iter().map(|&(_, duration)| duration).collect();
|
||||
let total_time: u128 = durations.iter().sum();
|
||||
|
||||
let f64_times: Vec<f64> = durations.iter().map(|&t| t as f64).collect();
|
||||
let mut data = Data::new(f64_times.clone());
|
||||
|
||||
let mean = data.mean().unwrap_or(0.0);
|
||||
let std_dev = data.std_dev().unwrap_or(0.0);
|
||||
let cv = if mean > 0.0 { std_dev / mean } else { 0.0 };
|
||||
|
||||
let (skewness, kurtosis) = calculate_skewness_kurtosis(&f64_times, mean, std_dev);
|
||||
|
||||
results.push(InstructionStats {
|
||||
name: instruction.clone(),
|
||||
count,
|
||||
total_time,
|
||||
min: data.min(),
|
||||
max: data.max(),
|
||||
mean,
|
||||
median: data.median(),
|
||||
std_dev,
|
||||
cv,
|
||||
p95: data.percentile(95),
|
||||
p99: data.percentile(99),
|
||||
skewness,
|
||||
kurtosis,
|
||||
});
|
||||
}
|
||||
|
||||
results.sort_by(|a, b| b.mean.partial_cmp(&a.mean).unwrap());
|
||||
results
|
||||
}
|
||||
|
||||
pub fn print_textual_report(stats: &[InstructionStats]) {
|
||||
let mut table = Table::new();
|
||||
table
|
||||
.load_preset(UTF8_FULL)
|
||||
.apply_modifier(UTF8_ROUND_CORNERS)
|
||||
.set_header(vec![
|
||||
"Instruction",
|
||||
"Count",
|
||||
"Total Time",
|
||||
"Mean",
|
||||
"Median",
|
||||
"Min",
|
||||
"Max",
|
||||
"P95",
|
||||
"P99",
|
||||
"StdDev",
|
||||
"CV",
|
||||
"Skewness",
|
||||
"Kurtosis",
|
||||
]);
|
||||
|
||||
for stat in stats {
|
||||
table.add_row(vec![
|
||||
Cell::new(&stat.name),
|
||||
Cell::new(stat.count.to_string()),
|
||||
Cell::new(format_time(stat.total_time as f64)), // Cast u128 to f64 for formatting
|
||||
Cell::new(format_time(stat.mean)),
|
||||
Cell::new(format_time(stat.median)),
|
||||
Cell::new(format_time(stat.min)),
|
||||
Cell::new(format_time(stat.max)),
|
||||
Cell::new(format_time(stat.p95)),
|
||||
Cell::new(format_time(stat.p99)),
|
||||
Cell::new(format_time(stat.std_dev)),
|
||||
Cell::new(format!("{:.3}", stat.cv)),
|
||||
Cell::new(format!("{:.2}", stat.skewness)),
|
||||
Cell::new(format!("{:.2}", stat.kurtosis)),
|
||||
]);
|
||||
}
|
||||
|
||||
println!("{table}");
|
||||
}
|
||||
|
||||
|
||||
pub fn generate_interactive_report(
|
||||
timings: &HashMap<String, Vec<(u128, u128)>>,
|
||||
instructions_to_plot: &[&str], // <-- NEW: Only plot these
|
||||
file_path: &str,
|
||||
) {
|
||||
|
||||
use plotly::common::{Mode, Marker, Line};
|
||||
use plotly::layout::{Axis, Layout};
|
||||
use plotly::{Plot, Scatter};
|
||||
use std::collections::HashMap;
|
||||
let mut plot = Plot::new();
|
||||
|
||||
for &instruction_name in instructions_to_plot {
|
||||
// Only proceed if the instruction exists in our timings map
|
||||
if let Some(times) = timings.get(instruction_name) {
|
||||
let x_axis: Vec<f64> = times.iter().map(|&(ts, _)| ts as f64).collect();
|
||||
let y_axis: Vec<f64> = times.iter().map(|&(_, dur)| dur as f64).collect();
|
||||
|
||||
let text_array: Vec<String> = times.iter()
|
||||
.map(|&(_, dur)| format_time(dur as f64))
|
||||
.collect();
|
||||
|
||||
let trace = Scatter::new(x_axis, y_axis)
|
||||
.name(instruction_name)
|
||||
.mode(Mode::LinesMarkers)
|
||||
.marker(Marker::new().size(4).opacity(0.6))
|
||||
.line(Line::new().width(1.0))
|
||||
.text_array(text_array)
|
||||
.hover_info(plotly::common::HoverInfo::All);
|
||||
|
||||
plot.add_trace(trace);
|
||||
}
|
||||
}
|
||||
|
||||
let layout = Layout::new()
|
||||
.title(plotly::common::Title::new("Simulator Timeline: Top Offenders"))
|
||||
.x_axis(Axis::new().title(plotly::common::Title::new("Absolute Time (ns)")))
|
||||
.y_axis(Axis::new().title(plotly::common::Title::new("Execution Duration")));
|
||||
|
||||
plot.set_layout(layout);
|
||||
plot.write_html(file_path);
|
||||
println!("🌐 Interactive timeline saved to {}", file_path);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,364 @@
|
||||
use crate::{
|
||||
cpu::CPU,
|
||||
instruction_set::instruction_data::InstructionData,
|
||||
memory_manager::{
|
||||
MemoryStorable,
|
||||
type_traits::{FromFloat, UpcastDestTraits, UpcastSlice},
|
||||
},
|
||||
tracing::Trace,
|
||||
utility::{add_offset_r1, add_offset_rd},
|
||||
};
|
||||
use std::io::Write;
|
||||
use std::time::Instant;
|
||||
|
||||
#[cfg(feature = "profile_time")]
|
||||
impl Trace {
|
||||
///////////////////////////////////////////////////////////////
|
||||
/////////////////Scalar/register Instructions//////////////////
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
||||
fn pre_impl(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
let (core_indx, rd, imm) = data.get_core_rd_imm();
|
||||
let core_indx = core_indx as usize;
|
||||
if self.core_start_time.get(&core_indx).unwrap().is_none() {
|
||||
self.core_start_time.insert(core_indx, Some(Instant::now()));
|
||||
}
|
||||
}
|
||||
|
||||
fn post_impl(&mut self, cores: &mut CPU, data: InstructionData, name: &'static str) {
|
||||
let (core_indx, rd, imm) = data.get_core_rd_imm();
|
||||
let core_indx = core_indx as usize;
|
||||
let Self {
|
||||
instruction_times,
|
||||
core_start_time,
|
||||
start_time,
|
||||
} = self;
|
||||
let now = Instant::now();
|
||||
instruction_times
|
||||
.get_mut(name)
|
||||
.unwrap()
|
||||
.push((now.duration_since(*start_time).as_nanos(), now.duration_since(core_start_time[&core_indx].unwrap()).as_nanos()));
|
||||
self.core_start_time.insert(core_indx, None);
|
||||
}
|
||||
|
||||
pub fn pre_sldi(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
self.pre_impl(cores, data);
|
||||
}
|
||||
|
||||
pub fn post_sldi(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
self.post_impl(cores, data, "sldi");
|
||||
}
|
||||
|
||||
pub fn pre_sld(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
self.pre_impl(cores, data);
|
||||
}
|
||||
|
||||
pub fn post_sld(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
self.post_impl(cores, data, "sld");
|
||||
}
|
||||
|
||||
pub fn pre_sadd(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
self.pre_impl(cores, data);
|
||||
}
|
||||
|
||||
pub fn post_sadd(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
self.post_impl(cores, data, "sadd");
|
||||
}
|
||||
|
||||
pub fn pre_ssub(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
self.pre_impl(cores, data);
|
||||
}
|
||||
|
||||
pub fn post_ssub(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
self.post_impl(cores, data, "ssub");
|
||||
}
|
||||
|
||||
pub fn pre_smul(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
self.pre_impl(cores, data);
|
||||
}
|
||||
|
||||
pub fn post_smul(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
self.post_impl(cores, data, "smul");
|
||||
}
|
||||
|
||||
pub fn pre_saddi(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
self.pre_impl(cores, data);
|
||||
}
|
||||
|
||||
pub fn post_saddi(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
self.post_impl(cores, data, "saddi");
|
||||
}
|
||||
|
||||
pub fn pre_smuli(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
self.pre_impl(cores, data);
|
||||
}
|
||||
|
||||
pub fn post_smuli(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
self.post_impl(cores, data, "smuli");
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
///////////////////Matrix/vector Instructions////////////////////
|
||||
/////////////////////////////////////////////////////////////////
|
||||
|
||||
pub fn pre_setbw(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
self.pre_impl(cores, data);
|
||||
}
|
||||
|
||||
pub fn post_setbw(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
self.post_impl(cores, data, "setbw");
|
||||
}
|
||||
|
||||
pub fn pre_mvm<F, M, T>(&mut self, cores: &mut CPU, data: InstructionData)
|
||||
where
|
||||
[F]: UpcastSlice<T> + UpcastSlice<M>,
|
||||
[M]: UpcastSlice<T>,
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
M: UpcastDestTraits<M> + MemoryStorable + FromFloat,
|
||||
F: UpcastDestTraits<F> + MemoryStorable,
|
||||
{
|
||||
self.pre_impl(cores, data);
|
||||
}
|
||||
|
||||
pub fn post_mvm<F, M, T>(&mut self, cores: &mut CPU, data: InstructionData)
|
||||
where
|
||||
[F]: UpcastSlice<T> + UpcastSlice<M>,
|
||||
[M]: UpcastSlice<T>,
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
M: UpcastDestTraits<M> + MemoryStorable + FromFloat,
|
||||
F: UpcastDestTraits<F> + MemoryStorable,
|
||||
{
|
||||
self.post_impl(cores, data, "mvmul");
|
||||
}
|
||||
|
||||
pub fn pre_vvadd<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
|
||||
where
|
||||
[F]: UpcastSlice<T>,
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable,
|
||||
{
|
||||
self.pre_impl(cores, data);
|
||||
}
|
||||
|
||||
pub fn post_vvadd<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
|
||||
where
|
||||
[F]: UpcastSlice<T>,
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable,
|
||||
{
|
||||
self.post_impl(cores, data, "vvadd");
|
||||
}
|
||||
|
||||
pub fn pre_vvsub<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
|
||||
where
|
||||
[F]: UpcastSlice<T>,
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable,
|
||||
{
|
||||
self.pre_impl(cores, data);
|
||||
}
|
||||
|
||||
pub fn post_vvsub<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
|
||||
where
|
||||
[F]: UpcastSlice<T>,
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable,
|
||||
{
|
||||
self.post_impl(cores, data, "vvsub");
|
||||
}
|
||||
|
||||
pub fn pre_vvmul<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
|
||||
where
|
||||
[F]: UpcastSlice<T>,
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable,
|
||||
{
|
||||
self.pre_impl(cores, data);
|
||||
}
|
||||
|
||||
pub fn post_vvmul<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
|
||||
where
|
||||
[F]: UpcastSlice<T>,
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable,
|
||||
{
|
||||
self.post_impl(cores, data, "vvmul");
|
||||
}
|
||||
|
||||
pub fn pre_vvdmul<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
|
||||
where
|
||||
[F]: UpcastSlice<T>,
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable,
|
||||
{
|
||||
self.pre_impl(cores, data);
|
||||
}
|
||||
|
||||
pub fn post_vvdmul<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
|
||||
where
|
||||
[F]: UpcastSlice<T>,
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable,
|
||||
{
|
||||
self.post_impl(cores, data, "vvdmul");
|
||||
}
|
||||
|
||||
pub fn pre_vvmax<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
|
||||
where
|
||||
[F]: UpcastSlice<T>,
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable,
|
||||
{
|
||||
self.pre_impl(cores, data);
|
||||
}
|
||||
|
||||
pub fn post_vvmax<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
|
||||
where
|
||||
[F]: UpcastSlice<T>,
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable,
|
||||
{
|
||||
self.post_impl(cores, data, "vvmax");
|
||||
}
|
||||
|
||||
pub fn pre_vavg<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
|
||||
where
|
||||
[F]: UpcastSlice<T>,
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable,
|
||||
{
|
||||
self.pre_impl(cores, data);
|
||||
}
|
||||
|
||||
pub fn post_vavg<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
|
||||
where
|
||||
[F]: UpcastSlice<T>,
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable,
|
||||
{
|
||||
self.post_impl(cores, data, "vavg");
|
||||
}
|
||||
|
||||
pub fn pre_vrelu<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
|
||||
where
|
||||
[F]: UpcastSlice<T>,
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
|
||||
{
|
||||
self.pre_impl(cores, data);
|
||||
}
|
||||
|
||||
pub fn post_vrelu<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
|
||||
where
|
||||
[F]: UpcastSlice<T>,
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
|
||||
{
|
||||
self.post_impl(cores, data, "vrelu");
|
||||
}
|
||||
|
||||
pub fn pre_vtanh<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
|
||||
where
|
||||
[F]: UpcastSlice<T>,
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
|
||||
{
|
||||
self.pre_impl(cores, data);
|
||||
}
|
||||
|
||||
pub fn post_vtanh<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
|
||||
where
|
||||
[F]: UpcastSlice<T>,
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
|
||||
{
|
||||
self.post_impl(cores, data, "vtanh");
|
||||
}
|
||||
|
||||
pub fn pre_vsigm<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
|
||||
where
|
||||
[F]: UpcastSlice<T>,
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
|
||||
{
|
||||
self.pre_impl(cores, data);
|
||||
}
|
||||
|
||||
pub fn post_vsigm<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
|
||||
where
|
||||
[F]: UpcastSlice<T>,
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
|
||||
{
|
||||
self.post_impl(cores, data, "vsigm");
|
||||
}
|
||||
|
||||
pub fn pre_vsoftmax<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
|
||||
where
|
||||
[F]: UpcastSlice<T>,
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
|
||||
{
|
||||
self.pre_impl(cores, data);
|
||||
}
|
||||
|
||||
pub fn post_vsoftmax<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
|
||||
where
|
||||
[F]: UpcastSlice<T>,
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
|
||||
{
|
||||
self.post_impl(cores, data, "vsoftmax");
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
/////Communication/synchronization Instructions/////////////////
|
||||
/////////////////////////////////////////////////////////////////
|
||||
|
||||
pub fn pre_ld(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
self.pre_impl(cores, data);
|
||||
}
|
||||
pub fn post_ld(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
self.post_impl(cores, data, "ld");
|
||||
}
|
||||
|
||||
pub fn pre_st(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
self.pre_impl(cores, data);
|
||||
}
|
||||
|
||||
pub fn post_st(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
self.post_impl(cores, data, "st");
|
||||
}
|
||||
|
||||
pub fn pre_lldi(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
self.pre_impl(cores, data);
|
||||
}
|
||||
|
||||
pub fn post_lldi(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
self.post_impl(cores, data, "lldi");
|
||||
}
|
||||
|
||||
pub fn pre_lmv(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
self.pre_impl(cores, data);
|
||||
}
|
||||
|
||||
pub fn post_lmv(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
self.post_impl(cores, data, "lmv");
|
||||
}
|
||||
|
||||
pub fn pre_send(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
self.pre_impl(cores, data);
|
||||
}
|
||||
|
||||
pub fn post_send(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
self.post_impl(cores, data, "send");
|
||||
}
|
||||
|
||||
pub fn pre_recv(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
self.pre_impl(cores, data);
|
||||
}
|
||||
|
||||
pub fn post_recv(&mut self, cores: &mut CPU, data: InstructionData) {
|
||||
self.post_impl(cores, data, "recv");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
use std::{fs::File, path::PathBuf};
|
||||
|
||||
pub mod pretty_print;
|
||||
pub mod tracing_isa;
|
||||
|
||||
pub struct Trace {
|
||||
out_files: Vec<File>,
|
||||
}
|
||||
|
||||
|
||||
impl Trace {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
out_files: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn init(&mut self, num_core: usize, mut path: PathBuf) {
|
||||
path.pop();
|
||||
for i in 0..num_core {
|
||||
path.push(format!("TraceCore{}", i));
|
||||
let file = File::create(&path).expect("Can not create file");
|
||||
self.out_files.push(file);
|
||||
path.pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+1
-10
@@ -1,4 +1,4 @@
|
||||
use crate::tracing::pretty_print;
|
||||
use crate::{tracing::trace::pretty_print, utility::add_offset_r2};
|
||||
use std::fs::File;
|
||||
|
||||
use crate::{
|
||||
@@ -13,7 +13,6 @@ use crate::{
|
||||
};
|
||||
use std::io::Write;
|
||||
|
||||
#[cfg(feature = "tracing")]
|
||||
impl Trace {
|
||||
///////////////////////////////////////////////////////////////
|
||||
/////////////////Scalar/register Instructions//////////////////
|
||||
@@ -284,7 +283,6 @@ impl Trace {
|
||||
M: UpcastDestTraits<M> + MemoryStorable + FromFloat,
|
||||
F: UpcastDestTraits<F> + MemoryStorable,
|
||||
{
|
||||
use crate::tracing::pretty_print;
|
||||
|
||||
let (core_indx, rd, r1, mbiw, relu, group) = data.get_core_rd_r1_mbiw_immrelu_immgroup();
|
||||
let file: &mut File = self
|
||||
@@ -358,8 +356,6 @@ impl Trace {
|
||||
T: UpcastDestTraits<T> + MemoryStorable,
|
||||
F: UpcastDestTraits<F> + MemoryStorable,
|
||||
{
|
||||
use crate::{tracing::pretty_print, utility::add_offset_r2};
|
||||
|
||||
let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
|
||||
data.get_core_rd_r1_r2_immlen_offset();
|
||||
let file: &mut File = self
|
||||
@@ -990,8 +986,6 @@ impl Trace {
|
||||
/////////////////////////////////////////////////////////////////
|
||||
|
||||
pub fn ld_impl(&mut self, cores: &mut CPU, data: InstructionData, prefix: &'static str) {
|
||||
use crate::tracing::pretty_print;
|
||||
|
||||
let (core, rd, r1, _, imm_len, offset_select, offset_value) =
|
||||
data.get_core_rd_r1_r2_immlen_offset();
|
||||
let file: &mut File = self
|
||||
@@ -1044,8 +1038,6 @@ impl Trace {
|
||||
}
|
||||
|
||||
pub fn st_impl(&mut self, cores: &mut CPU, data: InstructionData, prefix: &'static str) {
|
||||
use crate::tracing::pretty_print;
|
||||
|
||||
let (core, rd, r1, _, imm_len, offset_select, offset_value) =
|
||||
data.get_core_rd_r1_r2_immlen_offset();
|
||||
let file: &mut File = self
|
||||
@@ -1138,7 +1130,6 @@ impl Trace {
|
||||
}
|
||||
|
||||
fn lmv_impl (&mut self, cores: &mut CPU, data: InstructionData, prefix: &'static str) {
|
||||
use crate::tracing::pretty_print;
|
||||
|
||||
let (core, rd, r1, _, imm_len, offset_select, offset_value) =
|
||||
data.get_core_rd_r1_r2_immlen_offset();
|
||||
Reference in New Issue
Block a user