pim-sim TraceTime + faer
Validate Operations / validate-operations (push) Has been cancelled

This commit is contained in:
ilgeco
2026-05-11 18:19:30 +02:00
parent 1e279ae9bb
commit feaff820e1
13 changed files with 2875 additions and 104 deletions
File diff suppressed because it is too large Load Diff
@@ -13,8 +13,9 @@ name = "pimcore"
path = "src/lib/pimcore.rs"
[features]
default = ["tracing"]
default = []
tracing = []
profile_time = ["dep:plotly", "dep:comfy-table", "dep:statrs"]
@@ -27,3 +28,9 @@ hex = "0"
paste = "1"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
statrs = {version="0.16", optional=true}
comfy-table = {version="7.1", optional=true}
plotly = {version="0.8", optional=true}
rayon = "1.12.0"
faer = "0.24.0"
faer-traits = "0.24.0"
@@ -1,14 +1,19 @@
use crate::{
cpu::{CPU, crossbar}, instruction_set::{
cpu::{CPU, crossbar},
instruction_set::{
Instruction, InstructionData, InstructionStatus, InstructionType, VectorBitWith,
helper::add_all,
}, memory_manager::{
},
memory_manager::{
MemoryStorable,
type_traits::{FromFloat, UpcastDestTraits, UpcastSlice},
}, tracing::TRACER, utility::{add_offset_r1, add_offset_r2, add_offset_rd}
},
tracing::TRACER,
utility::{add_offset_r1, add_offset_r2, add_offset_rd},
};
use aligned_vec::{AVec, ConstAlign};
use anyhow::{Context, Result, ensure};
use rayon::prelude::*;
use paste::paste;
use std::{borrow::Cow, cell::OnceCell, collections::HashMap};
@@ -76,8 +81,7 @@ pub fn functor_to_name(functor: usize) -> &'static str {
///////////////////////////////////////////////////////////////
/////////////////Scalar/register Instructions//////////////////
///////////////////////////////////////////////////////////////
pub fn sldi(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus>
{
pub fn sldi(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
TRACER.lock().unwrap().pre_sldi(cores, data);
let (core_indx, rd, imm) = data.get_core_rd_imm();
let core = cores.core(core_indx);
@@ -229,25 +233,30 @@ where
[F]: UpcastSlice<T> + UpcastSlice<M>,
[M]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
M: UpcastDestTraits<M> + MemoryStorable + FromFloat,
// Add faer::ComplexField HERE, directly bounding M for this function only
M: UpcastDestTraits<M> + MemoryStorable + FromFloat + faer_traits::ComplexField,
F: UpcastDestTraits<F> + MemoryStorable,
{
TRACER.lock().unwrap().pre_mvm::<F, M, T>(cores, data);
let (core_indx, rd, r1, mbiw, relu, group) = data.get_core_rd_r1_mbiw_immrelu_immgroup();
let group: usize = group.try_into().context("group can not be negative")?;
let core = cores.core(core_indx);
let r1_val = core.register(r1);
let rd_val = core.register(rd);
let (memory, crossbars) = core.get_memory_crossbar();
let crossbar = crossbars.get_mut(group).unwrap();
let crossbar_stored_bytes = crossbar.stored_bytes();
let crossbar_byte_width = crossbar.width();
//Fix this
let crossbar_elem_width = crossbar_byte_width / size_of::<M>();
ensure!(
crossbar_byte_width & size_of::<M>() == 0,
crossbar_byte_width % size_of::<M>() == 0,
"M not divisor of the crosbbar size"
);
let crossbar_height = crossbar.height();
let crossbar_byte_size = crossbar_byte_width * crossbar_height;
@@ -257,19 +266,29 @@ where
let load = loads[0];
let vec: Cow<[M]> = load.up();
let matrix = crossbar.load::<M>(crossbar_byte_size)?[0];
let mut res = Vec::with_capacity(crossbar_elem_width);
let mut partial :AVec<M, _> = AVec::<M, ConstAlign<64>>::with_capacity(64, vec.len());
partial.resize(vec.len(), M::from_f32(0.0));
for x in 0..crossbar_elem_width {
partial[0] = vec[0] * matrix[x];
for y in 1..crossbar_height {
partial[y] = vec[y] * matrix[y * crossbar_elem_width + x];
}
// --- FAER IMPLEMENTATION ---
// 1. Explicitly create a Matrix Reference (MatRef)
let matrix_view = faer::mat::MatRef::from_row_major_slice(
matrix.as_ref(),
crossbar_height,
crossbar_elem_width,
);
// 2. Explicitly create a Column Vector Reference (ColRef)
// Using `ColRef` here guarantees we don't accidentally get a RowRef (Fixes E0277)
let vec_view = faer::col::ColRef::from_slice(vec.as_ref());
let res_col: faer::col::Col<M> = matrix_view.transpose() * vec_view;
// 4. Convert back to standard Rust Vec
// try_as_slice() returns an Option<&[M]>.
// We can safely unwrap() because a freshly allocated, owned Col is ALWAYS contiguous!
let mut res: Vec<M> = (0..crossbar_elem_width).map(|i| res_col[i]).collect();
// --- END FAER ---
let mut acc = add_all(partial.as_slice());
res.push(acc);
}
if relu != 0 {
res.iter_mut().for_each(|x| {
if *x < M::from_f32(0.0) {
@@ -277,12 +296,15 @@ where
}
});
}
ensure!(
res.len() == crossbar_elem_width,
"mvm generate a vector bigger thant it's requested elements"
);
let res_up: Cow<[T]> = res.as_slice().up();
core.execute_store(rd_val, res_up.as_ref());
TRACER.lock().unwrap().post_mvm::<F, M, T>(cores, data);
Ok(InstructionStatus::Completed)
}
@@ -533,7 +555,10 @@ where
let r2_val = r2;
ensure!(r2_val == 1, "Stride different than 1 not supported");
let rd_val = core.register(rd);
ensure!(offset_select == 1, "Offset select cannot be different from 1");
ensure!(
offset_select == 1,
"Offset select cannot be different from 1"
);
let r1_val = add_offset_r1(r1_val, offset_select, offset_value);
let loads = core.reserve_load(r1_val, imm_len)?.execute_load::<F>()?;
let load1 = loads[0];
@@ -633,7 +658,10 @@ pub fn vsoftmax(cores: &mut CPU, data: InstructionData) -> Result<InstructionSta
panic!("You are calling a placeholder, the real call is the generic version");
}
pub(super) fn vsoftmax_impl<F, T>(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus>
pub(super) fn vsoftmax_impl<F, T>(
cores: &mut CPU,
data: InstructionData,
) -> Result<InstructionStatus>
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
@@ -656,12 +684,11 @@ where
.reduce(|a, b| if a > b { a } else { b })
.unwrap();
let exp_values: Vec<F> = load1.iter().map(|&a| (a - max_val).exp()).collect();
let sum = exp_values
.iter()
.copied()
.reduce(|a, b| a + b)
.unwrap();
ensure!(sum > 0.0.into(), "vsoftmax normalization sum must be positive");
let sum = exp_values.iter().copied().reduce(|a, b| a + b).unwrap();
ensure!(
sum > 0.0.into(),
"vsoftmax normalization sum must be positive"
);
let res: Vec<F> = exp_values.iter().map(|&a| a / sum).collect();
let res_up: Cow<[T]> = res.as_slice().up();
core.execute_store(rd_val, res_up.as_ref());
@@ -749,12 +776,10 @@ pub fn lmv(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus>
}
pub fn send(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
TRACER.lock().unwrap().pre_send(cores, data);
Ok(InstructionStatus::Sending(data))
}
pub fn recv(cores: &mut CPU, data: InstructionData) -> Result<InstructionStatus> {
TRACER.lock().unwrap().pre_recv(cores, data);
Ok(InstructionStatus::Reciving(data))
}
@@ -169,6 +169,9 @@ impl<'a> Executable<'a> {
}
}
print_status(cores_instructions);
#[cfg(feature = "profile_time")]
TRACER.lock().unwrap().report();
}
pub fn cpu(&self) -> &CPU<'a> {
@@ -58,6 +58,20 @@ where 'a : 'b
&& sender.internal_core == receiver.external_core
&& receiver.internal_core == sender.external_core
{
{
let sender = &mut core_instructions[sender.internal_core];
let pc = sender.program_counter;
let inst = sender.instructions.get(pc).unwrap();
let data = inst.data;
TRACER.lock().unwrap().pre_send(cpu, data);
}
{
let recv = &mut core_instructions[receiver.internal_core];
let pc = recv.program_counter;
let inst = recv.instructions.get(pc).unwrap();
let data = inst.data;
TRACER.lock().unwrap().pre_recv(cpu, data);
}
let [sender_core, reciver_core] =
cpu.get_multiple_cores([sender.internal_core, receiver.internal_core]);
let memory = sender_core
@@ -13,7 +13,7 @@ use crate::{
};
use std::io::Write;
#[cfg(not(feature = "tracing"))]
#[cfg(not(any(feature = "tracing", feature = "profile_time")))]
impl Trace {
///////////////////////////////////////////////////////////////
/////////////////Scalar/register Instructions//////////////////
@@ -1,52 +1,32 @@
mod tracing_isa;
mod disable;
mod pretty_print;
use std::{fs::File, path::{ PathBuf}};
#[cfg(feature = "profile_time")]
mod profile;
#[cfg(feature = "profile_time")]
use profile::Trace;
#[cfg(feature = "tracing")]
mod trace;
#[cfg(feature = "tracing")]
use trace::Trace;
use crate::Executable;
#[cfg(not(any(feature = "tracing", feature = "profile_time")))]
use std::path::PathBuf;
use std::sync::{LazyLock, Mutex};
use crate::Executable;
#[cfg(not(any(feature = "tracing", feature = "profile_time")))]
pub struct Trace {}
#[cfg(feature = "tracing")]
pub struct Trace {
out_files : Vec<File>
}
#[cfg(feature = "tracing")]
impl Trace {
fn new() -> Self {
Self { out_files : Vec::new()}
}
pub fn init(&mut self, num_core : usize , mut path : PathBuf) {
path.pop();
for i in 0..num_core {
path.push(format!("TraceCore{}", i));
let file = File::create(&path).expect("Can not create file");
self.out_files.push(file);
path.pop();
}
}
}
#[cfg(not(feature = "tracing"))]
pub struct Trace {
}
#[cfg(not(feature = "tracing"))]
#[cfg(not(any(feature = "tracing", feature = "profile_time")))]
impl Trace {
fn new() -> Self {
Self {}
}
pub fn init(&mut self, num_core : usize, path : PathBuf ) {
}
pub fn init(&mut self, num_core: usize, path: PathBuf) {}
}
pub static TRACER: LazyLock<Mutex<Trace>> = LazyLock::new(|| { Trace::new().into()});
pub static TRACER: LazyLock<Mutex<Trace>> = LazyLock::new(|| Trace::new().into());
@@ -0,0 +1,73 @@
use std::{collections::HashMap, path::PathBuf, time::Instant};
use crate::tracing::profile::profile_analysis::{
analyze_timings, generate_interactive_report, print_textual_report,
};
pub mod profile_analysis;
pub mod profile_isa;
pub struct Trace {
instruction_times: HashMap<String, Vec<(u128,u128)>>,
core_start_time: HashMap<usize, Option<Instant>>,
start_time: Instant,
}
impl Trace {
pub fn new() -> Self {
let mut instruction_times = HashMap::new();
instruction_times.insert("sldi".to_string(), Vec::with_capacity(20000));
instruction_times.insert("sld".to_string(), Vec::with_capacity(20000));
instruction_times.insert("sadd".to_string(), Vec::with_capacity(20000));
instruction_times.insert("ssub".to_string(), Vec::with_capacity(20000));
instruction_times.insert("smul".to_string(), Vec::with_capacity(20000));
instruction_times.insert("saddi".to_string(), Vec::with_capacity(20000));
instruction_times.insert("smuli".to_string(), Vec::with_capacity(20000));
instruction_times.insert("setbw".to_string(), Vec::with_capacity(20000));
instruction_times.insert("mvmul".to_string(), Vec::with_capacity(20000));
instruction_times.insert("vvadd".to_string(), Vec::with_capacity(20000));
instruction_times.insert("vvsub".to_string(), Vec::with_capacity(20000));
instruction_times.insert("vvmul".to_string(), Vec::with_capacity(20000));
instruction_times.insert("vvdmul".to_string(), Vec::with_capacity(20000));
instruction_times.insert("vvmax".to_string(), Vec::with_capacity(20000));
instruction_times.insert("vvsll".to_string(), Vec::with_capacity(20000));
instruction_times.insert("vvsra".to_string(), Vec::with_capacity(20000));
instruction_times.insert("vavg".to_string(), Vec::with_capacity(20000));
instruction_times.insert("vrelu".to_string(), Vec::with_capacity(20000));
instruction_times.insert("vtanh".to_string(), Vec::with_capacity(20000));
instruction_times.insert("vsigm".to_string(), Vec::with_capacity(20000));
instruction_times.insert("vsoftmax".to_string(), Vec::with_capacity(20000));
instruction_times.insert("vmv".to_string(), Vec::with_capacity(20000));
instruction_times.insert("vrsu".to_string(), Vec::with_capacity(20000));
instruction_times.insert("vrsl".to_string(), Vec::with_capacity(20000));
instruction_times.insert("ld".to_string(), Vec::with_capacity(20000));
instruction_times.insert("st".to_string(), Vec::with_capacity(20000));
instruction_times.insert("lldi".to_string(), Vec::with_capacity(20000));
instruction_times.insert("lmv".to_string(), Vec::with_capacity(20000));
instruction_times.insert("send".to_string(), Vec::with_capacity(20000));
instruction_times.insert("recv".to_string(), Vec::with_capacity(20000));
instruction_times.insert("wait".to_string(), Vec::with_capacity(20000));
instruction_times.insert("sync".to_string(), Vec::with_capacity(20000));
Self {
instruction_times,
core_start_time: HashMap::new(),
start_time: Instant::now()
}
}
pub fn init(&mut self, num_core: usize, path: PathBuf) {
for i in 0..num_core {
self.core_start_time.insert(i, None);
}
}
pub fn report(&self) {
let res = analyze_timings(&self.instruction_times);
print_textual_report(&res);
generate_interactive_report(
&self.instruction_times,
&["mvmul", "recv"],
"/tmp/report.html",
);
}
}
@@ -0,0 +1,192 @@
use comfy_table::{Cell, Table, modifiers::UTF8_ROUND_CORNERS, presets::UTF8_FULL};
use statrs::statistics::{Data, Distribution, Max, Min, OrderStatistics};
use std::collections::HashMap;
#[derive(Debug)]
pub struct InstructionStats {
pub name: String,
pub count: usize,
pub total_time: u128,
pub min: f64,
pub max: f64,
pub mean: f64,
pub median: f64,
pub std_dev: f64,
pub cv: f64,
pub p95: f64,
pub p99: f64,
pub skewness: f64,
pub kurtosis: f64,
}
fn format_time(ns: f64) -> String {
if ns.is_nan() {
return "NaN".to_string();
}
if ns >= 1_000_000_000.0 {
format!("{:.2} s", ns / 1_000_000_000.0)
} else if ns >= 1_000_000.0 {
format!("{:.2} ms", ns / 1_000_000.0)
} else if ns >= 1_000.0 {
format!("{:.2} µs", ns / 1_000.0)
} else {
format!("{:.2} ns", ns)
}
}
fn calculate_skewness_kurtosis(times: &[f64], mean: f64, std_dev: f64) -> (f64, f64) {
let n = times.len() as f64;
if n < 4.0 || std_dev == 0.0 {
return (f64::NAN, f64::NAN);
}
let mut sum_m3 = 0.0;
let mut sum_m4 = 0.0;
for &x in times {
let deviation = x - mean;
sum_m3 += deviation.powi(3);
sum_m4 += deviation.powi(4);
}
let m3 = sum_m3 / n;
let m4 = sum_m4 / n;
let skewness = m3 / std_dev.powi(3);
let kurtosis = (m4 / std_dev.powi(4)) - 3.0;
(skewness, kurtosis)
}
pub fn analyze_timings(timings: &HashMap<String, Vec<(u128, u128)>>) -> Vec<InstructionStats> {
let mut results = Vec::new();
for (instruction, times) in timings {
let count = times.len();
if count == 0 {
continue;
}
// Extract ONLY the duration (the second element of the tuple) for stats
let durations: Vec<u128> = times.iter().map(|&(_, duration)| duration).collect();
let total_time: u128 = durations.iter().sum();
let f64_times: Vec<f64> = durations.iter().map(|&t| t as f64).collect();
let mut data = Data::new(f64_times.clone());
let mean = data.mean().unwrap_or(0.0);
let std_dev = data.std_dev().unwrap_or(0.0);
let cv = if mean > 0.0 { std_dev / mean } else { 0.0 };
let (skewness, kurtosis) = calculate_skewness_kurtosis(&f64_times, mean, std_dev);
results.push(InstructionStats {
name: instruction.clone(),
count,
total_time,
min: data.min(),
max: data.max(),
mean,
median: data.median(),
std_dev,
cv,
p95: data.percentile(95),
p99: data.percentile(99),
skewness,
kurtosis,
});
}
results.sort_by(|a, b| b.mean.partial_cmp(&a.mean).unwrap());
results
}
pub fn print_textual_report(stats: &[InstructionStats]) {
let mut table = Table::new();
table
.load_preset(UTF8_FULL)
.apply_modifier(UTF8_ROUND_CORNERS)
.set_header(vec![
"Instruction",
"Count",
"Total Time",
"Mean",
"Median",
"Min",
"Max",
"P95",
"P99",
"StdDev",
"CV",
"Skewness",
"Kurtosis",
]);
for stat in stats {
table.add_row(vec![
Cell::new(&stat.name),
Cell::new(stat.count.to_string()),
Cell::new(format_time(stat.total_time as f64)), // Cast u128 to f64 for formatting
Cell::new(format_time(stat.mean)),
Cell::new(format_time(stat.median)),
Cell::new(format_time(stat.min)),
Cell::new(format_time(stat.max)),
Cell::new(format_time(stat.p95)),
Cell::new(format_time(stat.p99)),
Cell::new(format_time(stat.std_dev)),
Cell::new(format!("{:.3}", stat.cv)),
Cell::new(format!("{:.2}", stat.skewness)),
Cell::new(format!("{:.2}", stat.kurtosis)),
]);
}
println!("{table}");
}
pub fn generate_interactive_report(
timings: &HashMap<String, Vec<(u128, u128)>>,
instructions_to_plot: &[&str], // <-- NEW: Only plot these
file_path: &str,
) {
use plotly::common::{Mode, Marker, Line};
use plotly::layout::{Axis, Layout};
use plotly::{Plot, Scatter};
use std::collections::HashMap;
let mut plot = Plot::new();
for &instruction_name in instructions_to_plot {
// Only proceed if the instruction exists in our timings map
if let Some(times) = timings.get(instruction_name) {
let x_axis: Vec<f64> = times.iter().map(|&(ts, _)| ts as f64).collect();
let y_axis: Vec<f64> = times.iter().map(|&(_, dur)| dur as f64).collect();
let text_array: Vec<String> = times.iter()
.map(|&(_, dur)| format_time(dur as f64))
.collect();
let trace = Scatter::new(x_axis, y_axis)
.name(instruction_name)
.mode(Mode::LinesMarkers)
.marker(Marker::new().size(4).opacity(0.6))
.line(Line::new().width(1.0))
.text_array(text_array)
.hover_info(plotly::common::HoverInfo::All);
plot.add_trace(trace);
}
}
let layout = Layout::new()
.title(plotly::common::Title::new("Simulator Timeline: Top Offenders"))
.x_axis(Axis::new().title(plotly::common::Title::new("Absolute Time (ns)")))
.y_axis(Axis::new().title(plotly::common::Title::new("Execution Duration")));
plot.set_layout(layout);
plot.write_html(file_path);
println!("🌐 Interactive timeline saved to {}", file_path);
}
@@ -0,0 +1,364 @@
use crate::{
cpu::CPU,
instruction_set::instruction_data::InstructionData,
memory_manager::{
MemoryStorable,
type_traits::{FromFloat, UpcastDestTraits, UpcastSlice},
},
tracing::Trace,
utility::{add_offset_r1, add_offset_rd},
};
use std::io::Write;
use std::time::Instant;
#[cfg(feature = "profile_time")]
impl Trace {
///////////////////////////////////////////////////////////////
/////////////////Scalar/register Instructions//////////////////
///////////////////////////////////////////////////////////////
fn pre_impl(&mut self, cores: &mut CPU, data: InstructionData) {
let (core_indx, rd, imm) = data.get_core_rd_imm();
let core_indx = core_indx as usize;
if self.core_start_time.get(&core_indx).unwrap().is_none() {
self.core_start_time.insert(core_indx, Some(Instant::now()));
}
}
fn post_impl(&mut self, cores: &mut CPU, data: InstructionData, name: &'static str) {
let (core_indx, rd, imm) = data.get_core_rd_imm();
let core_indx = core_indx as usize;
let Self {
instruction_times,
core_start_time,
start_time,
} = self;
let now = Instant::now();
instruction_times
.get_mut(name)
.unwrap()
.push((now.duration_since(*start_time).as_nanos(), now.duration_since(core_start_time[&core_indx].unwrap()).as_nanos()));
self.core_start_time.insert(core_indx, None);
}
pub fn pre_sldi(&mut self, cores: &mut CPU, data: InstructionData) {
self.pre_impl(cores, data);
}
pub fn post_sldi(&mut self, cores: &mut CPU, data: InstructionData) {
self.post_impl(cores, data, "sldi");
}
pub fn pre_sld(&mut self, cores: &mut CPU, data: InstructionData) {
self.pre_impl(cores, data);
}
pub fn post_sld(&mut self, cores: &mut CPU, data: InstructionData) {
self.post_impl(cores, data, "sld");
}
pub fn pre_sadd(&mut self, cores: &mut CPU, data: InstructionData) {
self.pre_impl(cores, data);
}
pub fn post_sadd(&mut self, cores: &mut CPU, data: InstructionData) {
self.post_impl(cores, data, "sadd");
}
pub fn pre_ssub(&mut self, cores: &mut CPU, data: InstructionData) {
self.pre_impl(cores, data);
}
pub fn post_ssub(&mut self, cores: &mut CPU, data: InstructionData) {
self.post_impl(cores, data, "ssub");
}
pub fn pre_smul(&mut self, cores: &mut CPU, data: InstructionData) {
self.pre_impl(cores, data);
}
pub fn post_smul(&mut self, cores: &mut CPU, data: InstructionData) {
self.post_impl(cores, data, "smul");
}
pub fn pre_saddi(&mut self, cores: &mut CPU, data: InstructionData) {
self.pre_impl(cores, data);
}
pub fn post_saddi(&mut self, cores: &mut CPU, data: InstructionData) {
self.post_impl(cores, data, "saddi");
}
pub fn pre_smuli(&mut self, cores: &mut CPU, data: InstructionData) {
self.pre_impl(cores, data);
}
pub fn post_smuli(&mut self, cores: &mut CPU, data: InstructionData) {
self.post_impl(cores, data, "smuli");
}
/////////////////////////////////////////////////////////////////
///////////////////Matrix/vector Instructions////////////////////
/////////////////////////////////////////////////////////////////
pub fn pre_setbw(&mut self, cores: &mut CPU, data: InstructionData) {
self.pre_impl(cores, data);
}
pub fn post_setbw(&mut self, cores: &mut CPU, data: InstructionData) {
self.post_impl(cores, data, "setbw");
}
pub fn pre_mvm<F, M, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T> + UpcastSlice<M>,
[M]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
M: UpcastDestTraits<M> + MemoryStorable + FromFloat,
F: UpcastDestTraits<F> + MemoryStorable,
{
self.pre_impl(cores, data);
}
pub fn post_mvm<F, M, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T> + UpcastSlice<M>,
[M]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
M: UpcastDestTraits<M> + MemoryStorable + FromFloat,
F: UpcastDestTraits<F> + MemoryStorable,
{
self.post_impl(cores, data, "mvmul");
}
pub fn pre_vvadd<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
self.pre_impl(cores, data);
}
pub fn post_vvadd<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
self.post_impl(cores, data, "vvadd");
}
pub fn pre_vvsub<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
self.pre_impl(cores, data);
}
pub fn post_vvsub<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
self.post_impl(cores, data, "vvsub");
}
pub fn pre_vvmul<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
self.pre_impl(cores, data);
}
pub fn post_vvmul<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
self.post_impl(cores, data, "vvmul");
}
pub fn pre_vvdmul<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
self.pre_impl(cores, data);
}
pub fn post_vvdmul<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
self.post_impl(cores, data, "vvdmul");
}
pub fn pre_vvmax<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
self.pre_impl(cores, data);
}
pub fn post_vvmax<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
self.post_impl(cores, data, "vvmax");
}
pub fn pre_vavg<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
self.pre_impl(cores, data);
}
pub fn post_vavg<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
self.post_impl(cores, data, "vavg");
}
pub fn pre_vrelu<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
{
self.pre_impl(cores, data);
}
pub fn post_vrelu<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
{
self.post_impl(cores, data, "vrelu");
}
pub fn pre_vtanh<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
{
self.pre_impl(cores, data);
}
pub fn post_vtanh<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
{
self.post_impl(cores, data, "vtanh");
}
pub fn pre_vsigm<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
{
self.pre_impl(cores, data);
}
pub fn post_vsigm<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
{
self.post_impl(cores, data, "vsigm");
}
pub fn pre_vsoftmax<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
{
self.pre_impl(cores, data);
}
pub fn post_vsoftmax<F, T>(&mut self, cores: &mut CPU, data: InstructionData)
where
[F]: UpcastSlice<T>,
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable + From<f32>,
{
self.post_impl(cores, data, "vsoftmax");
}
/////////////////////////////////////////////////////////////////
/////Communication/synchronization Instructions/////////////////
/////////////////////////////////////////////////////////////////
pub fn pre_ld(&mut self, cores: &mut CPU, data: InstructionData) {
self.pre_impl(cores, data);
}
pub fn post_ld(&mut self, cores: &mut CPU, data: InstructionData) {
self.post_impl(cores, data, "ld");
}
pub fn pre_st(&mut self, cores: &mut CPU, data: InstructionData) {
self.pre_impl(cores, data);
}
pub fn post_st(&mut self, cores: &mut CPU, data: InstructionData) {
self.post_impl(cores, data, "st");
}
pub fn pre_lldi(&mut self, cores: &mut CPU, data: InstructionData) {
self.pre_impl(cores, data);
}
pub fn post_lldi(&mut self, cores: &mut CPU, data: InstructionData) {
self.post_impl(cores, data, "lldi");
}
pub fn pre_lmv(&mut self, cores: &mut CPU, data: InstructionData) {
self.pre_impl(cores, data);
}
pub fn post_lmv(&mut self, cores: &mut CPU, data: InstructionData) {
self.post_impl(cores, data, "lmv");
}
pub fn pre_send(&mut self, cores: &mut CPU, data: InstructionData) {
self.pre_impl(cores, data);
}
pub fn post_send(&mut self, cores: &mut CPU, data: InstructionData) {
self.post_impl(cores, data, "send");
}
pub fn pre_recv(&mut self, cores: &mut CPU, data: InstructionData) {
self.pre_impl(cores, data);
}
pub fn post_recv(&mut self, cores: &mut CPU, data: InstructionData) {
self.post_impl(cores, data, "recv");
}
}
@@ -0,0 +1,28 @@
use std::{fs::File, path::PathBuf};
pub mod pretty_print;
pub mod tracing_isa;
pub struct Trace {
out_files: Vec<File>,
}
impl Trace {
pub fn new() -> Self {
Self {
out_files: Vec::new(),
}
}
pub fn init(&mut self, num_core: usize, mut path: PathBuf) {
path.pop();
for i in 0..num_core {
path.push(format!("TraceCore{}", i));
let file = File::create(&path).expect("Can not create file");
self.out_files.push(file);
path.pop();
}
}
}
@@ -1,4 +1,4 @@
use crate::tracing::pretty_print;
use crate::{tracing::trace::pretty_print, utility::add_offset_r2};
use std::fs::File;
use crate::{
@@ -13,7 +13,6 @@ use crate::{
};
use std::io::Write;
#[cfg(feature = "tracing")]
impl Trace {
///////////////////////////////////////////////////////////////
/////////////////Scalar/register Instructions//////////////////
@@ -284,7 +283,6 @@ impl Trace {
M: UpcastDestTraits<M> + MemoryStorable + FromFloat,
F: UpcastDestTraits<F> + MemoryStorable,
{
use crate::tracing::pretty_print;
let (core_indx, rd, r1, mbiw, relu, group) = data.get_core_rd_r1_mbiw_immrelu_immgroup();
let file: &mut File = self
@@ -358,8 +356,6 @@ impl Trace {
T: UpcastDestTraits<T> + MemoryStorable,
F: UpcastDestTraits<F> + MemoryStorable,
{
use crate::{tracing::pretty_print, utility::add_offset_r2};
let (core_indx, rd, r1, r2, imm_len, offset_select, offset_value) =
data.get_core_rd_r1_r2_immlen_offset();
let file: &mut File = self
@@ -990,8 +986,6 @@ impl Trace {
/////////////////////////////////////////////////////////////////
pub fn ld_impl(&mut self, cores: &mut CPU, data: InstructionData, prefix: &'static str) {
use crate::tracing::pretty_print;
let (core, rd, r1, _, imm_len, offset_select, offset_value) =
data.get_core_rd_r1_r2_immlen_offset();
let file: &mut File = self
@@ -1044,8 +1038,6 @@ impl Trace {
}
pub fn st_impl(&mut self, cores: &mut CPU, data: InstructionData, prefix: &'static str) {
use crate::tracing::pretty_print;
let (core, rd, r1, _, imm_len, offset_select, offset_value) =
data.get_core_rd_r1_r2_immlen_offset();
let file: &mut File = self
@@ -1138,7 +1130,6 @@ impl Trace {
}
fn lmv_impl (&mut self, cores: &mut CPU, data: InstructionData, prefix: &'static str) {
use crate::tracing::pretty_print;
let (core, rd, r1, _, imm_len, offset_select, offset_value) =
data.get_core_rd_r1_r2_immlen_offset();