Skip to content

Commit

Permalink
Print more tok/sec metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
EricLBuehler committed Mar 6, 2024
1 parent d8e986e commit 1835461
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 3 deletions.
18 changes: 16 additions & 2 deletions mistralrs-core/src/engine/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,15 @@ impl Engine {
get_mut_arcmutex!(self.pipeline).forward(scheduled.prompt.clone(), true);
for seq in scheduled.prompt.iter() {
deref_mut_refcell!(seq).set_state(SequenceState::RunningCompletion);
let now = SystemTime::now()
.duration_since(UNIX_EPOCH)
.expect("Time travel has occurred!")
.as_secs();
#[allow(clippy::cast_precision_loss)]
let prompt_tok_per_sec = deref_refcell!(seq).len() as f32
/ (now - deref_refcell!(seq).timestamp()) as f32;
deref_mut_refcell!(seq).prompt_tok_per_sec = prompt_tok_per_sec;
deref_mut_refcell!(seq).prompt_timestamp = Some(now);
}
self.sample_seqs(&scheduled.prompt, logits);
if !self.no_kv_cache {
Expand Down Expand Up @@ -158,7 +167,10 @@ impl Engine {
.expect("Time travel has occurred!")
.as_secs();
#[allow(clippy::cast_precision_loss)]
let tok_per_sec = deref_refcell!(seq).len() as f32
let total_tok_per_sec = deref_refcell!(seq).len() as f32
/ (now - deref_refcell!(seq).timestamp()) as f32;
#[allow(clippy::cast_precision_loss)]
let compl_tok_per_sec = deref_refcell!(seq).len() as f32
/ (now - deref_refcell!(seq).timestamp()) as f32;

// NOTE(EricLBuehler): Unwrap reasoning: The receiver should really be there, otherwise it is their fault.
Expand All @@ -175,7 +187,9 @@ impl Engine {
completion_tokens: deref_refcell!(seq).logprobs().len(),
prompt_tokens: deref_refcell!(seq).prompt_tokens(),
total_tokens: deref_refcell!(seq).len(),
tok_per_sec,
total_tok_per_sec,
compl_tok_per_sec,
prompt_tok_per_sec: deref_refcell!(seq).prompt_tok_per_sec,
},
}))
.unwrap();
Expand Down
4 changes: 3 additions & 1 deletion mistralrs-core/src/response.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ pub struct ChatCompletionUsage {
pub completion_tokens: usize,
pub prompt_tokens: usize,
pub total_tokens: usize,
pub tok_per_sec: f32,
pub total_tok_per_sec: f32,
pub prompt_tok_per_sec: f32,
pub compl_tok_per_sec: f32,
}

#[derive(Debug, Clone, Serialize)]
Expand Down
8 changes: 8 additions & 0 deletions mistralrs-core/src/sequence.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ pub struct Sequence {
stop_tokens: Vec<u32>,
max_len: Option<usize>,
return_logprobs: bool,
pub prompt_tok_per_sec: f32,
pub prompt_timestamp: Option<u64>,
}

impl Sequence {
Expand Down Expand Up @@ -71,6 +73,8 @@ impl Sequence {
stop_tokens,
max_len,
return_logprobs,
prompt_tok_per_sec: 0.,
prompt_timestamp: None,
}
}

Expand Down Expand Up @@ -164,4 +168,8 @@ impl Sequence {
pub fn timestamp(&self) -> u64 {
self.timestamp
}

pub fn prompt_timestamp(&self) -> Option<u64> {
self.prompt_timestamp
}
}

0 comments on commit 1835461

Please sign in to comment.