Skip to content

Commit 9427382

Browse files
committed
feat: add user turn metrics, and more info in request metadata
1 parent f9448b5 commit 9427382

File tree

9 files changed

+500
-133
lines changed

9 files changed

+500
-133
lines changed

crates/chat-cli/src/cli/chat/cli/compact.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,10 @@ impl CompactArgs {
5757
Some(self.prompt.join(" "))
5858
};
5959

60+
// Compact interrupts the current conversation so this will always result in a new user
61+
// turn.
62+
session.reset_user_turn();
63+
6064
session
6165
.compact_history(os, prompt, self.show_summary, CompactStrategy {
6266
messages_to_exclude: self.messages_to_exclude.unwrap_or(default.messages_to_exclude),

crates/chat-cli/src/cli/chat/mod.rs

Lines changed: 168 additions & 94 deletions
Large diffs are not rendered by default.

crates/chat-cli/src/cli/chat/parser.rs

Lines changed: 128 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ use std::sync::Arc;
22
use std::time::{
33
Duration,
44
Instant,
5+
SystemTime,
6+
UNIX_EPOCH,
57
};
68

79
use eyre::Result;
@@ -37,8 +39,43 @@ use crate::api_client::{
3739
ApiClientError,
3840
};
3941
use crate::telemetry::ReasonCode;
40-
use crate::telemetry::core::ChatConversationType;
42+
use crate::telemetry::core::{
43+
ChatConversationType,
44+
MessageMetaTag,
45+
};
46+
47+
/// Error from sending a SendMessage request.
48+
#[derive(Debug, Error)]
49+
pub struct SendMessageError {
50+
#[source]
51+
pub source: ApiClientError,
52+
pub request_metadata: RequestMetadata,
53+
}
54+
55+
impl SendMessageError {
56+
pub fn status_code(&self) -> Option<u16> {
57+
self.source.status_code()
58+
}
59+
}
60+
61+
impl ReasonCode for SendMessageError {
62+
fn reason_code(&self) -> String {
63+
self.source.reason_code()
64+
}
65+
}
66+
67+
impl std::fmt::Display for SendMessageError {
68+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
69+
write!(f, "Failed to send the request: ")?;
70+
if let Some(request_id) = self.request_metadata.request_id.as_ref() {
71+
write!(f, "request_id: {}, error: ", request_id)?;
72+
}
73+
write!(f, "{}", self.source)?;
74+
Ok(())
75+
}
76+
}
4177

78+
/// Errors associated with consuming the response stream.
4279
#[derive(Debug, Error)]
4380
pub struct RecvError {
4481
#[source]
@@ -163,16 +200,36 @@ impl SendMessageStream {
163200
client: &ApiClient,
164201
conversation_state: ConversationState,
165202
request_metadata_lock: Arc<Mutex<Option<RequestMetadata>>>,
166-
) -> Result<Self, ApiClientError> {
203+
message_meta_tags: Option<Vec<MessageMetaTag>>,
204+
) -> Result<Self, SendMessageError> {
167205
let message_id = uuid::Uuid::new_v4().to_string();
168206
info!(?message_id, "Generated new message id");
207+
let user_prompt_length = conversation_state.user_input_message.content.len();
208+
let model_id = conversation_state.user_input_message.model_id.clone();
209+
let message_meta_tags = message_meta_tags.unwrap_or_default();
169210

170211
let cancel_token = CancellationToken::new();
171212
let cancel_token_clone = cancel_token.clone();
172213

173214
let start_time = Instant::now();
215+
let start_time_sys = SystemTime::now();
174216
debug!(?start_time, "sending send_message request");
175-
let response = client.send_message(conversation_state).await?;
217+
let response = client
218+
.send_message(conversation_state)
219+
.await
220+
.map_err(|err| SendMessageError {
221+
source: err,
222+
request_metadata: RequestMetadata {
223+
message_id: message_id.clone(),
224+
request_start_timestamp_ms: system_time_to_unix_ms(start_time_sys),
225+
stream_end_timestamp_ms: system_time_to_unix_ms(SystemTime::now()),
226+
model_id: model_id.clone(),
227+
user_prompt_length,
228+
message_meta_tags: message_meta_tags.clone(),
229+
// Other fields are irrelevant if we can't get a successful response
230+
..Default::default()
231+
},
232+
})?;
176233
let elapsed = start_time.elapsed();
177234
debug!(?elapsed, "send_message succeeded");
178235

@@ -182,8 +239,12 @@ impl SendMessageStream {
182239
ResponseParser::new(
183240
response,
184241
message_id,
242+
model_id,
243+
user_prompt_length,
244+
message_meta_tags,
185245
ev_tx,
186246
start_time,
247+
start_time_sys,
187248
cancel_token_clone,
188249
request_metadata_lock,
189250
)
@@ -221,9 +282,8 @@ struct ResponseParser {
221282

222283
/// Message identifier for the assistant's response. Randomly generated on creation.
223284
message_id: String,
224-
285+
/// Whether or not the stream has completed.
225286
ended: bool,
226-
227287
/// Buffer to hold the next event in [SendMessageOutput].
228288
peek: Option<ChatResponseStream>,
229289
/// Buffer for holding the accumulated assistant response.
@@ -238,33 +298,50 @@ struct ResponseParser {
238298
cancel_token: CancellationToken,
239299

240300
// metadata fields
241-
/// Time immediately after sending the request.
242-
start_time: Instant,
301+
/// Id of the model used with this request.
302+
model_id: Option<String>,
303+
/// Length of the user prompt for the initial request.
304+
user_prompt_length: usize,
305+
/// Meta tags for the initial request.
306+
message_meta_tags: Vec<MessageMetaTag>,
307+
/// Time immediately before sending the request.
308+
request_start_time: Instant,
309+
/// Time immediately before sending the request, as a [SystemTime].
310+
request_start_time_sys: SystemTime,
243311
/// Total size (in bytes) of the response received so far.
244312
received_response_size: usize,
245313
time_to_first_chunk: Option<Duration>,
246314
time_between_chunks: Vec<Duration>,
247315
}
248316

249317
impl ResponseParser {
318+
#[allow(clippy::too_many_arguments)]
250319
fn new(
251320
response: SendMessageOutput,
252321
message_id: String,
322+
model_id: Option<String>,
323+
user_prompt_length: usize,
324+
message_meta_tags: Vec<MessageMetaTag>,
253325
event_tx: mpsc::Sender<Result<ResponseEvent, RecvError>>,
254-
start_time: Instant,
326+
request_start_time: Instant,
327+
request_start_time_sys: SystemTime,
255328
cancel_token: CancellationToken,
256329
request_metadata: Arc<Mutex<Option<RequestMetadata>>>,
257330
) -> Self {
258331
Self {
259332
response,
260333
message_id,
334+
model_id,
335+
user_prompt_length,
336+
message_meta_tags,
261337
ended: false,
262338
event_tx,
263339
peek: None,
264340
assistant_text: String::new(),
265341
tool_uses: Vec::new(),
266342
parsing_tool_use: None,
267-
start_time,
343+
request_start_time,
344+
request_start_time_sys,
268345
received_response_size: 0,
269346
time_to_first_chunk: None,
270347
time_between_chunks: Vec::new(),
@@ -481,7 +558,7 @@ impl ResponseParser {
481558

482559
// Track metadata about the chunk.
483560
self.time_to_first_chunk
484-
.get_or_insert_with(|| self.start_time.elapsed());
561+
.get_or_insert_with(|| self.request_start_time.elapsed());
485562
self.time_between_chunks.push(duration);
486563
if let Some(r) = ev.as_ref() {
487564
match r {
@@ -510,10 +587,6 @@ impl ResponseParser {
510587
}
511588
}
512589

513-
fn request_id(&self) -> Option<&str> {
514-
self.response.request_id()
515-
}
516-
517590
/// Helper to create a new [RecvError] populated with the associated request id for the stream.
518591
fn error(&self, source: impl Into<RecvErrorKind>) -> RecvError {
519592
RecvError {
@@ -524,11 +597,24 @@ impl ResponseParser {
524597

525598
fn make_metadata(&self, chat_conversation_type: Option<ChatConversationType>) -> RequestMetadata {
526599
RequestMetadata {
527-
request_id: self.request_id().map(String::from),
600+
request_id: self.response.request_id().map(String::from),
601+
message_id: self.message_id.clone(),
528602
time_to_first_chunk: self.time_to_first_chunk,
529603
time_between_chunks: self.time_between_chunks.clone(),
530604
response_size: self.received_response_size,
531605
chat_conversation_type,
606+
request_start_timestamp_ms: system_time_to_unix_ms(self.request_start_time_sys),
607+
// We always end the stream when this method is called, so just set the end timestamp
608+
// here.
609+
stream_end_timestamp_ms: system_time_to_unix_ms(SystemTime::now()),
610+
user_prompt_length: self.user_prompt_length,
611+
message_meta_tags: self.message_meta_tags.clone(),
612+
tool_use_ids_and_names: self
613+
.tool_uses
614+
.iter()
615+
.map(|t| (t.id.clone(), t.name.clone()))
616+
.collect::<_>(),
617+
model_id: self.model_id.clone(),
532618
}
533619
}
534620
}
@@ -554,18 +640,40 @@ pub enum ResponseEvent {
554640
}
555641

556642
/// Metadata about the sent request and associated response stream.
557-
#[derive(Debug, Clone, Serialize, Deserialize)]
643+
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
558644
pub struct RequestMetadata {
559645
/// The request id associated with the [SendMessageOutput] stream.
560646
pub request_id: Option<String>,
647+
/// The randomly-generated id associated with the request. Equivalent to utterance id.
648+
pub message_id: String,
649+
/// Unix timestamp (milliseconds) immediately before sending the request.
650+
pub request_start_timestamp_ms: u64,
651+
/// Unix timestamp (milliseconds) once the stream has either completed or ended in an error.
652+
pub stream_end_timestamp_ms: u64,
561653
/// Time until the first chunk was received.
562654
pub time_to_first_chunk: Option<Duration>,
563655
/// Time between each received chunk in the stream.
564656
pub time_between_chunks: Vec<Duration>,
657+
/// Total size (in bytes) of the user prompt associated with the request.
658+
pub user_prompt_length: usize,
565659
/// Total size (in bytes) of the response.
566660
pub response_size: usize,
567661
/// [ChatConversationType] for the returned assistant message.
568662
pub chat_conversation_type: Option<ChatConversationType>,
663+
/// Tool uses returned by the assistant for this request.
664+
pub tool_use_ids_and_names: Vec<(String, String)>,
665+
/// Model id.
666+
pub model_id: Option<String>,
667+
/// Meta tags for the request.
668+
pub message_meta_tags: Vec<MessageMetaTag>,
669+
}
670+
671+
fn system_time_to_unix_ms(time: SystemTime) -> u64 {
672+
(time
673+
.duration_since(UNIX_EPOCH)
674+
.expect("time should never be before unix epoch")
675+
.as_secs_f64()
676+
* 1000.0) as u64
569677
}
570678

571679
#[cfg(test)]
@@ -623,8 +731,12 @@ mod tests {
623731
let mut parser = ResponseParser::new(
624732
mock,
625733
"".to_string(),
734+
None,
735+
1,
736+
vec![],
626737
mpsc::channel(32).0,
627738
Instant::now(),
739+
SystemTime::now(),
628740
CancellationToken::new(),
629741
Arc::new(Mutex::new(None)),
630742
);

crates/chat-cli/src/cli/chat/tool_manager.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -611,7 +611,7 @@ impl ToolManagerBuilder {
611611
Err(e) => {
612612
error!("Error initializing mcp client for server {}: {:?}", name, &e);
613613
os.telemetry
614-
.send_mcp_server_init(conversation_id.clone(), Some(e.to_string()), 0)
614+
.send_mcp_server_init(conversation_id.clone(), name, Some(e.to_string()), 0)
615615
.ok();
616616
let _ = messenger.send_tools_list_result(Err(e)).await;
617617
},
@@ -1401,7 +1401,7 @@ fn process_tool_specs(
14011401
specs.retain(|spec| !matches!(spec.tool_origin, ToolOrigin::Native));
14021402
// Send server load success metric datum
14031403
let conversation_id = conversation_id.to_string();
1404-
let _ = telemetry.send_mcp_server_init(conversation_id, None, number_of_tools);
1404+
let _ = telemetry.send_mcp_server_init(conversation_id, server_name.to_string(), None, number_of_tools);
14051405
// Tool name translation. This is beyond of the scope of what is
14061406
// considered a "server load". Reasoning being:
14071407
// - Failures here are not related to server load

crates/chat-cli/src/cli/chat/tools/mod.rs

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@ pub mod knowledge;
77
pub mod thinking;
88
pub mod use_aws;
99

10-
use std::borrow::Borrow;
10+
use std::borrow::{
11+
Borrow,
12+
Cow,
13+
};
1114
use std::io::Write;
1215
use std::path::{
1316
Path,
@@ -31,6 +34,7 @@ use serde::{
3134
Serialize,
3235
};
3336
use thinking::Thinking;
37+
use tracing::error;
3438
use use_aws::UseAws;
3539

3640
use super::consts::MAX_TOOL_RESPONSE_SIZE;
@@ -239,12 +243,15 @@ pub struct InvokeOutput {
239243
}
240244

241245
impl InvokeOutput {
242-
pub fn as_str(&self) -> &str {
246+
pub fn as_str(&self) -> Cow<'_, str> {
243247
match &self.output {
244-
OutputKind::Text(s) => s.as_str(),
245-
OutputKind::Json(j) => j.as_str().unwrap_or_default(),
246-
OutputKind::Images(_) => "",
247-
OutputKind::Mixed { text, .. } => text.as_str(), // Return the text part
248+
OutputKind::Text(s) => s.as_str().into(),
249+
OutputKind::Json(j) => serde_json::to_string(j)
250+
.map_err(|err| error!(?err, "failed to serialize tool to json"))
251+
.unwrap_or_default()
252+
.into(),
253+
OutputKind::Images(_) => "".into(),
254+
OutputKind::Mixed { text, .. } => text.as_str().into(), // Return the text part
248255
}
249256
}
250257
}

0 commit comments

Comments
 (0)