diff --git a/async-openai/src/types/realtime/client_event.rs b/async-openai/src/types/realtime/client_event.rs
index 87ff7010..9fb3d2a4 100644
--- a/async-openai/src/types/realtime/client_event.rs
+++ b/async-openai/src/types/realtime/client_event.rs
@@ -1,7 +1,76 @@
 use serde::{Deserialize, Serialize};
 use tokio_tungstenite::tungstenite::Message;
 
-use super::{item::Item, session_resource::SessionResource};
+use super::{
+    item::Item,
+    session_resource::{
+        AudioFormat, MaxResponseOutputTokens, Modality, RealtimeVoice, SessionResource, ToolChoice,
+        ToolDefinition,
+    },
+};
+
+/// Configuration for a response in the OpenAI Realtime API.
+/// This is used in the `response.create` event.
+#[derive(Debug, Serialize, Deserialize, Clone, Default)]
+pub struct ResponseConfig {
+    /// Controls which conversation the response is added to. Currently supports "auto" and "none",
+    /// with "auto" as the default value. The "auto" value means that the contents of the response
+    /// will be added to the default conversation. Set this to "none" to create an out-of-band response
+    /// which will not add items to default conversation.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub conversation: Option<String>,
+
+    /// Input items to include in the prompt for the model. Using this field creates a new context
+    /// for this Response instead of using the default conversation. An empty array [] will clear
+    /// the context for this Response. Note that this can include references to items from the default conversation.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub input: Option<Vec<Item>>,
+
+    /// The default system instructions (i.e. system message) prepended to model calls.
+    /// This field allows the client to guide the model on desired responses.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub instructions: Option<String>,
+
+    /// Maximum number of output tokens for a single assistant response, inclusive of tool calls.
+    /// Provide an integer between 1 and 4096 to limit output tokens, or "inf" for the maximum available tokens for a given model.
+    /// Defaults to "inf".
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_response_output_tokens: Option<MaxResponseOutputTokens>,
+
+    /// Set of 16 key-value pairs that can be attached to an object.
+    /// This can be useful for storing additional information about the object in a structured format.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub metadata: Option<std::collections::HashMap<String, String>>,
+
+    /// The set of modalities the model can respond with. To disable audio, set this to ["text"].
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub modalities: Option<Vec<Modality>>,
+
+    /// The format of output audio. Options are "pcm16", "g711_ulaw", or "g711_alaw".
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub output_audio_format: Option<AudioFormat>,
+
+    /// Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub temperature: Option<f32>,
+
+    /// How the model chooses tools.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tool_choice: Option<ToolChoice>,
+
+    /// Tools (functions) available to the model.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tools: Option<Vec<ToolDefinition>>,
+
+    /// The voice the model uses to respond. Cannot be changed once the model has responded with audio at least once.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub voice: Option<RealtimeVoice>,
+
+    /// The speed of the model's spoken response. 1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed.
+    /// This value can only be changed in between model turns, not while a response is in progress.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub speed: Option<f32>,
+}
 
 #[derive(Debug, Serialize, Deserialize, Clone, Default)]
 pub struct SessionUpdateEvent {
@@ -35,6 +104,13 @@ pub struct InputAudioBufferClearEvent {
     pub event_id: Option<String>,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone, Default)]
+pub struct OutputAudioBufferClearEvent {
+    /// Optional client-generated ID used to identify this event.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub event_id: Option<String>,
+}
+
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct ConversationItemCreateEvent {
     /// Optional client-generated ID used to identify this event.
@@ -75,6 +151,16 @@ pub struct ConversationItemDeleteEvent {
     pub item_id: String,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone, Default)]
+pub struct ConversationItemRetrieveEvent {
+    /// Optional client-generated ID used to identify this event.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub event_id: Option<String>,
+
+    /// The ID of the item to retrieve.
+    pub item_id: String,
+}
+
 #[derive(Debug, Serialize, Deserialize, Clone, Default)]
 pub struct ResponseCreateEvent {
     /// Optional client-generated ID used to identify this event.
@@ -82,7 +168,7 @@ pub struct ResponseCreateEvent {
     pub event_id: Option<String>,
 
     /// Configuration for the response.
-    pub response: Option<SessionResource>,
+    pub response: Option<ResponseConfig>,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone, Default)]
@@ -90,6 +176,9 @@ pub struct ResponseCancelEvent {
     /// Optional client-generated ID used to identify this event.
     #[serde(skip_serializing_if = "Option::is_none")]
     pub event_id: Option<String>,
+
+    /// A specific response ID to cancel - if not provided, will cancel an in-progress response in the default conversation.
+    pub response_id: String,
 }
 
 /// These are events that the OpenAI Realtime WebSocket server will accept from the client.
@@ -112,6 +201,10 @@ pub enum ClientEvent {
     #[serde(rename = "input_audio_buffer.clear")]
     InputAudioBufferClear(InputAudioBufferClearEvent),
 
+    /// WebRTC Only: Send this event to cut off the current audio response.
+    #[serde(rename = "output_audio_buffer.clear")]
+    OutputAudioBufferClear(OutputAudioBufferClearEvent),
+
     /// Send this event when adding an item to the conversation.
     #[serde(rename = "conversation.item.create")]
     ConversationItemCreate(ConversationItemCreateEvent),
@@ -124,6 +217,10 @@ pub enum ClientEvent {
     #[serde(rename = "conversation.item.delete")]
     ConversationItemDelete(ConversationItemDeleteEvent),
 
+    /// Send this event when you want to retrieve the server's representation of a specific item in the conversation history.
+    #[serde(rename = "conversation.item.retrieve")]
+    ConversationItemRetrieve(ConversationItemRetrieveEvent),
+
     /// Send this event to trigger a response generation.
     #[serde(rename = "response.create")]
     ResponseCreate(ResponseCreateEvent),
@@ -181,6 +278,11 @@ event_from!(
     ClientEvent,
     InputAudioBufferClear
 );
+event_from!(
+    OutputAudioBufferClearEvent,
+    ClientEvent,
+    OutputAudioBufferClear
+);
 event_from!(
     ConversationItemCreateEvent,
     ClientEvent,
@@ -198,14 +300,21 @@ event_from!(
 );
 event_from!(ResponseCreateEvent, ClientEvent, ResponseCreate);
 event_from!(ResponseCancelEvent, ClientEvent, ResponseCancel);
+event_from!(
+    ConversationItemRetrieveEvent,
+    ClientEvent,
+    ConversationItemRetrieve
+);
 
 message_from_event!(SessionUpdateEvent, ClientEvent);
 message_from_event!(InputAudioBufferAppendEvent, ClientEvent);
 message_from_event!(InputAudioBufferCommitEvent, ClientEvent);
 message_from_event!(InputAudioBufferClearEvent, ClientEvent);
+message_from_event!(OutputAudioBufferClearEvent, ClientEvent);
 message_from_event!(ConversationItemCreateEvent, ClientEvent);
 message_from_event!(ConversationItemTruncateEvent, ClientEvent);
 message_from_event!(ConversationItemDeleteEvent, ClientEvent);
+message_from_event!(ConversationItemRetrieveEvent, ClientEvent);
 message_from_event!(ResponseCreateEvent, ClientEvent);
 message_from_event!(ResponseCancelEvent, ClientEvent);
 
diff --git a/async-openai/src/types/realtime/response_resource.rs b/async-openai/src/types/realtime/response_resource.rs
index a6c6c32f..922c1fad 100644
--- a/async-openai/src/types/realtime/response_resource.rs
+++ b/async-openai/src/types/realtime/response_resource.rs
@@ -31,6 +31,17 @@ pub enum IncompleteReason {
     Interruption,
     MaxOutputTokens,
     ContentFilter,
+    TokenLimit,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(rename_all = "snake_case")]
+pub enum FinishReason {
+    Stop,
+    Length,
+    ToolCalls,
+    ContentFilter,
+    FunctionCall,
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
@@ -58,4 +69,10 @@ pub struct ResponseResource {
     pub output: Vec<Item>,
     /// Usage statistics for the response.
     pub usage: Option<Usage>,
+    /// The Unix timestamp (in seconds) for when the response was created.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub created_at: Option<u64>,
+    /// The reason the model stopped generating tokens, if applicable.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub finish_reason: Option<FinishReason>,
 }
diff --git a/async-openai/src/types/realtime/server_event.rs b/async-openai/src/types/realtime/server_event.rs
index 8795f6e4..fadeaa2d 100644
--- a/async-openai/src/types/realtime/server_event.rs
+++ b/async-openai/src/types/realtime/server_event.rs
@@ -38,7 +38,7 @@ pub struct ConversationCreatedEvent {
 }
 
 #[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct InputAudioBufferCommitedEvent {
+pub struct InputAudioBufferCommittedEvent {
     /// The unique ID of the server event.
     pub event_id: String,
     /// The ID of the preceding item after which the new item will be inserted.
@@ -53,6 +53,12 @@ pub struct InputAudioBufferClearedEvent {
     pub event_id: String,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct OutputAudioBufferClearedEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+}
+
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct InputAudioBufferSpeechStartedEvent {
     /// The unique ID of the server event.
@@ -154,6 +160,14 @@ pub struct ConversationItemDeletedEvent {
     pub item_id: String,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ConversationItemRetrievedEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The item that was retrieved.
+    pub item: Item,
+}
+
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct ResponseCreatedEvent {
     /// The unique ID of the server event.
@@ -381,12 +395,16 @@ pub enum ServerEvent {
 
     /// Returned when an input audio buffer is committed, either by the client or automatically in server VAD mode.
     #[serde(rename = "input_audio_buffer.committed")]
-    InputAudioBufferCommited(InputAudioBufferCommitedEvent),
+    InputAudioBufferCommitted(InputAudioBufferCommittedEvent),
 
     /// Returned when the input audio buffer is cleared by the client.
     #[serde(rename = "input_audio_buffer.cleared")]
     InputAudioBufferCleared(InputAudioBufferClearedEvent),
 
+    /// Returned when the output audio buffer is cleared by the client (WebRTC specific).
+    #[serde(rename = "output_audio_buffer.cleared")]
+    OutputAudioBufferCleared(OutputAudioBufferClearedEvent),
+
     /// Returned in server turn detection mode when speech is detected.
     #[serde(rename = "input_audio_buffer.speech_started")]
     InputAudioBufferSpeechStarted(InputAudioBufferSpeechStartedEvent),
@@ -422,6 +440,10 @@ pub enum ServerEvent {
     #[serde(rename = "conversation.item.deleted")]
     ConversationItemDeleted(ConversationItemDeletedEvent),
 
+    /// Returned when an item in the conversation is retrieved.
+    #[serde(rename = "conversation.item.retrieved")]
+    ConversationItemRetrieved(ConversationItemRetrievedEvent),
+
     /// Returned when a new Response is created. The first event of response creation, where the response is in an initial state of "in_progress".
     #[serde(rename = "response.created")]
     ResponseCreated(ResponseCreatedEvent),
diff --git a/async-openai/src/types/realtime/session_resource.rs b/async-openai/src/types/realtime/session_resource.rs
index 2fe1e5b1..f84ecff9 100644
--- a/async-openai/src/types/realtime/session_resource.rs
+++ b/async-openai/src/types/realtime/session_resource.rs
@@ -10,6 +10,22 @@ pub enum AudioFormat {
     G711ALAW,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(rename_all = "snake_case")]
+pub enum NoiseReductionType {
+    NearField,
+    FarField,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct InputAudioNoiseReduction {
+    /// Type of noise reduction. `near_field` is for close-talking microphones such as
+    /// headphones, `far_field` is for far-field microphones such as laptop or
+    /// conference room microphones.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub r#type: Option<NoiseReductionType>,
+}
+
 #[derive(Debug, Default, Serialize, Deserialize, Clone)]
 pub struct AudioTranscription {
     /// The language of the input audio. Supplying the input language in ISO-639-1 (e.g. en) format will improve accuracy and latency.
@@ -74,6 +90,32 @@ pub enum MaxResponseOutputTokens {
     Num(u16),
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct TracingConfiguration {
+    /// The group id to attach to this trace to enable filtering and grouping in the traces dashboard.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub group_id: Option<String>,
+
+    /// The arbitrary metadata to attach to this trace to enable filtering in the traces dashboard.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub metadata: Option<serde_json::Value>,
+
+    /// The name of the workflow to attach to this trace. This is used to name the trace in the traces dashboard.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub workflow_name: Option<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(untagged)]
+pub enum TracingOption {
+    /// Auto tracing with default values
+    #[serde(rename = "auto")]
+    Auto,
+    /// Granular tracing configuration
+    #[serde(rename = "config")]
+    Config(TracingConfiguration),
+}
+
 #[derive(Debug, Serialize, Deserialize, Clone)]
 #[serde(tag = "type")]
 pub enum ToolDefinition {
@@ -118,19 +160,43 @@ pub enum RealtimeVoice {
     Fable,
     Onyx,
     Nova,
+    Sage,
     Shimmer,
     Verse,
 }
 
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(rename_all = "lowercase")]
+pub enum Modality {
+    Text,
+    Audio,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub enum RealtimeModel {
+    #[serde(rename = "gpt-4o-realtime-preview")]
+    GPT4ORealtimePreview,
+    #[serde(rename = "gpt-4o-realtime-preview-2024-10-01")]
+    GPT4ORealtimePreview20241001,
+    #[serde(rename = "gpt-4o-realtime-preview-2024-12-17")]
+    GPT4ORealtimePreview20241217,
+    #[serde(rename = "gpt-4o-realtime-preview-2025-06-03")]
+    GPT4ORealtimePreview20250603,
+    #[serde(rename = "gpt-4o-mini-realtime-preview")]
+    GPT4OMiniRealtimePreview,
+    #[serde(rename = "gpt-4o-mini-realtime-preview-2024-12-17")]
+    GPT4OMiniRealtimePreview20241217,
+}
+
 #[derive(Debug, Serialize, Deserialize, Clone, Default)]
 pub struct SessionResource {
     /// The default model used for this session.
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub model: Option<String>,
+    pub model: Option<RealtimeModel>,
 
     /// The set of modalities the model can respond with. To disable audio, set this to ["text"].
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub modalities: Option<Vec<String>>,
+    pub modalities: Option<Vec<Modality>>,
 
     //// The default system instructions prepended to model calls.
     #[serde(skip_serializing_if = "Option::is_none")]
@@ -140,10 +206,22 @@ pub struct SessionResource {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub voice: Option<RealtimeVoice>,
 
+    /// The speed of the model's spoken response. 1.0 is the default speed. 0.25 is the minimum speed. 1.5 is the maximum speed.
+    /// This value can only be changed in between model turns, not while a response is in progress.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub speed: Option<f32>,
+
     /// The format of input audio. Options are "pcm16", "g711_ulaw", or "g711_alaw".
     #[serde(skip_serializing_if = "Option::is_none")]
     pub input_audio_format: Option<AudioFormat>,
 
+    /// Configuration for input audio noise reduction. This can be set to `null` to turn off.
+    /// Noise reduction filters audio added to the input audio buffer before it is sent to VAD and the model.
+    /// Filtering the audio can improve VAD and turn detection accuracy (reducing false positives)
+    /// and model performance by improving perception of the input audio.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub input_audio_noise_reduction: Option<InputAudioNoiseReduction>,
+
     /// The format of output audio. Options are "pcm16", "g711_ulaw", or "g711_alaw".
     #[serde(skip_serializing_if = "Option::is_none")]
     pub output_audio_format: Option<AudioFormat>,
@@ -168,6 +246,9 @@ pub struct SessionResource {
     /// Sampling temperature for the model.
     pub temperature: Option<f32>,
 
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tracing: Option<TracingOption>,
+
     /// Maximum number of output tokens for a single assistant response, inclusive of tool calls.
     /// Provide an integer between 1 and 4096 to limit output tokens, or "inf" for the maximum available tokens for a given model.
     /// Defaults to "inf".