From ceeda7649761b6a5c77f8cfcac4c5a621b329048 Mon Sep 17 00:00:00 2001 From: Jose Arrillaga Date: Wed, 25 Dec 2024 14:38:31 -0500 Subject: [PATCH 1/5] use URL lib to forum url with query --- lib/api.js | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/api.js b/lib/api.js index cc8f904..8db02fa 100644 --- a/lib/api.js +++ b/lib/api.js @@ -73,7 +73,10 @@ export class RealtimeAPI extends RealtimeEventHandler { ); } const WebSocket = globalThis.WebSocket; - const ws = new WebSocket(`${this.url}${model ? `?model=${model}` : ''}`, [ + const url = new URL(this.url); + url.searchParams.set('model', model); + + const ws = new WebSocket(url.toString(), [ 'realtime', `openai-insecure-api-key.${this.apiKey}`, 'openai-beta.realtime-v1', From 716b24542c2ee3b457b98a65d319f80d74dd5653 Mon Sep 17 00:00:00 2001 From: Jose Arrillaga Date: Sat, 5 Apr 2025 22:22:45 -0400 Subject: [PATCH 2/5] add types for semantic_vad support --- README.md | 2 +- dist/lib/client.d.ts | 17 +++++++++++++++-- lib/client.js | 38 ++++++++++++++++++++++++++++++++++---- 3 files changed, 50 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 1ebbfc2..34c8aff 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ const client = new RealtimeClient({ apiKey: process.env.OPENAI_API_KEY }); client.updateSession({ instructions: 'You are a great, upbeat friend.' }); client.updateSession({ voice: 'alloy' }); client.updateSession({ - turn_detection: { type: 'none' }, // or 'server_vad' + turn_detection: { type: 'none' }, // or 'server_vad' or 'semantic_vad' input_audio_transcription: { model: 'whisper-1' }, }); diff --git a/dist/lib/client.d.ts b/dist/lib/client.d.ts index 7ea2201..35f29ea 100644 --- a/dist/lib/client.d.ts +++ b/dist/lib/client.d.ts @@ -13,6 +13,13 @@ * @property {number} [prefix_padding_ms] * @property {number} [silence_duration_ms] */ +/** + * @typedef {Object} TurnDetectionSemanticVadType + * @property {"semantic_vad"} type + * @property {boolean} [create_response] + * @property {boolean} [interrupt_response] + * @property {"auto"|"low"|"medium"|"high"} [eagerness] + */ /** * Tool definitions * @typedef {Object} ToolDefinitionType @@ -235,9 +242,9 @@ export class RealtimeClient extends RealtimeEventHandler { disconnect(): void; /** * Gets the active turn detection mode - * @returns {"server_vad"|null} + * @returns {"server_vad"|"semantic_vad"|null} */ - getTurnDetectionType(): "server_vad" | null; + getTurnDetectionType(): "server_vad" | "semantic_vad" | null; /** * Add a tool and handler * @param {ToolDefinitionType} definition @@ -321,6 +328,12 @@ export type TurnDetectionServerVadType = { prefix_padding_ms?: number; silence_duration_ms?: number; }; +export type TurnDetectionSemanticVadType = { + type: "semantic_vad"; + create_response?: boolean; + interrupt_response?: boolean; + eagerness?: "auto" | "low" | "medium" | "high"; +}; /** * Tool definitions */ diff --git a/lib/client.js b/lib/client.js index 2c48d7f..ec34813 100644 --- a/lib/client.js +++ b/lib/client.js @@ -21,6 +21,14 @@ import { RealtimeUtils } from './utils.js'; * @property {number} [silence_duration_ms] */ +/** + * @typedef {Object} TurnDetectionSemanticVadType + * @property {"semantic_vad"} type + * @property {boolean} [create_response] + * @property {boolean} [interrupt_response] + * @property {"auto"|"low"|"medium"|"high"} [eagerness] + */ + /** * Tool definitions * @typedef {Object} ToolDefinitionType @@ -39,7 +47,7 @@ import { RealtimeUtils } from './utils.js'; * @property {AudioFormatType} [input_audio_format] * @property {AudioFormatType} [output_audio_format] * @property {AudioTranscriptionType|null} [input_audio_transcription] - * @property {TurnDetectionServerVadType|null} [turn_detection] + * @property {TurnDetectionServerVadType|TurnDetectionSemanticVadType|null} [turn_detection] * @property {ToolDefinitionType[]} [tools] * @property {"auto"|"none"|"required"|{type:"function",name:string}} [tool_choice] * @property {number} [temperature] @@ -218,6 +226,12 @@ export class RealtimeClient extends RealtimeEventHandler { prefix_padding_ms: 300, // How much audio to include in the audio stream before the speech starts. silence_duration_ms: 200, // How long to wait to mark the speech as stopped. }; + this.defaultSemanticVadConfig = { + type: 'semantic_vad', + create_response: true, + interrupt_response: false, + eagerness: 'auto', + }; this.realtime = new RealtimeAPI({ url, apiKey, @@ -423,7 +437,7 @@ export class RealtimeClient extends RealtimeEventHandler { /** * Gets the active turn detection mode - * @returns {"server_vad"|null} + * @returns {"server_vad"|"semantic_vad"|null} */ getTurnDetectionType() { return this.sessionConfig.turn_detection?.type || null; @@ -504,8 +518,24 @@ export class RealtimeClient extends RealtimeEventHandler { input_audio_transcription !== void 0 && (this.sessionConfig.input_audio_transcription = input_audio_transcription); - turn_detection !== void 0 && - (this.sessionConfig.turn_detection = turn_detection); + + // Apply turn detection config with defaults if needed + if (turn_detection !== void 0) { + if (turn_detection?.type === 'semantic_vad') { + this.sessionConfig.turn_detection = { + ...this.defaultSemanticVadConfig, + ...turn_detection, + }; + } else if (turn_detection?.type === 'server_vad') { + this.sessionConfig.turn_detection = { + ...this.defaultServerVadConfig, + ...turn_detection, + }; + } else { + this.sessionConfig.turn_detection = turn_detection; + } + } + tools !== void 0 && (this.sessionConfig.tools = tools); tool_choice !== void 0 && (this.sessionConfig.tool_choice = tool_choice); temperature !== void 0 && (this.sessionConfig.temperature = temperature); From b5b4166e564202b8f6e39fcd3e766cee529d603e Mon Sep 17 00:00:00 2001 From: Jose Arrillaga Date: Tue, 8 Apr 2025 22:12:20 -0400 Subject: [PATCH 3/5] add TurnDetectionSemanticVadType to turn_detection sig --- dist/lib/client.d.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dist/lib/client.d.ts b/dist/lib/client.d.ts index 35f29ea..ff816e2 100644 --- a/dist/lib/client.d.ts +++ b/dist/lib/client.d.ts @@ -37,7 +37,7 @@ * @property {AudioFormatType} [input_audio_format] * @property {AudioFormatType} [output_audio_format] * @property {AudioTranscriptionType|null} [input_audio_transcription] - * @property {TurnDetectionServerVadType|null} [turn_detection] + * @property {TurnDetectionServerVadType|TurnDetectionSemanticVadType|null} [turn_detection] * @property {ToolDefinitionType[]} [tools] * @property {"auto"|"none"|"required"|{type:"function",name:string}} [tool_choice] * @property {number} [temperature] From 3bbc7ea4689a08c2b40472b62cd46b14f3d0ecc7 Mon Sep 17 00:00:00 2001 From: Jose Arrillaga Date: Tue, 8 Apr 2025 22:19:11 -0400 Subject: [PATCH 4/5] add TurnDetectionSemanticVadType to turn_detection sig --- dist/lib/client.d.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dist/lib/client.d.ts b/dist/lib/client.d.ts index ff816e2..24e1a3a 100644 --- a/dist/lib/client.d.ts +++ b/dist/lib/client.d.ts @@ -354,7 +354,7 @@ export type SessionResourceType = { input_audio_format?: AudioFormatType; output_audio_format?: AudioFormatType; input_audio_transcription?: AudioTranscriptionType | null; - turn_detection?: TurnDetectionServerVadType | null; + turn_detection?: TurnDetectionServerVadType | TurnDetectionSemanticVadType | null; tools?: ToolDefinitionType[]; tool_choice?: "auto" | "none" | "required" | { type: "function"; From 592e0554b2a70d33c10dc8be5a4e169532ebab22 Mon Sep 17 00:00:00 2001 From: Jose Arrillaga Date: Tue, 8 Apr 2025 22:49:33 -0400 Subject: [PATCH 5/5] update gpt model --- lib/api.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/api.js b/lib/api.js index 8db02fa..7681c69 100644 --- a/lib/api.js +++ b/lib/api.js @@ -56,7 +56,7 @@ export class RealtimeAPI extends RealtimeEventHandler { * @param {{model?: string}} [settings] * @returns {Promise} */ - async connect({ model } = { model: 'gpt-4o-realtime-preview-2024-10-01' }) { + async connect({ model } = { model: 'gpt-4o-realtime-preview-2024-12-17' }) { if (!this.apiKey && this.url === this.defaultUrl) { console.warn(`No apiKey provided for connection to "${this.url}"`); } @@ -116,7 +116,7 @@ export class RealtimeAPI extends RealtimeEventHandler { const wsModule = await import(/* webpackIgnore: true */ moduleName); const WebSocket = wsModule.default; const ws = new WebSocket( - 'wss://api.openai.com/v1/realtime?model=gpt-4o-realtime-preview-2024-10-01', + `wss://api.openai.com/v1/realtime?model=${model}`, [], { finishRequest: (request) => {