Skip to content

Commit 197fe3f

Browse files
authored
Merge pull request #11 from speechmatics/bugfix/linux-playback-issue
Fix latency issue on linux
2 parents 22c68af + 3629088 commit 197fe3f

File tree

8 files changed

+77
-74
lines changed

8 files changed

+77
-74
lines changed

CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,17 @@ All notable changes to this project will be documented in this file.
44

55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
66

7+
## [0.0.9] - 2025-01-24
8+
9+
### Fixed
10+
11+
- Issue with Flow latency on Linux
12+
13+
### Changed
14+
15+
- Examples load env vars from .env file
16+
- Examples now print user/agent transcripts
17+
718
## [0.0.8] - 2024-11-29
819

920
### Added

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,15 @@ key can be found here: https://docs.speechmatics.com/flow/getting-started#set-up
3333
*Note: Requires access to microphone
3434

3535
```bash
36-
speechmatics-flow --auth-token $TOKEN --ssl-mode insecure
36+
speechmatics-flow --auth-token $TOKEN
3737
```
3838

3939
### Change Assistant (Amelia → Humphrey)
4040

4141
To set the assistant to *Humphrey* instead of *Amelia* run this command:
4242

4343
```bash
44-
speechmatics-flow --auth-token $TOKEN --ssl-mode insecure --assistant humphrey
44+
speechmatics-flow --auth-token $TOKEN --assistant humphrey
4545
```
4646

4747
### Load conversation_config from a config file
@@ -63,7 +63,7 @@ using the `--config-file` option
6363
```
6464

6565
```bash
66-
speechmatics-flow --auth-token $TOKEN --ssl-mode insecure --config-file conversation_config.json
66+
speechmatics-flow --auth-token $TOKEN --config-file conversation_config.json
6767
```
6868

6969
> **Hint**: Why limit Humphrey? Try changing the template_variables to see what happens if he’s not a butler but

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.0.8
1+
0.0.9

examples/client_side_function_calling.py

Lines changed: 14 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,21 @@
99
import sys
1010
from typing import Any, Dict
1111

12-
import pyaudio
12+
from dotenv import load_dotenv
1313

14+
from speechmatics_flow.cli import Transcripts, add_printing_handlers
1415
from speechmatics_flow.client import WebsocketClient
1516
from speechmatics_flow.models import (
16-
ConnectionSettings,
17-
Interaction,
1817
AudioSettings,
18+
ClientMessageType,
19+
ConnectionSettings,
1920
ConversationConfig,
21+
Interaction,
2022
ServerMessageType,
21-
ClientMessageType,
2223
)
24+
from speechmatics_flow.playback import audio_playback
25+
26+
load_dotenv()
2327

2428
# Create a websocket client
2529
client = WebsocketClient(
@@ -185,34 +189,13 @@ async def send_response(response: Dict[str, Any]) -> None:
185189
print(f"Failed to send response: {e}")
186190

187191

188-
client.add_event_handler(ServerMessageType.AddAudio, binary_msg_callback)
189-
client.add_event_handler(ServerMessageType.ToolInvoke, order_callback)
190-
191-
192-
async def audio_playback():
193-
"""Continuously read from the audio queue and play audio back to the user."""
194-
p = pyaudio.PyAudio()
195-
player_stream = p.open(
196-
format=pyaudio.paInt16,
197-
channels=1,
198-
rate=16000,
199-
frames_per_buffer=128,
200-
output=True,
201-
)
202-
try:
203-
while True:
204-
audio = await audio_queue.get()
205-
player_stream.write(audio)
206-
# read from buffer at a constant rate
207-
await asyncio.sleep(0.005)
208-
finally:
209-
player_stream.stop_stream()
210-
player_stream.close()
211-
p.terminate()
212-
213-
214192
async def main():
215193
"""Main function to run both the WebSocket client and audio playback."""
194+
transcripts = Transcripts()
195+
client.add_event_handler(ServerMessageType.AddAudio, binary_msg_callback)
196+
client.add_event_handler(ServerMessageType.ToolInvoke, order_callback)
197+
add_printing_handlers(client, transcripts, False)
198+
216199
tasks = [
217200
# Start the WebSocket client and conversation
218201
asyncio.create_task(
@@ -294,7 +277,7 @@ async def main():
294277
)
295278
),
296279
# Start the audio playback handler
297-
asyncio.create_task(audio_playback()),
280+
asyncio.create_task(audio_playback(audio_queue)),
298281
]
299282

300283
(done, pending) = await asyncio.wait(tasks, return_when=asyncio.FIRST_EXCEPTION)

examples/stream_from_microphone.py

Lines changed: 13 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,20 @@
22
import os
33
import sys
44

5-
import pyaudio
5+
from dotenv import load_dotenv
66

7+
from speechmatics_flow.cli import Transcripts, add_printing_handlers
78
from speechmatics_flow.client import WebsocketClient
89
from speechmatics_flow.models import (
9-
ConnectionSettings,
10-
Interaction,
1110
AudioSettings,
11+
ConnectionSettings,
1212
ConversationConfig,
13+
Interaction,
1314
ServerMessageType,
1415
)
16+
from speechmatics_flow.playback import audio_playback
17+
18+
load_dotenv()
1519

1620
# Create a websocket client
1721
client = WebsocketClient(
@@ -31,34 +35,13 @@ async def binary_msg_callback(msg: bytes):
3135
await audio_queue.put(msg)
3236

3337

34-
# Register the callback to be called when the client receives an audio message
35-
client.add_event_handler(ServerMessageType.AddAudio, binary_msg_callback)
36-
37-
38-
async def audio_playback():
39-
"""Continuously read from the audio queue and play audio back to the user."""
40-
p = pyaudio.PyAudio()
41-
player_stream = p.open(
42-
format=pyaudio.paInt16,
43-
channels=1,
44-
rate=16000,
45-
frames_per_buffer=128,
46-
output=True,
47-
)
48-
try:
49-
while True:
50-
audio = await audio_queue.get()
51-
player_stream.write(audio)
52-
# read from buffer at a constant rate
53-
await asyncio.sleep(0.005)
54-
finally:
55-
player_stream.stop_stream()
56-
player_stream.close()
57-
p.terminate()
58-
59-
6038
async def main():
6139
"""Main function to run both the WebSocket client and audio playback."""
40+
transcripts = Transcripts()
41+
# Register callbacks
42+
client.add_event_handler(ServerMessageType.AddAudio, binary_msg_callback)
43+
add_printing_handlers(client, transcripts, False)
44+
6245
tasks = [
6346
# Start the WebSocket client and conversation
6447
asyncio.create_task(
@@ -69,7 +52,7 @@ async def main():
6952
)
7053
),
7154
# Start the audio playback handler
72-
asyncio.create_task(audio_playback()),
55+
asyncio.create_task(audio_playback(audio_queue)),
7356
]
7457

7558
(done, pending) = await asyncio.wait(tasks, return_when=asyncio.FIRST_EXCEPTION)

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ httpx==0.27.1
22
pyaudio==0.2.14
33
setuptools
44
websockets>=10,<=13.1
5+
python-dotenv~=1.0

speechmatics_flow/client.py

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ def __init__(
8888
# Semaphore used to ensure that we don't send too much audio data to
8989
# the server too quickly and burst any buffers downstream.
9090
self._buffer_semaphore = asyncio.BoundedSemaphore
91+
self._loop = None
9192

9293
async def _init_synchronization_primitives(self):
9394
"""
@@ -100,6 +101,7 @@ async def _init_synchronization_primitives(self):
100101
self._buffer_semaphore = asyncio.BoundedSemaphore(
101102
self.connection_settings.message_buffer_size
102103
)
104+
self._loop = asyncio.get_running_loop()
103105

104106
def _flag_conversation_started(self):
105107
"""
@@ -233,8 +235,7 @@ async def _consumer(self, message, from_cli=False):
233235
if inspect.iscoroutinefunction(handler):
234236
await handler(copy.deepcopy(message))
235237
else:
236-
loop = asyncio.get_event_loop()
237-
await loop.run_in_executor(
238+
await self._loop.run_in_executor(
238239
self._executor, handler, copy.deepcopy(message)
239240
)
240241
except ForceEndSession:
@@ -277,6 +278,13 @@ async def _read_from_microphone(self):
277278
rate=self.audio_settings.sample_rate,
278279
input=True,
279280
)
281+
282+
async def async_stream_read():
283+
# audio_chunk size is 128 * 2 = 256 bytes which is about 8ms
284+
return await self._loop.run_in_executor(
285+
self._executor, stream.read, 128, False
286+
)
287+
280288
try:
281289
while True:
282290
if self._session_needs_closing or self._conversation_ended.is_set():
@@ -287,14 +295,11 @@ async def _read_from_microphone(self):
287295
timeout=self.connection_settings.semaphore_timeout_seconds,
288296
)
289297

290-
# audio_chunk size is 128 * 2 = 256 bytes which is about 8ms
291-
audio_chunk = stream.read(num_frames=128, exception_on_overflow=False)
298+
audio_chunk = await async_stream_read()
292299

293300
self.client_seq_no += 1
294301
self._call_middleware(ClientMessageType.AddAudio, audio_chunk, True)
295302
await self.websocket.send(audio_chunk)
296-
# send audio at a constant rate
297-
await asyncio.sleep(0.01)
298303
except KeyboardInterrupt:
299304
await self.websocket.send(self._end_of_audio())
300305
finally:
@@ -377,11 +382,13 @@ async def _playback_handler(self):
377382
format=pyaudio.paInt16,
378383
channels=1,
379384
rate=self.playback_settings.sample_rate,
380-
frames_per_buffer=self.playback_settings.chunk_size,
381385
output=True,
382386
)
383387
chunk_size = self.playback_settings.chunk_size
384388

389+
async def async_stream_write(chunk):
390+
return await self._loop.run_in_executor(self._executor, stream.write, chunk)
391+
385392
try:
386393
while not self._session_needs_closing or self._conversation_ended.is_set():
387394
# Wait for the server to start sending audio
@@ -397,8 +404,7 @@ async def _playback_handler(self):
397404
async with self._audio_buffer_lock:
398405
audio_chunk = bytes(self._audio_buffer[:chunk_size])
399406
self._audio_buffer = self._audio_buffer[chunk_size:]
400-
stream.write(audio_chunk)
401-
await asyncio.sleep(0.005)
407+
await async_stream_write(audio_chunk)
402408
except Exception as e:
403409
LOGGER.error(f"Error during audio playback: {e}", exc_info=True)
404410
raise e

speechmatics_flow/playback.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import asyncio
2+
3+
import pyaudio
4+
5+
6+
async def audio_playback(audio_queue: asyncio.Queue):
7+
"""Continuously read from the audio queue and play audio back to the user."""
8+
9+
p = pyaudio.PyAudio()
10+
player_stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, output=True)
11+
try:
12+
while True:
13+
audio = await audio_queue.get()
14+
player_stream.write(audio)
15+
await asyncio.sleep(0.005)
16+
finally:
17+
player_stream.stop_stream()
18+
player_stream.close()
19+
p.terminate()

0 commit comments

Comments
 (0)