Skip to content

Commit 84076ef

Browse files
authored
Merge pull request #3 from speechmatics/v0.0.3
Fix latency and improved audio playback on systems running Python 3.12+
2 parents 23bf49e + 08be7bc commit 84076ef

File tree

4 files changed

+51
-32
lines changed

4 files changed

+51
-32
lines changed

CHANGELOG.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,18 @@ All notable changes to this project will be documented in this file.
44

55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
66

7+
## [0.0.3] - 2024-10-23
8+
9+
### Changed
10+
11+
- PyAudio class is instantiated only when the client is started directly from the CLI.
12+
- Simplified microphone example
13+
14+
### Fixed
15+
16+
- Choppy audio playback on some systems using Python 3.12+
17+
- Latency issues on some systems using Python 3.12+
18+
719
## [0.0.2] - 2024-10-17
820

921
### Added

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.0.2
1+
0.0.3

examples/stream_from_microphone.py

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616

1717
AUTH_TOKEN = "YOUR TOKEN HERE"
1818

19-
2019
# Create a websocket client
2120
ssl_context = ssl.create_default_context()
2221
ssl_context.check_hostname = False
@@ -29,57 +28,61 @@
2928
)
3029
)
3130

32-
# Create a buffer to store binary messages sent from the server
33-
audio_buffer = io.BytesIO()
31+
32+
# Create an asyncio queue to store audio data
33+
audio_queue = asyncio.Queue()
3434

3535

36-
# Create callback function which adds binary messages to audio buffer
36+
# Create a callback function to add binary messages to the audio queue
3737
def binary_msg_handler(msg: bytes):
3838
if isinstance(msg, (bytes, bytearray)):
39-
audio_buffer.write(msg)
39+
audio_queue.put_nowait(msg)
4040

4141

42-
# Register the callback to be called when the client receives an audio message from the server
42+
# Register the callback to be called when the client receives an audio message
4343
client.add_event_handler(ServerMessageType.audio, binary_msg_handler)
4444

4545

4646
async def audio_playback():
47-
"""Read from buffer and play audio back to the user"""
47+
"""Continuously read from the audio queue and play audio back to the user."""
4848
p = pyaudio.PyAudio()
49-
stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, output=True)
49+
chunk_size = 1024
50+
player_stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, output=True)
51+
5052
try:
5153
while True:
52-
# Get the current value from the buffer
53-
audio_to_play = audio_buffer.getvalue()
54-
# Only proceed if there is audio data to play
55-
if audio_to_play:
56-
# Write the audio to the stream
57-
stream.write(audio_to_play)
58-
audio_buffer.seek(0)
59-
audio_buffer.truncate(0)
60-
# Pause briefly before checking the buffer again
61-
await asyncio.sleep(0.05)
54+
# Create a new playback buffer for each iteration
55+
playback_buffer = io.BytesIO()
56+
57+
# Fill the buffer until it has enough data
58+
while playback_buffer.tell() < chunk_size:
59+
playback_buffer.write(await audio_queue.get())
60+
61+
# Write the full buffer contents to the player stream
62+
player_stream.write(playback_buffer.getvalue())
6263
finally:
63-
stream.close()
64-
stream.stop_stream()
64+
player_stream.stop_stream()
65+
player_stream.close()
6566
p.terminate()
6667

6768

6869
async def main():
70+
"""Main function to run both the WebSocket client and audio playback."""
6971
tasks = [
70-
# Use the websocket to connect to Flow Service and start a conversation
72+
# Start the WebSocket client and conversation
7173
asyncio.create_task(
7274
client.run(
7375
interactions=[Interaction(sys.stdin.buffer)],
7476
audio_settings=AudioSettings(),
7577
conversation_config=ConversationConfig(),
7678
)
7779
),
78-
# Run audio playback handler which streams audio from audio buffer
80+
# Start the audio playback handler
7981
asyncio.create_task(audio_playback()),
8082
]
8183

8284
await asyncio.gather(*tasks)
8385

8486

87+
# Run the main event loop
8588
asyncio.run(main())

speechmatics_flow/client.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ def __init__(
6969
self.conversation_ended_wait_timeout = 5
7070
self._session_needs_closing = False
7171
self._audio_buffer = None
72-
self._pyaudio = pyaudio.PyAudio
7372

7473
# The following asyncio fields are fully instantiated in
7574
# _init_synchronization_primitives
@@ -86,7 +85,6 @@ async def _init_synchronization_primitives(self):
8685
"""
8786
self._conversation_started = asyncio.Event()
8887
self._conversation_ended = asyncio.Event()
89-
self._pyaudio = pyaudio.PyAudio()
9088
self._buffer_semaphore = asyncio.BoundedSemaphore(
9189
self.connection_settings.message_buffer_size
9290
)
@@ -165,7 +163,6 @@ async def _consumer(self, message, from_cli: False):
165163
:raises ForceEndSession: If this was raised by the user's event
166164
handler.
167165
"""
168-
LOGGER.debug(message)
169166
if isinstance(message, (bytes, bytearray)):
170167
# add an audio message to local buffer only when running from cli
171168
if from_cli:
@@ -174,6 +171,7 @@ async def _consumer(self, message, from_cli: False):
174171
# so we need to set it here for event_handler to work
175172
message_type = ServerMessageType.audio
176173
else:
174+
LOGGER.debug(message)
177175
message = json.loads(message)
178176
message_type = message.get("message")
179177

@@ -200,14 +198,15 @@ async def _consumer(self, message, from_cli: False):
200198
raise ConversationError(message["reason"])
201199

202200
async def _read_from_microphone(self):
201+
_pyaudio = pyaudio.PyAudio()
203202
print(
204-
f"Default input device: {self._pyaudio.get_default_input_device_info()['name']}"
203+
f"Default input device: {_pyaudio.get_default_input_device_info()['name']}"
205204
)
206205
print(
207-
f"Default output device: {self._pyaudio.get_default_output_device_info()['name']}"
206+
f"Default output device: {_pyaudio.get_default_output_device_info()['name']}"
208207
)
209208
print("Start speaking...")
210-
stream = self._pyaudio.open(
209+
stream = _pyaudio.open(
211210
format=pyaudio.paInt16,
212211
channels=1,
213212
rate=self.audio_settings.sample_rate,
@@ -229,13 +228,15 @@ async def _read_from_microphone(self):
229228
self.seq_no += 1
230229
self._call_middleware(ClientMessageType.AddAudio, audio_chunk, True)
231230
await self.websocket.send(audio_chunk)
231+
# send audio at a constant rate
232+
await asyncio.sleep(0.01)
232233
except KeyboardInterrupt:
233234
await self.websocket.send(self._end_of_audio())
234235
finally:
235236
await self._wait_for_conversation_ended()
236237
stream.stop_stream()
237238
stream.close()
238-
self._pyaudio.terminate()
239+
_pyaudio.terminate()
239240

240241
async def _consumer_handler(self, from_cli: False):
241242
"""
@@ -295,7 +296,8 @@ async def _playback_handler(self):
295296
"""
296297
Reads audio binary messages from the playback buffer and plays them to the user.
297298
"""
298-
stream = self._pyaudio.open(
299+
_pyaudio = pyaudio.PyAudio()
300+
stream = _pyaudio.open(
299301
format=pyaudio.paInt16,
300302
channels=1,
301303
rate=self.audio_settings.sample_rate,
@@ -309,13 +311,15 @@ async def _playback_handler(self):
309311
audio_message = await self._audio_buffer.get()
310312
stream.write(audio_message)
311313
self._audio_buffer.task_done()
314+
# read from buffer at a constant rate
315+
await asyncio.sleep(0.005)
312316
except Exception as e:
313317
LOGGER.error(f"Error during audio playback: {e}")
314318
raise e
315319
finally:
316320
stream.close()
317321
stream.stop_stream()
318-
self._pyaudio.terminate()
322+
_pyaudio.terminate()
319323

320324
def _call_middleware(self, event_name, *args):
321325
"""

0 commit comments

Comments
 (0)