Skip to content

Commit 035b29d

Browse files
feat(stt): remove interim_results and low_latency wss params
1 parent d026ab2 commit 035b29d

File tree

4 files changed

+11
-110
lines changed

4 files changed

+11
-110
lines changed

examples/microphone-speech-to-text.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,7 @@ def recognize_using_weboscket(*args):
7272
mycallback = MyRecognizeCallback()
7373
speech_to_text.recognize_using_websocket(audio=audio_source,
7474
content_type='audio/l16; rate=44100',
75-
recognize_callback=mycallback,
76-
interim_results=True)
75+
recognize_callback=mycallback)
7776

7877
###############################################
7978
#### Prepare the for recording using Pyaudio ##

ibm_watson/speech_to_text_v1_adapter.py

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# coding: utf-8
22

3-
# (C) Copyright IBM Corp. 2018, 2021.
3+
# (C) Copyright IBM Corp. 2018, 2024.
44
#
55
# Licensed under the Apache License, Version 2.0 (the "License");
66
# you may not use this file except in compliance with the License.
@@ -33,7 +33,6 @@ def recognize_using_websocket(self,
3333
customization_weight=None,
3434
base_model_version=None,
3535
inactivity_timeout=None,
36-
interim_results=None,
3736
keywords=None,
3837
keywords_threshold=None,
3938
max_alternatives=None,
@@ -55,7 +54,6 @@ def recognize_using_websocket(self,
5554
split_transcript_at_phrase_end=None,
5655
speech_detector_sensitivity=None,
5756
background_audio_suppression=None,
58-
low_latency=None,
5957
character_insertion_bias=None,
6058
**kwargs):
6159
"""
@@ -271,22 +269,6 @@ def recognize_using_websocket(self,
271269
* 1.0 suppresses all audio (no audio is transcribed).
272270
The values increase on a monotonic curve. See [Background audio
273271
suppression](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-parameters-suppression).
274-
:param bool low_latency: (optional) If `true` for next-generation
275-
`Multimedia` and `Telephony` models that support low latency, directs the
276-
service to produce results even more quickly than it usually does.
277-
Next-generation models produce transcription results faster than
278-
previous-generation models. The `low_latency` parameter causes the models
279-
to produce results even more quickly, though the results might be less
280-
accurate when the parameter is used.
281-
**Note:** The parameter is beta functionality. It is not available for
282-
previous-generation `Broadband` and `Narrowband` models. It is available
283-
only for some next-generation models.
284-
* For a list of next-generation models that support low latency, see
285-
[Supported language
286-
models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng#models-ng-supported)
287-
for next-generation models.
288-
* For more information about the `low_latency` parameter, see [Low
289-
latency](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-interim#low-latency).
290272
:param float character_insertion_bias: (optional) For next-generation
291273
`Multimedia` and `Telephony` models, an indication of whether the service
292274
is biased to recognize shorter or longer strings of characters when
@@ -355,7 +337,6 @@ def recognize_using_websocket(self,
355337
'customization_weight': customization_weight,
356338
'content_type': content_type,
357339
'inactivity_timeout': inactivity_timeout,
358-
'interim_results': interim_results,
359340
'keywords': keywords,
360341
'keywords_threshold': keywords_threshold,
361342
'max_alternatives': max_alternatives,
@@ -375,7 +356,6 @@ def recognize_using_websocket(self,
375356
'split_transcript_at_phrase_end': split_transcript_at_phrase_end,
376357
'speech_detector_sensitivity': speech_detector_sensitivity,
377358
'background_audio_suppression': background_audio_suppression,
378-
'low_latency': low_latency,
379359
'character_insertion_bias': character_insertion_bias
380360
}
381361
options = {k: v for k, v in options.items() if v is not None}

ibm_watson/websocket/recognize_listener.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -196,16 +196,15 @@ def on_data(self, ws, message, message_type, fin):
196196
# set of transcriptions and send them to the appropriate callbacks.
197197
results = json_object.get('results')
198198
if results:
199-
if (self.options.get('interim_results') is True):
200-
b_final = (results[0].get('final') is True)
201-
alternatives = results[0].get('alternatives')
202-
if alternatives:
203-
hypothesis = alternatives[0].get('transcript')
204-
transcripts = self.extract_transcripts(alternatives)
205-
if b_final:
206-
self.callback.on_transcription(transcripts)
207-
if hypothesis:
208-
self.callback.on_hypothesis(hypothesis)
199+
b_final = (results[0].get('final') is True)
200+
alternatives = results[0].get('alternatives')
201+
if alternatives:
202+
hypothesis = alternatives[0].get('transcript')
203+
transcripts = self.extract_transcripts(alternatives)
204+
if b_final:
205+
self.callback.on_transcription(transcripts)
206+
if hypothesis:
207+
self.callback.on_hypothesis(hypothesis)
209208
else:
210209
final_transcript = []
211210
for result in results:

test/integration/test_speech_to_text_v1.py

Lines changed: 0 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -118,83 +118,6 @@ def on_data(self, data):
118118
assert test_callback.data['results'][0]['alternatives'][0]
119119
['transcript'] == 'thunderstorms could produce large hail isolated tornadoes and heavy rain '
120120

121-
def test_on_transcription_interim_results_false(self):
122-
123-
class MyRecognizeCallback(RecognizeCallback):
124-
125-
def __init__(self):
126-
RecognizeCallback.__init__(self)
127-
self.error = None
128-
self.transcript = None
129-
130-
def on_error(self, error):
131-
self.error = error
132-
133-
def on_transcription(self, transcript):
134-
self.transcript = transcript
135-
136-
test_callback = MyRecognizeCallback()
137-
with open(os.path.join(os.path.dirname(__file__), '../../resources/speech_with_pause.wav'), 'rb') as audio_file:
138-
audio_source = AudioSource(audio_file, False)
139-
self.speech_to_text.recognize_using_websocket(audio_source, "audio/wav", test_callback, model="en-US_Telephony",
140-
interim_results=False, low_latency=False)
141-
assert test_callback.error is None
142-
assert test_callback.transcript is not None
143-
assert test_callback.transcript[0][0]['transcript'] in ['isolated tornadoes ', 'isolated tornados ']
144-
assert test_callback.transcript[1][0]['transcript'] == 'and heavy rain '
145-
146-
def test_on_transcription_interim_results_true(self):
147-
148-
class MyRecognizeCallback(RecognizeCallback):
149-
150-
def __init__(self):
151-
RecognizeCallback.__init__(self)
152-
self.error = None
153-
self.transcript = None
154-
155-
def on_error(self, error):
156-
self.error = error
157-
158-
def on_transcription(self, transcript):
159-
self.transcript = transcript
160-
assert transcript[0]['confidence'] is not None
161-
assert transcript[0]['transcript'] is not None
162-
163-
test_callback = MyRecognizeCallback()
164-
with open(os.path.join(os.path.dirname(__file__), '../../resources/speech_with_pause.wav'), 'rb') as audio_file:
165-
audio_source = AudioSource(audio_file, False)
166-
self.speech_to_text.recognize_using_websocket(audio_source, "audio/wav", test_callback, model="en-US_Telephony",
167-
interim_results=True, low_latency=True)
168-
assert test_callback.error is None
169-
assert test_callback.transcript is not None
170-
assert test_callback.transcript[0]['transcript'] == 'and heavy rain '
171-
172-
def test_on_transcription_interim_results_true_low_latency_false(self):
173-
174-
class MyRecognizeCallback(RecognizeCallback):
175-
176-
def __init__(self):
177-
RecognizeCallback.__init__(self)
178-
self.error = None
179-
self.transcript = None
180-
181-
def on_error(self, error):
182-
self.error = error
183-
184-
def on_transcription(self, transcript):
185-
self.transcript = transcript
186-
assert transcript[0]['confidence'] is not None
187-
assert transcript[0]['transcript'] is not None
188-
189-
test_callback = MyRecognizeCallback()
190-
with open(os.path.join(os.path.dirname(__file__), '../../resources/speech_with_pause.wav'), 'rb') as audio_file:
191-
audio_source = AudioSource(audio_file, False)
192-
self.speech_to_text.recognize_using_websocket(audio_source, "audio/wav", test_callback, model="en-US_Telephony",
193-
interim_results=True, low_latency=False)
194-
assert test_callback.error is None
195-
assert test_callback.transcript is not None
196-
assert test_callback.transcript[0]['transcript'] == 'and heavy rain '
197-
198121
def test_custom_grammars(self):
199122
customization_id = None
200123
for custom_model in self.custom_models.get('customizations'):

0 commit comments

Comments
 (0)