feat(stt): remove interim_results and low_latency wss params

apaparazzi0329 · apaparazzi0329 · commit 035b29d82c35 · 2024-05-17T11:12:35.000-05:00
diff --git a/examples/microphone-speech-to-text.py b/examples/microphone-speech-to-text.py
@@ -72,8 +72,7 @@ def recognize_using_weboscket(*args):
     mycallback = MyRecognizeCallback()
     speech_to_text.recognize_using_websocket(audio=audio_source,
                                              content_type='audio/l16; rate=44100',
-                                             recognize_callback=mycallback,
-                                             interim_results=True)
+                                             recognize_callback=mycallback)
 
 ###############################################
 #### Prepare the for recording using Pyaudio ##
diff --git a/ibm_watson/speech_to_text_v1_adapter.py b/ibm_watson/speech_to_text_v1_adapter.py
@@ -1,6 +1,6 @@
 # coding: utf-8
 
-# (C) Copyright IBM Corp. 2018, 2021.
+# (C) Copyright IBM Corp. 2018, 2024.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -33,7 +33,6 @@ def recognize_using_websocket(self,
                                   customization_weight=None,
                                   base_model_version=None,
                                   inactivity_timeout=None,
-                                  interim_results=None,
                                   keywords=None,
                                   keywords_threshold=None,
                                   max_alternatives=None,
@@ -55,7 +54,6 @@ def recognize_using_websocket(self,
                                   split_transcript_at_phrase_end=None,
                                   speech_detector_sensitivity=None,
                                   background_audio_suppression=None,
-                                  low_latency=None,
                                   character_insertion_bias=None,
                                   **kwargs):
         """
@@ -271,22 +269,6 @@ def recognize_using_websocket(self,
                * 1.0 suppresses all audio (no audio is transcribed).
                The values increase on a monotonic curve. See [Background audio
                suppression](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-parameters-suppression).
-        :param bool low_latency: (optional) If `true` for next-generation
-               `Multimedia` and `Telephony` models that support low latency, directs the
-               service to produce results even more quickly than it usually does.
-               Next-generation models produce transcription results faster than
-               previous-generation models. The `low_latency` parameter causes the models
-               to produce results even more quickly, though the results might be less
-               accurate when the parameter is used.
-               **Note:** The parameter is beta functionality. It is not available for
-               previous-generation `Broadband` and `Narrowband` models. It is available
-               only for some next-generation models.
-               * For a list of next-generation models that support low latency, see
-               [Supported language
-               models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng#models-ng-supported)
-               for next-generation models.
-               * For more information about the `low_latency` parameter, see [Low
-               latency](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-interim#low-latency).
         :param float character_insertion_bias: (optional) For next-generation
                `Multimedia` and `Telephony` models, an indication of whether the service
                is biased to recognize shorter or longer strings of characters when
@@ -355,7 +337,6 @@ def recognize_using_websocket(self,
             'customization_weight': customization_weight,
             'content_type': content_type,
             'inactivity_timeout': inactivity_timeout,
-            'interim_results': interim_results,
             'keywords': keywords,
             'keywords_threshold': keywords_threshold,
             'max_alternatives': max_alternatives,
@@ -375,7 +356,6 @@ def recognize_using_websocket(self,
             'split_transcript_at_phrase_end': split_transcript_at_phrase_end,
             'speech_detector_sensitivity': speech_detector_sensitivity,
             'background_audio_suppression': background_audio_suppression,
-            'low_latency': low_latency,
             'character_insertion_bias': character_insertion_bias
         }
         options = {k: v for k, v in options.items() if v is not None}
diff --git a/ibm_watson/websocket/recognize_listener.py b/ibm_watson/websocket/recognize_listener.py
@@ -196,16 +196,15 @@ def on_data(self, ws, message, message_type, fin):
             # set of transcriptions and send them to the appropriate callbacks.
             results = json_object.get('results')
             if results:
-                if (self.options.get('interim_results') is True):
-                    b_final = (results[0].get('final') is True)
-                    alternatives = results[0].get('alternatives')
-                    if alternatives:
-                        hypothesis = alternatives[0].get('transcript')
-                        transcripts = self.extract_transcripts(alternatives)
-                        if b_final:
-                            self.callback.on_transcription(transcripts)
-                        if hypothesis:
-                            self.callback.on_hypothesis(hypothesis)
+                b_final = (results[0].get('final') is True)
+                alternatives = results[0].get('alternatives')
+                if alternatives:
+                    hypothesis = alternatives[0].get('transcript')
+                    transcripts = self.extract_transcripts(alternatives)
+                    if b_final:
+                        self.callback.on_transcription(transcripts)
+                    if hypothesis:
+                        self.callback.on_hypothesis(hypothesis)
                 else:
                     final_transcript = []
                     for result in results:
diff --git a/test/integration/test_speech_to_text_v1.py b/test/integration/test_speech_to_text_v1.py
@@ -118,83 +118,6 @@ def on_data(self, data):
         assert test_callback.data['results'][0]['alternatives'][0]
         ['transcript'] == 'thunderstorms could produce large hail isolated tornadoes and heavy rain '
 
-    def test_on_transcription_interim_results_false(self):
-
-        class MyRecognizeCallback(RecognizeCallback):
-
-            def __init__(self):
-                RecognizeCallback.__init__(self)
-                self.error = None
-                self.transcript = None
-
-            def on_error(self, error):
-                self.error = error
-
-            def on_transcription(self, transcript):
-                self.transcript = transcript
-
-        test_callback = MyRecognizeCallback()
-        with open(os.path.join(os.path.dirname(__file__), '../../resources/speech_with_pause.wav'), 'rb') as audio_file:
-            audio_source = AudioSource(audio_file, False)
-            self.speech_to_text.recognize_using_websocket(audio_source, "audio/wav", test_callback, model="en-US_Telephony",
-             interim_results=False, low_latency=False)
-            assert test_callback.error is None
-            assert test_callback.transcript is not None
-            assert test_callback.transcript[0][0]['transcript'] in ['isolated tornadoes ', 'isolated tornados ']
-            assert test_callback.transcript[1][0]['transcript'] == 'and heavy rain '
-
-    def test_on_transcription_interim_results_true(self):
-
-        class MyRecognizeCallback(RecognizeCallback):
-
-            def __init__(self):
-                RecognizeCallback.__init__(self)
-                self.error = None
-                self.transcript = None
-
-            def on_error(self, error):
-                self.error = error
-
-            def on_transcription(self, transcript):
-                self.transcript = transcript
-                assert transcript[0]['confidence'] is not None
-                assert transcript[0]['transcript'] is not None
-
-        test_callback = MyRecognizeCallback()
-        with open(os.path.join(os.path.dirname(__file__), '../../resources/speech_with_pause.wav'), 'rb') as audio_file:
-            audio_source = AudioSource(audio_file, False)
-            self.speech_to_text.recognize_using_websocket(audio_source, "audio/wav", test_callback, model="en-US_Telephony",
-             interim_results=True, low_latency=True)
-            assert test_callback.error is None
-            assert test_callback.transcript is not None
-            assert test_callback.transcript[0]['transcript'] == 'and heavy rain '
-
-    def test_on_transcription_interim_results_true_low_latency_false(self):
-
-        class MyRecognizeCallback(RecognizeCallback):
-
-            def __init__(self):
-                RecognizeCallback.__init__(self)
-                self.error = None
-                self.transcript = None
-
-            def on_error(self, error):
-                self.error = error
-
-            def on_transcription(self, transcript):
-                self.transcript = transcript
-                assert transcript[0]['confidence'] is not None
-                assert transcript[0]['transcript'] is not None
-
-        test_callback = MyRecognizeCallback()
-        with open(os.path.join(os.path.dirname(__file__), '../../resources/speech_with_pause.wav'), 'rb') as audio_file:
-            audio_source = AudioSource(audio_file, False)
-            self.speech_to_text.recognize_using_websocket(audio_source, "audio/wav", test_callback, model="en-US_Telephony",
-             interim_results=True, low_latency=False)
-            assert test_callback.error is None
-            assert test_callback.transcript is not None
-            assert test_callback.transcript[0]['transcript'] == 'and heavy rain '
-
     def test_custom_grammars(self):
         customization_id = None
         for custom_model in self.custom_models.get('customizations'):