feat(stt): New param end_of_phrase_silence_time and split_transcription in recognize_using_websocket

ehdsouza · ehdsouza · commit 040946f88d6b · 2020-01-14T18:04:26.000-05:00
diff --git a/ibm_watson/speech_to_text_v1_adapter.py b/ibm_watson/speech_to_text_v1_adapter.py
@@ -51,6 +51,8 @@ def recognize_using_websocket(self,
                                   processing_metrics=None,
                                   processing_metrics_interval=None,
                                   audio_metrics=None,
+                                  end_of_phrase_silence_time=None,
+                                  split_transcript_at_phrase_end=None,
                                   **kwargs):
         """
         Sends audio for speech recognition using web sockets.
@@ -188,6 +190,31 @@ def recognize_using_websocket(self,
         :param bool audio_metrics: If `true`, requests detailed information about the
         signal characteristics of the input audio. The service returns audio metrics with
         the final transcription results. By default, the service returns no audio metrics.
+        :param float end_of_phrase_silence_time: (optional) If `true`, specifies
+        the duration of the pause interval at which the service splits a transcript
+        into multiple final results. If the service detects pauses or extended
+        silence before it reaches the end of the audio stream, its response can
+        include multiple final results. Silence indicates a point at which the
+        speaker pauses between spoken words or phrases.
+        Specify a value for the pause interval in the range of 0.0 to 120.0.
+        * A value greater than 0 specifies the interval that the service is to use
+        for speech recognition.
+        * A value of 0 indicates that the service is to use the default interval.
+        It is equivalent to omitting the parameter.
+        The default pause interval for most languages is 0.8 seconds; the default
+        for Chinese is 0.6 seconds.
+        See [End of phrase silence
+        time](https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-output#silence_time). 
+        :param bool split_transcript_at_phrase_end: (optional) If `true`, directs
+        the service to split the transcript into multiple final results based on
+        semantic features of the input, for example, at the conclusion of
+        meaningful phrases such as sentences. The service bases its understanding
+        of semantic features on the base language model that you use with a
+        request. Custom language models and grammars can also influence how and
+        where the service splits a transcript. By default, the service splits
+        transcripts based solely on the pause interval.
+        See [Split transcript at phrase
+        end](https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-output#split_transcript).
         :param dict headers: A `dict` containing the request headers
         :return: A `dict` containing the `SpeechRecognitionResults` response.
         :rtype: dict