GoogleCloudPlatform
diff --git a/‎speech/snippets/transcribe_streaming.py
Lines changed: 8 additions & 15 deletions b/‎speech/snippets/transcribe_streaming.py
Lines changed: 8 additions & 15 deletions
diff --git a/‎speech/snippets/transcribe_streaming_v2.py
Lines changed: 17 additions & 24 deletions b/‎speech/snippets/transcribe_streaming_v2.py
Lines changed: 17 additions & 24 deletions
diff --git a/‎speech/snippets/transcribe_streaming_v2_test.py
Lines changed: 1 addition & 3 deletions b/‎speech/snippets/transcribe_streaming_v2_test.py
Lines changed: 1 addition & 3 deletions
diff --git a/‎speech/snippets/transcribe_streaming_voice_activity_events.py
Lines changed: 17 additions & 23 deletions b/‎speech/snippets/transcribe_streaming_voice_activity_events.py
Lines changed: 17 additions & 23 deletions
diff --git a/‎speech/snippets/transcribe_streaming_voice_activity_events_test.py
Lines changed: 1 addition & 3 deletions b/‎speech/snippets/transcribe_streaming_voice_activity_events_test.py
Lines changed: 1 addition & 3 deletions
diff --git a/‎speech/snippets/transcribe_streaming_voice_activity_timeouts.py
Lines changed: 22 additions & 28 deletions b/‎speech/snippets/transcribe_streaming_voice_activity_timeouts.py
Lines changed: 22 additions & 28 deletions
diff --git a/‎speech/snippets/transcribe_streaming_voice_activity_timeouts_test.py
Lines changed: 0 additions & 6 deletions b/‎speech/snippets/transcribe_streaming_voice_activity_timeouts_test.py
Lines changed: 0 additions & 6 deletions
diff --git a/‎speech/snippets/transcribe_word_level_confidence_gcs_beta.py
Lines changed: 11 additions & 7 deletions b/‎speech/snippets/transcribe_word_level_confidence_gcs_beta.py
Lines changed: 11 additions & 7 deletions
@@ -13,28 +13,26 @@
 # limitations under the License.
 
 """Google Cloud Speech API sample application using the streaming API.
-
-Example usage:
-    python transcribe_streaming.py resources/audio.raw
 """
 
-import argparse
-
 from google.cloud import speech
 
 
 # [START speech_transcribe_streaming]
 def transcribe_streaming(stream_file: str) -> speech.RecognitionConfig:
-    """Streams transcription of the given audio file."""
-
+    """Streams transcription of the given audio file using Google Cloud Speech-to-Text API.
+    Args:
+        stream_file (str): Path to the local audio file to be transcribed.
+            Example: "resources/audio.raw"
+    """
     client = speech.SpeechClient()
 
     # [START speech_python_migration_streaming_request]
     with open(stream_file, "rb") as audio_file:
-        content = audio_file.read()
+        audio_content = audio_file.read()
 
     # In practice, stream should be a generator yielding chunks of audio data.
-    stream = [content]
+    stream = [audio_content]
 
     requests = (
         speech.StreamingRecognizeRequest(audio_content=chunk) for chunk in stream
@@ -76,9 +74,4 @@ def transcribe_streaming(stream_file: str) -> speech.RecognitionConfig:
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
-    )
-    parser.add_argument("stream", help="File to stream to the API")
-    args = parser.parse_args()
-    transcribe_streaming(args.stream)
+    transcribe_streaming("resources/audio.raw")
@@ -12,39 +12,38 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
-import argparse
-
 # [START speech_transcribe_streaming_v2]
+import os
+
 from google.cloud.speech_v2 import SpeechClient
 from google.cloud.speech_v2.types import cloud_speech as cloud_speech_types
 
+PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
+
 
 def transcribe_streaming_v2(
-    project_id: str,
-    audio_file: str,
+    stream_file: str,
 ) -> cloud_speech_types.StreamingRecognizeResponse:
-    """Transcribes audio from audio file stream.
-
+    """Transcribes audio from an audio file stream using Google Cloud Speech-to-Text API.
     Args:
-        project_id: The GCP project ID.
-        audio_file: The path to the audio file to transcribe.
-
+        stream_file (str): Path to the local audio file to be transcribed.
+            Example: "resources/audio.wav"
     Returns:
-        The response from the transcribe method.
+        list[cloud_speech_types.StreamingRecognizeResponse]: A list of objects.
+            Each response includes the transcription results for the corresponding audio segment.
     """
     # Instantiates a client
     client = SpeechClient()
 
     # Reads a file as bytes
-    with open(audio_file, "rb") as f:
-        content = f.read()
+    with open(stream_file, "rb") as f:
+        audio_content = f.read()
 
     # In practice, stream should be a generator yielding chunks of audio data
-    chunk_length = len(content) // 5
+    chunk_length = len(audio_content) // 5
     stream = [
-        content[start : start + chunk_length]
-        for start in range(0, len(content), chunk_length)
+        audio_content[start : start + chunk_length]
+        for start in range(0, len(audio_content), chunk_length)
     ]
     audio_requests = (
         cloud_speech_types.StreamingRecognizeRequest(audio=audio) for audio in stream
@@ -59,7 +58,7 @@ def transcribe_streaming_v2(
         config=recognition_config
     )
     config_request = cloud_speech_types.StreamingRecognizeRequest(
-        recognizer=f"projects/{project_id}/locations/global/recognizers/_",
+        recognizer=f"projects/{PROJECT_ID}/locations/global/recognizers/_",
         streaming_config=streaming_config,
     )
 
@@ -84,10 +83,4 @@ def requests(config: cloud_speech_types.RecognitionConfig, audio: list) -> list:
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
-    )
-    parser.add_argument("project_id", help="GCP Project ID")
-    parser.add_argument("audio_file", help="Audio file to stream")
-    args = parser.parse_args()
-    transcribe_streaming_v2(args.project_id, args.audio_file)
+    transcribe_streaming_v2("resources/audio.wav")
@@ -25,10 +25,8 @@
 
 @Retry()
 def test_transcribe_streaming_v2(capsys: pytest.CaptureFixture) -> None:
-    project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
-
     responses = transcribe_streaming_v2.transcribe_streaming_v2(
-        project_id, os.path.join(_RESOURCES, "audio.wav")
+        os.path.join(_RESOURCES, "audio.wav")
     )
 
     transcript = ""
 
@@ -12,38 +12,38 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
-import argparse
-
 # [START speech_transcribe_streaming_voice_activity_events]
+import os
+
 from google.cloud.speech_v2 import SpeechClient
 from google.cloud.speech_v2.types import cloud_speech
 
+PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
+
 
 def transcribe_streaming_voice_activity_events(
-    project_id: str, audio_file: str
+    audio_file: str,
 ) -> cloud_speech.StreamingRecognizeResponse:
-    """Transcribes audio from a file into text.
-
+    """Transcribes audio from a file into text and detects voice activity
+        events using Google Cloud Speech-to-Text API.
     Args:
-        project_id: The GCP project ID to use.
-        audio_file: The path to the audio file to transcribe.
-
+        audio_file (str): Path to the local audio file to be transcribed.
+            Example: "resources/audio.wav"
     Returns:
-        The streaming response containing the transcript.
+        list[cloud_speech.StreamingRecognizeResponse]: A list of `StreamingRecognizeResponse` objects.
     """
     # Instantiates a client
     client = SpeechClient()
 
     # Reads a file as bytes
-    with open(audio_file, "rb") as f:
-        content = f.read()
+    with open(audio_file, "rb") as file:
+        audio_content = file.read()
 
     # In practice, stream should be a generator yielding chunks of audio data
-    chunk_length = len(content) // 5
+    chunk_length = len(audio_content) // 5
     stream = [
-        content[start : start + chunk_length]
-        for start in range(0, len(content), chunk_length)
+        audio_content[start : start + chunk_length]
+        for start in range(0, len(audio_content), chunk_length)
     ]
     audio_requests = (
         cloud_speech.StreamingRecognizeRequest(audio=audio) for audio in stream
@@ -64,7 +64,7 @@ def transcribe_streaming_voice_activity_events(
     )
 
     config_request = cloud_speech.StreamingRecognizeRequest(
-        recognizer=f"projects/{project_id}/locations/global/recognizers/_",
+        recognizer=f"projects/{PROJECT_ID}/locations/global/recognizers/_",
         streaming_config=streaming_config,
     )
 
@@ -99,10 +99,4 @@ def requests(config: cloud_speech.RecognitionConfig, audio: list) -> list:
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
-    )
-    parser.add_argument("project_id", help="GCP Project ID")
-    parser.add_argument("audio_file", help="Audio file to stream")
-    args = parser.parse_args()
-    transcribe_streaming_voice_activity_events(args.project_id, args.audio_file)
+    transcribe_streaming_voice_activity_events("resources/audio.wav")
@@ -28,10 +28,8 @@
 def test_transcribe_streaming_voice_activity_events(
     capsys: pytest.CaptureFixture,
 ) -> None:
-    project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
-
     responses = transcribe_streaming_voice_activity_events.transcribe_streaming_voice_activity_events(
-        project_id, os.path.join(_RESOURCES, "audio.wav")
+        os.path.join(_RESOURCES, "audio.wav")
     )
 
     transcript = ""
 
@@ -12,46 +12,43 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
-import argparse
-
 # [START speech_transcribe_streaming_voice_activity_timeouts]
+import os
 from time import sleep
 
 from google.cloud.speech_v2 import SpeechClient
 from google.cloud.speech_v2.types import cloud_speech
 from google.protobuf import duration_pb2  # type: ignore
 
+PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
+
 
 def transcribe_streaming_voice_activity_timeouts(
-    project_id: str,
     speech_start_timeout: int,
     speech_end_timeout: int,
     audio_file: str,
 ) -> cloud_speech.StreamingRecognizeResponse:
     """Transcribes audio from audio file to text.
-
     Args:
-        project_id: The GCP project ID to use.
         speech_start_timeout: The timeout in seconds for speech start.
         speech_end_timeout: The timeout in seconds for speech end.
-        audio_file: The audio file to transcribe.
-
+        audio_file: Path to the local audio file to be transcribed.
+            Example: "resources/audio_silence_padding.wav"
     Returns:
         The streaming response containing the transcript.
     """
     # Instantiates a client
     client = SpeechClient()
 
     # Reads a file as bytes
-    with open(audio_file, "rb") as f:
-        content = f.read()
+    with open(audio_file, "rb") as file:
+        audio_content = file.read()
 
     # In practice, stream should be a generator yielding chunks of audio data
-    chunk_length = len(content) // 20
+    chunk_length = len(audio_content) // 20
     stream = [
-        content[start : start + chunk_length]
-        for start in range(0, len(content), chunk_length)
+        audio_content[start : start + chunk_length]
+        for start in range(0, len(audio_content), chunk_length)
     ]
     audio_requests = (
         cloud_speech.StreamingRecognizeRequest(audio=audio) for audio in stream
@@ -81,7 +78,7 @@ def transcribe_streaming_voice_activity_timeouts(
     )
 
     config_request = cloud_speech.StreamingRecognizeRequest(
-        recognizer=f"projects/{project_id}/locations/global/recognizers/_",
+        recognizer=f"projects/{PROJECT_ID}/locations/global/recognizers/_",
         streaming_config=streaming_config,
     )
 
@@ -119,19 +116,16 @@ def requests(config: cloud_speech.RecognitionConfig, audio: list) -> list:
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
-    )
-    parser.add_argument("project_id", help="GCP Project ID")
-    parser.add_argument(
-        "speech_start_timeout", help="Timeout in seconds for speech start"
-    )
-    parser.add_argument("speech_end_timeout", help="Timeout in seconds for speech end")
-    parser.add_argument("audio_file", help="Audio file to stream")
-    args = parser.parse_args()
+    # Define the timeout duration for detecting the start of speech
+    # In this case this means the function will wait for up to 5 seconds to determine if speech has started
+    #   before it begins processing the audio stream.
+    speech_start_timeout = 5
+    # Define the timeout duration for detecting the end of speech
+    # This indicates that the function will continue to listen for up to 1 second
+    #     after the last detected speech segment to determine if speech has ended.
+    speech_end_timeout = 1
     transcribe_streaming_voice_activity_timeouts(
-        args.project_id,
-        args.speech_start_timeout,
-        args.speech_end_timeout,
-        args.audio_file,
+        speech_start_timeout=speech_start_timeout,
+        speech_end_timeout=speech_end_timeout,
+        audio_file="resources/audio_silence_padding.wav",
     )
@@ -27,10 +27,7 @@
 
 @flaky(max_runs=3, min_passes=1)
 def test_transcribe_silence_padding_timeouts(capsys: pytest.CaptureFixture) -> None:
-    project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
-
     responses = transcribe_streaming_voice_activity_timeouts.transcribe_streaming_voice_activity_timeouts(
-        project_id,
         1,
         5,
         os.path.join(_RESOURCES, "audio_silence_padding.wav"),
@@ -44,10 +41,7 @@ def test_transcribe_silence_padding_timeouts(capsys: pytest.CaptureFixture) -> N
 def test_transcribe_streaming_voice_activity_timeouts(
     capsys: pytest.CaptureFixture,
 ) -> None:
-    project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
-
     responses = transcribe_streaming_voice_activity_timeouts.transcribe_streaming_voice_activity_timeouts(
-        project_id,
         5,
         1,
         os.path.join(_RESOURCES, "audio_silence_padding.wav"),
 
@@ -17,14 +17,13 @@
 from google.cloud import speech_v1p1beta1 as speech
 
 
-def transcribe_file_with_word_level_confidence(gcs_uri: str) -> str:
+def transcribe_file_with_word_level_confidence(audio_uri: str) -> str:
     """Transcribe a remote audio file with word level confidence.
-
     Args:
-        gcs_uri: The Google Cloud Storage path to an audio file.
-
+        audio_uri (str): The Cloud Storage URI of the input audio.
+            E.g., gs://[BUCKET]/[FILE]
     Returns:
-        The generated transcript from the audio file provided.
+        The generated transcript from the audio file provided with word level confidence.
     """
 
     client = speech.SpeechClient()
@@ -34,11 +33,11 @@ def transcribe_file_with_word_level_confidence(gcs_uri: str) -> str:
         encoding=speech.RecognitionConfig.AudioEncoding.FLAC,
         sample_rate_hertz=44100,
         language_code="en-US",
-        enable_word_confidence=True,
+        enable_word_confidence=True,  # Enable word level confidence
     )
 
     # Set the remote path for the audio file
-    audio = speech.RecognitionAudio(uri=gcs_uri)
+    audio = speech.RecognitionAudio(uri=audio_uri)
 
     # Use non-blocking call for getting file transcription
     response = client.long_running_recognize(config=config, audio=audio).result(
@@ -64,3 +63,8 @@ def transcribe_file_with_word_level_confidence(gcs_uri: str) -> str:
 
 
 # [END speech_transcribe_word_level_confidence_gcs_beta]
+
+if __name__ == "__main__":
+    transcribe_file_with_word_level_confidence(
+        "gs://cloud-samples-data/speech/brooklyn_bridge.flac"
+    )