Skip to content

Commit de4ad98

Browse files
Simplified speech samples (set-2) (#12120)
* Simplified next set of Samples * Fix nits and added test * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent 6d59d2d commit de4ad98

9 files changed

+102
-82
lines changed

speech/snippets/transcribe_async_gcs_test.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,11 @@
1818

1919
import transcribe_async_gcs
2020
import transcribe_diarization_gcs_beta
21-
import transcribe_multilanguage_gcs_beta
2221
import transcribe_word_level_confidence_gcs_beta
2322

2423
BUCKET = "cloud-samples-data"
2524
GCS_AUDIO_PATH = "gs://" + BUCKET + "/speech/brooklyn_bridge.flac"
2625
GCS_DIARIZATION_AUDIO_PATH = "gs://" + BUCKET + "/speech/commercial_mono.wav"
27-
GCS_MUTLILANGUAGE_PATH = "gs://" + BUCKET + "/speech/Google_Gnome.wav"
2826

2927

3028
@Retry()
@@ -40,15 +38,6 @@ def test_transcribe_diarization_gcs_beta() -> None:
4038
assert is_completed
4139

4240

43-
def test_transcribe_multilanguage_gcs_bets() -> None:
44-
transcript = (
45-
transcribe_multilanguage_gcs_beta.transcribe_file_with_multilanguage_gcs(
46-
GCS_MUTLILANGUAGE_PATH
47-
)
48-
)
49-
assert re.search("Transcript: OK Google", transcript)
50-
51-
5241
def test_transcribe_word_level_confidence_gcs_beta() -> None:
5342
transcript = transcribe_word_level_confidence_gcs_beta.transcribe_file_with_word_level_confidence(
5443
GCS_AUDIO_PATH

speech/snippets/transcribe_chirp.py

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,20 +12,28 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
16-
import argparse
17-
1815
# [START speech_transcribe_chirp]
16+
import os
17+
1918
from google.api_core.client_options import ClientOptions
2019
from google.cloud.speech_v2 import SpeechClient
2120
from google.cloud.speech_v2.types import cloud_speech
2221

22+
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
23+
2324

2425
def transcribe_chirp(
25-
project_id: str,
2626
audio_file: str,
2727
) -> cloud_speech.RecognizeResponse:
28-
"""Transcribe an audio file using Chirp."""
28+
"""Transcribes an audio file using the Chirp model of Google Cloud Speech-to-Text API.
29+
Args:
30+
audio_file (str): Path to the local audio file to be transcribed.
31+
Example: "resources/audio.wav"
32+
Returns:
33+
cloud_speech.RecognizeResponse: The response from the Speech-to-Text API containing
34+
the transcription results.
35+
36+
"""
2937
# Instantiates a client
3038
client = SpeechClient(
3139
client_options=ClientOptions(
@@ -35,7 +43,7 @@ def transcribe_chirp(
3543

3644
# Reads a file as bytes
3745
with open(audio_file, "rb") as f:
38-
content = f.read()
46+
audio_content = f.read()
3947

4048
config = cloud_speech.RecognitionConfig(
4149
auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
@@ -44,9 +52,9 @@ def transcribe_chirp(
4452
)
4553

4654
request = cloud_speech.RecognizeRequest(
47-
recognizer=f"projects/{project_id}/locations/us-central1/recognizers/_",
55+
recognizer=f"projects/{PROJECT_ID}/locations/us-central1/recognizers/_",
4856
config=config,
49-
content=content,
57+
content=audio_content,
5058
)
5159

5260
# Transcribes the audio into text
@@ -62,10 +70,4 @@ def transcribe_chirp(
6270

6371

6472
if __name__ == "__main__":
65-
parser = argparse.ArgumentParser(
66-
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
67-
)
68-
parser.add_argument("project_id", help="GCP Project ID")
69-
parser.add_argument("audio_file", help="Audio file to stream")
70-
args = parser.parse_args()
71-
transcribe_chirp(args.project_id, args.audio_file)
73+
transcribe_chirp("resources/audio.wav")

speech/snippets/transcribe_chirp_test.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,7 @@
2424

2525
@Retry()
2626
def test_transcribe_chirp() -> None:
27-
project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
28-
29-
response = transcribe_chirp.transcribe_chirp(
30-
project_id, os.path.join(_RESOURCES, "audio.wav")
31-
)
32-
27+
response = transcribe_chirp.transcribe_chirp(os.path.join(_RESOURCES, "audio.wav"))
3328
assert re.search(
3429
r"how old is the Brooklyn Bridge",
3530
response.results[0].alternatives[0].transcript,

speech/snippets/transcribe_context_classes.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,20 +16,19 @@
1616
from google.cloud import speech
1717

1818

19-
def transcribe_context_classes(storage_uri: str) -> speech.RecognizeResponse:
20-
"""Provides "hints" to the speech recognizer to
21-
favor specific classes of words in the results.
22-
19+
def transcribe_context_classes(audio_uri: str) -> speech.RecognizeResponse:
20+
"""Provides "hints" to the speech recognizer to favor
21+
specific classes of words in the results.
2322
Args:
24-
storage_uri: The URI of the audio file to transcribe.
25-
23+
audio_uri: The URI of the audio file to transcribe.
24+
E.g., gs://[BUCKET]/[FILE]
2625
Returns:
27-
The transcript of the audio file.
26+
cloud_speech.RecognizeResponse: The response containing the transcription results.
2827
"""
2928
client = speech.SpeechClient()
3029

31-
# storage_uri = 'gs://YOUR_BUCKET_ID/path/to/your/file.wav'
32-
audio = speech.RecognitionAudio(uri=storage_uri)
30+
# audio_uri = 'gs://YOUR_BUCKET_ID/path/to/your/file.wav'
31+
audio = speech.RecognitionAudio(uri=audio_uri)
3332

3433
# SpeechContext: to configure your speech_context see:
3534
# https://cloud.google.com/speech-to-text/docs/reference/rpc/google.cloud.speech.v1#speechcontext

speech/snippets/transcribe_diarization_gcs_beta.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,25 +17,24 @@
1717
from google.cloud import speech
1818

1919

20-
def transcribe_diarization_gcs_beta(gcs_uri: str) -> bool:
20+
def transcribe_diarization_gcs_beta(audio_uri: str) -> bool:
2121
"""Transcribe a remote audio file (stored in Google Cloud Storage) using speaker diarization.
22-
2322
Args:
24-
gcs_uri: The Google Cloud Storage path to an audio file.
25-
23+
audio_uri (str): The Google Cloud Storage path to an audio file.
24+
E.g., gs://[BUCKET]/[FILE]
2625
Returns:
2726
True if the operation successfully completed, False otherwise.
2827
"""
2928

3029
client = speech.SpeechClient()
31-
30+
# Enhance diarization config with more speaker counts and details
3231
speaker_diarization_config = speech.SpeakerDiarizationConfig(
3332
enable_speaker_diarization=True,
34-
min_speaker_count=2,
35-
max_speaker_count=2,
33+
min_speaker_count=2, # Set minimum number of speakers
34+
max_speaker_count=2, # Adjust max speakers based on expected number of speakers
3635
)
3736

38-
# Configure request to enable Speaker diarization
37+
# Configure recognition with enhanced audio settings
3938
recognition_config = speech.RecognitionConfig(
4039
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
4140
language_code="en-US",
@@ -45,7 +44,7 @@ def transcribe_diarization_gcs_beta(gcs_uri: str) -> bool:
4544

4645
# Set the remote path for the audio file
4746
audio = speech.RecognitionAudio(
48-
uri=gcs_uri,
47+
uri=audio_uri,
4948
)
5049

5150
# Use non-blocking call for getting file transcription
@@ -67,3 +66,8 @@ def transcribe_diarization_gcs_beta(gcs_uri: str) -> bool:
6766

6867

6968
# [END speech_transcribe_diarization_gcs_beta]
69+
70+
if __name__ == "__main__":
71+
transcribe_diarization_gcs_beta(
72+
audio_uri="gs://cloud-samples-data/speech/commercial_mono.wav"
73+
)

speech/snippets/transcribe_enhanced_model.py

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -14,27 +14,28 @@
1414

1515
"""Google Cloud Speech API sample that demonstrates enhanced models
1616
and recognition metadata.
17-
18-
Example usage:
19-
python transcribe_enhanced_model.py resources/commercial_mono.wav
2017
"""
2118
# [START speech_transcribe_enhanced_model]
2219

23-
import argparse
24-
2520
from google.cloud import speech
2621

2722

28-
def transcribe_file_with_enhanced_model(path: str) -> speech.RecognizeResponse:
29-
"""Transcribe the given audio file using an enhanced model."""
23+
def transcribe_file_with_enhanced_model(audio_file: str) -> speech.RecognizeResponse:
24+
"""Transcribe the given audio file using an enhanced model.
25+
Args:
26+
audio_file (str): Path to the local audio file to be transcribed.
27+
Example: "resources/commercial_mono.wav"
28+
Returns:
29+
speech.RecognizeResponse: The response containing the transcription results.
30+
"""
3031

3132
client = speech.SpeechClient()
3233

33-
# path = 'resources/commercial_mono.wav'
34-
with open(path, "rb") as audio_file:
35-
content = audio_file.read()
34+
# audio_file = 'resources/commercial_mono.wav'
35+
with open(audio_file, "rb") as f:
36+
audio_content = f.read()
3637

37-
audio = speech.RecognitionAudio(content=content)
38+
audio = speech.RecognitionAudio(content=audio_content)
3839
config = speech.RecognitionConfig(
3940
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
4041
sample_rate_hertz=8000,
@@ -57,11 +58,4 @@ def transcribe_file_with_enhanced_model(path: str) -> speech.RecognizeResponse:
5758

5859

5960
if __name__ == "__main__":
60-
parser = argparse.ArgumentParser(
61-
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
62-
)
63-
parser.add_argument("path", help="File to stream to the API")
64-
65-
args = parser.parse_args()
66-
67-
transcribe_file_with_enhanced_model(args.path)
61+
transcribe_file_with_enhanced_model("resources/commercial_mono.wav")

speech/snippets/transcribe_enhanced_model_test.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,5 +29,7 @@ def test_transcribe_file_with_enhanced_model(capsys: pytest.CaptureFixture) -> N
2929
)
3030
out, _ = capsys.readouterr()
3131

32-
assert "Chrome" in out
3332
assert result is not None
33+
assert "Chrome" in out
34+
assert "First alternative" in out
35+
assert "result 7" in out

speech/snippets/transcribe_multilanguage_gcs_beta.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,31 +17,30 @@
1717
from google.cloud import speech_v1p1beta1 as speech
1818

1919

20-
def transcribe_file_with_multilanguage_gcs(gcs_uri: str) -> str:
20+
def transcribe_file_with_multilanguage_gcs(audio_uri: str) -> str:
2121
"""Transcribe a remote audio file with multi-language recognition
22-
2322
Args:
24-
gcs_uri: The Google Cloud Storage path to an audio file.
25-
23+
audio_uri (str): The Google Cloud Storage path to an audio file.
24+
E.g., gs://[BUCKET]/[FILE]
2625
Returns:
27-
The generated transcript from the audio file provided.
26+
str: The generated transcript from the audio file provided.
2827
"""
2928

3029
client = speech.SpeechClient()
3130

32-
first_language = "ja-JP"
33-
alternate_languages = ["es-ES", "en-US"]
31+
first_language = "es-ES"
32+
alternate_languages = ["en-US", "fr-FR"]
3433

3534
# Configure request to enable multiple languages
3635
recognition_config = speech.RecognitionConfig(
37-
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
38-
sample_rate_hertz=16000,
36+
encoding=speech.RecognitionConfig.AudioEncoding.FLAC,
37+
sample_rate_hertz=44100,
3938
language_code=first_language,
4039
alternative_language_codes=alternate_languages,
4140
)
4241

4342
# Set the remote path for the audio file
44-
audio = speech.RecognitionAudio(uri=gcs_uri)
43+
audio = speech.RecognitionAudio(uri=audio_uri)
4544

4645
# Use non-blocking call for getting file transcription
4746
response = client.long_running_recognize(
@@ -51,9 +50,9 @@ def transcribe_file_with_multilanguage_gcs(gcs_uri: str) -> str:
5150
transcript_builder = []
5251
for i, result in enumerate(response.results):
5352
alternative = result.alternatives[0]
54-
transcript_builder.append("-" * 20)
53+
transcript_builder.append("-" * 20 + "\n")
5554
transcript_builder.append(f"First alternative of result {i}: {alternative}")
56-
transcript_builder.append(f"Transcript: {alternative.transcript}")
55+
transcript_builder.append(f"Transcript: {alternative.transcript} \n")
5756

5857
transcript = "".join(transcript_builder)
5958
print(transcript)
@@ -62,3 +61,8 @@ def transcribe_file_with_multilanguage_gcs(gcs_uri: str) -> str:
6261

6362

6463
# [END speech_transcribe_multilanguage_gcs_beta]
64+
65+
if __name__ == "__main__":
66+
transcribe_file_with_multilanguage_gcs(
67+
"gs://cloud-samples-data/speech/multi_es.flac"
68+
)
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from google.api_core.retry import Retry
16+
17+
import pytest
18+
19+
import transcribe_multilanguage_gcs_beta
20+
21+
22+
@Retry()
23+
def test_transcribe_file_with_multilanguage_gcs(capsys: pytest.CaptureFixture) -> None:
24+
audio = "gs://cloud-samples-data/speech/multi_es.flac"
25+
response = transcribe_multilanguage_gcs_beta.transcribe_file_with_multilanguage_gcs(
26+
audio
27+
)
28+
out, err = capsys.readouterr()
29+
30+
assert response is not None
31+
assert "estoy" in out

0 commit comments

Comments
 (0)