diff --git a/speech/resources/brooklyn_bridge.wav b/speech/resources/brooklyn_bridge.wav new file mode 100644 index 00000000000..140a3022e96 Binary files /dev/null and b/speech/resources/brooklyn_bridge.wav differ diff --git a/speech/src/main/java/com/example/speech/TranscribeStreamingV2.java b/speech/src/main/java/com/example/speech/TranscribeStreamingV2.java new file mode 100644 index 00000000000..b56027169ab --- /dev/null +++ b/speech/src/main/java/com/example/speech/TranscribeStreamingV2.java @@ -0,0 +1,112 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.speech; + +// [START speech_to_text_transcribe_streaming_v2] + +import com.google.api.gax.rpc.BidiStream; +import com.google.cloud.speech.v2.*; +import com.google.protobuf.ByteString; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +public class TranscribeStreamingV2 { + private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT"); + + // Transcribes audio from an audio file stream using Google Cloud Speech-to-Text API. +// Args: +// streamFile (String): Path to the local audio file to be transcribed. +// Example: "resources/audio.wav" +// Returns: +// List: A list of objects. +// Each response includes the transcription results for the corresponding audio segment. +// + public static List transcribeStreamingV2(String streamFile) throws IOException { + List responses = new ArrayList<>(); +// Instantiates a client + try (SpeechClient client = SpeechClient.create()) { + + Path path = Paths.get(streamFile); + byte[] audioContent = Files.readAllBytes(path); + +// In practice, stream should be a generator yielding chunks of audio data + int chunkLength = audioContent.length / 5; + List stream = new ArrayList<>(); + for (int i = 0; i < audioContent.length; i += chunkLength) { + int end = Math.min(i + chunkLength, audioContent.length); + byte[] chunk = new byte[end - i]; + System.arraycopy(audioContent, i, chunk, 0, end - i); + stream.add(chunk); + } + + + List audioRequests = new ArrayList<>(); + for (byte[] audio : stream) { + audioRequests.add(StreamingRecognizeRequest.newBuilder().setAudio(ByteString.copyFrom(audio)).build()); + } + + RecognitionConfig recognitionConfig = RecognitionConfig.newBuilder() + .setAutoDecodingConfig(AutoDetectDecodingConfig.getDefaultInstance()) + .addLanguageCodes("en-US") + .setModel("long") + .build(); + + StreamingRecognitionConfig streamingConfig = StreamingRecognitionConfig.newBuilder() + .setConfig(recognitionConfig) + .build(); + + StreamingRecognizeRequest configRequest = StreamingRecognizeRequest.newBuilder() + .setRecognizer(String.format("projects/%s/locations/global/recognizers/_", PROJECT_ID)) + .setStreamingConfig(streamingConfig) + .build(); + + + List requests = new ArrayList<>(); + requests.add(configRequest); + requests.addAll(audioRequests); + + BidiStream stream1 = client.streamingRecognizeCallable().call(); + for (StreamingRecognizeRequest request : requests) { + stream1.send(request); + } + stream1.closeSend(); + + Iterator responseIterator = stream1.iterator(); + while (responseIterator.hasNext()) { + StreamingRecognizeResponse response = responseIterator.next(); + System.out.println(response); + // Process the response and extract the transcript + System.out.println("Transcript: " + response.getResultsList().get(0).getAlternativesList().get(0).getTranscript()); + responses.add(response); + } + + + } + return responses; + } + + public static void main(String[] args) throws IOException { + List responses = transcribeStreamingV2("./resources/brooklyn_bridge.wav"); + } +} +// [END speech_to_text_transcribe_streaming_v2] diff --git a/speech/src/test/java/com/example/speech/TranscribeStreamingV2IT.java b/speech/src/test/java/com/example/speech/TranscribeStreamingV2IT.java new file mode 100644 index 00000000000..36792a4741e --- /dev/null +++ b/speech/src/test/java/com/example/speech/TranscribeStreamingV2IT.java @@ -0,0 +1,54 @@ +/* + * Copyright 2025 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.speech; +import com.google.cloud.speech.v2.StreamingRecognizeResponse; +import com.google.common.truth.Truth; +import org.junit.Test; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; +import java.util.regex.Pattern; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +public class TranscribeStreamingV2IT { + + @Test + public void testTranscribeStreamingV2_Success() throws IOException { + // Create a dummy audio file for testing + String testFilePath = "./resources/brooklyn_bridge.wav"; + + // Call the method to test + List responses = TranscribeStreamingV2.transcribeStreamingV2(testFilePath); + + // Assert the transcript + String transcript = ""; + for (StreamingRecognizeResponse response : responses) { + if (response.getResultsCount() > 0) { + transcript += response.getResults(0).getAlternatives(0).getTranscript(); + } + } + // Use a regex to match the expected transcript + Pattern pattern = Pattern.compile("how old is the Brooklyn Bridge", Pattern.CASE_INSENSITIVE | Pattern.DOTALL); + Truth.assertThat(pattern.matcher(transcript).find()).isTrue(); + + } +} +