|
18 | 18 | Google Cloud API.
|
19 | 19 |
|
20 | 20 | Usage Examples:
|
21 |
| - python beta_snippets.py transcription \ |
22 |
| - gs://python-docs-samples-tests/video/googlework_tiny.mp4 |
23 |
| -
|
24 |
| - python beta_snippets.py video-text-gcs \ |
25 |
| - gs://python-docs-samples-tests/video/googlework_tiny.mp4 |
26 |
| -
|
27 | 21 | python beta_snippets.py streaming-labels resources/cat.mp4
|
28 | 22 |
|
29 | 23 | python beta_snippets.py streaming-shot-change resources/cat.mp4
|
|
49 | 43 | import io
|
50 | 44 |
|
51 | 45 |
|
52 |
| -def speech_transcription(input_uri, timeout=180): |
53 |
| - # [START video_speech_transcription_gcs_beta] |
54 |
| - """Transcribe speech from a video stored on GCS.""" |
55 |
| - from google.cloud import videointelligence_v1p1beta1 as videointelligence |
56 |
| - |
57 |
| - video_client = videointelligence.VideoIntelligenceServiceClient() |
58 |
| - |
59 |
| - features = [videointelligence.Feature.SPEECH_TRANSCRIPTION] |
60 |
| - |
61 |
| - config = videointelligence.SpeechTranscriptionConfig( |
62 |
| - language_code="en-US", enable_automatic_punctuation=True |
63 |
| - ) |
64 |
| - video_context = videointelligence.VideoContext(speech_transcription_config=config) |
65 |
| - |
66 |
| - operation = video_client.annotate_video( |
67 |
| - request={ |
68 |
| - "features": features, |
69 |
| - "input_uri": input_uri, |
70 |
| - "video_context": video_context, |
71 |
| - } |
72 |
| - ) |
73 |
| - |
74 |
| - print("\nProcessing video for speech transcription.") |
75 |
| - |
76 |
| - result = operation.result(timeout) |
77 |
| - |
78 |
| - # There is only one annotation_result since only |
79 |
| - # one video is processed. |
80 |
| - annotation_results = result.annotation_results[0] |
81 |
| - for speech_transcription in annotation_results.speech_transcriptions: |
82 |
| - # The number of alternatives for each transcription is limited by |
83 |
| - # SpeechTranscriptionConfig.max_alternatives. |
84 |
| - # Each alternative is a different possible transcription |
85 |
| - # and has its own confidence score. |
86 |
| - for alternative in speech_transcription.alternatives: |
87 |
| - print("Alternative level information:") |
88 |
| - |
89 |
| - print("Transcript: {}".format(alternative.transcript)) |
90 |
| - print("Confidence: {}\n".format(alternative.confidence)) |
91 |
| - |
92 |
| - print("Word level information:") |
93 |
| - for word_info in alternative.words: |
94 |
| - word = word_info.word |
95 |
| - start_time = word_info.start_time |
96 |
| - end_time = word_info.end_time |
97 |
| - print( |
98 |
| - "\t{}s - {}s: {}".format( |
99 |
| - start_time.seconds + start_time.microseconds * 1e-6, |
100 |
| - end_time.seconds + end_time.microseconds * 1e-6, |
101 |
| - word, |
102 |
| - ) |
103 |
| - ) |
104 |
| - # [END video_speech_transcription_gcs_beta] |
105 |
| - |
106 |
| - |
107 |
| -def video_detect_text_gcs(input_uri): |
108 |
| - # [START video_detect_text_gcs_beta] |
109 |
| - """Detect text in a video stored on GCS.""" |
110 |
| - from google.cloud import videointelligence_v1p2beta1 as videointelligence |
111 |
| - |
112 |
| - video_client = videointelligence.VideoIntelligenceServiceClient() |
113 |
| - features = [videointelligence.Feature.TEXT_DETECTION] |
114 |
| - |
115 |
| - operation = video_client.annotate_video( |
116 |
| - request={"features": features, "input_uri": input_uri} |
117 |
| - ) |
118 |
| - |
119 |
| - print("\nProcessing video for text detection.") |
120 |
| - result = operation.result(timeout=300) |
121 |
| - |
122 |
| - # The first result is retrieved because a single video was processed. |
123 |
| - annotation_result = result.annotation_results[0] |
124 |
| - |
125 |
| - # Get only the first result |
126 |
| - text_annotation = annotation_result.text_annotations[0] |
127 |
| - print("\nText: {}".format(text_annotation.text)) |
128 |
| - |
129 |
| - # Get the first text segment |
130 |
| - text_segment = text_annotation.segments[0] |
131 |
| - start_time = text_segment.segment.start_time_offset |
132 |
| - end_time = text_segment.segment.end_time_offset |
133 |
| - print( |
134 |
| - "start_time: {}, end_time: {}".format( |
135 |
| - start_time.seconds + start_time.microseconds * 1e-6, |
136 |
| - end_time.seconds + end_time.microseconds * 1e-6, |
137 |
| - ) |
138 |
| - ) |
139 |
| - |
140 |
| - print("Confidence: {}".format(text_segment.confidence)) |
141 |
| - |
142 |
| - # Show the result for the first frame in this segment. |
143 |
| - frame = text_segment.frames[0] |
144 |
| - time_offset = frame.time_offset |
145 |
| - print( |
146 |
| - "Time offset for the first frame: {}".format( |
147 |
| - time_offset.seconds + time_offset.microseconds * 1e-6 |
148 |
| - ) |
149 |
| - ) |
150 |
| - print("Rotated Bounding Box Vertices:") |
151 |
| - for vertex in frame.rotated_bounding_box.vertices: |
152 |
| - print("\tVertex.x: {}, Vertex.y: {}".format(vertex.x, vertex.y)) |
153 |
| - # [END video_detect_text_gcs_beta] |
154 |
| - return annotation_result.text_annotations |
155 |
| - |
156 |
| - |
157 |
| -def video_detect_text(path): |
158 |
| - # [START video_detect_text_beta] |
159 |
| - """Detect text in a local video.""" |
160 |
| - from google.cloud import videointelligence_v1p2beta1 as videointelligence |
161 |
| - |
162 |
| - video_client = videointelligence.VideoIntelligenceServiceClient() |
163 |
| - features = [videointelligence.Feature.TEXT_DETECTION] |
164 |
| - video_context = videointelligence.VideoContext() |
165 |
| - |
166 |
| - with io.open(path, "rb") as file: |
167 |
| - input_content = file.read() |
168 |
| - |
169 |
| - operation = video_client.annotate_video( |
170 |
| - request={ |
171 |
| - "features": features, |
172 |
| - "input_content": input_content, |
173 |
| - "video_context": video_context, |
174 |
| - } |
175 |
| - ) |
176 |
| - |
177 |
| - print("\nProcessing video for text detection.") |
178 |
| - result = operation.result(timeout=300) |
179 |
| - |
180 |
| - # The first result is retrieved because a single video was processed. |
181 |
| - annotation_result = result.annotation_results[0] |
182 |
| - |
183 |
| - # Get only the first result |
184 |
| - text_annotation = annotation_result.text_annotations[0] |
185 |
| - print("\nText: {}".format(text_annotation.text)) |
186 |
| - |
187 |
| - # Get the first text segment |
188 |
| - text_segment = text_annotation.segments[0] |
189 |
| - start_time = text_segment.segment.start_time_offset |
190 |
| - end_time = text_segment.segment.end_time_offset |
191 |
| - print( |
192 |
| - "start_time: {}, end_time: {}".format( |
193 |
| - start_time.seconds + start_time.microseconds * 1e-6, |
194 |
| - end_time.seconds + end_time.microseconds * 1e-6, |
195 |
| - ) |
196 |
| - ) |
197 |
| - |
198 |
| - print("Confidence: {}".format(text_segment.confidence)) |
199 |
| - |
200 |
| - # Show the result for the first frame in this segment. |
201 |
| - frame = text_segment.frames[0] |
202 |
| - time_offset = frame.time_offset |
203 |
| - print( |
204 |
| - "Time offset for the first frame: {}".format( |
205 |
| - time_offset.seconds + time_offset.microseconds * 1e-6 |
206 |
| - ) |
207 |
| - ) |
208 |
| - print("Rotated Bounding Box Vertices:") |
209 |
| - for vertex in frame.rotated_bounding_box.vertices: |
210 |
| - print("\tVertex.x: {}, Vertex.y: {}".format(vertex.x, vertex.y)) |
211 |
| - # [END video_detect_text_beta] |
212 |
| - return annotation_result.text_annotations |
213 |
| - |
214 |
| - |
215 | 46 | def detect_labels_streaming(path):
|
216 | 47 | # [START video_streaming_label_detection_beta]
|
217 | 48 | from google.cloud import videointelligence_v1p3beta1 as videointelligence
|
@@ -826,21 +657,6 @@ def stream_generator():
|
826 | 657 | )
|
827 | 658 | subparsers = parser.add_subparsers(dest="command")
|
828 | 659 |
|
829 |
| - speech_transcription_parser = subparsers.add_parser( |
830 |
| - "transcription", help=speech_transcription.__doc__ |
831 |
| - ) |
832 |
| - speech_transcription_parser.add_argument("gcs_uri") |
833 |
| - |
834 |
| - video_text_gcs_parser = subparsers.add_parser( |
835 |
| - "video-text-gcs", help=video_detect_text_gcs.__doc__ |
836 |
| - ) |
837 |
| - video_text_gcs_parser.add_argument("gcs_uri") |
838 |
| - |
839 |
| - video_text_parser = subparsers.add_parser( |
840 |
| - "video-text", help=video_detect_text.__doc__ |
841 |
| - ) |
842 |
| - video_text_parser.add_argument("path") |
843 |
| - |
844 | 660 | video_streaming_labels_parser = subparsers.add_parser(
|
845 | 661 | "streaming-labels", help=detect_labels_streaming.__doc__
|
846 | 662 | )
|
@@ -892,13 +708,7 @@ def stream_generator():
|
892 | 708 |
|
893 | 709 | args = parser.parse_args()
|
894 | 710 |
|
895 |
| - if args.command == "transcription": |
896 |
| - speech_transcription(args.gcs_uri) |
897 |
| - elif args.command == "video-text-gcs": |
898 |
| - video_detect_text_gcs(args.gcs_uri) |
899 |
| - elif args.command == "video-text": |
900 |
| - video_detect_text(args.path) |
901 |
| - elif args.command == "streaming-labels": |
| 711 | + if args.command == "streaming-labels": |
902 | 712 | detect_labels_streaming(args.path)
|
903 | 713 | elif args.command == "streaming-shot-change":
|
904 | 714 | detect_shot_change_streaming(args.path)
|
|
0 commit comments