18
18
Google Cloud API.
19
19
20
20
Usage Examples:
21
- python beta_snippets.py \
22
- transcription gs://python-docs-samples-tests/video/googlework_short.mp4
21
+ python beta_snippets.py transcription \
22
+ gs://python-docs-samples-tests/video/googlework_short.mp4
23
+ python beta_snippets.py video-text-gcs \
24
+ gs://python-docs-samples-tests/video/googlework_short.mp4
25
+ python beta_snippets.py track-objects /resources/cat.mp4
23
26
"""
24
27
25
28
import argparse
29
+ import io
26
30
27
- from google .cloud import videointelligence_v1p1beta1 as videointelligence
28
31
29
-
30
- # [START video_speech_transcription_gcs_beta]
31
32
def speech_transcription (input_uri ):
33
+ # [START video_speech_transcription_gcs_beta]
32
34
"""Transcribe speech from a video stored on GCS."""
35
+ from google .cloud import videointelligence_v1p1beta1 as videointelligence
36
+
33
37
video_client = videointelligence .VideoIntelligenceServiceClient ()
34
38
35
39
features = [videointelligence .enums .Feature .SPEECH_TRANSCRIPTION ]
@@ -66,7 +70,202 @@ def speech_transcription(input_uri):
66
70
start_time .seconds + start_time .nanos * 1e-9 ,
67
71
end_time .seconds + end_time .nanos * 1e-9 ,
68
72
word ))
69
- # [END video_speech_transcription_gcs_beta]
73
+ # [END video_speech_transcription_gcs_beta]
74
+
75
+
76
+ def video_detect_text_gcs (input_uri ):
77
+ # [START video_detect_text_gcs_beta]
78
+ """Detect text in a video stored on GCS."""
79
+ from google .cloud import videointelligence_v1p2beta1 as videointelligence
80
+
81
+ video_client = videointelligence .VideoIntelligenceServiceClient ()
82
+ features = [videointelligence .enums .Feature .TEXT_DETECTION ]
83
+
84
+ operation = video_client .annotate_video (
85
+ input_uri = input_uri ,
86
+ features = features )
87
+
88
+ print ('\n Processing video for text detection.' )
89
+ result = operation .result (timeout = 300 )
90
+
91
+ # The first result is retrieved because a single video was processed.
92
+ annotation_result = result .annotation_results [0 ]
93
+
94
+ # Get only the first result
95
+ text_annotation = annotation_result .text_annotations [0 ]
96
+ print ('\n Text: {}' .format (text_annotation .text ))
97
+
98
+ # Get the first text segment
99
+ text_segment = text_annotation .segments [0 ]
100
+ start_time = text_segment .segment .start_time_offset
101
+ end_time = text_segment .segment .end_time_offset
102
+ print ('start_time: {}, end_time: {}' .format (
103
+ start_time .seconds + start_time .nanos * 1e-9 ,
104
+ end_time .seconds + end_time .nanos * 1e-9 ))
105
+
106
+ print ('Confidence: {}' .format (text_segment .confidence ))
107
+
108
+ # Show the result for the first frame in this segment.
109
+ frame = text_segment .frames [0 ]
110
+ time_offset = frame .time_offset
111
+ print ('Time offset for the first frame: {}' .format (
112
+ time_offset .seconds + time_offset .nanos * 1e-9 ))
113
+ print ('Rotated Bounding Box Vertices:' )
114
+ for vertex in frame .rotated_bounding_box .vertices :
115
+ print ('\t Vertex.x: {}, Vertex.y: {}' .format (vertex .x , vertex .y ))
116
+ # [END video_detect_text_gcs_beta]
117
+ return annotation_result .text_annotations
118
+
119
+
120
+ def video_detect_text (path ):
121
+ # [START video_detect_text_beta]
122
+ """Detect text in a local video."""
123
+ from google .cloud import videointelligence_v1p2beta1 as videointelligence
124
+
125
+ video_client = videointelligence .VideoIntelligenceServiceClient ()
126
+ features = [videointelligence .enums .Feature .TEXT_DETECTION ]
127
+ video_context = videointelligence .types .VideoContext ()
128
+
129
+ with io .open (path , 'rb' ) as file :
130
+ input_content = file .read ()
131
+
132
+ operation = video_client .annotate_video (
133
+ input_content = input_content , # the bytes of the video file
134
+ features = features ,
135
+ video_context = video_context )
136
+
137
+ print ('\n Processing video for text detection.' )
138
+ result = operation .result (timeout = 300 )
139
+
140
+ # The first result is retrieved because a single video was processed.
141
+ annotation_result = result .annotation_results [0 ]
142
+
143
+ # Get only the first result
144
+ text_annotation = annotation_result .text_annotations [0 ]
145
+ print ('\n Text: {}' .format (text_annotation .text ))
146
+
147
+ # Get the first text segment
148
+ text_segment = text_annotation .segments [0 ]
149
+ start_time = text_segment .segment .start_time_offset
150
+ end_time = text_segment .segment .end_time_offset
151
+ print ('start_time: {}, end_time: {}' .format (
152
+ start_time .seconds + start_time .nanos * 1e-9 ,
153
+ end_time .seconds + end_time .nanos * 1e-9 ))
154
+
155
+ print ('Confidence: {}' .format (text_segment .confidence ))
156
+
157
+ # Show the result for the first frame in this segment.
158
+ frame = text_segment .frames [0 ]
159
+ time_offset = frame .time_offset
160
+ print ('Time offset for the first frame: {}' .format (
161
+ time_offset .seconds + time_offset .nanos * 1e-9 ))
162
+ print ('Rotated Bounding Box Vertices:' )
163
+ for vertex in frame .rotated_bounding_box .vertices :
164
+ print ('\t Vertex.x: {}, Vertex.y: {}' .format (vertex .x , vertex .y ))
165
+ # [END video_detect_text_beta]
166
+ return annotation_result .text_annotations
167
+
168
+
169
+ def track_objects_gcs (gcs_uri ):
170
+ # [START video_object_tracking_gcs_beta]
171
+ """Object Tracking."""
172
+ from google .cloud import videointelligence_v1p2beta1 as videointelligence
173
+
174
+ # It is recommended to use location_id as 'us-east1' for the best latency
175
+ # due to different types of processors used in this region and others.
176
+ video_client = videointelligence .VideoIntelligenceServiceClient ()
177
+ features = [videointelligence .enums .Feature .OBJECT_TRACKING ]
178
+ operation = video_client .annotate_video (
179
+ input_uri = gcs_uri , features = features , location_id = 'us-east1' )
180
+ print ('\n Processing video for object annotations.' )
181
+
182
+ result = operation .result (timeout = 300 )
183
+ print ('\n Finished processing.\n ' )
184
+
185
+ # The first result is retrieved because a single video was processed.
186
+ object_annotations = result .annotation_results [0 ].object_annotations
187
+
188
+ # Get only the first annotation for demo purposes.
189
+ object_annotation = object_annotations [0 ]
190
+ print ('Entity description: {}' .format (
191
+ object_annotation .entity .description ))
192
+ if object_annotation .entity .entity_id :
193
+ print ('Entity id: {}' .format (object_annotation .entity .entity_id ))
194
+
195
+ print ('Segment: {}s to {}s' .format (
196
+ object_annotation .segment .start_time_offset .seconds +
197
+ object_annotation .segment .start_time_offset .nanos / 1e9 ,
198
+ object_annotation .segment .end_time_offset .seconds +
199
+ object_annotation .segment .end_time_offset .nanos / 1e9 ))
200
+
201
+ print ('Confidence: {}' .format (object_annotation .confidence ))
202
+
203
+ # Here we print only the bounding box of the first frame in this segment
204
+ frame = object_annotation .frames [0 ]
205
+ box = frame .normalized_bounding_box
206
+ print ('Time offset of the first frame: {}s' .format (
207
+ frame .time_offset .seconds + frame .time_offset .nanos / 1e9 ))
208
+ print ('Bounding box position:' )
209
+ print ('\t left : {}' .format (box .left ))
210
+ print ('\t top : {}' .format (box .top ))
211
+ print ('\t right : {}' .format (box .right ))
212
+ print ('\t bottom: {}' .format (box .bottom ))
213
+ print ('\n ' )
214
+ # [END video_object_tracking_gcs_beta]
215
+ return object_annotations
216
+
217
+
218
+ def track_objects (path ):
219
+ # [START video_object_tracking_beta]
220
+ """Object Tracking."""
221
+ from google .cloud import videointelligence_v1p2beta1 as videointelligence
222
+
223
+ video_client = videointelligence .VideoIntelligenceServiceClient ()
224
+ features = [videointelligence .enums .Feature .OBJECT_TRACKING ]
225
+
226
+ with io .open (path , 'rb' ) as file :
227
+ input_content = file .read ()
228
+
229
+ # It is recommended to use location_id as 'us-east1' for the best latency
230
+ # due to different types of processors used in this region and others.
231
+ operation = video_client .annotate_video (
232
+ input_content = input_content , features = features , location_id = 'us-east1' )
233
+ print ('\n Processing video for object annotations.' )
234
+
235
+ result = operation .result (timeout = 300 )
236
+ print ('\n Finished processing.\n ' )
237
+
238
+ # The first result is retrieved because a single video was processed.
239
+ object_annotations = result .annotation_results [0 ].object_annotations
240
+
241
+ # Get only the first annotation for demo purposes.
242
+ object_annotation = object_annotations [0 ]
243
+ print ('Entity description: {}' .format (
244
+ object_annotation .entity .description ))
245
+ if object_annotation .entity .entity_id :
246
+ print ('Entity id: {}' .format (object_annotation .entity .entity_id ))
247
+
248
+ print ('Segment: {}s to {}s' .format (
249
+ object_annotation .segment .start_time_offset .seconds +
250
+ object_annotation .segment .start_time_offset .nanos / 1e9 ,
251
+ object_annotation .segment .end_time_offset .seconds +
252
+ object_annotation .segment .end_time_offset .nanos / 1e9 ))
253
+
254
+ print ('Confidence: {}' .format (object_annotation .confidence ))
255
+
256
+ # Here we print only the bounding box of the first frame in this segment
257
+ frame = object_annotation .frames [0 ]
258
+ box = frame .normalized_bounding_box
259
+ print ('Time offset of the first frame: {}s' .format (
260
+ frame .time_offset .seconds + frame .time_offset .nanos / 1e9 ))
261
+ print ('Bounding box position:' )
262
+ print ('\t left : {}' .format (box .left ))
263
+ print ('\t top : {}' .format (box .top ))
264
+ print ('\t right : {}' .format (box .right ))
265
+ print ('\t bottom: {}' .format (box .bottom ))
266
+ print ('\n ' )
267
+ # [END video_object_tracking_beta]
268
+ return object_annotations
70
269
71
270
72
271
if __name__ == '__main__' :
@@ -79,7 +278,31 @@ def speech_transcription(input_uri):
79
278
'transcription' , help = speech_transcription .__doc__ )
80
279
speech_transcription_parser .add_argument ('gcs_uri' )
81
280
281
+ video_text_gcs_parser = subparsers .add_parser (
282
+ 'video-text-gcs' , help = video_detect_text_gcs .__doc__ )
283
+ video_text_gcs_parser .add_argument ('gcs_uri' )
284
+
285
+ video_text_parser = subparsers .add_parser (
286
+ 'video-text' , help = video_detect_text .__doc__ )
287
+ video_text_parser .add_argument ('path' )
288
+
289
+ video_object_tracking_gcs_parser = subparsers .add_parser (
290
+ 'track-objects-gcs' , help = track_objects_gcs .__doc__ )
291
+ video_object_tracking_gcs_parser .add_argument ('gcs_uri' )
292
+
293
+ video_object_tracking_parser = subparsers .add_parser (
294
+ 'track-objects' , help = track_objects .__doc__ )
295
+ video_object_tracking_parser .add_argument ('path' )
296
+
82
297
args = parser .parse_args ()
83
298
84
299
if args .command == 'transcription' :
85
300
speech_transcription (args .gcs_uri )
301
+ elif args .command == 'video-text-gcs' :
302
+ video_detect_text_gcs (args .gcs_uri )
303
+ elif args .command == 'video-text' :
304
+ video_detect_text (args .path )
305
+ elif args .command == 'track-objects-gcs' :
306
+ track_objects_gcs (args .gcs_uri )
307
+ elif args .command == 'track-objects' :
308
+ track_objects (args .path )
0 commit comments