19
19
20
20
Usage Examples:
21
21
22
- python analyze.py labels gs://cloud-ml-sandbox /video/chicago.mp4
22
+ python analyze.py labels gs://cloud-samples-data /video/chicago.mp4
23
23
python analyze.py labels_file resources/cat.mp4
24
- python analyze.py shots gs://demomaker/gbikes_dinosaur.mp4
25
- python analyze.py explicit_content gs://demomaker/gbikes_dinosaur.mp4
26
-
24
+ python analyze.py shots gs://cloud-samples-data/video/gbikes_dinosaur.mp4
25
+ python analyze.py explicit_content \
26
+ gs://cloud-samples-data/video/gbikes_dinosaur.mp4
27
+ python analyze.py text_gcs \
28
+ gs://cloud-samples-data/video/googlework_short.mp4
29
+ python analyze.py text_file resources/googlework_short.mp4
30
+ python analyze.py objects_gcs gs://cloud-samples-data/video/cat.mp4
31
+ python analyze.py objects_file resources/cat.mp4
27
32
"""
28
33
29
34
import argparse
@@ -278,27 +283,232 @@ def speech_transcription(path):
278
283
# [END video_speech_transcription_gcs]
279
284
280
285
286
+ def video_detect_text_gcs (input_uri ):
287
+ # [START video_detect_text_gcs]
288
+ """Detect text in a video stored on GCS."""
289
+ from google .cloud import videointelligence
290
+
291
+ video_client = videointelligence .VideoIntelligenceServiceClient ()
292
+ features = [videointelligence .enums .Feature .TEXT_DETECTION ]
293
+
294
+ operation = video_client .annotate_video (
295
+ input_uri = input_uri ,
296
+ features = features )
297
+
298
+ print ('\n Processing video for text detection.' )
299
+ result = operation .result (timeout = 300 )
300
+
301
+ # The first result is retrieved because a single video was processed.
302
+ annotation_result = result .annotation_results [0 ]
303
+
304
+ for text_annotation in annotation_result .text_annotations :
305
+ print ('\n Text: {}' .format (text_annotation .text ))
306
+
307
+ # Get the first text segment
308
+ text_segment = text_annotation .segments [0 ]
309
+ start_time = text_segment .segment .start_time_offset
310
+ end_time = text_segment .segment .end_time_offset
311
+ print ('start_time: {}, end_time: {}' .format (
312
+ start_time .seconds + start_time .nanos * 1e-9 ,
313
+ end_time .seconds + end_time .nanos * 1e-9 ))
314
+
315
+ print ('Confidence: {}' .format (text_segment .confidence ))
316
+
317
+ # Show the result for the first frame in this segment.
318
+ frame = text_segment .frames [0 ]
319
+ time_offset = frame .time_offset
320
+ print ('Time offset for the first frame: {}' .format (
321
+ time_offset .seconds + time_offset .nanos * 1e-9 ))
322
+ print ('Rotated Bounding Box Vertices:' )
323
+ for vertex in frame .rotated_bounding_box .vertices :
324
+ print ('\t Vertex.x: {}, Vertex.y: {}' .format (vertex .x , vertex .y ))
325
+ # [END video_detect_text_gcs]
326
+
327
+
328
+ def video_detect_text (path ):
329
+ # [START video_detect_text]
330
+ """Detect text in a local video."""
331
+ from google .cloud import videointelligence
332
+
333
+ video_client = videointelligence .VideoIntelligenceServiceClient ()
334
+ features = [videointelligence .enums .Feature .TEXT_DETECTION ]
335
+ video_context = videointelligence .types .VideoContext ()
336
+
337
+ with io .open (path , 'rb' ) as file :
338
+ input_content = file .read ()
339
+
340
+ operation = video_client .annotate_video (
341
+ input_content = input_content , # the bytes of the video file
342
+ features = features ,
343
+ video_context = video_context )
344
+
345
+ print ('\n Processing video for text detection.' )
346
+ result = operation .result (timeout = 300 )
347
+
348
+ # The first result is retrieved because a single video was processed.
349
+ annotation_result = result .annotation_results [0 ]
350
+
351
+ for text_annotation in annotation_result .text_annotations :
352
+ print ('\n Text: {}' .format (text_annotation .text ))
353
+
354
+ # Get the first text segment
355
+ text_segment = text_annotation .segments [0 ]
356
+ start_time = text_segment .segment .start_time_offset
357
+ end_time = text_segment .segment .end_time_offset
358
+ print ('start_time: {}, end_time: {}' .format (
359
+ start_time .seconds + start_time .nanos * 1e-9 ,
360
+ end_time .seconds + end_time .nanos * 1e-9 ))
361
+
362
+ print ('Confidence: {}' .format (text_segment .confidence ))
363
+
364
+ # Show the result for the first frame in this segment.
365
+ frame = text_segment .frames [0 ]
366
+ time_offset = frame .time_offset
367
+ print ('Time offset for the first frame: {}' .format (
368
+ time_offset .seconds + time_offset .nanos * 1e-9 ))
369
+ print ('Rotated Bounding Box Vertices:' )
370
+ for vertex in frame .rotated_bounding_box .vertices :
371
+ print ('\t Vertex.x: {}, Vertex.y: {}' .format (vertex .x , vertex .y ))
372
+ # [END video_detect_text]
373
+
374
+
375
+ def track_objects_gcs (gcs_uri ):
376
+ # [START video_object_tracking_gcs]
377
+ """Object tracking in a video stored on GCS."""
378
+ from google .cloud import videointelligence
379
+
380
+ video_client = videointelligence .VideoIntelligenceServiceClient ()
381
+ features = [videointelligence .enums .Feature .OBJECT_TRACKING ]
382
+ operation = video_client .annotate_video (
383
+ input_uri = gcs_uri , features = features )
384
+ print ('\n Processing video for object annotations.' )
385
+
386
+ result = operation .result (timeout = 300 )
387
+ print ('\n Finished processing.\n ' )
388
+
389
+ # The first result is retrieved because a single video was processed.
390
+ object_annotations = result .annotation_results [0 ].object_annotations
391
+
392
+ for object_annotation in object_annotations :
393
+ print ('Entity description: {}' .format (
394
+ object_annotation .entity .description ))
395
+ if object_annotation .entity .entity_id :
396
+ print ('Entity id: {}' .format (object_annotation .entity .entity_id ))
397
+
398
+ print ('Segment: {}s to {}s' .format (
399
+ object_annotation .segment .start_time_offset .seconds +
400
+ object_annotation .segment .start_time_offset .nanos / 1e9 ,
401
+ object_annotation .segment .end_time_offset .seconds +
402
+ object_annotation .segment .end_time_offset .nanos / 1e9 ))
403
+
404
+ print ('Confidence: {}' .format (object_annotation .confidence ))
405
+
406
+ # Here we print only the bounding box of the first frame in the segment
407
+ frame = object_annotation .frames [0 ]
408
+ box = frame .normalized_bounding_box
409
+ print ('Time offset of the first frame: {}s' .format (
410
+ frame .time_offset .seconds + frame .time_offset .nanos / 1e9 ))
411
+ print ('Bounding box position:' )
412
+ print ('\t left : {}' .format (box .left ))
413
+ print ('\t top : {}' .format (box .top ))
414
+ print ('\t right : {}' .format (box .right ))
415
+ print ('\t bottom: {}' .format (box .bottom ))
416
+ print ('\n ' )
417
+ # [END video_object_tracking_gcs]
418
+
419
+
420
+ def track_objects (path ):
421
+ # [START video_object_tracking]
422
+ """Object tracking in a local video."""
423
+ from google .cloud import videointelligence
424
+
425
+ video_client = videointelligence .VideoIntelligenceServiceClient ()
426
+ features = [videointelligence .enums .Feature .OBJECT_TRACKING ]
427
+
428
+ with io .open (path , 'rb' ) as file :
429
+ input_content = file .read ()
430
+
431
+ operation = video_client .annotate_video (
432
+ input_content = input_content , features = features )
433
+ print ('\n Processing video for object annotations.' )
434
+
435
+ result = operation .result (timeout = 300 )
436
+ print ('\n Finished processing.\n ' )
437
+
438
+ # The first result is retrieved because a single video was processed.
439
+ object_annotations = result .annotation_results [0 ].object_annotations
440
+
441
+ # Get only the first annotation for demo purposes.
442
+ object_annotation = object_annotations [0 ]
443
+ print ('Entity description: {}' .format (
444
+ object_annotation .entity .description ))
445
+ if object_annotation .entity .entity_id :
446
+ print ('Entity id: {}' .format (object_annotation .entity .entity_id ))
447
+
448
+ print ('Segment: {}s to {}s' .format (
449
+ object_annotation .segment .start_time_offset .seconds +
450
+ object_annotation .segment .start_time_offset .nanos / 1e9 ,
451
+ object_annotation .segment .end_time_offset .seconds +
452
+ object_annotation .segment .end_time_offset .nanos / 1e9 ))
453
+
454
+ print ('Confidence: {}' .format (object_annotation .confidence ))
455
+
456
+ # Here we print only the bounding box of the first frame in this segment
457
+ frame = object_annotation .frames [0 ]
458
+ box = frame .normalized_bounding_box
459
+ print ('Time offset of the first frame: {}s' .format (
460
+ frame .time_offset .seconds + frame .time_offset .nanos / 1e9 ))
461
+ print ('Bounding box position:' )
462
+ print ('\t left : {}' .format (box .left ))
463
+ print ('\t top : {}' .format (box .top ))
464
+ print ('\t right : {}' .format (box .right ))
465
+ print ('\t bottom: {}' .format (box .bottom ))
466
+ print ('\n ' )
467
+ # [END video_object_tracking]
468
+
469
+
281
470
if __name__ == '__main__' :
282
471
parser = argparse .ArgumentParser (
283
472
description = __doc__ ,
284
473
formatter_class = argparse .RawDescriptionHelpFormatter )
285
474
subparsers = parser .add_subparsers (dest = 'command' )
475
+
286
476
analyze_labels_parser = subparsers .add_parser (
287
477
'labels' , help = analyze_labels .__doc__ )
288
478
analyze_labels_parser .add_argument ('path' )
479
+
289
480
analyze_labels_file_parser = subparsers .add_parser (
290
481
'labels_file' , help = analyze_labels_file .__doc__ )
291
482
analyze_labels_file_parser .add_argument ('path' )
483
+
292
484
analyze_explicit_content_parser = subparsers .add_parser (
293
485
'explicit_content' , help = analyze_explicit_content .__doc__ )
294
486
analyze_explicit_content_parser .add_argument ('path' )
487
+
295
488
analyze_shots_parser = subparsers .add_parser (
296
489
'shots' , help = analyze_shots .__doc__ )
297
490
analyze_shots_parser .add_argument ('path' )
491
+
298
492
transcribe_speech_parser = subparsers .add_parser (
299
493
'transcribe' , help = speech_transcription .__doc__ )
300
494
transcribe_speech_parser .add_argument ('path' )
301
495
496
+ detect_text_parser = subparsers .add_parser (
497
+ 'text_gcs' , help = video_detect_text_gcs .__doc__ )
498
+ detect_text_parser .add_argument ('path' )
499
+
500
+ detect_text_file_parser = subparsers .add_parser (
501
+ 'text_file' , help = video_detect_text .__doc__ )
502
+ detect_text_file_parser .add_argument ('path' )
503
+
504
+ tack_objects_parser = subparsers .add_parser (
505
+ 'objects_gcs' , help = track_objects_gcs .__doc__ )
506
+ tack_objects_parser .add_argument ('path' )
507
+
508
+ tack_objects_file_parser = subparsers .add_parser (
509
+ 'objects_file' , help = track_objects .__doc__ )
510
+ tack_objects_file_parser .add_argument ('path' )
511
+
302
512
args = parser .parse_args ()
303
513
304
514
if args .command == 'labels' :
@@ -311,3 +521,11 @@ def speech_transcription(path):
311
521
analyze_explicit_content (args .path )
312
522
if args .command == 'transcribe' :
313
523
speech_transcription (args .path )
524
+ if args .command == 'text_gcs' :
525
+ video_detect_text_gcs (args .path )
526
+ if args .command == 'text_file' :
527
+ video_detect_text (args .path )
528
+ if args .command == 'objects_gcs' :
529
+ track_objects_gcs (args .path )
530
+ if args .command == 'objects_file' :
531
+ track_objects (args .path )
0 commit comments