Skip to content

Commit 07d5bd0

Browse files
dizcologyleahecole
authored andcommitted
Video GA text detection and object tracking [(#2024)](#2024)
* add video text and object samples * update sample data location * add tests of the gcs samples * update client library version in requirements.txt * fix * remove location_id
1 parent 733603b commit 07d5bd0

File tree

3 files changed

+243
-9
lines changed

3 files changed

+243
-9
lines changed

videointelligence/samples/analyze/analyze.py

Lines changed: 222 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,16 @@
1919
2020
Usage Examples:
2121
22-
python analyze.py labels gs://cloud-ml-sandbox/video/chicago.mp4
22+
python analyze.py labels gs://cloud-samples-data/video/chicago.mp4
2323
python analyze.py labels_file resources/cat.mp4
24-
python analyze.py shots gs://demomaker/gbikes_dinosaur.mp4
25-
python analyze.py explicit_content gs://demomaker/gbikes_dinosaur.mp4
26-
24+
python analyze.py shots gs://cloud-samples-data/video/gbikes_dinosaur.mp4
25+
python analyze.py explicit_content \
26+
gs://cloud-samples-data/video/gbikes_dinosaur.mp4
27+
python analyze.py text_gcs \
28+
gs://cloud-samples-data/video/googlework_short.mp4
29+
python analyze.py text_file resources/googlework_short.mp4
30+
python analyze.py objects_gcs gs://cloud-samples-data/video/cat.mp4
31+
python analyze.py objects_file resources/cat.mp4
2732
"""
2833

2934
import argparse
@@ -278,27 +283,232 @@ def speech_transcription(path):
278283
# [END video_speech_transcription_gcs]
279284

280285

286+
def video_detect_text_gcs(input_uri):
287+
# [START video_detect_text_gcs]
288+
"""Detect text in a video stored on GCS."""
289+
from google.cloud import videointelligence
290+
291+
video_client = videointelligence.VideoIntelligenceServiceClient()
292+
features = [videointelligence.enums.Feature.TEXT_DETECTION]
293+
294+
operation = video_client.annotate_video(
295+
input_uri=input_uri,
296+
features=features)
297+
298+
print('\nProcessing video for text detection.')
299+
result = operation.result(timeout=300)
300+
301+
# The first result is retrieved because a single video was processed.
302+
annotation_result = result.annotation_results[0]
303+
304+
for text_annotation in annotation_result.text_annotations:
305+
print('\nText: {}'.format(text_annotation.text))
306+
307+
# Get the first text segment
308+
text_segment = text_annotation.segments[0]
309+
start_time = text_segment.segment.start_time_offset
310+
end_time = text_segment.segment.end_time_offset
311+
print('start_time: {}, end_time: {}'.format(
312+
start_time.seconds + start_time.nanos * 1e-9,
313+
end_time.seconds + end_time.nanos * 1e-9))
314+
315+
print('Confidence: {}'.format(text_segment.confidence))
316+
317+
# Show the result for the first frame in this segment.
318+
frame = text_segment.frames[0]
319+
time_offset = frame.time_offset
320+
print('Time offset for the first frame: {}'.format(
321+
time_offset.seconds + time_offset.nanos * 1e-9))
322+
print('Rotated Bounding Box Vertices:')
323+
for vertex in frame.rotated_bounding_box.vertices:
324+
print('\tVertex.x: {}, Vertex.y: {}'.format(vertex.x, vertex.y))
325+
# [END video_detect_text_gcs]
326+
327+
328+
def video_detect_text(path):
329+
# [START video_detect_text]
330+
"""Detect text in a local video."""
331+
from google.cloud import videointelligence
332+
333+
video_client = videointelligence.VideoIntelligenceServiceClient()
334+
features = [videointelligence.enums.Feature.TEXT_DETECTION]
335+
video_context = videointelligence.types.VideoContext()
336+
337+
with io.open(path, 'rb') as file:
338+
input_content = file.read()
339+
340+
operation = video_client.annotate_video(
341+
input_content=input_content, # the bytes of the video file
342+
features=features,
343+
video_context=video_context)
344+
345+
print('\nProcessing video for text detection.')
346+
result = operation.result(timeout=300)
347+
348+
# The first result is retrieved because a single video was processed.
349+
annotation_result = result.annotation_results[0]
350+
351+
for text_annotation in annotation_result.text_annotations:
352+
print('\nText: {}'.format(text_annotation.text))
353+
354+
# Get the first text segment
355+
text_segment = text_annotation.segments[0]
356+
start_time = text_segment.segment.start_time_offset
357+
end_time = text_segment.segment.end_time_offset
358+
print('start_time: {}, end_time: {}'.format(
359+
start_time.seconds + start_time.nanos * 1e-9,
360+
end_time.seconds + end_time.nanos * 1e-9))
361+
362+
print('Confidence: {}'.format(text_segment.confidence))
363+
364+
# Show the result for the first frame in this segment.
365+
frame = text_segment.frames[0]
366+
time_offset = frame.time_offset
367+
print('Time offset for the first frame: {}'.format(
368+
time_offset.seconds + time_offset.nanos * 1e-9))
369+
print('Rotated Bounding Box Vertices:')
370+
for vertex in frame.rotated_bounding_box.vertices:
371+
print('\tVertex.x: {}, Vertex.y: {}'.format(vertex.x, vertex.y))
372+
# [END video_detect_text]
373+
374+
375+
def track_objects_gcs(gcs_uri):
376+
# [START video_object_tracking_gcs]
377+
"""Object tracking in a video stored on GCS."""
378+
from google.cloud import videointelligence
379+
380+
video_client = videointelligence.VideoIntelligenceServiceClient()
381+
features = [videointelligence.enums.Feature.OBJECT_TRACKING]
382+
operation = video_client.annotate_video(
383+
input_uri=gcs_uri, features=features)
384+
print('\nProcessing video for object annotations.')
385+
386+
result = operation.result(timeout=300)
387+
print('\nFinished processing.\n')
388+
389+
# The first result is retrieved because a single video was processed.
390+
object_annotations = result.annotation_results[0].object_annotations
391+
392+
for object_annotation in object_annotations:
393+
print('Entity description: {}'.format(
394+
object_annotation.entity.description))
395+
if object_annotation.entity.entity_id:
396+
print('Entity id: {}'.format(object_annotation.entity.entity_id))
397+
398+
print('Segment: {}s to {}s'.format(
399+
object_annotation.segment.start_time_offset.seconds +
400+
object_annotation.segment.start_time_offset.nanos / 1e9,
401+
object_annotation.segment.end_time_offset.seconds +
402+
object_annotation.segment.end_time_offset.nanos / 1e9))
403+
404+
print('Confidence: {}'.format(object_annotation.confidence))
405+
406+
# Here we print only the bounding box of the first frame in the segment
407+
frame = object_annotation.frames[0]
408+
box = frame.normalized_bounding_box
409+
print('Time offset of the first frame: {}s'.format(
410+
frame.time_offset.seconds + frame.time_offset.nanos / 1e9))
411+
print('Bounding box position:')
412+
print('\tleft : {}'.format(box.left))
413+
print('\ttop : {}'.format(box.top))
414+
print('\tright : {}'.format(box.right))
415+
print('\tbottom: {}'.format(box.bottom))
416+
print('\n')
417+
# [END video_object_tracking_gcs]
418+
419+
420+
def track_objects(path):
421+
# [START video_object_tracking]
422+
"""Object tracking in a local video."""
423+
from google.cloud import videointelligence
424+
425+
video_client = videointelligence.VideoIntelligenceServiceClient()
426+
features = [videointelligence.enums.Feature.OBJECT_TRACKING]
427+
428+
with io.open(path, 'rb') as file:
429+
input_content = file.read()
430+
431+
operation = video_client.annotate_video(
432+
input_content=input_content, features=features)
433+
print('\nProcessing video for object annotations.')
434+
435+
result = operation.result(timeout=300)
436+
print('\nFinished processing.\n')
437+
438+
# The first result is retrieved because a single video was processed.
439+
object_annotations = result.annotation_results[0].object_annotations
440+
441+
# Get only the first annotation for demo purposes.
442+
object_annotation = object_annotations[0]
443+
print('Entity description: {}'.format(
444+
object_annotation.entity.description))
445+
if object_annotation.entity.entity_id:
446+
print('Entity id: {}'.format(object_annotation.entity.entity_id))
447+
448+
print('Segment: {}s to {}s'.format(
449+
object_annotation.segment.start_time_offset.seconds +
450+
object_annotation.segment.start_time_offset.nanos / 1e9,
451+
object_annotation.segment.end_time_offset.seconds +
452+
object_annotation.segment.end_time_offset.nanos / 1e9))
453+
454+
print('Confidence: {}'.format(object_annotation.confidence))
455+
456+
# Here we print only the bounding box of the first frame in this segment
457+
frame = object_annotation.frames[0]
458+
box = frame.normalized_bounding_box
459+
print('Time offset of the first frame: {}s'.format(
460+
frame.time_offset.seconds + frame.time_offset.nanos / 1e9))
461+
print('Bounding box position:')
462+
print('\tleft : {}'.format(box.left))
463+
print('\ttop : {}'.format(box.top))
464+
print('\tright : {}'.format(box.right))
465+
print('\tbottom: {}'.format(box.bottom))
466+
print('\n')
467+
# [END video_object_tracking]
468+
469+
281470
if __name__ == '__main__':
282471
parser = argparse.ArgumentParser(
283472
description=__doc__,
284473
formatter_class=argparse.RawDescriptionHelpFormatter)
285474
subparsers = parser.add_subparsers(dest='command')
475+
286476
analyze_labels_parser = subparsers.add_parser(
287477
'labels', help=analyze_labels.__doc__)
288478
analyze_labels_parser.add_argument('path')
479+
289480
analyze_labels_file_parser = subparsers.add_parser(
290481
'labels_file', help=analyze_labels_file.__doc__)
291482
analyze_labels_file_parser.add_argument('path')
483+
292484
analyze_explicit_content_parser = subparsers.add_parser(
293485
'explicit_content', help=analyze_explicit_content.__doc__)
294486
analyze_explicit_content_parser.add_argument('path')
487+
295488
analyze_shots_parser = subparsers.add_parser(
296489
'shots', help=analyze_shots.__doc__)
297490
analyze_shots_parser.add_argument('path')
491+
298492
transcribe_speech_parser = subparsers.add_parser(
299493
'transcribe', help=speech_transcription.__doc__)
300494
transcribe_speech_parser.add_argument('path')
301495

496+
detect_text_parser = subparsers.add_parser(
497+
'text_gcs', help=video_detect_text_gcs.__doc__)
498+
detect_text_parser.add_argument('path')
499+
500+
detect_text_file_parser = subparsers.add_parser(
501+
'text_file', help=video_detect_text.__doc__)
502+
detect_text_file_parser.add_argument('path')
503+
504+
tack_objects_parser = subparsers.add_parser(
505+
'objects_gcs', help=track_objects_gcs.__doc__)
506+
tack_objects_parser.add_argument('path')
507+
508+
tack_objects_file_parser = subparsers.add_parser(
509+
'objects_file', help=track_objects.__doc__)
510+
tack_objects_file_parser.add_argument('path')
511+
302512
args = parser.parse_args()
303513

304514
if args.command == 'labels':
@@ -311,3 +521,11 @@ def speech_transcription(path):
311521
analyze_explicit_content(args.path)
312522
if args.command == 'transcribe':
313523
speech_transcription(args.path)
524+
if args.command == 'text_gcs':
525+
video_detect_text_gcs(args.path)
526+
if args.command == 'text_file':
527+
video_detect_text(args.path)
528+
if args.command == 'objects_gcs':
529+
track_objects_gcs(args.path)
530+
if args.command == 'objects_file':
531+
track_objects(args.path)

videointelligence/samples/analyze/analyze_test.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,28 +21,44 @@
2121

2222
@pytest.mark.slow
2323
def test_analyze_shots(capsys):
24-
analyze.analyze_shots('gs://demomaker/gbikes_dinosaur.mp4')
24+
analyze.analyze_shots('gs://cloud-samples-data/video/gbikes_dinosaur.mp4')
2525
out, _ = capsys.readouterr()
2626
assert 'Shot 1:' in out
2727

2828

2929
@pytest.mark.slow
3030
def test_analyze_labels(capsys):
31-
analyze.analyze_labels('gs://demomaker/cat.mp4')
31+
analyze.analyze_labels('gs://cloud-samples-data/video/cat.mp4')
3232
out, _ = capsys.readouterr()
3333
assert 'label description: cat' in out
3434

3535

3636
@pytest.mark.slow
3737
def test_analyze_explicit_content(capsys):
38-
analyze.analyze_explicit_content('gs://demomaker/cat.mp4')
38+
analyze.analyze_explicit_content('gs://cloud-samples-data/video/cat.mp4')
3939
out, _ = capsys.readouterr()
4040
assert 'pornography' in out
4141

4242

4343
@pytest.mark.slow
4444
def test_speech_transcription(capsys):
4545
analyze.speech_transcription(
46-
'gs://python-docs-samples-tests/video/googlework_short.mp4')
46+
'gs://cloud-samples-data/video/googlework_short.mp4')
4747
out, _ = capsys.readouterr()
4848
assert 'cultural' in out
49+
50+
51+
@pytest.mark.slow
52+
def test_detect_text_gcs(capsys):
53+
analyze.video_detect_text_gcs(
54+
'gs://cloud-samples-data/video/googlework_short.mp4')
55+
out, _ = capsys.readouterr()
56+
assert 'GOOGLE' in out
57+
58+
59+
@pytest.mark.slow
60+
def test_track_objects_gcs(capsys):
61+
analyze.track_objects_gcs(
62+
'gs://cloud-samples-data/video/cat.mp4')
63+
out, _ = capsys.readouterr()
64+
assert 'cat' in out
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
google-cloud-videointelligence==1.6.1
1+
google-cloud-videointelligence==1.7.0

0 commit comments

Comments
 (0)