Skip to content

Commit dc9c831

Browse files
nnegreydanoscarmike
authored andcommitted
Add beta snippets for object tracking / text detection [(#1773)](GoogleCloudPlatform/python-docs-samples#1773)
* Add beta snippets for object tracking / text detection * Update beta_snippets_test.py * Update beta_snippets.py * Revert to using explicit URIs * linter
1 parent eb6f109 commit dc9c831

File tree

7 files changed

+330
-7
lines changed

7 files changed

+330
-7
lines changed

packages/google-cloud-videointelligence/samples/analyze/README.rst

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,46 @@ To run this sample:
100100
101101
102102
103+
beta samples
104+
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
105+
106+
.. image:: https://gstatic.com/cloudssh/images/open-btn.png
107+
:target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=video/cloud-client/analyze/beta_snippets.py,video/cloud-client/analyze/README.rst
108+
109+
110+
111+
112+
To run this sample:
113+
114+
.. code-block:: bash
115+
116+
$ python beta_snippets.py
117+
118+
usage: beta_snippets.py [-h]
119+
{transcription,video-text-gcs,video-text,track-objects-gcs,track-objects}
120+
...
121+
122+
This application demonstrates speech transcription using the
123+
Google Cloud API.
124+
125+
Usage Examples:
126+
python beta_snippets.py transcription gs://python-docs-samples-tests/video/googlework_short.mp4
127+
python beta_snippets.py video-text-gcs gs://python-docs-samples-tests/video/googlework_short.mp4
128+
python beta_snippets.py track-objects /resources/cat.mp4
129+
130+
positional arguments:
131+
{transcription,video-text-gcs,video-text,track-objects-gcs,track-objects}
132+
transcription Transcribe speech from a video stored on GCS.
133+
video-text-gcs Detect text in a video stored on GCS.
134+
video-text Detect text in a local video.
135+
track-objects-gcs Object Tracking.
136+
track-objects Object Tracking.
137+
138+
optional arguments:
139+
-h, --help show this help message and exit
140+
141+
142+
103143
104144
105145
The client library

packages/google-cloud-videointelligence/samples/analyze/README.rst.in

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ samples:
1616
- name: analyze
1717
file: analyze.py
1818
show_help: True
19+
- name: beta samples
20+
file: beta_snippets.py
21+
show_help: True
1922

2023
cloud_client_library: true
2124

packages/google-cloud-videointelligence/samples/analyze/beta_snippets.py

Lines changed: 229 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,22 @@
1818
Google Cloud API.
1919
2020
Usage Examples:
21-
python beta_snippets.py \
22-
transcription gs://python-docs-samples-tests/video/googlework_short.mp4
21+
python beta_snippets.py transcription \
22+
gs://python-docs-samples-tests/video/googlework_short.mp4
23+
python beta_snippets.py video-text-gcs \
24+
gs://python-docs-samples-tests/video/googlework_short.mp4
25+
python beta_snippets.py track-objects /resources/cat.mp4
2326
"""
2427

2528
import argparse
29+
import io
2630

27-
from google.cloud import videointelligence_v1p1beta1 as videointelligence
2831

29-
30-
# [START video_speech_transcription_gcs_beta]
3132
def speech_transcription(input_uri):
33+
# [START video_speech_transcription_gcs_beta]
3234
"""Transcribe speech from a video stored on GCS."""
35+
from google.cloud import videointelligence_v1p1beta1 as videointelligence
36+
3337
video_client = videointelligence.VideoIntelligenceServiceClient()
3438

3539
features = [videointelligence.enums.Feature.SPEECH_TRANSCRIPTION]
@@ -66,7 +70,202 @@ def speech_transcription(input_uri):
6670
start_time.seconds + start_time.nanos * 1e-9,
6771
end_time.seconds + end_time.nanos * 1e-9,
6872
word))
69-
# [END video_speech_transcription_gcs_beta]
73+
# [END video_speech_transcription_gcs_beta]
74+
75+
76+
def video_detect_text_gcs(input_uri):
77+
# [START video_detect_text_gcs_beta]
78+
"""Detect text in a video stored on GCS."""
79+
from google.cloud import videointelligence_v1p2beta1 as videointelligence
80+
81+
video_client = videointelligence.VideoIntelligenceServiceClient()
82+
features = [videointelligence.enums.Feature.TEXT_DETECTION]
83+
84+
operation = video_client.annotate_video(
85+
input_uri=input_uri,
86+
features=features)
87+
88+
print('\nProcessing video for text detection.')
89+
result = operation.result(timeout=300)
90+
91+
# The first result is retrieved because a single video was processed.
92+
annotation_result = result.annotation_results[0]
93+
94+
# Get only the first result
95+
text_annotation = annotation_result.text_annotations[0]
96+
print('\nText: {}'.format(text_annotation.text))
97+
98+
# Get the first text segment
99+
text_segment = text_annotation.segments[0]
100+
start_time = text_segment.segment.start_time_offset
101+
end_time = text_segment.segment.end_time_offset
102+
print('start_time: {}, end_time: {}'.format(
103+
start_time.seconds + start_time.nanos * 1e-9,
104+
end_time.seconds + end_time.nanos * 1e-9))
105+
106+
print('Confidence: {}'.format(text_segment.confidence))
107+
108+
# Show the result for the first frame in this segment.
109+
frame = text_segment.frames[0]
110+
time_offset = frame.time_offset
111+
print('Time offset for the first frame: {}'.format(
112+
time_offset.seconds + time_offset.nanos * 1e-9))
113+
print('Rotated Bounding Box Vertices:')
114+
for vertex in frame.rotated_bounding_box.vertices:
115+
print('\tVertex.x: {}, Vertex.y: {}'.format(vertex.x, vertex.y))
116+
# [END video_detect_text_gcs_beta]
117+
return annotation_result.text_annotations
118+
119+
120+
def video_detect_text(path):
121+
# [START video_detect_text_beta]
122+
"""Detect text in a local video."""
123+
from google.cloud import videointelligence_v1p2beta1 as videointelligence
124+
125+
video_client = videointelligence.VideoIntelligenceServiceClient()
126+
features = [videointelligence.enums.Feature.TEXT_DETECTION]
127+
video_context = videointelligence.types.VideoContext()
128+
129+
with io.open(path, 'rb') as file:
130+
input_content = file.read()
131+
132+
operation = video_client.annotate_video(
133+
input_content=input_content, # the bytes of the video file
134+
features=features,
135+
video_context=video_context)
136+
137+
print('\nProcessing video for text detection.')
138+
result = operation.result(timeout=300)
139+
140+
# The first result is retrieved because a single video was processed.
141+
annotation_result = result.annotation_results[0]
142+
143+
# Get only the first result
144+
text_annotation = annotation_result.text_annotations[0]
145+
print('\nText: {}'.format(text_annotation.text))
146+
147+
# Get the first text segment
148+
text_segment = text_annotation.segments[0]
149+
start_time = text_segment.segment.start_time_offset
150+
end_time = text_segment.segment.end_time_offset
151+
print('start_time: {}, end_time: {}'.format(
152+
start_time.seconds + start_time.nanos * 1e-9,
153+
end_time.seconds + end_time.nanos * 1e-9))
154+
155+
print('Confidence: {}'.format(text_segment.confidence))
156+
157+
# Show the result for the first frame in this segment.
158+
frame = text_segment.frames[0]
159+
time_offset = frame.time_offset
160+
print('Time offset for the first frame: {}'.format(
161+
time_offset.seconds + time_offset.nanos * 1e-9))
162+
print('Rotated Bounding Box Vertices:')
163+
for vertex in frame.rotated_bounding_box.vertices:
164+
print('\tVertex.x: {}, Vertex.y: {}'.format(vertex.x, vertex.y))
165+
# [END video_detect_text_beta]
166+
return annotation_result.text_annotations
167+
168+
169+
def track_objects_gcs(gcs_uri):
170+
# [START video_object_tracking_gcs_beta]
171+
"""Object Tracking."""
172+
from google.cloud import videointelligence_v1p2beta1 as videointelligence
173+
174+
# It is recommended to use location_id as 'us-east1' for the best latency
175+
# due to different types of processors used in this region and others.
176+
video_client = videointelligence.VideoIntelligenceServiceClient()
177+
features = [videointelligence.enums.Feature.OBJECT_TRACKING]
178+
operation = video_client.annotate_video(
179+
input_uri=gcs_uri, features=features, location_id='us-east1')
180+
print('\nProcessing video for object annotations.')
181+
182+
result = operation.result(timeout=300)
183+
print('\nFinished processing.\n')
184+
185+
# The first result is retrieved because a single video was processed.
186+
object_annotations = result.annotation_results[0].object_annotations
187+
188+
# Get only the first annotation for demo purposes.
189+
object_annotation = object_annotations[0]
190+
print('Entity description: {}'.format(
191+
object_annotation.entity.description))
192+
if object_annotation.entity.entity_id:
193+
print('Entity id: {}'.format(object_annotation.entity.entity_id))
194+
195+
print('Segment: {}s to {}s'.format(
196+
object_annotation.segment.start_time_offset.seconds +
197+
object_annotation.segment.start_time_offset.nanos / 1e9,
198+
object_annotation.segment.end_time_offset.seconds +
199+
object_annotation.segment.end_time_offset.nanos / 1e9))
200+
201+
print('Confidence: {}'.format(object_annotation.confidence))
202+
203+
# Here we print only the bounding box of the first frame in this segment
204+
frame = object_annotation.frames[0]
205+
box = frame.normalized_bounding_box
206+
print('Time offset of the first frame: {}s'.format(
207+
frame.time_offset.seconds + frame.time_offset.nanos / 1e9))
208+
print('Bounding box position:')
209+
print('\tleft : {}'.format(box.left))
210+
print('\ttop : {}'.format(box.top))
211+
print('\tright : {}'.format(box.right))
212+
print('\tbottom: {}'.format(box.bottom))
213+
print('\n')
214+
# [END video_object_tracking_gcs_beta]
215+
return object_annotations
216+
217+
218+
def track_objects(path):
219+
# [START video_object_tracking_beta]
220+
"""Object Tracking."""
221+
from google.cloud import videointelligence_v1p2beta1 as videointelligence
222+
223+
video_client = videointelligence.VideoIntelligenceServiceClient()
224+
features = [videointelligence.enums.Feature.OBJECT_TRACKING]
225+
226+
with io.open(path, 'rb') as file:
227+
input_content = file.read()
228+
229+
# It is recommended to use location_id as 'us-east1' for the best latency
230+
# due to different types of processors used in this region and others.
231+
operation = video_client.annotate_video(
232+
input_content=input_content, features=features, location_id='us-east1')
233+
print('\nProcessing video for object annotations.')
234+
235+
result = operation.result(timeout=300)
236+
print('\nFinished processing.\n')
237+
238+
# The first result is retrieved because a single video was processed.
239+
object_annotations = result.annotation_results[0].object_annotations
240+
241+
# Get only the first annotation for demo purposes.
242+
object_annotation = object_annotations[0]
243+
print('Entity description: {}'.format(
244+
object_annotation.entity.description))
245+
if object_annotation.entity.entity_id:
246+
print('Entity id: {}'.format(object_annotation.entity.entity_id))
247+
248+
print('Segment: {}s to {}s'.format(
249+
object_annotation.segment.start_time_offset.seconds +
250+
object_annotation.segment.start_time_offset.nanos / 1e9,
251+
object_annotation.segment.end_time_offset.seconds +
252+
object_annotation.segment.end_time_offset.nanos / 1e9))
253+
254+
print('Confidence: {}'.format(object_annotation.confidence))
255+
256+
# Here we print only the bounding box of the first frame in this segment
257+
frame = object_annotation.frames[0]
258+
box = frame.normalized_bounding_box
259+
print('Time offset of the first frame: {}s'.format(
260+
frame.time_offset.seconds + frame.time_offset.nanos / 1e9))
261+
print('Bounding box position:')
262+
print('\tleft : {}'.format(box.left))
263+
print('\ttop : {}'.format(box.top))
264+
print('\tright : {}'.format(box.right))
265+
print('\tbottom: {}'.format(box.bottom))
266+
print('\n')
267+
# [END video_object_tracking_beta]
268+
return object_annotations
70269

71270

72271
if __name__ == '__main__':
@@ -79,7 +278,31 @@ def speech_transcription(input_uri):
79278
'transcription', help=speech_transcription.__doc__)
80279
speech_transcription_parser.add_argument('gcs_uri')
81280

281+
video_text_gcs_parser = subparsers.add_parser(
282+
'video-text-gcs', help=video_detect_text_gcs.__doc__)
283+
video_text_gcs_parser.add_argument('gcs_uri')
284+
285+
video_text_parser = subparsers.add_parser(
286+
'video-text', help=video_detect_text.__doc__)
287+
video_text_parser.add_argument('path')
288+
289+
video_object_tracking_gcs_parser = subparsers.add_parser(
290+
'track-objects-gcs', help=track_objects_gcs.__doc__)
291+
video_object_tracking_gcs_parser.add_argument('gcs_uri')
292+
293+
video_object_tracking_parser = subparsers.add_parser(
294+
'track-objects', help=track_objects.__doc__)
295+
video_object_tracking_parser.add_argument('path')
296+
82297
args = parser.parse_args()
83298

84299
if args.command == 'transcription':
85300
speech_transcription(args.gcs_uri)
301+
elif args.command == 'video-text-gcs':
302+
video_detect_text_gcs(args.gcs_uri)
303+
elif args.command == 'video-text':
304+
video_detect_text(args.path)
305+
elif args.command == 'track-objects-gcs':
306+
track_objects_gcs(args.gcs_uri)
307+
elif args.command == 'track-objects':
308+
track_objects(args.path)

packages/google-cloud-videointelligence/samples/analyze/beta_snippets_test.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,67 @@
1818

1919
import beta_snippets
2020

21+
POSSIBLE_TEXTS = ['Google', 'SUR', 'SUR', 'ROTO', 'Vice President', '58oo9',
22+
'LONDRES', 'OMAR', 'PARIS', 'METRO', 'RUE', 'CARLO']
23+
2124

2225
@pytest.mark.slow
2326
def test_speech_transcription(capsys):
2427
beta_snippets.speech_transcription(
2528
'gs://python-docs-samples-tests/video/googlework_short.mp4')
2629
out, _ = capsys.readouterr()
2730
assert 'cultural' in out
31+
32+
33+
@pytest.mark.slow
34+
def test_detect_text():
35+
in_file = './resources/googlework_short.mp4'
36+
text_annotations = beta_snippets.video_detect_text(in_file)
37+
38+
text_exists = False
39+
for text_annotation in text_annotations:
40+
for possible_text in POSSIBLE_TEXTS:
41+
if possible_text.upper() in text_annotation.text.upper():
42+
text_exists = True
43+
assert text_exists
44+
45+
46+
@pytest.mark.slow
47+
def test_detect_text_gcs():
48+
in_file = 'gs://python-docs-samples-tests/video/googlework_short.mp4'
49+
text_annotations = beta_snippets.video_detect_text_gcs(in_file)
50+
51+
text_exists = False
52+
for text_annotation in text_annotations:
53+
for possible_text in POSSIBLE_TEXTS:
54+
if possible_text.upper() in text_annotation.text.upper():
55+
text_exists = True
56+
assert text_exists
57+
58+
59+
@pytest.mark.slow
60+
def test_track_objects():
61+
in_file = './resources/cat.mp4'
62+
object_annotations = beta_snippets.track_objects(in_file)
63+
64+
text_exists = False
65+
for object_annotation in object_annotations:
66+
if 'CAT' in object_annotation.entity.description.upper():
67+
text_exists = True
68+
assert text_exists
69+
assert object_annotations[0].frames[0].normalized_bounding_box.left >= 0.0
70+
assert object_annotations[0].frames[0].normalized_bounding_box.left <= 1.0
71+
72+
73+
@pytest.mark.slow
74+
def test_track_objects_gcs():
75+
in_file = 'gs://demomaker/cat.mp4'
76+
object_annotations = beta_snippets.track_objects_gcs(in_file)
77+
78+
text_exists = False
79+
for object_annotation in object_annotations:
80+
if 'CAT' in object_annotation.entity.description.upper():
81+
text_exists = True
82+
assert text_exists
83+
assert object_annotations[0].frames[0].normalized_bounding_box.left >= 0.0
84+
assert object_annotations[0].frames[0].normalized_bounding_box.left <= 1.0
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
google-cloud-videointelligence==1.3.0
1+
google-cloud-videointelligence==1.5.0
Binary file not shown.

0 commit comments

Comments
 (0)