Skip to content

Commit 7764727

Browse files
dizcologydanoscarmike
authored andcommitted
Face detection beta features [(#1414)](#1414)
1 parent dd31420 commit 7764727

File tree

3 files changed

+253
-1
lines changed

3 files changed

+253
-1
lines changed
Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2017 Google Inc. All Rights Reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
"""This application demonstrates face detection, face emotions
18+
and speech transcription using the Google Cloud API.
19+
20+
Usage Examples:
21+
python beta_snippets.py boxes \
22+
gs://python-docs-samples-tests/video/googlework_short.mp4
23+
24+
python beta_snippets.py \
25+
emotions gs://python-docs-samples-tests/video/googlework_short.mp4
26+
27+
python beta_snippets.py \
28+
transcription gs://python-docs-samples-tests/video/googlework_short.mp4
29+
"""
30+
31+
import argparse
32+
33+
from google.cloud import videointelligence_v1p1beta1 as videointelligence
34+
35+
36+
# [START video_face_bounding_boxes]
37+
def face_bounding_boxes(gcs_uri):
38+
""" Detects faces' bounding boxes. """
39+
video_client = videointelligence.VideoIntelligenceServiceClient()
40+
features = [videointelligence.enums.Feature.FACE_DETECTION]
41+
42+
config = videointelligence.types.FaceConfig(
43+
include_bounding_boxes=True)
44+
context = videointelligence.types.VideoContext(
45+
face_detection_config=config)
46+
47+
operation = video_client.annotate_video(
48+
gcs_uri, features=features, video_context=context)
49+
print('\nProcessing video for face annotations:')
50+
51+
result = operation.result(timeout=900)
52+
print('\nFinished processing.')
53+
54+
# There is only one result because a single video was processed.
55+
faces = result.annotation_results[0].face_detection_annotations
56+
for i, face in enumerate(faces):
57+
print('Face {}'.format(i))
58+
59+
# Each face_detection_annotation has only one segment.
60+
segment = face.segments[0]
61+
start_time = (segment.segment.start_time_offset.seconds +
62+
segment.segment.start_time_offset.nanos / 1e9)
63+
end_time = (segment.segment.end_time_offset.seconds +
64+
segment.segment.end_time_offset.nanos / 1e9)
65+
positions = '{}s to {}s'.format(start_time, end_time)
66+
print('\tSegment: {}\n'.format(positions))
67+
68+
# Each detected face may appear in many frames of the video.
69+
# Here we process only the first frame.
70+
frame = face.frames[0]
71+
72+
time_offset = (frame.time_offset.seconds +
73+
frame.time_offset.nanos / 1e9)
74+
box = frame.attributes[0].normalized_bounding_box
75+
76+
print('First frame time offset: {}s\n'.format(time_offset))
77+
78+
print('First frame normalized bounding box:')
79+
print('\tleft : {}'.format(box.left))
80+
print('\ttop : {}'.format(box.top))
81+
print('\tright : {}'.format(box.right))
82+
print('\tbottom: {}'.format(box.bottom))
83+
print('\n')
84+
# [END video_face_bounding_boxes]
85+
86+
87+
# [START video_face_emotions]
88+
def face_emotions(gcs_uri):
89+
""" Analyze faces' emotions over frames. """
90+
video_client = videointelligence.VideoIntelligenceServiceClient()
91+
features = [videointelligence.enums.Feature.FACE_DETECTION]
92+
93+
config = videointelligence.types.FaceConfig(
94+
include_emotions=True)
95+
context = videointelligence.types.VideoContext(
96+
face_detection_config=config)
97+
98+
operation = video_client.annotate_video(
99+
gcs_uri, features=features, video_context=context)
100+
print('\nProcessing video for face annotations:')
101+
102+
result = operation.result(timeout=600)
103+
print('\nFinished processing.')
104+
105+
# There is only one result because a single video was processed.
106+
faces = result.annotation_results[0].face_detection_annotations
107+
for i, face in enumerate(faces):
108+
for j, frame in enumerate(face.frames):
109+
time_offset = (frame.time_offset.seconds +
110+
frame.time_offset.nanos / 1e9)
111+
emotions = frame.attributes[0].emotions
112+
113+
print('Face {}, frame {}, time_offset {}\n'.format(
114+
i, j, time_offset))
115+
116+
# from videointelligence.enums
117+
emotion_labels = (
118+
'EMOTION_UNSPECIFIED', 'AMUSEMENT', 'ANGER',
119+
'CONCENTRATION', 'CONTENTMENT', 'DESIRE',
120+
'DISAPPOINTMENT', 'DISGUST', 'ELATION',
121+
'EMBARRASSMENT', 'INTEREST', 'PRIDE', 'SADNESS',
122+
'SURPRISE')
123+
124+
for emotion in emotions:
125+
emotion_index = emotion.emotion
126+
emotion_label = emotion_labels[emotion_index]
127+
emotion_score = emotion.score
128+
129+
print('emotion: {} (confidence score: {})'.format(
130+
emotion_label, emotion_score))
131+
132+
print('\n')
133+
134+
print('\n')
135+
# [END video_face_emotions]
136+
137+
138+
# [START video_speech_transcription]
139+
def speech_transcription(input_uri):
140+
"""Transcribe speech from a video stored on GCS."""
141+
video_client = videointelligence.VideoIntelligenceServiceClient()
142+
143+
features = [videointelligence.enums.Feature.SPEECH_TRANSCRIPTION]
144+
145+
config = videointelligence.types.SpeechTranscriptionConfig(
146+
language_code='en-US')
147+
video_context = videointelligence.types.VideoContext(
148+
speech_transcription_config=config)
149+
150+
operation = video_client.annotate_video(
151+
input_uri, features=features,
152+
video_context=video_context)
153+
154+
print('\nProcessing video for speech transcription.')
155+
156+
result = operation.result(timeout=180)
157+
158+
# There is only one annotation_result since only
159+
# one video is processed.
160+
annotation_results = result.annotation_results[0]
161+
speech_transcription = annotation_results.speech_transcriptions[0]
162+
alternative = speech_transcription.alternatives[0]
163+
164+
print('Transcript: {}'.format(alternative.transcript))
165+
print('Confidence: {}\n'.format(alternative.confidence))
166+
167+
print('Word level information:')
168+
for word_info in alternative.words:
169+
word = word_info.word
170+
start_time = word_info.start_time
171+
end_time = word_info.end_time
172+
print('\t{}s - {}s: {}'.format(
173+
start_time.seconds + start_time.nanos * 1e-9,
174+
end_time.seconds + end_time.nanos * 1e-9,
175+
word))
176+
# [END video_speech_transcription]
177+
178+
179+
if __name__ == '__main__':
180+
parser = argparse.ArgumentParser(
181+
description=__doc__,
182+
formatter_class=argparse.RawDescriptionHelpFormatter)
183+
subparsers = parser.add_subparsers(dest='command')
184+
analyze_faces_parser = subparsers.add_parser(
185+
'boxes', help=face_bounding_boxes.__doc__)
186+
analyze_faces_parser.add_argument('gcs_uri')
187+
188+
analyze_emotions_parser = subparsers.add_parser(
189+
'emotions', help=face_emotions.__doc__)
190+
analyze_emotions_parser.add_argument('gcs_uri')
191+
192+
speech_transcription_parser = subparsers.add_parser(
193+
'transcription', help=speech_transcription.__doc__)
194+
speech_transcription_parser.add_argument('gcs_uri')
195+
196+
args = parser.parse_args()
197+
198+
if args.command == 'boxes':
199+
face_bounding_boxes(args.gcs_uri)
200+
elif args.command == 'emotions':
201+
face_emotions(args.gcs_uri)
202+
elif args.command == 'transcription':
203+
speech_transcription(args.gcs_uri)
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2017 Google, Inc
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
import os
18+
19+
import pytest
20+
21+
import beta_snippets
22+
23+
24+
BUCKET = os.environ['CLOUD_STORAGE_BUCKET']
25+
FACES_SHORT_FILE_PATH = 'video/googlework_short.mp4'
26+
27+
28+
@pytest.mark.slow
29+
def test_face_bounding_boxes(capsys):
30+
beta_snippets.face_bounding_boxes(
31+
'gs://{}/{}'.format(BUCKET, FACES_SHORT_FILE_PATH))
32+
out, _ = capsys.readouterr()
33+
assert 'top :' in out
34+
35+
36+
@pytest.mark.slow
37+
def test_face_emotions(capsys):
38+
beta_snippets.face_emotions(
39+
'gs://{}/{}'.format(BUCKET, FACES_SHORT_FILE_PATH))
40+
out, _ = capsys.readouterr()
41+
assert 'CONCENTRATION' in out
42+
43+
44+
@pytest.mark.slow
45+
def test_speech_transcription(capsys):
46+
beta_snippets.speech_transcription(
47+
'gs://{}/{}'.format(BUCKET, FACES_SHORT_FILE_PATH))
48+
out, _ = capsys.readouterr()
49+
assert 'cultural' in out
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
google-cloud-videointelligence==1.0.1
1+
google-cloud-videointelligence==1.1.0

0 commit comments

Comments
 (0)