14
14
# See the License for the specific language governing permissions and
15
15
# limitations under the License.
16
16
17
- """This application demonstrates face detection, label detection, safe search,
18
- and shot change detection using the Google Cloud API.
17
+ """This application demonstrates face detection, label detection,
18
+ explicit content, and shot change detection using the Google Cloud API.
19
19
20
20
Usage Examples:
21
21
22
22
python analyze.py faces gs://demomaker/google_gmail.mp4
23
23
python analyze.py labels gs://cloud-ml-sandbox/video/chicago.mp4
24
24
python analyze.py labels_file resources/cat.mp4
25
25
python analyze.py shots gs://demomaker/gbikes_dinosaur.mp4
26
- python analyze.py safe_search gs://demomaker/gbikes_dinosaur.mp4
26
+ python analyze.py explicit_content gs://demomaker/gbikes_dinosaur.mp4
27
27
28
28
"""
29
29
33
33
import sys
34
34
import time
35
35
36
- from google .cloud . gapic . videointelligence . v1beta1 import enums
37
- from google .cloud .gapic . videointelligence . v1beta1 import (
38
- video_intelligence_service_client )
36
+ from google .cloud import videointelligence_v1beta2
37
+ from google .cloud .videointelligence_v1beta2 import enums
38
+ from google . cloud . videointelligence_v1beta2 import types
39
39
40
40
41
- def analyze_safe_search (path ):
42
- """ Detects safe search features the GCS path to a video. """
43
- video_client = ( video_intelligence_service_client .
44
- VideoIntelligenceServiceClient ())
45
- features = [ enums . Feature . SAFE_SEARCH_DETECTION ]
41
+ def analyze_explicit_content (path ):
42
+ """ Detects explicit content from the GCS path to a video. """
43
+ video_client = videointelligence_v1beta2 . VideoIntelligenceServiceClient ()
44
+ features = [ enums . Feature . EXPLICIT_CONTENT_DETECTION ]
45
+
46
46
operation = video_client .annotate_video (path , features )
47
- print ('\n Processing video for safe search annotations:' )
47
+ print ('\n Processing video for explicit content annotations:' )
48
48
49
49
while not operation .done ():
50
50
sys .stdout .write ('.' )
@@ -54,27 +54,29 @@ def analyze_safe_search(path):
54
54
print ('\n Finished processing.' )
55
55
56
56
# first result is retrieved because a single video was processed
57
- safe_annotations = (operation .result ().annotation_results [0 ].
58
- safe_search_annotations )
57
+ explicit_annotation = (operation .result ().annotation_results [0 ].
58
+ explicit_annotation )
59
59
60
60
likely_string = ("Unknown" , "Very unlikely" , "Unlikely" , "Possible" ,
61
61
"Likely" , "Very likely" )
62
62
63
- for note in safe_annotations :
64
- print ('Time: {}s' .format (note .time_offset / 1000000.0 ))
65
- print ('\t adult: {}' .format (likely_string [note .adult ]))
66
- print ('\t spoof: {}' .format (likely_string [note .spoof ]))
67
- print ('\t medical: {}' .format (likely_string [note .medical ]))
68
- print ('\t racy: {}' .format (likely_string [note .racy ]))
69
- print ('\t violent: {}\n ' .format (likely_string [note .violent ]))
63
+ for frame in explicit_annotation .frames :
64
+ frame_time = frame .time_offset .seconds + frame .time_offset .nanos / 1e9
65
+ print ('Time: {}s' .format (frame_time ))
66
+ print ('\t pornography: {}' .format (
67
+ likely_string [frame .pornography_likelihood ]))
70
68
71
69
72
70
def analyze_faces (path ):
73
71
""" Detects faces given a GCS path. """
74
- video_client = (video_intelligence_service_client .
75
- VideoIntelligenceServiceClient ())
72
+ video_client = videointelligence_v1beta2 .VideoIntelligenceServiceClient ()
76
73
features = [enums .Feature .FACE_DETECTION ]
77
- operation = video_client .annotate_video (path , features )
74
+
75
+ config = types .FaceDetectionConfig (include_bounding_boxes = True )
76
+ context = types .VideoContext (face_detection_config = config )
77
+
78
+ operation = video_client .annotate_video (
79
+ path , features , video_context = context )
78
80
print ('\n Processing video for face annotations:' )
79
81
80
82
while not operation .done ():
@@ -89,27 +91,43 @@ def analyze_faces(path):
89
91
face_annotations )
90
92
91
93
for face_id , face in enumerate (face_annotations ):
94
+ print ('Face {}' .format (face_id ))
92
95
print ('Thumbnail size: {}' .format (len (face .thumbnail )))
93
96
94
97
for segment_id , segment in enumerate (face .segments ):
95
- positions = 'Entire video'
96
- if (segment .start_time_offset != - 1 or
97
- segment .end_time_offset != - 1 ):
98
- positions = '{}s to {}s' .format (
99
- segment .start_time_offset / 1000000.0 ,
100
- segment .end_time_offset / 1000000.0 )
101
-
102
- print ('\t Track {}: {}' .format (segment_id , positions ))
103
-
98
+ start_time = (segment .segment .start_time_offset .seconds +
99
+ segment .segment .start_time_offset .nanos / 1e9 )
100
+ end_time = (segment .segment .end_time_offset .seconds +
101
+ segment .segment .end_time_offset .nanos / 1e9 )
102
+ positions = '{}s to {}s' .format (start_time , end_time )
103
+ print ('\t Segment {}: {}' .format (segment_id , positions ))
104
+
105
+ # There are typically many frames for each face,
106
+ # here we print information on only the first frame.
107
+ frame = face .frames [0 ]
108
+ time_offset = (frame .time_offset .seconds +
109
+ frame .time_offset .nanos / 1e9 )
110
+ box = frame .normalized_bounding_boxes [0 ]
111
+ print ('First frame time offset: {}s' .format (time_offset ))
112
+ print ('First frame normalized bounding box:' )
113
+ print ('\t left: {}' .format (box .left ))
114
+ print ('\t top: {}' .format (box .top ))
115
+ print ('\t right: {}' .format (box .right ))
116
+ print ('\t bottom: {}' .format (box .bottom ))
104
117
print ('\n ' )
105
118
106
119
107
120
def analyze_labels (path ):
108
121
""" Detects labels given a GCS path. """
109
- video_client = (video_intelligence_service_client .
110
- VideoIntelligenceServiceClient ())
122
+ video_client = videointelligence_v1beta2 .VideoIntelligenceServiceClient ()
111
123
features = [enums .Feature .LABEL_DETECTION ]
112
- operation = video_client .annotate_video (path , features )
124
+
125
+ config = types .LabelDetectionConfig (
126
+ label_detection_mode = enums .LabelDetectionMode .SHOT_AND_FRAME_MODE )
127
+ context = types .VideoContext (label_detection_config = config )
128
+
129
+ operation = video_client .annotate_video (
130
+ path , features , video_context = context )
113
131
print ('\n Processing video for label annotations:' )
114
132
115
133
while not operation .done ():
@@ -122,26 +140,65 @@ def analyze_labels(path):
122
140
# first result is retrieved because a single video was processed
123
141
results = operation .result ().annotation_results [0 ]
124
142
125
- for i , label in enumerate (results .label_annotations ):
126
- print ('Label description: {}' .format (label .description ))
127
- print ('Locations:' )
143
+ # Process video/segment level label annotations
144
+ for i , segment_label in enumerate (results .segment_label_annotations ):
145
+ print ('Video label description: {}' .format (
146
+ segment_label .entity .description ))
147
+ for category_entity in segment_label .category_entities :
148
+ print ('\t Label category description: {}' .format (
149
+ category_entity .description ))
150
+
151
+ for i , segment in enumerate (segment_label .segments ):
152
+ start_time = (segment .segment .start_time_offset .seconds +
153
+ segment .segment .start_time_offset .nanos / 1e9 )
154
+ end_time = (segment .segment .end_time_offset .seconds +
155
+ segment .segment .end_time_offset .nanos / 1e9 )
156
+ positions = '{}s to {}s' .format (start_time , end_time )
157
+ confidence = segment .confidence
158
+ print ('\t Segment {}: {}' .format (i , positions ))
159
+ print ('\t Confidence: {}' .format (confidence ))
160
+ print ('\n ' )
128
161
129
- for l , location in enumerate (label .locations ):
130
- positions = 'Entire video'
131
- if (location .segment .start_time_offset != - 1 or
132
- location .segment .end_time_offset != - 1 ):
133
- positions = '{}s to {}s' .format (
134
- location .segment .start_time_offset / 1000000.0 ,
135
- location .segment .end_time_offset / 1000000.0 )
136
- print ('\t {}: {}' .format (l , positions ))
162
+ # Process shot level label annotations
163
+ for i , shot_label in enumerate (results .shot_label_annotations ):
164
+ print ('Shot label description: {}' .format (
165
+ shot_label .entity .description ))
166
+ for category_entity in shot_label .category_entities :
167
+ print ('\t Label category description: {}' .format (
168
+ category_entity .description ))
169
+
170
+ for i , shot in enumerate (shot_label .segments ):
171
+ start_time = (shot .segment .start_time_offset .seconds +
172
+ shot .segment .start_time_offset .nanos / 1e9 )
173
+ end_time = (shot .segment .end_time_offset .seconds +
174
+ shot .segment .end_time_offset .nanos / 1e9 )
175
+ positions = '{}s to {}s' .format (start_time , end_time )
176
+ confidence = shot .confidence
177
+ print ('\t Segment {}: {}' .format (i , positions ))
178
+ print ('\t Confidence: {}' .format (confidence ))
179
+ print ('\n ' )
137
180
181
+ # Process frame level label annotations
182
+ for i , frame_label in enumerate (results .frame_label_annotations ):
183
+ print ('Frame label description: {}' .format (
184
+ frame_label .entity .description ))
185
+ for category_entity in frame_label .category_entities :
186
+ print ('\t Label category description: {}' .format (
187
+ category_entity .description ))
188
+
189
+ # Each frame_label_annotation has many frames,
190
+ # here we print information only about the first frame.
191
+ frame = frame_label .frames [0 ]
192
+ time_offset = (frame .time_offset .seconds +
193
+ frame .time_offset .nanos / 1e9 )
194
+ print ('\t First frame time offset: {}s' .format (time_offset ))
195
+ print ('\t First frame confidence: {}' .format (frame .confidence ))
138
196
print ('\n ' )
139
197
140
198
141
199
def analyze_labels_file (path ):
142
200
""" Detects labels given a file path. """
143
- video_client = (video_intelligence_service_client .
144
- VideoIntelligenceServiceClient ())
201
+ video_client = videointelligence_v1beta2 .VideoIntelligenceServiceClient ()
145
202
features = [enums .Feature .LABEL_DETECTION ]
146
203
147
204
with io .open (path , "rb" ) as movie :
@@ -161,26 +218,64 @@ def analyze_labels_file(path):
161
218
# first result is retrieved because a single video was processed
162
219
results = operation .result ().annotation_results [0 ]
163
220
164
- for i , label in enumerate (results .label_annotations ):
165
- print ('Label description: {}' .format (label .description ))
166
- print ('Locations:' )
221
+ # Process video/segment level label annotations
222
+ for i , segment_label in enumerate (results .segment_label_annotations ):
223
+ print ('Video label description: {}' .format (
224
+ segment_label .entity .description ))
225
+ for category_entity in segment_label .category_entities :
226
+ print ('\t Label category description: {}' .format (
227
+ category_entity .description ))
228
+
229
+ for i , segment in enumerate (segment_label .segments ):
230
+ start_time = (segment .segment .start_time_offset .seconds +
231
+ segment .segment .start_time_offset .nanos / 1e9 )
232
+ end_time = (segment .segment .end_time_offset .seconds +
233
+ segment .segment .end_time_offset .nanos / 1e9 )
234
+ positions = '{}s to {}s' .format (start_time , end_time )
235
+ confidence = segment .confidence
236
+ print ('\t Segment {}: {}' .format (i , positions ))
237
+ print ('\t Confidence: {}' .format (confidence ))
238
+ print ('\n ' )
167
239
168
- for l , location in enumerate (label .locations ):
169
- positions = 'Entire video'
170
- if (location .segment .start_time_offset != - 1 or
171
- location .segment .end_time_offset != - 1 ):
172
- positions = '{} to {}' .format (
173
- location .segment .start_time_offset / 1000000.0 ,
174
- location .segment .end_time_offset / 1000000.0 )
175
- print ('\t {}: {}' .format (l , positions ))
240
+ # Process shot level label annotations
241
+ for i , shot_label in enumerate (results .shot_label_annotations ):
242
+ print ('Shot label description: {}' .format (
243
+ shot_label .entity .description ))
244
+ for category_entity in shot_label .category_entities :
245
+ print ('\t Label category description: {}' .format (
246
+ category_entity .description ))
247
+
248
+ for i , shot in enumerate (shot_label .segments ):
249
+ start_time = (shot .segment .start_time_offset .seconds +
250
+ shot .segment .start_time_offset .nanos / 1e9 )
251
+ end_time = (shot .segment .end_time_offset .seconds +
252
+ shot .segment .end_time_offset .nanos / 1e9 )
253
+ positions = '{}s to {}s' .format (start_time , end_time )
254
+ confidence = shot .confidence
255
+ print ('\t Segment {}: {}' .format (i , positions ))
256
+ print ('\t Confidence: {}' .format (confidence ))
257
+ print ('\n ' )
176
258
259
+ # Process frame level label annotations
260
+ for i , frame_label in enumerate (results .frame_label_annotations ):
261
+ print ('Frame label description: {}' .format (
262
+ frame_label .entity .description ))
263
+ for category_entity in frame_label .category_entities :
264
+ print ('\t Label category description: {}' .format (
265
+ category_entity .description ))
266
+
267
+ # Each frame_label_annotation has many frames,
268
+ # here we print information only about the first frame.
269
+ frame = frame_label .frames [0 ]
270
+ time_offset = frame .time_offset .seconds + frame .time_offset .nanos / 1e9
271
+ print ('\t First frame time offset: {}s' .format (time_offset ))
272
+ print ('\t First frame confidence: {}' .format (frame .confidence ))
177
273
print ('\n ' )
178
274
179
275
180
276
def analyze_shots (path ):
181
277
""" Detects camera shot changes. """
182
- video_client = (video_intelligence_service_client .
183
- VideoIntelligenceServiceClient ())
278
+ video_client = videointelligence_v1beta2 .VideoIntelligenceServiceClient ()
184
279
features = [enums .Feature .SHOT_CHANGE_DETECTION ]
185
280
operation = video_client .annotate_video (path , features )
186
281
print ('\n Processing video for shot change annotations:' )
@@ -193,13 +288,14 @@ def analyze_shots(path):
193
288
print ('\n Finished processing.' )
194
289
195
290
# first result is retrieved because a single video was processed
196
- shots = operation .result ().annotation_results [0 ]
291
+ shots = operation .result ().annotation_results [0 ]. shot_annotations
197
292
198
- for note , shot in enumerate (shots .shot_annotations ):
199
- print ('\t Scene {}: {} to {}' .format (
200
- note ,
201
- shot .start_time_offset / 1000000.0 ,
202
- shot .end_time_offset / 1000000.0 ))
293
+ for i , shot in enumerate (shots ):
294
+ start_time = (shot .start_time_offset .seconds +
295
+ shot .start_time_offset .nanos / 1e9 )
296
+ end_time = (shot .end_time_offset .seconds +
297
+ shot .end_time_offset .nanos / 1e9 )
298
+ print ('\t Shot {}: {} to {}' .format (i , start_time , end_time ))
203
299
204
300
205
301
if __name__ == '__main__' :
@@ -216,9 +312,9 @@ def analyze_shots(path):
216
312
analyze_labels_file_parser = subparsers .add_parser (
217
313
'labels_file' , help = analyze_labels_file .__doc__ )
218
314
analyze_labels_file_parser .add_argument ('path' )
219
- analyze_safe_search_parser = subparsers .add_parser (
220
- 'safe_search ' , help = analyze_safe_search .__doc__ )
221
- analyze_safe_search_parser .add_argument ('path' )
315
+ analyze_explicit_content_parser = subparsers .add_parser (
316
+ 'explicit_content ' , help = analyze_explicit_content .__doc__ )
317
+ analyze_explicit_content_parser .add_argument ('path' )
222
318
analyze_shots_parser = subparsers .add_parser (
223
319
'shots' , help = analyze_shots .__doc__ )
224
320
analyze_shots_parser .add_argument ('path' )
@@ -233,5 +329,5 @@ def analyze_shots(path):
233
329
analyze_labels_file (args .path )
234
330
if args .command == 'shots' :
235
331
analyze_shots (args .path )
236
- if args .command == 'safe_search ' :
237
- analyze_safe_search (args .path )
332
+ if args .command == 'explicit_content ' :
333
+ analyze_explicit_content (args .path )
0 commit comments