Video v1beta2 [(#1088)](#1088)

dizcology · danoscarmike · commit 746e4459cfbf · 2020-09-30T13:21:13.000-07:00
* update analyze_safe_search

* update analyze_shots

* update explicit_content_detection and test

* update fece detection

* update label detection (path)

* update label detection (file)

* flake

* safe search --&gt; explicit content

* update faces tutorial

* update client library quickstart

* update shotchange tutorial

* update labels tutorial

* correct spelling

* correction start_time_offset

* import order

* rebased
diff --git a/videointelligence/samples/analyze/README.rst b/videointelligence/samples/analyze/README.rst
@@ -59,25 +59,25 @@ To run this sample:
 
     $ python analyze.py
 
-    usage: analyze.py [-h] {faces,labels,labels_file,safe_search,shots} ...
+    usage: analyze.py [-h] {faces,labels,labels_file,explicit_content,shots} ...
     
-    This application demonstrates face detection, label detection, safe search,
-    and shot change detection using the Google Cloud API.
+    This application demonstrates face detection, label detection,
+    explicit content, and shot change detection using the Google Cloud API.
     
     Usage Examples:
     
         python analyze.py faces gs://demomaker/google_gmail.mp4
         python analyze.py labels gs://cloud-ml-sandbox/video/chicago.mp4
         python analyze.py labels_file resources/cat.mp4
         python analyze.py shots gs://demomaker/gbikes_dinosaur.mp4
-        python analyze.py safe_search gs://demomaker/gbikes_dinosaur.mp4
+        python analyze.py explicit_content gs://demomaker/gbikes_dinosaur.mp4
     
     positional arguments:
-      {faces,labels,labels_file,safe_search,shots}
+      {faces,labels,labels_file,explicit_content,shots}
         faces               Detects faces given a GCS path.
         labels              Detects labels given a GCS path.
         labels_file         Detects labels given a file path.
-        safe_search         Detects safe search features the GCS path to a video.
+        explicit_content    Detects explicit content from the GCS path to a video.
         shots               Detects camera shot changes.
     
     optional arguments:
diff --git a/videointelligence/samples/analyze/analyze.py b/videointelligence/samples/analyze/analyze.py
@@ -14,16 +14,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""This application demonstrates face detection, label detection, safe search,
-and shot change detection using the Google Cloud API.
+"""This application demonstrates face detection, label detection,
+explicit content, and shot change detection using the Google Cloud API.
 
 Usage Examples:
 
     python analyze.py faces gs://demomaker/google_gmail.mp4
     python analyze.py labels gs://cloud-ml-sandbox/video/chicago.mp4
     python analyze.py labels_file resources/cat.mp4
     python analyze.py shots gs://demomaker/gbikes_dinosaur.mp4
-    python analyze.py safe_search gs://demomaker/gbikes_dinosaur.mp4
+    python analyze.py explicit_content gs://demomaker/gbikes_dinosaur.mp4
 
 """
 
@@ -33,18 +33,18 @@
 import sys
 import time
 
-from google.cloud.gapic.videointelligence.v1beta1 import enums
-from google.cloud.gapic.videointelligence.v1beta1 import (
-    video_intelligence_service_client)
+from google.cloud import videointelligence_v1beta2
+from google.cloud.videointelligence_v1beta2 import enums
+from google.cloud.videointelligence_v1beta2 import types
 
 
-def analyze_safe_search(path):
-    """ Detects safe search features the GCS path to a video. """
-    video_client = (video_intelligence_service_client.
-                    VideoIntelligenceServiceClient())
-    features = [enums.Feature.SAFE_SEARCH_DETECTION]
+def analyze_explicit_content(path):
+    """ Detects explicit content from the GCS path to a video. """
+    video_client = videointelligence_v1beta2.VideoIntelligenceServiceClient()
+    features = [enums.Feature.EXPLICIT_CONTENT_DETECTION]
+
     operation = video_client.annotate_video(path, features)
-    print('\nProcessing video for safe search annotations:')
+    print('\nProcessing video for explicit content annotations:')
 
     while not operation.done():
         sys.stdout.write('.')
@@ -54,27 +54,29 @@ def analyze_safe_search(path):
     print('\nFinished processing.')
 
     # first result is retrieved because a single video was processed
-    safe_annotations = (operation.result().annotation_results[0].
-                        safe_search_annotations)
+    explicit_annotation = (operation.result().annotation_results[0].
+                           explicit_annotation)
 
     likely_string = ("Unknown", "Very unlikely", "Unlikely", "Possible",
                      "Likely", "Very likely")
 
-    for note in safe_annotations:
-        print('Time: {}s'.format(note.time_offset / 1000000.0))
-        print('\tadult: {}'.format(likely_string[note.adult]))
-        print('\tspoof: {}'.format(likely_string[note.spoof]))
-        print('\tmedical: {}'.format(likely_string[note.medical]))
-        print('\tracy: {}'.format(likely_string[note.racy]))
-        print('\tviolent: {}\n'.format(likely_string[note.violent]))
+    for frame in explicit_annotation.frames:
+        frame_time = frame.time_offset.seconds + frame.time_offset.nanos / 1e9
+        print('Time: {}s'.format(frame_time))
+        print('\tpornography: {}'.format(
+            likely_string[frame.pornography_likelihood]))
 
 
 def analyze_faces(path):
     """ Detects faces given a GCS path. """
-    video_client = (video_intelligence_service_client.
-                    VideoIntelligenceServiceClient())
+    video_client = videointelligence_v1beta2.VideoIntelligenceServiceClient()
     features = [enums.Feature.FACE_DETECTION]
-    operation = video_client.annotate_video(path, features)
+
+    config = types.FaceDetectionConfig(include_bounding_boxes=True)
+    context = types.VideoContext(face_detection_config=config)
+
+    operation = video_client.annotate_video(
+        path, features, video_context=context)
     print('\nProcessing video for face annotations:')
 
     while not operation.done():
@@ -89,27 +91,43 @@ def analyze_faces(path):
                         face_annotations)
 
     for face_id, face in enumerate(face_annotations):
+        print('Face {}'.format(face_id))
         print('Thumbnail size: {}'.format(len(face.thumbnail)))
 
         for segment_id, segment in enumerate(face.segments):
-            positions = 'Entire video'
-            if (segment.start_time_offset != -1 or
-                    segment.end_time_offset != -1):
-                positions = '{}s to {}s'.format(
-                    segment.start_time_offset / 1000000.0,
-                    segment.end_time_offset / 1000000.0)
-
-            print('\tTrack {}: {}'.format(segment_id, positions))
-
+            start_time = (segment.segment.start_time_offset.seconds +
+                          segment.segment.start_time_offset.nanos / 1e9)
+            end_time = (segment.segment.end_time_offset.seconds +
+                        segment.segment.end_time_offset.nanos / 1e9)
+            positions = '{}s to {}s'.format(start_time, end_time)
+            print('\tSegment {}: {}'.format(segment_id, positions))
+
+        # There are typically many frames for each face,
+        # here we print information on only the first frame.
+        frame = face.frames[0]
+        time_offset = (frame.time_offset.seconds +
+                       frame.time_offset.nanos / 1e9)
+        box = frame.normalized_bounding_boxes[0]
+        print('First frame time offset: {}s'.format(time_offset))
+        print('First frame normalized bounding box:')
+        print('\tleft: {}'.format(box.left))
+        print('\ttop: {}'.format(box.top))
+        print('\tright: {}'.format(box.right))
+        print('\tbottom: {}'.format(box.bottom))
         print('\n')
 
 
 def analyze_labels(path):
     """ Detects labels given a GCS path. """
-    video_client = (video_intelligence_service_client.
-                    VideoIntelligenceServiceClient())
+    video_client = videointelligence_v1beta2.VideoIntelligenceServiceClient()
     features = [enums.Feature.LABEL_DETECTION]
-    operation = video_client.annotate_video(path, features)
+
+    config = types.LabelDetectionConfig(
+        label_detection_mode=enums.LabelDetectionMode.SHOT_AND_FRAME_MODE)
+    context = types.VideoContext(label_detection_config=config)
+
+    operation = video_client.annotate_video(
+        path, features, video_context=context)
     print('\nProcessing video for label annotations:')
 
     while not operation.done():
@@ -122,26 +140,65 @@ def analyze_labels(path):
     # first result is retrieved because a single video was processed
     results = operation.result().annotation_results[0]
 
-    for i, label in enumerate(results.label_annotations):
-        print('Label description: {}'.format(label.description))
-        print('Locations:')
+    # Process video/segment level label annotations
+    for i, segment_label in enumerate(results.segment_label_annotations):
+        print('Video label description: {}'.format(
+            segment_label.entity.description))
+        for category_entity in segment_label.category_entities:
+            print('\tLabel category description: {}'.format(
+                category_entity.description))
+
+        for i, segment in enumerate(segment_label.segments):
+            start_time = (segment.segment.start_time_offset.seconds +
+                          segment.segment.start_time_offset.nanos / 1e9)
+            end_time = (segment.segment.end_time_offset.seconds +
+                        segment.segment.end_time_offset.nanos / 1e9)
+            positions = '{}s to {}s'.format(start_time, end_time)
+            confidence = segment.confidence
+            print('\tSegment {}: {}'.format(i, positions))
+            print('\tConfidence: {}'.format(confidence))
+        print('\n')
 
-        for l, location in enumerate(label.locations):
-            positions = 'Entire video'
-            if (location.segment.start_time_offset != -1 or
-                    location.segment.end_time_offset != -1):
-                positions = '{}s to {}s'.format(
-                    location.segment.start_time_offset / 1000000.0,
-                    location.segment.end_time_offset / 1000000.0)
-            print('\t{}: {}'.format(l, positions))
+    # Process shot level label annotations
+    for i, shot_label in enumerate(results.shot_label_annotations):
+        print('Shot label description: {}'.format(
+            shot_label.entity.description))
+        for category_entity in shot_label.category_entities:
+            print('\tLabel category description: {}'.format(
+                category_entity.description))
+
+        for i, shot in enumerate(shot_label.segments):
+            start_time = (shot.segment.start_time_offset.seconds +
+                          shot.segment.start_time_offset.nanos / 1e9)
+            end_time = (shot.segment.end_time_offset.seconds +
+                        shot.segment.end_time_offset.nanos / 1e9)
+            positions = '{}s to {}s'.format(start_time, end_time)
+            confidence = shot.confidence
+            print('\tSegment {}: {}'.format(i, positions))
+            print('\tConfidence: {}'.format(confidence))
+        print('\n')
 
+    # Process frame level label annotations
+    for i, frame_label in enumerate(results.frame_label_annotations):
+        print('Frame label description: {}'.format(
+            frame_label.entity.description))
+        for category_entity in frame_label.category_entities:
+            print('\tLabel category description: {}'.format(
+                category_entity.description))
+
+        # Each frame_label_annotation has many frames,
+        # here we print information only about the first frame.
+        frame = frame_label.frames[0]
+        time_offset = (frame.time_offset.seconds +
+                       frame.time_offset.nanos / 1e9)
+        print('\tFirst frame time offset: {}s'.format(time_offset))
+        print('\tFirst frame confidence: {}'.format(frame.confidence))
         print('\n')
 
 
 def analyze_labels_file(path):
     """ Detects labels given a file path. """
-    video_client = (video_intelligence_service_client.
-                    VideoIntelligenceServiceClient())
+    video_client = videointelligence_v1beta2.VideoIntelligenceServiceClient()
     features = [enums.Feature.LABEL_DETECTION]
 
     with io.open(path, "rb") as movie:
@@ -161,26 +218,64 @@ def analyze_labels_file(path):
     # first result is retrieved because a single video was processed
     results = operation.result().annotation_results[0]
 
-    for i, label in enumerate(results.label_annotations):
-        print('Label description: {}'.format(label.description))
-        print('Locations:')
+    # Process video/segment level label annotations
+    for i, segment_label in enumerate(results.segment_label_annotations):
+        print('Video label description: {}'.format(
+            segment_label.entity.description))
+        for category_entity in segment_label.category_entities:
+            print('\tLabel category description: {}'.format(
+                category_entity.description))
+
+        for i, segment in enumerate(segment_label.segments):
+            start_time = (segment.segment.start_time_offset.seconds +
+                          segment.segment.start_time_offset.nanos / 1e9)
+            end_time = (segment.segment.end_time_offset.seconds +
+                        segment.segment.end_time_offset.nanos / 1e9)
+            positions = '{}s to {}s'.format(start_time, end_time)
+            confidence = segment.confidence
+            print('\tSegment {}: {}'.format(i, positions))
+            print('\tConfidence: {}'.format(confidence))
+        print('\n')
 
-        for l, location in enumerate(label.locations):
-            positions = 'Entire video'
-            if (location.segment.start_time_offset != -1 or
-                    location.segment.end_time_offset != -1):
-                positions = '{} to {}'.format(
-                    location.segment.start_time_offset / 1000000.0,
-                    location.segment.end_time_offset / 1000000.0)
-            print('\t{}: {}'.format(l, positions))
+    # Process shot level label annotations
+    for i, shot_label in enumerate(results.shot_label_annotations):
+        print('Shot label description: {}'.format(
+            shot_label.entity.description))
+        for category_entity in shot_label.category_entities:
+            print('\tLabel category description: {}'.format(
+                category_entity.description))
+
+        for i, shot in enumerate(shot_label.segments):
+            start_time = (shot.segment.start_time_offset.seconds +
+                          shot.segment.start_time_offset.nanos / 1e9)
+            end_time = (shot.segment.end_time_offset.seconds +
+                        shot.segment.end_time_offset.nanos / 1e9)
+            positions = '{}s to {}s'.format(start_time, end_time)
+            confidence = shot.confidence
+            print('\tSegment {}: {}'.format(i, positions))
+            print('\tConfidence: {}'.format(confidence))
+        print('\n')
 
+    # Process frame level label annotations
+    for i, frame_label in enumerate(results.frame_label_annotations):
+        print('Frame label description: {}'.format(
+            frame_label.entity.description))
+        for category_entity in frame_label.category_entities:
+            print('\tLabel category description: {}'.format(
+                category_entity.description))
+
+        # Each frame_label_annotation has many frames,
+        # here we print information only about the first frame.
+        frame = frame_label.frames[0]
+        time_offset = frame.time_offset.seconds + frame.time_offset.nanos / 1e9
+        print('\tFirst frame time offset: {}s'.format(time_offset))
+        print('\tFirst frame confidence: {}'.format(frame.confidence))
         print('\n')
 
 
 def analyze_shots(path):
     """ Detects camera shot changes. """
-    video_client = (video_intelligence_service_client.
-                    VideoIntelligenceServiceClient())
+    video_client = videointelligence_v1beta2.VideoIntelligenceServiceClient()
     features = [enums.Feature.SHOT_CHANGE_DETECTION]
     operation = video_client.annotate_video(path, features)
     print('\nProcessing video for shot change annotations:')
@@ -193,13 +288,14 @@ def analyze_shots(path):
     print('\nFinished processing.')
 
     # first result is retrieved because a single video was processed
-    shots = operation.result().annotation_results[0]
+    shots = operation.result().annotation_results[0].shot_annotations
 
-    for note, shot in enumerate(shots.shot_annotations):
-        print('\tScene {}: {} to {}'.format(
-            note,
-            shot.start_time_offset / 1000000.0,
-            shot.end_time_offset / 1000000.0))
+    for i, shot in enumerate(shots):
+        start_time = (shot.start_time_offset.seconds +
+                      shot.start_time_offset.nanos / 1e9)
+        end_time = (shot.end_time_offset.seconds +
+                    shot.end_time_offset.nanos / 1e9)
+        print('\tShot {}: {} to {}'.format(i, start_time, end_time))
 
 
 if __name__ == '__main__':
@@ -216,9 +312,9 @@ def analyze_shots(path):
     analyze_labels_file_parser = subparsers.add_parser(
         'labels_file', help=analyze_labels_file.__doc__)
     analyze_labels_file_parser.add_argument('path')
-    analyze_safe_search_parser = subparsers.add_parser(
-        'safe_search', help=analyze_safe_search.__doc__)
-    analyze_safe_search_parser.add_argument('path')
+    analyze_explicit_content_parser = subparsers.add_parser(
+        'explicit_content', help=analyze_explicit_content.__doc__)
+    analyze_explicit_content_parser.add_argument('path')
     analyze_shots_parser = subparsers.add_parser(
         'shots', help=analyze_shots.__doc__)
     analyze_shots_parser.add_argument('path')
@@ -233,5 +329,5 @@ def analyze_shots(path):
         analyze_labels_file(args.path)
     if args.command == 'shots':
         analyze_shots(args.path)
-    if args.command == 'safe_search':
-        analyze_safe_search(args.path)
+    if args.command == 'explicit_content':
+        analyze_explicit_content(args.path)
diff --git a/videointelligence/samples/analyze/analyze_test.py b/videointelligence/samples/analyze/analyze_test.py