Skip to content

Commit 93ee3c7

Browse files
anguillanneufchingor13
authored andcommitted
samples: Added beta samples for video object tracking/text detection (#1237)
* Added beta samples for video object tracking/text detection * Update style, fix a few test issues we missed, increase timeouts * Updates based on review * Shorten timeouts, break out of test
1 parent 774053f commit 93ee3c7

File tree

6 files changed

+414
-1
lines changed

6 files changed

+414
-1
lines changed

video/resources/cat.mp4

6.1 MB
Binary file not shown.

video/resources/googlework_short.mp4

1.42 MB
Binary file not shown.

video/src/main/java/com/example/video/Detect.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ public static void speechTranscription(String gcsUri) throws Exception {
105105

106106
System.out.println("Waiting for operation to complete...");
107107
// Display the results
108-
for (VideoAnnotationResults results : response.get(180, TimeUnit.SECONDS)
108+
for (VideoAnnotationResults results : response.get(300, TimeUnit.SECONDS)
109109
.getAnnotationResultsList()) {
110110
for (SpeechTranscription speechTranscription : results.getSpeechTranscriptionsList()) {
111111
try {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
/*
2+
* Copyright 2018 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.video;
18+
19+
import com.google.api.gax.longrunning.OperationFuture;
20+
import com.google.cloud.videointelligence.v1p2beta1.AnnotateVideoProgress;
21+
import com.google.cloud.videointelligence.v1p2beta1.AnnotateVideoRequest;
22+
import com.google.cloud.videointelligence.v1p2beta1.AnnotateVideoResponse;
23+
import com.google.cloud.videointelligence.v1p2beta1.Feature;
24+
import com.google.cloud.videointelligence.v1p2beta1.NormalizedVertex;
25+
import com.google.cloud.videointelligence.v1p2beta1.TextAnnotation;
26+
import com.google.cloud.videointelligence.v1p2beta1.TextFrame;
27+
import com.google.cloud.videointelligence.v1p2beta1.TextSegment;
28+
import com.google.cloud.videointelligence.v1p2beta1.VideoAnnotationResults;
29+
import com.google.cloud.videointelligence.v1p2beta1.VideoIntelligenceServiceClient;
30+
import com.google.cloud.videointelligence.v1p2beta1.VideoSegment;
31+
import com.google.protobuf.ByteString;
32+
33+
import com.google.protobuf.Duration;
34+
import java.nio.file.Files;
35+
import java.nio.file.Path;
36+
import java.nio.file.Paths;
37+
import java.util.List;
38+
import java.util.concurrent.TimeUnit;
39+
40+
public class TextDetection {
41+
42+
// [START video_detect_text_beta]
43+
/**
44+
* Detect text in a video.
45+
*
46+
* @param filePath the path to the video file to analyze.
47+
*/
48+
public static VideoAnnotationResults detectText(String filePath) throws Exception {
49+
try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient.create()) {
50+
// Read file
51+
Path path = Paths.get(filePath);
52+
byte[] data = Files.readAllBytes(path);
53+
54+
// Create the request
55+
AnnotateVideoRequest request = AnnotateVideoRequest.newBuilder()
56+
.setInputContent(ByteString.copyFrom(data))
57+
.addFeatures(Feature.TEXT_DETECTION)
58+
.build();
59+
60+
// asynchronously perform object tracking on videos
61+
OperationFuture<AnnotateVideoResponse, AnnotateVideoProgress> future =
62+
client.annotateVideoAsync(request);
63+
64+
System.out.println("Waiting for operation to complete...");
65+
// The first result is retrieved because a single video was processed.
66+
AnnotateVideoResponse response = future.get(300, TimeUnit.SECONDS);
67+
VideoAnnotationResults results = response.getAnnotationResults(0);
68+
69+
// Get only the first annotation for demo purposes.
70+
TextAnnotation annotation = results.getTextAnnotations(0);
71+
System.out.println("Text: " + annotation.getText());
72+
73+
// Get the first text segment.
74+
TextSegment textSegment = annotation.getSegments(0);
75+
System.out.println("Confidence: " + textSegment.getConfidence());
76+
// For the text segment display it's time offset
77+
VideoSegment videoSegment = textSegment.getSegment();
78+
Duration startTimeOffset = videoSegment.getStartTimeOffset();
79+
Duration endTimeOffset = videoSegment.getEndTimeOffset();
80+
// Display the offset times in seconds, 1e9 is part of the formula to convert nanos to seconds
81+
System.out.println(String.format("Start time: %.2f",
82+
startTimeOffset.getSeconds() + startTimeOffset.getNanos() / 1e9));
83+
System.out.println(String.format("End time: %.2f",
84+
endTimeOffset.getSeconds() + endTimeOffset.getNanos() / 1e9));
85+
86+
// Show the first result for the first frame in the segment.
87+
TextFrame textFrame = textSegment.getFrames(0);
88+
Duration timeOffset = textFrame.getTimeOffset();
89+
System.out.println(String.format("Time offset for the first frame: %.2f",
90+
timeOffset.getSeconds() + timeOffset.getNanos() / 1e9));
91+
92+
// Display the rotated bounding box for where the text is on the frame.
93+
System.out.println("Rotated Bounding Box Vertices:");
94+
List<NormalizedVertex> vertices = textFrame.getRotatedBoundingBox().getVerticesList();
95+
for (NormalizedVertex normalizedVertex : vertices) {
96+
System.out.println(String.format(
97+
"\tVertex.x: %.2f, Vertex.y: %.2f",
98+
normalizedVertex.getX(),
99+
normalizedVertex.getY()));
100+
}
101+
return results;
102+
}
103+
}
104+
// [END video_detect_text_beta]
105+
106+
// [START video_detect_text_gcs_beta]
107+
/**
108+
* Detect Text in a video.
109+
*
110+
* @param gcsUri the path to the video file to analyze.
111+
*/
112+
public static VideoAnnotationResults detectTextGcs(String gcsUri) throws Exception {
113+
try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient.create()) {
114+
// Create the request
115+
AnnotateVideoRequest request = AnnotateVideoRequest.newBuilder()
116+
.setInputUri(gcsUri)
117+
.addFeatures(Feature.TEXT_DETECTION)
118+
.build();
119+
120+
// asynchronously perform object tracking on videos
121+
OperationFuture<AnnotateVideoResponse, AnnotateVideoProgress> future =
122+
client.annotateVideoAsync(request);
123+
124+
System.out.println("Waiting for operation to complete...");
125+
// The first result is retrieved because a single video was processed.
126+
AnnotateVideoResponse response = future.get(300, TimeUnit.SECONDS);
127+
VideoAnnotationResults results = response.getAnnotationResults(0);
128+
129+
// Get only the first annotation for demo purposes.
130+
TextAnnotation annotation = results.getTextAnnotations(0);
131+
System.out.println("Text: " + annotation.getText());
132+
133+
// Get the first text segment.
134+
TextSegment textSegment = annotation.getSegments(0);
135+
System.out.println("Confidence: " + textSegment.getConfidence());
136+
// For the text segment display it's time offset
137+
VideoSegment videoSegment = textSegment.getSegment();
138+
Duration startTimeOffset = videoSegment.getStartTimeOffset();
139+
Duration endTimeOffset = videoSegment.getEndTimeOffset();
140+
// Display the offset times in seconds, 1e9 is part of the formula to convert nanos to seconds
141+
System.out.println(String.format("Start time: %.2f",
142+
startTimeOffset.getSeconds() + startTimeOffset.getNanos() / 1e9));
143+
System.out.println(String.format("End time: %.2f",
144+
endTimeOffset.getSeconds() + endTimeOffset.getNanos() / 1e9));
145+
146+
// Show the first result for the first frame in the segment.
147+
TextFrame textFrame = textSegment.getFrames(0);
148+
Duration timeOffset = textFrame.getTimeOffset();
149+
System.out.println(String.format("Time offset for the first frame: %.2f",
150+
timeOffset.getSeconds() + timeOffset.getNanos() / 1e9));
151+
152+
// Display the rotated bounding box for where the text is on the frame.
153+
System.out.println("Rotated Bounding Box Vertices:");
154+
List<NormalizedVertex> vertices = textFrame.getRotatedBoundingBox().getVerticesList();
155+
for (NormalizedVertex normalizedVertex : vertices) {
156+
System.out.println(String.format(
157+
"\tVertex.x: %.2f, Vertex.y: %.2f",
158+
normalizedVertex.getX(),
159+
normalizedVertex.getY()));
160+
}
161+
return results;
162+
}
163+
}
164+
// [END video_detect_text_gcs_beta]
165+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
/*
2+
* Copyright 2018 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.video;
18+
19+
import com.google.api.gax.longrunning.OperationFuture;
20+
import com.google.cloud.videointelligence.v1p2beta1.AnnotateVideoProgress;
21+
import com.google.cloud.videointelligence.v1p2beta1.AnnotateVideoRequest;
22+
import com.google.cloud.videointelligence.v1p2beta1.AnnotateVideoResponse;
23+
import com.google.cloud.videointelligence.v1p2beta1.Entity;
24+
import com.google.cloud.videointelligence.v1p2beta1.Feature;
25+
import com.google.cloud.videointelligence.v1p2beta1.NormalizedBoundingBox;
26+
import com.google.cloud.videointelligence.v1p2beta1.ObjectTrackingAnnotation;
27+
import com.google.cloud.videointelligence.v1p2beta1.ObjectTrackingFrame;
28+
import com.google.cloud.videointelligence.v1p2beta1.VideoAnnotationResults;
29+
import com.google.cloud.videointelligence.v1p2beta1.VideoIntelligenceServiceClient;
30+
import com.google.cloud.videointelligence.v1p2beta1.VideoSegment;
31+
import com.google.protobuf.ByteString;
32+
33+
import com.google.protobuf.Duration;
34+
import java.nio.file.Files;
35+
import java.nio.file.Path;
36+
import java.nio.file.Paths;
37+
import java.util.concurrent.TimeUnit;
38+
39+
public class TrackObjects {
40+
41+
// [START video_object_tracking_beta]
42+
/**
43+
* Track objects in a video.
44+
*
45+
* @param filePath the path to the video file to analyze.
46+
*/
47+
public static VideoAnnotationResults trackObjects(String filePath) throws Exception {
48+
try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient.create()) {
49+
// Read file
50+
Path path = Paths.get(filePath);
51+
byte[] data = Files.readAllBytes(path);
52+
53+
// Create the request
54+
AnnotateVideoRequest request = AnnotateVideoRequest.newBuilder()
55+
.setInputContent(ByteString.copyFrom(data))
56+
.addFeatures(Feature.OBJECT_TRACKING)
57+
.setLocationId("us-east1")
58+
.build();
59+
60+
// asynchronously perform object tracking on videos
61+
OperationFuture<AnnotateVideoResponse, AnnotateVideoProgress> future =
62+
client.annotateVideoAsync(request);
63+
64+
System.out.println("Waiting for operation to complete...");
65+
// The first result is retrieved because a single video was processed.
66+
AnnotateVideoResponse response = future.get(300, TimeUnit.SECONDS);
67+
VideoAnnotationResults results = response.getAnnotationResults(0);
68+
69+
// Get only the first annotation for demo purposes.
70+
ObjectTrackingAnnotation annotation = results.getObjectAnnotations(0);
71+
System.out.println("Confidence: " + annotation.getConfidence());
72+
73+
if (annotation.hasEntity()) {
74+
Entity entity = annotation.getEntity();
75+
System.out.println("Entity description: " + entity.getDescription());
76+
System.out.println("Entity id:: " + entity.getEntityId());
77+
}
78+
79+
if (annotation.hasSegment()) {
80+
VideoSegment videoSegment = annotation.getSegment();
81+
Duration startTimeOffset = videoSegment.getStartTimeOffset();
82+
Duration endTimeOffset = videoSegment.getEndTimeOffset();
83+
// Display the segment time in seconds, 1e9 converts nanos to seconds
84+
System.out.println(String.format(
85+
"Segment: %.2fs to %.2fs",
86+
startTimeOffset.getSeconds() + startTimeOffset.getNanos() / 1e9,
87+
endTimeOffset.getSeconds() + endTimeOffset.getNanos() / 1e9));
88+
}
89+
90+
// Here we print only the bounding box of the first frame in this segment.
91+
ObjectTrackingFrame frame = annotation.getFrames(0);
92+
// Display the offset time in seconds, 1e9 converts nanos to seconds
93+
Duration timeOffset = frame.getTimeOffset();
94+
System.out.println(String.format(
95+
"Time offset of the first frame: %.2fs",
96+
timeOffset.getSeconds() + timeOffset.getNanos() / 1e9));
97+
98+
// Display the bounding box of the detected object
99+
NormalizedBoundingBox normalizedBoundingBox = frame.getNormalizedBoundingBox();
100+
System.out.println("Bounding box position:");
101+
System.out.println("\tleft: " + normalizedBoundingBox.getLeft());
102+
System.out.println("\ttop: " + normalizedBoundingBox.getTop());
103+
System.out.println("\tright: " + normalizedBoundingBox.getRight());
104+
System.out.println("\tbottom: " + normalizedBoundingBox.getBottom());
105+
return results;
106+
}
107+
}
108+
// [END video_object_tracking_beta]
109+
110+
// [START video_object_tracking_gcs_beta]
111+
/**
112+
* Track objects in a video.
113+
*
114+
* @param gcsUri the path to the video file to analyze.
115+
*/
116+
public static VideoAnnotationResults trackObjectsGcs(String gcsUri) throws Exception {
117+
try (VideoIntelligenceServiceClient client = VideoIntelligenceServiceClient.create()) {
118+
// Create the request
119+
AnnotateVideoRequest request = AnnotateVideoRequest.newBuilder()
120+
.setInputUri(gcsUri)
121+
.addFeatures(Feature.OBJECT_TRACKING)
122+
.setLocationId("us-east1")
123+
.build();
124+
125+
// asynchronously perform object tracking on videos
126+
OperationFuture<AnnotateVideoResponse, AnnotateVideoProgress> future =
127+
client.annotateVideoAsync(request);
128+
129+
System.out.println("Waiting for operation to complete...");
130+
// The first result is retrieved because a single video was processed.
131+
AnnotateVideoResponse response = future.get(300, TimeUnit.SECONDS);
132+
VideoAnnotationResults results = response.getAnnotationResults(0);
133+
134+
// Get only the first annotation for demo purposes.
135+
ObjectTrackingAnnotation annotation = results.getObjectAnnotations(0);
136+
System.out.println("Confidence: " + annotation.getConfidence());
137+
138+
if (annotation.hasEntity()) {
139+
Entity entity = annotation.getEntity();
140+
System.out.println("Entity description: " + entity.getDescription());
141+
System.out.println("Entity id:: " + entity.getEntityId());
142+
}
143+
144+
if (annotation.hasSegment()) {
145+
VideoSegment videoSegment = annotation.getSegment();
146+
Duration startTimeOffset = videoSegment.getStartTimeOffset();
147+
Duration endTimeOffset = videoSegment.getEndTimeOffset();
148+
// Display the segment time in seconds, 1e9 converts nanos to seconds
149+
System.out.println(String.format(
150+
"Segment: %.2fs to %.2fs",
151+
startTimeOffset.getSeconds() + startTimeOffset.getNanos() / 1e9,
152+
endTimeOffset.getSeconds() + endTimeOffset.getNanos() / 1e9));
153+
}
154+
155+
// Here we print only the bounding box of the first frame in this segment.
156+
ObjectTrackingFrame frame = annotation.getFrames(0);
157+
// Display the offset time in seconds, 1e9 converts nanos to seconds
158+
Duration timeOffset = frame.getTimeOffset();
159+
System.out.println(String.format(
160+
"Time offset of the first frame: %.2fs",
161+
timeOffset.getSeconds() + timeOffset.getNanos() / 1e9));
162+
163+
// Display the bounding box of the detected object
164+
NormalizedBoundingBox normalizedBoundingBox = frame.getNormalizedBoundingBox();
165+
System.out.println("Bounding box position:");
166+
System.out.println("\tleft: " + normalizedBoundingBox.getLeft());
167+
System.out.println("\ttop: " + normalizedBoundingBox.getTop());
168+
System.out.println("\tright: " + normalizedBoundingBox.getRight());
169+
System.out.println("\tbottom: " + normalizedBoundingBox.getBottom());
170+
return results;
171+
}
172+
}
173+
// [END video_object_tracking_gcs_beta]
174+
}
175+

0 commit comments

Comments
 (0)