Skip to content

Commit 86fe9b7

Browse files
vision: move published samples into master [(#2743)](GoogleCloudPlatform/python-docs-samples#2743)
Add generated samples for Vision API Add required attribute mime_type Resolve encoding error in py2 Remove autogenerated warnings Remove coding: utf-8 line Remove argument encoding checks Remove CLI Remove unnecessary statics, variables, and imports Blacken with l=88 Remove unused region tag and comments Verify that there are no published links pointing to removed region tags Shorten docstring Replace concrete file path with "path/to/your/document.pdf" Co-authored-by: Yu-Han Liu <[email protected]>
1 parent c23b240 commit 86fe9b7

6 files changed

+286
-0
lines changed
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# [START vision_async_batch_annotate_images]
16+
17+
from google.cloud import vision_v1
18+
from google.cloud.vision_v1 import enums
19+
20+
21+
def sample_async_batch_annotate_images(
22+
input_image_uri="gs://cloud-samples-data/vision/label/wakeupcat.jpg",
23+
output_uri="gs://your-bucket/prefix/",
24+
):
25+
"""Perform async batch image annotation."""
26+
client = vision_v1.ImageAnnotatorClient()
27+
28+
source = {"image_uri": input_image_uri}
29+
image = {"source": source}
30+
features = [
31+
{"type": enums.Feature.Type.LABEL_DETECTION},
32+
{"type": enums.Feature.Type.IMAGE_PROPERTIES},
33+
]
34+
requests = [{"image": image, "features": features}]
35+
gcs_destination = {"uri": output_uri}
36+
37+
# The max number of responses to output in each JSON file
38+
batch_size = 2
39+
output_config = {"gcs_destination": gcs_destination, "batch_size": batch_size}
40+
41+
operation = client.async_batch_annotate_images(requests, output_config)
42+
43+
print("Waiting for operation to complete...")
44+
response = operation.result()
45+
46+
# The output is written to GCS with the provided output_uri as prefix
47+
gcs_output_uri = response.output_config.gcs_destination.uri
48+
print("Output written to GCS with prefix: {}".format(gcs_output_uri))
49+
50+
51+
# [END vision_async_batch_annotate_images]
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# Copyright 2020 Google
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
import uuid
17+
18+
from google.cloud import storage
19+
import pytest
20+
21+
import vision_async_batch_annotate_images
22+
23+
RESOURCES = os.path.join(os.path.dirname(__file__), "resources")
24+
GCS_ROOT = "gs://cloud-samples-data/vision/"
25+
26+
BUCKET = os.environ["CLOUD_STORAGE_BUCKET"]
27+
OUTPUT_PREFIX = "TEST_OUTPUT_{}".format(uuid.uuid4())
28+
GCS_DESTINATION_URI = "gs://{}/{}/".format(BUCKET, OUTPUT_PREFIX)
29+
30+
31+
@pytest.fixture()
32+
def storage_client():
33+
yield storage.Client()
34+
35+
36+
@pytest.fixture()
37+
def bucket(storage_client):
38+
bucket = storage_client.get_bucket(BUCKET)
39+
40+
try:
41+
for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX):
42+
blob.delete()
43+
except Exception:
44+
pass
45+
46+
yield bucket
47+
48+
for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX):
49+
blob.delete()
50+
51+
52+
def test_sample_asyn_batch_annotate_images(storage_client, bucket, capsys):
53+
input_image_uri = os.path.join(GCS_ROOT, "label/wakeupcat.jpg")
54+
55+
vision_async_batch_annotate_images.sample_async_batch_annotate_images(
56+
input_image_uri=input_image_uri, output_uri=GCS_DESTINATION_URI
57+
)
58+
59+
out, _ = capsys.readouterr()
60+
61+
assert "Output written to GCS" in out
62+
assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) > 0
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# [START vision_batch_annotate_files]
16+
17+
from google.cloud import vision_v1
18+
from google.cloud.vision_v1 import enums
19+
import io
20+
21+
22+
def sample_batch_annotate_files(file_path="path/to/your/document.pdf"):
23+
"""Perform batch file annotation."""
24+
client = vision_v1.ImageAnnotatorClient()
25+
26+
# Supported mime_type: application/pdf, image/tiff, image/gif
27+
mime_type = "application/pdf"
28+
with io.open(file_path, "rb") as f:
29+
content = f.read()
30+
input_config = {"mime_type": mime_type, "content": content}
31+
features = [{"type": enums.Feature.Type.DOCUMENT_TEXT_DETECTION}]
32+
33+
# The service can process up to 5 pages per document file. Here we specify
34+
# the first, second, and last page of the document to be processed.
35+
pages = [1, 2, -1]
36+
requests = [{"input_config": input_config, "features": features, "pages": pages}]
37+
38+
response = client.batch_annotate_files(requests)
39+
for image_response in response.responses[0].responses:
40+
print(u"Full text: {}".format(image_response.full_text_annotation.text))
41+
for page in image_response.full_text_annotation.pages:
42+
for block in page.blocks:
43+
print(u"\nBlock confidence: {}".format(block.confidence))
44+
for par in block.paragraphs:
45+
print(u"\tParagraph confidence: {}".format(par.confidence))
46+
for word in par.words:
47+
print(u"\t\tWord confidence: {}".format(word.confidence))
48+
for symbol in word.symbols:
49+
print(
50+
u"\t\t\tSymbol: {}, (confidence: {})".format(
51+
symbol.text, symbol.confidence
52+
)
53+
)
54+
55+
56+
# [END vision_batch_annotate_files]
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# Copyright 2020 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# [START vision_batch_annotate_files_gcs]
16+
17+
from google.cloud import vision_v1
18+
from google.cloud.vision_v1 import enums
19+
20+
21+
def sample_batch_annotate_files(
22+
storage_uri="gs://cloud-samples-data/vision/document_understanding/kafka.pdf",
23+
):
24+
"""Perform batch file annotation."""
25+
mime_type = "application/pdf"
26+
27+
client = vision_v1.ImageAnnotatorClient()
28+
29+
gcs_source = {"uri": storage_uri}
30+
input_config = {"gcs_source": gcs_source, "mime_type": mime_type}
31+
features = [{"type": enums.Feature.Type.DOCUMENT_TEXT_DETECTION}]
32+
33+
# The service can process up to 5 pages per document file.
34+
# Here we specify the first, second, and last page of the document to be
35+
# processed.
36+
pages = [1, 2, -1]
37+
requests = [{"input_config": input_config, "features": features, "pages": pages}]
38+
39+
response = client.batch_annotate_files(requests)
40+
for image_response in response.responses[0].responses:
41+
print(u"Full text: {}".format(image_response.full_text_annotation.text))
42+
for page in image_response.full_text_annotation.pages:
43+
for block in page.blocks:
44+
print(u"\nBlock confidence: {}".format(block.confidence))
45+
for par in block.paragraphs:
46+
print(u"\tParagraph confidence: {}".format(par.confidence))
47+
for word in par.words:
48+
print(u"\t\tWord confidence: {}".format(word.confidence))
49+
for symbol in word.symbols:
50+
print(
51+
u"\t\t\tSymbol: {}, (confidence: {})".format(
52+
symbol.text, symbol.confidence
53+
)
54+
)
55+
56+
57+
# [END vision_batch_annotate_files_gcs]
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Copyright 2020 Google
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
17+
import vision_batch_annotate_files_gcs
18+
19+
GCS_ROOT = "gs://cloud-samples-data/vision/"
20+
21+
22+
def test_sample_batch_annotate_files_gcs(capsys):
23+
storage_uri = os.path.join(GCS_ROOT, "document_understanding/kafka.pdf")
24+
25+
vision_batch_annotate_files_gcs.sample_batch_annotate_files(storage_uri=storage_uri)
26+
27+
out, _ = capsys.readouterr()
28+
29+
assert "Full text" in out
30+
assert "Block confidence" in out
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Copyright 2020 Google
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
17+
import vision_batch_annotate_files
18+
19+
RESOURCES = os.path.join(os.path.dirname(__file__), "resources")
20+
21+
22+
def test_sample_batch_annotate_files(capsys):
23+
file_path = os.path.join(RESOURCES, "kafka.pdf")
24+
25+
vision_batch_annotate_files.sample_batch_annotate_files(file_path=file_path)
26+
27+
out, _ = capsys.readouterr()
28+
29+
assert "Full text" in out
30+
assert "Block confidence" in out

0 commit comments

Comments
 (0)