Skip to content

Commit 1bc0992

Browse files
authored
Added two samples for "OCR with PDF/TIFF as source files" [(#2034)](GoogleCloudPlatform/python-docs-samples#2034)
* Added two samples for "OCR with PDF/TIFF as source files" * Moved the code to beta_snippets.py * Fixed the sub-parser names. * Shortened the line that was too long. * Added newline at the end of the file * Using the builtin open function instead * Renamed a variable * Fixed the wrong arg parameter * Added extra comment lines * Regenerated README.rst * Added specific strings to be unit-tested
1 parent 8b3ee74 commit 1bc0992

File tree

4 files changed

+144
-4
lines changed

4 files changed

+144
-4
lines changed

samples/snippets/detect/README.rst

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ To run this sample:
165165
$ python beta_snippets.py
166166
167167
usage: beta_snippets.py [-h]
168-
{object-localization,object-localization-uri,handwritten-ocr,handwritten-ocr-uri}
168+
{object-localization,object-localization-uri,handwritten-ocr,handwritten-ocr-uri,doc-features,doc-features-uri}
169169
...
170170
171171
Google Cloud Vision API Python Beta Snippets
@@ -176,12 +176,14 @@ To run this sample:
176176
python beta_snippets.py object-localization-uri gs://...
177177
python beta_snippets.py handwritten-ocr INPUT_IMAGE
178178
python beta_snippets.py handwritten-ocr-uri gs://...
179+
python beta_snippets.py doc-features INPUT_PDF
180+
python beta_snippets.py doc-features_uri gs://...
179181
180182
For more information, the documentation at
181183
https://cloud.google.com/vision/docs.
182184
183185
positional arguments:
184-
{object-localization,object-localization-uri,handwritten-ocr,handwritten-ocr-uri}
186+
{object-localization,object-localization-uri,handwritten-ocr,handwritten-ocr-uri,doc-features,doc-features-uri}
185187
object-localization
186188
Localize objects in the local image. Args: path: The
187189
path to the local file.
@@ -195,6 +197,14 @@ To run this sample:
195197
Detects handwritten characters in the file located in
196198
Google Cloud Storage. Args: uri: The path to the file
197199
in Google Cloud Storage (gs://...)
200+
doc-features Detects document features in a PDF/TIFF/GIF file.
201+
While your PDF file may have several pages, this API
202+
can process up to 5 pages only. Args: path: The path
203+
to the local file.
204+
doc-features-uri Detects document features in a PDF/TIFF/GIF file.
205+
While your PDF file may have several pages, this API
206+
can process up to 5 pages only. Args: uri: The path to
207+
the file in Google Cloud Storage (gs://...)
198208
199209
optional arguments:
200210
-h, --help show this help message and exit

samples/snippets/detect/beta_snippets.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
python beta_snippets.py object-localization-uri gs://...
2424
python beta_snippets.py handwritten-ocr INPUT_IMAGE
2525
python beta_snippets.py handwritten-ocr-uri gs://...
26+
python beta_snippets.py doc-features INPUT_PDF
27+
python beta_snippets.py doc-features_uri gs://...
2628
2729
2830
For more information, the documentation at
@@ -174,6 +176,105 @@ def detect_handwritten_ocr_uri(uri):
174176
# [END vision_handwritten_ocr_gcs_beta]
175177

176178

179+
# [START vision_fulltext_detection_pdf_beta]
180+
def detect_document_features(path):
181+
"""Detects document features in a PDF/TIFF/GIF file.
182+
183+
While your PDF file may have several pages,
184+
this API can process up to 5 pages only.
185+
186+
Args:
187+
path: The path to the local file.
188+
"""
189+
from google.cloud import vision_v1p4beta1 as vision
190+
client = vision.ImageAnnotatorClient()
191+
192+
with open(path, 'rb') as pdf_file:
193+
content = pdf_file.read()
194+
195+
# Other supported mime_types: image/tiff' or 'image/gif'
196+
mime_type = 'application/pdf'
197+
input_config = vision.types.InputConfig(
198+
content=content, mime_type=mime_type)
199+
200+
feature = vision.types.Feature(
201+
type=vision.enums.Feature.Type.DOCUMENT_TEXT_DETECTION)
202+
# Annotate the first two pages and the last one (max 5 pages)
203+
# First page starts at 1, and not 0. Last page is -1.
204+
pages = [1, 2, -1]
205+
206+
request = vision.types.AnnotateFileRequest(
207+
input_config=input_config,
208+
features=[feature],
209+
pages=pages)
210+
211+
response = client.batch_annotate_files(requests=[request])
212+
213+
for image_response in response.responses[0].responses:
214+
for page in image_response.full_text_annotation.pages:
215+
for block in page.blocks:
216+
print('\nBlock confidence: {}\n'.format(block.confidence))
217+
for par in block.paragraphs:
218+
print('\tParagraph confidence: {}'.format(par.confidence))
219+
for word in par.words:
220+
symbol_texts = [symbol.text for symbol in word.symbols]
221+
word_text = ''.join(symbol_texts)
222+
print('\t\tWord text: {} (confidence: {})'.format(
223+
word_text, word.confidence))
224+
for symbol in word.symbols:
225+
print('\t\t\tSymbol: {} (confidence: {})'.format(
226+
symbol.text, symbol.confidence))
227+
# [END vision_fulltext_detection_pdf_beta]
228+
229+
230+
# [START vision_fulltext_detection_pdf_gcs_beta]
231+
def detect_document_features_uri(gcs_uri):
232+
"""Detects document features in a PDF/TIFF/GIF file.
233+
234+
While your PDF file may have several pages,
235+
this API can process up to 5 pages only.
236+
237+
Args:
238+
uri: The path to the file in Google Cloud Storage (gs://...)
239+
"""
240+
from google.cloud import vision_v1p4beta1 as vision
241+
client = vision.ImageAnnotatorClient()
242+
243+
# Other supported mime_types: image/tiff' or 'image/gif'
244+
mime_type = 'application/pdf'
245+
input_config = vision.types.InputConfig(
246+
gcs_source=vision.types.GcsSource(uri=gcs_uri), mime_type=mime_type)
247+
248+
feature = vision.types.Feature(
249+
type=vision.enums.Feature.Type.DOCUMENT_TEXT_DETECTION)
250+
# Annotate the first two pages and the last one (max 5 pages)
251+
# First page starts at 1, and not 0. Last page is -1.
252+
pages = [1, 2, -1]
253+
254+
request = vision.types.AnnotateFileRequest(
255+
input_config=input_config,
256+
features=[feature],
257+
pages=pages)
258+
259+
response = client.batch_annotate_files(requests=[request])
260+
261+
for image_response in response.responses[0].responses:
262+
for page in image_response.full_text_annotation.pages:
263+
for block in page.blocks:
264+
print('\nBlock confidence: {}\n'.format(block.confidence))
265+
for par in block.paragraphs:
266+
print('\tParagraph confidence: {}'.format(par.confidence))
267+
for word in par.words:
268+
symbol_texts = [symbol.text for symbol in word.symbols]
269+
word_text = ''.join(symbol_texts)
270+
print('\t\tWord text: {} (confidence: {})'.format(
271+
word_text, word.confidence))
272+
for symbol in word.symbols:
273+
print('\t\t\tSymbol: {} (confidence: {})'.format(
274+
symbol.text, symbol.confidence))
275+
# [END vision_fulltext_detection_pdf_gcs_beta]
276+
277+
177278
if __name__ == '__main__':
178279
parser = argparse.ArgumentParser(
179280
description=__doc__,
@@ -196,15 +297,27 @@ def detect_handwritten_ocr_uri(uri):
196297
'handwritten-ocr-uri', help=detect_handwritten_ocr_uri.__doc__)
197298
handwritten_uri_parser.add_argument('uri')
198299

300+
doc_features_parser = subparsers.add_parser(
301+
'doc-features', help=detect_document_features.__doc__)
302+
doc_features_parser.add_argument('path')
303+
304+
doc_features_uri_parser = subparsers.add_parser(
305+
'doc-features-uri', help=detect_document_features_uri.__doc__)
306+
doc_features_uri_parser.add_argument('uri')
307+
199308
args = parser.parse_args()
200309

201310
if 'uri' in args.command:
202311
if 'object-localization-uri' in args.command:
203312
localize_objects_uri(args.uri)
204313
elif 'handwritten-ocr-uri' in args.command:
205314
detect_handwritten_ocr_uri(args.uri)
315+
elif 'doc-features' in args.command:
316+
detect_handwritten_ocr_uri(args.uri)
206317
else:
207318
if 'object-localization' in args.command:
208319
localize_objects(args.path)
209320
elif 'handwritten-ocr' in args.command:
210321
detect_handwritten_ocr(args.path)
322+
elif 'doc-features' in args.command:
323+
detect_handwritten_ocr(args.path)

samples/snippets/detect/beta_snippets_test.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import beta_snippets
1717

1818
RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')
19+
GCS_ROOT = 'gs://cloud-samples-data/vision/'
1920

2021

2122
def test_localize_objects(capsys):
@@ -28,7 +29,7 @@ def test_localize_objects(capsys):
2829

2930

3031
def test_localize_objects_uri(capsys):
31-
uri = 'gs://cloud-samples-data/vision/puppies.jpg'
32+
uri = GCS_ROOT + 'puppies.jpg'
3233

3334
beta_snippets.localize_objects_uri(uri)
3435

@@ -46,9 +47,25 @@ def test_handwritten_ocr(capsys):
4647

4748

4849
def test_handwritten_ocr_uri(capsys):
49-
uri = 'gs://cloud-samples-data/vision/handwritten.jpg'
50+
uri = GCS_ROOT + 'handwritten.jpg'
5051

5152
beta_snippets.detect_handwritten_ocr_uri(uri)
5253

5354
out, _ = capsys.readouterr()
5455
assert 'Cloud Vision API' in out
56+
57+
58+
def test_detect_pdf_document(capsys):
59+
file_name = os.path.join(RESOURCES, 'kafka.pdf')
60+
beta_snippets.detect_document_features(file_name)
61+
out, _ = capsys.readouterr()
62+
assert 'Symbol: a' in out
63+
assert 'Word text: evenings' in out
64+
65+
66+
def test_detect_pdf_document_from_gcs(capsys):
67+
gcs_uri = GCS_ROOT + 'document_understanding/kafka.pdf'
68+
beta_snippets.detect_document_features_uri(gcs_uri)
69+
out, _ = capsys.readouterr()
70+
assert 'Symbol' in out
71+
assert 'Word text' in out
85.2 KB
Binary file not shown.

0 commit comments

Comments
 (0)