23
23
python beta_snippets.py object-localization-uri gs://...
24
24
python beta_snippets.py handwritten-ocr INPUT_IMAGE
25
25
python beta_snippets.py handwritten-ocr-uri gs://...
26
+ python beta_snippets.py doc-features INPUT_PDF
27
+ python beta_snippets.py doc-features_uri gs://...
26
28
27
29
28
30
For more information, the documentation at
@@ -174,6 +176,105 @@ def detect_handwritten_ocr_uri(uri):
174
176
# [END vision_handwritten_ocr_gcs_beta]
175
177
176
178
179
+ # [START vision_fulltext_detection_pdf_beta]
180
+ def detect_document_features (path ):
181
+ """Detects document features in a PDF/TIFF/GIF file.
182
+
183
+ While your PDF file may have several pages,
184
+ this API can process up to 5 pages only.
185
+
186
+ Args:
187
+ path: The path to the local file.
188
+ """
189
+ from google .cloud import vision_v1p4beta1 as vision
190
+ client = vision .ImageAnnotatorClient ()
191
+
192
+ with open (path , 'rb' ) as pdf_file :
193
+ content = pdf_file .read ()
194
+
195
+ # Other supported mime_types: image/tiff' or 'image/gif'
196
+ mime_type = 'application/pdf'
197
+ input_config = vision .types .InputConfig (
198
+ content = content , mime_type = mime_type )
199
+
200
+ feature = vision .types .Feature (
201
+ type = vision .enums .Feature .Type .DOCUMENT_TEXT_DETECTION )
202
+ # Annotate the first two pages and the last one (max 5 pages)
203
+ # First page starts at 1, and not 0. Last page is -1.
204
+ pages = [1 , 2 , - 1 ]
205
+
206
+ request = vision .types .AnnotateFileRequest (
207
+ input_config = input_config ,
208
+ features = [feature ],
209
+ pages = pages )
210
+
211
+ response = client .batch_annotate_files (requests = [request ])
212
+
213
+ for image_response in response .responses [0 ].responses :
214
+ for page in image_response .full_text_annotation .pages :
215
+ for block in page .blocks :
216
+ print ('\n Block confidence: {}\n ' .format (block .confidence ))
217
+ for par in block .paragraphs :
218
+ print ('\t Paragraph confidence: {}' .format (par .confidence ))
219
+ for word in par .words :
220
+ symbol_texts = [symbol .text for symbol in word .symbols ]
221
+ word_text = '' .join (symbol_texts )
222
+ print ('\t \t Word text: {} (confidence: {})' .format (
223
+ word_text , word .confidence ))
224
+ for symbol in word .symbols :
225
+ print ('\t \t \t Symbol: {} (confidence: {})' .format (
226
+ symbol .text , symbol .confidence ))
227
+ # [END vision_fulltext_detection_pdf_beta]
228
+
229
+
230
+ # [START vision_fulltext_detection_pdf_gcs_beta]
231
+ def detect_document_features_uri (gcs_uri ):
232
+ """Detects document features in a PDF/TIFF/GIF file.
233
+
234
+ While your PDF file may have several pages,
235
+ this API can process up to 5 pages only.
236
+
237
+ Args:
238
+ uri: The path to the file in Google Cloud Storage (gs://...)
239
+ """
240
+ from google .cloud import vision_v1p4beta1 as vision
241
+ client = vision .ImageAnnotatorClient ()
242
+
243
+ # Other supported mime_types: image/tiff' or 'image/gif'
244
+ mime_type = 'application/pdf'
245
+ input_config = vision .types .InputConfig (
246
+ gcs_source = vision .types .GcsSource (uri = gcs_uri ), mime_type = mime_type )
247
+
248
+ feature = vision .types .Feature (
249
+ type = vision .enums .Feature .Type .DOCUMENT_TEXT_DETECTION )
250
+ # Annotate the first two pages and the last one (max 5 pages)
251
+ # First page starts at 1, and not 0. Last page is -1.
252
+ pages = [1 , 2 , - 1 ]
253
+
254
+ request = vision .types .AnnotateFileRequest (
255
+ input_config = input_config ,
256
+ features = [feature ],
257
+ pages = pages )
258
+
259
+ response = client .batch_annotate_files (requests = [request ])
260
+
261
+ for image_response in response .responses [0 ].responses :
262
+ for page in image_response .full_text_annotation .pages :
263
+ for block in page .blocks :
264
+ print ('\n Block confidence: {}\n ' .format (block .confidence ))
265
+ for par in block .paragraphs :
266
+ print ('\t Paragraph confidence: {}' .format (par .confidence ))
267
+ for word in par .words :
268
+ symbol_texts = [symbol .text for symbol in word .symbols ]
269
+ word_text = '' .join (symbol_texts )
270
+ print ('\t \t Word text: {} (confidence: {})' .format (
271
+ word_text , word .confidence ))
272
+ for symbol in word .symbols :
273
+ print ('\t \t \t Symbol: {} (confidence: {})' .format (
274
+ symbol .text , symbol .confidence ))
275
+ # [END vision_fulltext_detection_pdf_gcs_beta]
276
+
277
+
177
278
if __name__ == '__main__' :
178
279
parser = argparse .ArgumentParser (
179
280
description = __doc__ ,
@@ -196,15 +297,27 @@ def detect_handwritten_ocr_uri(uri):
196
297
'handwritten-ocr-uri' , help = detect_handwritten_ocr_uri .__doc__ )
197
298
handwritten_uri_parser .add_argument ('uri' )
198
299
300
+ doc_features_parser = subparsers .add_parser (
301
+ 'doc-features' , help = detect_document_features .__doc__ )
302
+ doc_features_parser .add_argument ('path' )
303
+
304
+ doc_features_uri_parser = subparsers .add_parser (
305
+ 'doc-features-uri' , help = detect_document_features_uri .__doc__ )
306
+ doc_features_uri_parser .add_argument ('uri' )
307
+
199
308
args = parser .parse_args ()
200
309
201
310
if 'uri' in args .command :
202
311
if 'object-localization-uri' in args .command :
203
312
localize_objects_uri (args .uri )
204
313
elif 'handwritten-ocr-uri' in args .command :
205
314
detect_handwritten_ocr_uri (args .uri )
315
+ elif 'doc-features' in args .command :
316
+ detect_handwritten_ocr_uri (args .uri )
206
317
else :
207
318
if 'object-localization' in args .command :
208
319
localize_objects (args .path )
209
320
elif 'handwritten-ocr' in args .command :
210
321
detect_handwritten_ocr (args .path )
322
+ elif 'doc-features' in args .command :
323
+ detect_handwritten_ocr (args .path )
0 commit comments