@@ -270,7 +270,7 @@ def detect_properties_uri(uri):
270
270
271
271
272
272
def detect_web (path ):
273
- """detects web annotations given an image."""
273
+ """Detects web annotations given an image."""
274
274
vision_client = vision .Client ()
275
275
276
276
with io .open (path , 'rb' ) as image_file :
@@ -312,7 +312,7 @@ def detect_web(path):
312
312
313
313
314
314
def detect_web_uri (uri ):
315
- """detects web annotations in the file located in google cloud storage ."""
315
+ """Detects web annotations in the file located in Google Cloud Storage ."""
316
316
vision_client = vision .Client ()
317
317
image = vision_client .image (source_uri = uri )
318
318
@@ -350,7 +350,7 @@ def detect_web_uri(uri):
350
350
351
351
352
352
def detect_crop_hints (path ):
353
- """detects crop hints in an image."""
353
+ """Detects crop hints in an image."""
354
354
vision_client = vision .Client ()
355
355
with io .open (path , 'rb' ) as image_file :
356
356
content = image_file .read ()
@@ -368,7 +368,7 @@ def detect_crop_hints(path):
368
368
369
369
370
370
def detect_crop_hints_uri (uri ):
371
- """detects crop hints in the file located in google cloud storage ."""
371
+ """Detects crop hints in the file located in Google Cloud Storage ."""
372
372
vision_client = vision .Client ()
373
373
image = vision_client .image (source_uri = uri )
374
374
@@ -382,54 +382,77 @@ def detect_crop_hints_uri(uri):
382
382
print ('bounds: {}' .format (',' .join (vertices )))
383
383
384
384
385
- def detect_fulltext (path ):
386
- """extracts full text from an image."""
385
+ def detect_document (path ):
386
+ """Detects document features in an image."""
387
387
vision_client = vision .Client ()
388
388
389
389
with io .open (path , 'rb' ) as image_file :
390
390
content = image_file .read ()
391
391
392
392
image = vision_client .image (content = content )
393
393
394
- fulltext = image .detect_full_text ()
394
+ document = image .detect_full_text ()
395
+
396
+ for b , page in enumerate (document .pages ):
397
+ page_text = ''
395
398
396
- for b , page in enumerate (fulltext .pages ):
397
- print (page .width )
398
399
for bb , block in enumerate (page .blocks ):
399
- print ('Block: {}' .format (block .bounding_box ))
400
- print ('Type: {}' .format (dir (block )))
401
- print ('Type: {}' .format (block .block_type ))
400
+ block_text = ''
401
+
402
402
for p , paragraph in enumerate (block .paragraphs ):
403
- print ( ' \t Paragraph: ({})' . format ( paragraph . bounding_box ))
404
- print ( ' \t words: ({})' . format (( paragraph . words )))
403
+ para_text = ''
404
+
405
405
for w , word in enumerate (paragraph .words ):
406
+ word_text = ''
407
+
406
408
for s , symbol in enumerate (word .symbols ):
407
- print ('\t \t \t $:{}' .format (symbol .text ))
409
+ word_text = word_text + symbol .text
410
+
411
+ para_text = para_text + word_text
412
+
413
+ block_text = block_text + para_text
414
+ print ('\n --\n Content Block: {}' .format (block_text ))
415
+ print ('Block Bounding Box:\n {}' .format (block .bounding_box ))
416
+
417
+ page_text = page_text + block_text
408
418
409
- print (fulltext .text )
419
+ print ('Page Content:\n {}' .format (page_text ))
420
+ print ('Page Dimensions: w: {} h: {}' .format (page .width , page .height ))
410
421
411
422
412
- def detect_fulltext_uri (uri ):
413
- """extracts full text in the file located in google cloud storage."""
423
+ def detect_document_uri (uri ):
424
+ """Detects document features in the file located in Google Cloud
425
+ Storage."""
414
426
vision_client = vision .Client ()
415
427
image = vision_client .image (source_uri = uri )
416
428
417
- fulltext = image .detect_full_text ()
429
+ document = image .detect_full_text ()
430
+
431
+ for b , page in enumerate (document .pages ):
432
+ page_text = ''
418
433
419
- for b , page in enumerate (fulltext .pages ):
420
- print (page .width )
421
434
for bb , block in enumerate (page .blocks ):
422
- print ('Block: {}' .format (block .bounding_box ))
423
- print ('Type: {}' .format (dir (block )))
424
- print ('Type: {}' .format (block .block_type ))
435
+ block_text = ''
436
+
425
437
for p , paragraph in enumerate (block .paragraphs ):
426
- print ( ' \t Paragraph: ({})' . format ( paragraph . bounding_box ))
427
- print ( ' \t words: ({})' . format (( paragraph . words )))
438
+ para_text = ''
439
+
428
440
for w , word in enumerate (paragraph .words ):
441
+ word_text = ''
442
+
429
443
for s , symbol in enumerate (word .symbols ):
430
- print ('\t \t \t $:{}' .format (symbol .text ))
444
+ word_text = word_text + symbol .text
445
+
446
+ para_text = para_text + word_text
447
+
448
+ block_text = block_text + para_text
449
+ print ('\n --\n Content Block: {}' .format (block_text ))
450
+ print ('Block Bounding Box:\n {}' .format (block .bounding_box ))
451
+
452
+ page_text = page_text + block_text
431
453
432
- print (fulltext .text )
454
+ print ('Page Content:\n {}' .format (page_text ))
455
+ print ('Page Dimensions: w: {} h: {}' .format (page .width , page .height ))
433
456
434
457
435
458
def run_local (args ):
@@ -451,8 +474,8 @@ def run_local(args):
451
474
detect_web (args .path )
452
475
elif args .command == 'crophints' :
453
476
detect_crop_hints (args .path )
454
- elif args .command == 'fulltext ' :
455
- detect_fulltext (args .path )
477
+ elif args .command == 'document ' :
478
+ detect_document (args .path )
456
479
457
480
458
481
def run_uri (args ):
@@ -474,8 +497,8 @@ def run_uri(args):
474
497
detect_web_uri (args .uri )
475
498
elif args .command == 'crophints-uri' :
476
499
detect_crop_hints_uri (args .uri )
477
- elif args .command == 'fulltext -uri' :
478
- detect_fulltext_uri (args .uri )
500
+ elif args .command == 'document -uri' :
501
+ detect_document_uri (args .uri )
479
502
480
503
481
504
if __name__ == '__main__' :
@@ -560,13 +583,13 @@ def run_uri(args):
560
583
'crophints-uri' , help = detect_crop_hints_uri .__doc__ )
561
584
crop_hints_uri_parser .add_argument ('uri' )
562
585
563
- fulltext_parser = subparsers .add_parser (
564
- 'fulltext ' , help = detect_fulltext .__doc__ )
565
- fulltext_parser .add_argument ('path' )
586
+ document_parser = subparsers .add_parser (
587
+ 'document ' , help = detect_document .__doc__ )
588
+ document_parser .add_argument ('path' )
566
589
567
- fulltext_uri_parser = subparsers .add_parser (
568
- 'fulltext -uri' , help = detect_fulltext_uri .__doc__ )
569
- fulltext_uri_parser .add_argument ('uri' )
590
+ document_uri_parser = subparsers .add_parser (
591
+ 'document -uri' , help = detect_document_uri .__doc__ )
592
+ document_uri_parser .add_argument ('uri' )
570
593
571
594
args = parser .parse_args ()
572
595
0 commit comments