@@ -78,28 +78,28 @@ def batch_process_documents(
78
78
print ("Output files:" )
79
79
80
80
for i , blob in enumerate (blob_list ):
81
- # Download the contents of this blob as a bytes object.
82
- if ".json" not in blob .name :
83
- print ( f"skipping non-supported file type { blob .name } " )
84
- return
85
- # Only parses JSON files
86
- blob_as_bytes = blob . download_as_bytes ( )
87
-
88
- document = documentai . types . Document . from_json ( blob_as_bytes )
89
- print ( f"Fetched file { i + 1 } " )
90
-
91
- # For a full list of Document object attributes, please reference this page: https://googleapis.dev/python/documentai/latest/_modules/google/cloud/documentai_v1beta3/types/document.html#Document
92
-
93
- # Read the text recognition output from the processor
94
- for page in document . pages :
95
- for form_field in page . form_fields :
96
- field_name = get_text ( form_field . field_name , document )
97
- field_value = get_text ( form_field . field_value , document )
98
- print ( "Extracted key value pair:" )
99
- print ( f" \t { field_name } , { field_value } " )
100
- for paragraph in document . pages :
101
- paragraph_text = get_text ( paragraph . layout , document )
102
- print (f"Paragraph text: \n { paragraph_text } " )
81
+ # If JSON file, download the contents of this blob as a bytes object.
82
+ if ".json" in blob .name :
83
+ blob_as_bytes = blob .download_as_bytes ( )
84
+
85
+ document = documentai . types . Document . from_json ( blob_as_bytes )
86
+ print ( f"Fetched file { i + 1 } " )
87
+
88
+ # For a full list of Document object attributes, please reference this page:
89
+ # https://cloud.google.com/document-ai/docs/reference/rpc/google.cloud.documentai.v1beta3#document
90
+
91
+ # Read the text recognition output from the processor
92
+ for page in document . pages :
93
+ for form_field in page . form_fields :
94
+ field_name = get_text ( form_field . field_name , document )
95
+ field_value = get_text ( form_field . field_value , document )
96
+ print ( "Extracted key value pair:" )
97
+ print ( f" \t { field_name } , { field_value } " )
98
+ for paragraph in document . pages :
99
+ paragraph_text = get_text ( paragraph . layout , document )
100
+ print ( f"Paragraph text: \n { paragraph_text } " )
101
+ else :
102
+ print (f"Skipping non-supported file type { blob . name } " )
103
103
104
104
105
105
# Extract shards from the text field
0 commit comments