Skip to content

Commit 42deddf

Browse files
committed
fix(samples): Fix Typos in Batch process & get processor Samples (#420)
1 parent 7593fb2 commit 42deddf

6 files changed

+17
-13
lines changed

batch_process_documents_processor_version_sample.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
import re
1818

1919
from google.api_core.client_options import ClientOptions
20-
from google.api_core.exceptions import RetryError
20+
from google.api_core.exceptions import InternalServerError, RetryError
2121
from google.cloud import documentai, storage
2222

2323
# TODO(developer): Uncomment these variables before running the sample.
@@ -66,7 +66,8 @@ def batch_process_documents_processor_version(
6666
#
6767

6868
# Cloud Storage URI for the Output Directory
69-
destination_uri = f"{gcs_output_bucket}/{gcs_output_uri_prefix}/"
69+
# This must end with a trailing forward slash `/`
70+
destination_uri = f"{gcs_output_bucket}/{gcs_output_uri_prefix}"
7071

7172
gcs_output_config = documentai.DocumentOutputConfig.GcsOutputConfig(
7273
gcs_uri=destination_uri, field_mask=field_mask
@@ -97,7 +98,7 @@ def batch_process_documents_processor_version(
9798
print(f"Waiting for operation {operation.operation.name} to complete...")
9899
operation.result(timeout=timeout)
99100
# Catch exception when operation doesn't finish before timeout
100-
except (RetryError) as e:
101+
except (RetryError, InternalServerError) as e:
101102
print(e.message)
102103

103104
# NOTE: Can also use callbacks for asynchronous processing

batch_process_documents_processor_version_sample_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
processor_version_id = "pretrained-form-parser-v1.0-2020-09-23"
2828
gcs_input_uri = "gs://cloud-samples-data/documentai/invoice.pdf"
2929
input_mime_type = "application/pdf"
30-
gcs_output_uri_prefix = uuid4()
30+
gcs_output_uri_prefix = f"{uuid4()}/"
3131
field_mask = "text,pages.pageNumber"
3232
BUCKET_NAME = f"document-ai-python-{uuid4()}"
3333

batch_process_documents_sample.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
import re
1818

1919
from google.api_core.client_options import ClientOptions
20-
from google.api_core.exceptions import RetryError
20+
from google.api_core.exceptions import InternalServerError, RetryError
2121
from google.cloud import documentai, storage
2222

2323
# TODO(developer): Uncomment these variables before running the sample.
@@ -64,7 +64,8 @@ def batch_process_documents(
6464
#
6565

6666
# Cloud Storage URI for the Output Directory
67-
destination_uri = f"{gcs_output_bucket}/{gcs_output_uri_prefix}/"
67+
# This must end with a trailing forward slash `/`
68+
destination_uri = f"{gcs_output_bucket}/{gcs_output_uri_prefix}"
6869

6970
gcs_output_config = documentai.DocumentOutputConfig.GcsOutputConfig(
7071
gcs_uri=destination_uri, field_mask=field_mask
@@ -93,7 +94,7 @@ def batch_process_documents(
9394
print(f"Waiting for operation {operation.operation.name} to complete...")
9495
operation.result(timeout=timeout)
9596
# Catch exception when operation doesn't finish before timeout
96-
except (RetryError) as e:
97+
except (RetryError, InternalServerError) as e:
9798
print(e.message)
9899

99100
# NOTE: Can also use callbacks for asynchronous processing

batch_process_documents_sample_bad_input_test.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import os
1717
from uuid import uuid4
1818

19+
from google.api_core.exceptions import InternalServerError, RetryError
1920
from samples.snippets import batch_process_documents_sample
2021

2122
location = "us"
@@ -25,7 +26,7 @@
2526
input_mime_type = "application/pdf"
2627
# following bucket contains .csv file which will cause the sample to fail.
2728
gcs_output_full_uri_with_wrong_type = "gs://documentai-beta-samples"
28-
gcs_output_uri_prefix = "test"
29+
gcs_output_uri_prefix = "test/"
2930
BUCKET_NAME = f"document-ai-python-{uuid4()}"
3031

3132

@@ -41,7 +42,8 @@ def test_batch_process_documents_with_bad_input(capsys):
4142
gcs_output_uri_prefix=gcs_output_uri_prefix,
4243
timeout=450,
4344
)
45+
except ValueError:
4446
out, _ = capsys.readouterr()
45-
assert "Failed" in out
46-
except Exception as e:
47-
assert "Failed" in e.message
47+
assert "Failed" in out or "error" in out
48+
except (InternalServerError, RetryError) as e:
49+
assert "error" in e.message

batch_process_documents_sample_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
processor_id = "90484cfdedb024f6"
2727
gcs_input_uri = "gs://cloud-samples-data/documentai/invoice.pdf"
2828
input_mime_type = "application/pdf"
29-
gcs_output_uri_prefix = uuid4()
29+
gcs_output_uri_prefix = f"{uuid4()}/"
3030
field_mask = "text,pages.pageNumber"
3131
BUCKET_NAME = f"document-ai-python-{uuid4()}"
3232

get_processor_sample.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def get_processor_sample(project_id: str, location: str, processor_id: str):
3131
client = documentai.DocumentProcessorServiceClient(client_options=opts)
3232

3333
# The full resource name of the processor, e.g.:
34-
# projects/project_id/locations/location/processor/processor_id
34+
# projects/{project_id}/locations/{location}/processors/{processor_id}
3535
name = client.processor_path(project_id, location, processor_id)
3636

3737
# Make GetProcessor request

0 commit comments

Comments
 (0)