Skip to content

Commit edcb284

Browse files
munkhuushmglShabirmean
authored andcommitted
chore: added conditonal check to prevent indexOutOfBound Exception (#343)
* chore: added conditonal check to prevent indexOfBOund Exception * nit * removed first lang part from batchTable sample
1 parent f4be66b commit edcb284

File tree

4 files changed

+70
-58
lines changed

4 files changed

+70
-58
lines changed

document-ai/snippets/src/main/java/documentai/v1beta2/BatchParseFormBeta.java

+17-17
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,7 @@ public static void batchParseFormGcs(
6868
// Initialize client that will be used to send requests. This client only needs to be created
6969
// once, and can be reused for multiple requests. After completing all of your requests, call
7070
// the "close" method on the client to safely clean up any remaining background resources.
71-
try (DocumentUnderstandingServiceClient client =
72-
DocumentUnderstandingServiceClient.create()) {
71+
try (DocumentUnderstandingServiceClient client = DocumentUnderstandingServiceClient.create()) {
7372

7473
// Configure the request for processing the PDF
7574
String parent = String.format("projects/%s/locations/%s", projectId, location);
@@ -103,17 +102,16 @@ public static void batchParseFormGcs(
103102
// mime_type can be application/pdf, image/tiff,
104103
// and image/gif, or application/json
105104
InputConfig config =
106-
InputConfig.newBuilder().setGcsSource(inputUri)
107-
.setMimeType("application/pdf").build();
105+
InputConfig.newBuilder().setGcsSource(inputUri).setMimeType("application/pdf").build();
108106

109-
GcsDestination gcsDestination = GcsDestination.newBuilder()
110-
.setUri(String.format("gs://%s/%s", outputGcsBucketName, outputGcsPrefix)).build();
111-
112-
OutputConfig outputConfig = OutputConfig.newBuilder()
113-
.setGcsDestination(gcsDestination)
114-
.setPagesPerShard(1)
107+
GcsDestination gcsDestination =
108+
GcsDestination.newBuilder()
109+
.setUri(String.format("gs://%s/%s", outputGcsBucketName, outputGcsPrefix))
115110
.build();
116111

112+
OutputConfig outputConfig =
113+
OutputConfig.newBuilder().setGcsDestination(gcsDestination).setPagesPerShard(1).build();
114+
117115
ProcessDocumentRequest request =
118116
ProcessDocumentRequest.newBuilder()
119117
.setFormExtractionParams(params)
@@ -165,13 +163,15 @@ public static void batchParseFormGcs(
165163
String text = document.getText();
166164

167165
// Process the output.
168-
Document.Page page1 = document.getPages(0);
169-
for (Document.Page.FormField field : page1.getFormFieldsList()) {
170-
String fieldName = getText(field.getFieldName(), text);
171-
String fieldValue = getText(field.getFieldValue(), text);
172-
173-
System.out.println("Extracted form fields pair:");
174-
System.out.printf("\t(%s, %s))", fieldName, fieldValue);
166+
if (document.getPagesCount() > 0) {
167+
Document.Page page1 = document.getPages(0);
168+
for (Document.Page.FormField field : page1.getFormFieldsList()) {
169+
String fieldName = getText(field.getFieldName(), text);
170+
String fieldValue = getText(field.getFieldValue(), text);
171+
172+
System.out.println("Extracted form fields pair:");
173+
System.out.printf("\t(%s, %s))", fieldName, fieldValue);
174+
}
175175
}
176176

177177
// Clean up temp file.

document-ai/snippets/src/main/java/documentai/v1beta2/BatchParseTableBeta.java

+24-18
Original file line numberDiff line numberDiff line change
@@ -165,24 +165,30 @@ public static void batchParseTableGcs(
165165
String text = document.getText();
166166

167167
// Process the output.
168-
Document.Page page1 = document.getPages(0);
169-
Document.Page.Table table = page1.getTables(0);
170-
171-
System.out.println("Results from first table processed:");
172-
System.out.println("Header row:");
173-
174-
Document.Page.Table.TableRow headerRow = table.getHeaderRows(0);
175-
176-
for (Document.Page.Table.TableCell tableCell : headerRow.getCellsList()) {
177-
if (!tableCell.getLayout().getTextAnchor().getTextSegmentsList().isEmpty()) {
178-
// Extract shards from the text field
179-
// First shard in document doesn't have startIndex property
180-
List<Document.TextAnchor.TextSegment> textSegments =
181-
tableCell.getLayout().getTextAnchor().getTextSegmentsList();
182-
int startIdx =
183-
textSegments.size() > 0 ? (int) textSegments.get(0).getStartIndex() : 0;
184-
int endIdx = (int) textSegments.get(0).getEndIndex();
185-
System.out.printf("\t%s", text.substring(startIdx, endIdx));
168+
if (document.getPagesCount() > 0) {
169+
Document.Page page1 = document.getPages(0);
170+
if (page1.getTablesCount() > 0) {
171+
Document.Page.Table table = page1.getTables(0);
172+
173+
System.out.println("Results from first table processed:");
174+
System.out.println("Header row:");
175+
176+
if (table.getHeaderRowsCount() > 0) {
177+
Document.Page.Table.TableRow headerRow = table.getHeaderRows(0);
178+
179+
for (Document.Page.Table.TableCell tableCell : headerRow.getCellsList()) {
180+
if (!tableCell.getLayout().getTextAnchor().getTextSegmentsList().isEmpty()) {
181+
// Extract shards from the text field
182+
// First shard in document doesn't have startIndex property
183+
List<Document.TextAnchor.TextSegment> textSegments =
184+
tableCell.getLayout().getTextAnchor().getTextSegmentsList();
185+
int startIdx =
186+
textSegments.size() > 0 ? (int) textSegments.get(0).getStartIndex() : 0;
187+
int endIdx = (int) textSegments.get(0).getEndIndex();
188+
System.out.printf("\t%s", text.substring(startIdx, endIdx));
189+
}
190+
}
191+
}
186192
}
187193
}
188194

document-ai/snippets/src/main/java/documentai/v1beta2/ParseFormBeta.java

+8-6
Original file line numberDiff line numberDiff line change
@@ -90,13 +90,15 @@ public static void parseForm(String projectId, String location, String inputGcsU
9090
String text = response.getText();
9191

9292
// Process the output
93-
Document.Page page1 = response.getPages(0);
94-
for (Document.Page.FormField field : page1.getFormFieldsList()) {
95-
String fieldName = getText(field.getFieldName(), text);
96-
String fieldValue = getText(field.getFieldValue(), text);
93+
if (response.getPagesCount() > 0) {
94+
Document.Page page1 = response.getPages(0);
95+
for (Document.Page.FormField field : page1.getFormFieldsList()) {
96+
String fieldName = getText(field.getFieldName(), text);
97+
String fieldValue = getText(field.getFieldValue(), text);
9798

98-
System.out.println("Extracted form fields pair:");
99-
System.out.printf("\t(%s, %s))", fieldName, fieldValue);
99+
System.out.println("Extracted form fields pair:");
100+
System.out.printf("\t(%s, %s))", fieldName, fieldValue);
101+
}
100102
}
101103
}
102104
}

document-ai/snippets/src/main/java/documentai/v1beta2/ParseTableBeta.java

+21-17
Original file line numberDiff line numberDiff line change
@@ -94,23 +94,27 @@ public static void parseTable(String projectId, String location, String inputGcs
9494
String text = response.getText();
9595

9696
// Get the first table in the document
97-
Document.Page page1 = response.getPages(0);
98-
Document.Page.Table table = page1.getTables(0);
99-
100-
System.out.println("Results from first table processed:");
101-
List<Document.Page.DetectedLanguage> detectedLangs = page1.getDetectedLanguagesList();
102-
String langCode =
103-
detectedLangs.size() > 0 ? detectedLangs.get(0).getLanguageCode() : "NOT_FOUND";
104-
System.out.printf("First detected language: : %s", langCode);
105-
106-
Document.Page.Table.TableRow headerRow = table.getHeaderRows(0);
107-
System.out.println("Header row:");
108-
109-
for (Document.Page.Table.TableCell tableCell : headerRow.getCellsList()) {
110-
if (tableCell.getLayout().getTextAnchor().getTextSegmentsList() != null) {
111-
// Extract shards from the text field
112-
// First shard in document doesn't have startIndex property
113-
System.out.printf("\t%s", getText(tableCell.getLayout(), text));
97+
if (response.getPagesCount() > 0) {
98+
Document.Page page1 = response.getPages(0);
99+
if (page1.getTablesCount() > 0) {
100+
Document.Page.Table table = page1.getTables(0);
101+
102+
System.out.println("Results from first table processed:");
103+
List<Document.Page.DetectedLanguage> detectedLangs = page1.getDetectedLanguagesList();
104+
String langCode =
105+
detectedLangs.size() > 0 ? detectedLangs.get(0).getLanguageCode() : "NOT_FOUND";
106+
System.out.printf("First detected language: : %s", langCode);
107+
108+
Document.Page.Table.TableRow headerRow = table.getHeaderRows(0);
109+
System.out.println("Header row:");
110+
111+
for (Document.Page.Table.TableCell tableCell : headerRow.getCellsList()) {
112+
if (tableCell.getLayout().getTextAnchor().getTextSegmentsList() != null) {
113+
// Extract shards from the text field
114+
// First shard in document doesn't have startIndex property
115+
System.out.printf("\t%s", getText(tableCell.getLayout(), text));
116+
}
117+
}
114118
}
115119
}
116120
}

0 commit comments

Comments
 (0)