Skip to content

Commit 1e2f11b

Browse files
Matt Carrollgcf-owl-bot[bot]
Matt Carroll
andauthored
docs(samples): add OCR, form, quality, splitter and specialized processing samples (#283)
* docs(samples): add OCR, form, quality, splitter and specialized processing samples * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent 9bac12a commit 1e2f11b

14 files changed

+863
-0
lines changed

document-ai/process-document-form.js

+132
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
/**
2+
* Copyright 2021, Google, Inc.
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
'use strict';
17+
18+
async function main(projectId, location, processorId, filePath) {
19+
// [START documentai_process_form_document]
20+
/**
21+
* TODO(developer): Uncomment these variables before running the sample.
22+
*/
23+
// const projectId = 'YOUR_PROJECT_ID';
24+
// const location = 'YOUR_PROJECT_LOCATION'; // Format is 'us' or 'eu'
25+
// const processorId = 'YOUR_PROCESSOR_ID'; // Create processor in Cloud Console
26+
// const filePath = '/path/to/local/pdf';
27+
28+
const {DocumentProcessorServiceClient} =
29+
require('@google-cloud/documentai').v1beta3;
30+
31+
// Instantiates a client
32+
const client = new DocumentProcessorServiceClient();
33+
34+
async function processDocument() {
35+
// The full resource name of the processor, e.g.:
36+
// projects/project-id/locations/location/processor/processor-id
37+
// You must create new processors in the Cloud Console first
38+
const name = `projects/${projectId}/locations/${location}/processors/${processorId}`;
39+
40+
// Read the file into memory.
41+
const fs = require('fs').promises;
42+
const imageFile = await fs.readFile(filePath);
43+
44+
// Convert the image data to a Buffer and base64 encode it.
45+
const encodedImage = Buffer.from(imageFile).toString('base64');
46+
47+
const request = {
48+
name,
49+
rawDocument: {
50+
content: encodedImage,
51+
mimeType: 'application/pdf',
52+
},
53+
};
54+
55+
// Recognizes text entities in the PDF document
56+
const [result] = await client.processDocument(request);
57+
58+
console.log('Document processing complete.');
59+
60+
// Read the table and form fields output from the processor
61+
// The form processor also contains OCR data. For more information
62+
// on how to parse OCR data please see the OCR sample.
63+
// For a full list of Document object attributes,
64+
// please reference this page: https://googleapis.dev/nodejs/documentai/latest/index.html
65+
const {document} = result;
66+
const {text} = document;
67+
console.log(`Full document text: ${JSON.stringify(text)}`);
68+
console.log(`There are ${document.pages.length} page(s) in this document.`);
69+
70+
for (const page of document.pages) {
71+
console.log(`\n\n**** Page ${page.pageNumber} ****`);
72+
73+
console.log(`Found ${page.tables.length} table(s):`);
74+
for (const table of page.tables) {
75+
const numCollumns = table.headerRows[0].cells.length;
76+
const numRows = table.bodyRows.length;
77+
console.log(`Table with ${numCollumns} columns and ${numRows} rows:`);
78+
printTableInfo(table, text);
79+
}
80+
console.log(`Found ${page.formFields.length} form field(s):`);
81+
for (const field of page.formFields) {
82+
const fieldName = getText(field.fieldName.textAnchor, text);
83+
const fieldValue = getText(field.fieldValue.textAnchor, text);
84+
console.log(
85+
`\t* ${JSON.stringify(fieldName)}: ${JSON.stringify(fieldValue)}`
86+
);
87+
}
88+
}
89+
}
90+
91+
const printTableInfo = (table, text) => {
92+
// Print header row
93+
let headerRowText = '';
94+
for (const headerCell of table.headerRows[0].cells) {
95+
const headerCellText = getText(headerCell.layout.textAnchor, text);
96+
headerRowText += `${JSON.stringify(headerCellText.trim())} | `;
97+
}
98+
console.log(
99+
`Collumns: ${headerRowText.substring(0, headerRowText.length - 3)}`
100+
);
101+
// Print first body row
102+
let bodyRowText = '';
103+
for (const bodyCell of table.bodyRows[0].cells) {
104+
const bodyCellText = getText(bodyCell.layout.textAnchor, text);
105+
bodyRowText += `${JSON.stringify(bodyCellText.trim())} | `;
106+
}
107+
console.log(
108+
`First row data: ${bodyRowText.substring(0, bodyRowText.length - 3)}`
109+
);
110+
};
111+
112+
// Extract shards from the text field
113+
const getText = (textAnchor, text) => {
114+
if (!textAnchor.textSegments || textAnchor.textSegments.length === 0) {
115+
return '';
116+
}
117+
118+
// First shard in document doesn't have startIndex property
119+
const startIndex = textAnchor.textSegments[0].startIndex || 0;
120+
const endIndex = textAnchor.textSegments[0].endIndex;
121+
122+
return text.substring(startIndex, endIndex);
123+
};
124+
125+
// [END documentai_process_form_document]
126+
await processDocument();
127+
}
128+
129+
main(...process.argv.slice(2)).catch(err => {
130+
console.error(err);
131+
process.exitCode = 1;
132+
});

document-ai/process-document-ocr.js

+166
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
/**
2+
* Copyright 2021, Google, Inc.
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
'use strict';
17+
18+
async function main(projectId, location, processorId, filePath) {
19+
// [START documentai_process_ocr_document]
20+
/**
21+
* TODO(developer): Uncomment these variables before running the sample.
22+
*/
23+
// const projectId = 'YOUR_PROJECT_ID';
24+
// const location = 'YOUR_PROJECT_LOCATION'; // Format is 'us' or 'eu'
25+
// const processorId = 'YOUR_PROCESSOR_ID'; // Create processor in Cloud Console
26+
// const filePath = '/path/to/local/pdf';
27+
28+
const {DocumentProcessorServiceClient} =
29+
require('@google-cloud/documentai').v1beta3;
30+
31+
// Instantiates a client
32+
const client = new DocumentProcessorServiceClient();
33+
34+
async function processDocument() {
35+
// The full resource name of the processor, e.g.:
36+
// projects/project-id/locations/location/processor/processor-id
37+
// You must create new processors in the Cloud Console first
38+
const name = `projects/${projectId}/locations/${location}/processors/${processorId}`;
39+
40+
// Read the file into memory.
41+
const fs = require('fs').promises;
42+
const imageFile = await fs.readFile(filePath);
43+
44+
// Convert the image data to a Buffer and base64 encode it.
45+
const encodedImage = Buffer.from(imageFile).toString('base64');
46+
47+
const request = {
48+
name,
49+
rawDocument: {
50+
content: encodedImage,
51+
mimeType: 'application/pdf',
52+
},
53+
};
54+
55+
// Recognizes text entities in the PDF document
56+
const [result] = await client.processDocument(request);
57+
58+
console.log('Document processing complete.');
59+
60+
// Read the text recognition output from the processor
61+
// For a full list of Document object attributes,
62+
// please reference this page: https://googleapis.dev/nodejs/documentai/latest/index.html
63+
const {document} = result;
64+
const {text} = document;
65+
66+
// Read the text recognition output from the processor
67+
console.log(`Full document text: ${JSON.stringify(text)}`);
68+
console.log(`There are ${document.pages.length} page(s) in this document.`);
69+
for (const page of document.pages) {
70+
console.log(`Page ${page.pageNumber}`);
71+
printPageDimensions(page.dimension);
72+
printDetectedLanguages(page.detectedLanguages);
73+
printParagraphs(page.paragraphs, text);
74+
printBlocks(page.blocks, text);
75+
printLines(page.lines, text);
76+
printTokens(page.tokens, text);
77+
}
78+
}
79+
80+
const printPageDimensions = dimension => {
81+
console.log(` Width: ${dimension.width}`);
82+
console.log(` Height: ${dimension.height}`);
83+
};
84+
85+
const printDetectedLanguages = detectedLanguages => {
86+
console.log(' Detected languages:');
87+
for (const lang of detectedLanguages) {
88+
const code = lang.languageCode;
89+
const confPercent = lang.confidence * 100;
90+
console.log(` ${code} (${confPercent.toFixed(2)}% confidence)`);
91+
}
92+
};
93+
94+
const printParagraphs = (paragraphs, text) => {
95+
console.log(` ${paragraphs.length} paragraphs detected:`);
96+
const firstParagraphText = getText(paragraphs[0].layout.textAnchor, text);
97+
console.log(
98+
` First paragraph text: ${JSON.stringify(firstParagraphText)}`
99+
);
100+
const lastParagraphText = getText(
101+
paragraphs[paragraphs.length - 1].layout.textAnchor,
102+
text
103+
);
104+
console.log(
105+
` Last paragraph text: ${JSON.stringify(lastParagraphText)}`
106+
);
107+
};
108+
109+
const printBlocks = (blocks, text) => {
110+
console.log(` ${blocks.length} blocks detected:`);
111+
const firstBlockText = getText(blocks[0].layout.textAnchor, text);
112+
console.log(` First block text: ${JSON.stringify(firstBlockText)}`);
113+
const lastBlockText = getText(
114+
blocks[blocks.length - 1].layout.textAnchor,
115+
text
116+
);
117+
console.log(` Last block text: ${JSON.stringify(lastBlockText)}`);
118+
};
119+
120+
const printLines = (lines, text) => {
121+
console.log(` ${lines.length} lines detected:`);
122+
const firstLineText = getText(lines[0].layout.textAnchor, text);
123+
console.log(` First line text: ${JSON.stringify(firstLineText)}`);
124+
const lastLineText = getText(
125+
lines[lines.length - 1].layout.textAnchor,
126+
text
127+
);
128+
console.log(` Last line text: ${JSON.stringify(lastLineText)}`);
129+
};
130+
131+
const printTokens = (tokens, text) => {
132+
console.log(` ${tokens.length} tokens detected:`);
133+
const firstTokenText = getText(tokens[0].layout.textAnchor, text);
134+
console.log(` First token text: ${JSON.stringify(firstTokenText)}`);
135+
const firstTokenBreakType = tokens[0].detectedBreak.type;
136+
console.log(` First token break type: ${firstTokenBreakType}`);
137+
const lastTokenText = getText(
138+
tokens[tokens.length - 1].layout.textAnchor,
139+
text
140+
);
141+
console.log(` Last token text: ${JSON.stringify(lastTokenText)}`);
142+
const lastTokenBreakType = tokens[tokens.length - 1].detectedBreak.type;
143+
console.log(` Last token break type: ${lastTokenBreakType}`);
144+
};
145+
146+
// Extract shards from the text field
147+
const getText = (textAnchor, text) => {
148+
if (!textAnchor.textSegments || textAnchor.textSegments.length === 0) {
149+
return '';
150+
}
151+
152+
// First shard in document doesn't have startIndex property
153+
const startIndex = textAnchor.textSegments[0].startIndex || 0;
154+
const endIndex = textAnchor.textSegments[0].endIndex;
155+
156+
return text.substring(startIndex, endIndex);
157+
};
158+
159+
// [END documentai_process_ocr_document]
160+
await processDocument();
161+
}
162+
163+
main(...process.argv.slice(2)).catch(err => {
164+
console.error(err);
165+
process.exitCode = 1;
166+
});
+87
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/**
2+
* Copyright 2021, Google, Inc.
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
'use strict';
17+
18+
async function main(projectId, location, processorId, filePath) {
19+
// [START documentai_process_quality_document]
20+
/**
21+
* TODO(developer): Uncomment these variables before running the sample.
22+
*/
23+
// const projectId = 'YOUR_PROJECT_ID';
24+
// const location = 'YOUR_PROJECT_LOCATION'; // Format is 'us' or 'eu'
25+
// const processorId = 'YOUR_PROCESSOR_ID'; // Create processor in Cloud Console
26+
// const filePath = '/path/to/local/pdf';
27+
28+
const {DocumentProcessorServiceClient} =
29+
require('@google-cloud/documentai').v1beta3;
30+
31+
// Instantiates a client
32+
const client = new DocumentProcessorServiceClient();
33+
34+
async function processDocument() {
35+
// The full resource name of the processor, e.g.:
36+
// projects/project-id/locations/location/processor/processor-id
37+
// You must create new processors in the Cloud Console first
38+
const name = `projects/${projectId}/locations/${location}/processors/${processorId}`;
39+
40+
// Read the file into memory.
41+
const fs = require('fs').promises;
42+
const imageFile = await fs.readFile(filePath);
43+
44+
// Convert the image data to a Buffer and base64 encode it.
45+
const encodedImage = Buffer.from(imageFile).toString('base64');
46+
47+
const request = {
48+
name,
49+
rawDocument: {
50+
content: encodedImage,
51+
mimeType: 'application/pdf',
52+
},
53+
};
54+
55+
// Recognizes text entities in the PDF document
56+
const [result] = await client.processDocument(request);
57+
58+
console.log('Document processing complete.');
59+
60+
// Read the quality-specific information from the output from the
61+
// Intelligent Document Quality Processor:
62+
// https://cloud.google.com/document-ai/docs/processors-list#processor_doc-quality-processor
63+
// OCR and other data is also present in the quality processor's response.
64+
// Please see the OCR and other samples for how to parse other data in the
65+
// response.
66+
const {document} = result;
67+
for (const entity of document.entities) {
68+
const entityConf = entity.confidence * 100;
69+
const pageNum = parseInt(entity.pageAnchor.pageRefs.page) + 1 || 1;
70+
console.log(
71+
`Page ${pageNum} has a quality score of ${entityConf.toFixed(2)}%:`
72+
);
73+
for (const prop of entity.properties) {
74+
const propConf = prop.confidence * 100;
75+
console.log(`\t* ${prop.type} score of ${propConf.toFixed(2)}%`);
76+
}
77+
}
78+
}
79+
80+
// [END documentai_process_quality_document]
81+
await processDocument();
82+
}
83+
84+
main(...process.argv.slice(2)).catch(err => {
85+
console.error(err);
86+
process.exitCode = 1;
87+
});

0 commit comments

Comments
 (0)