Skip to content

Commit 17ad110

Browse files
[formrecognizer] add strongly-typed receipt wrapper sample (#12128)
* add strongly typed receipt samples * update sample tests * add link to doc showing receipt fields available * update receipt fields link to aka.ms
1 parent 78ffbba commit 17ad110

11 files changed

+276
-5
lines changed

sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_form_recognizer_client.py

+6
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@ def begin_recognize_receipts(self, receipt, **kwargs):
8787
The input document must be of one of the supported content types - 'application/pdf',
8888
'image/jpeg', 'image/png' or 'image/tiff'.
8989
90+
See fields found on a receipt here:
91+
https://aka.ms/azsdk/python/formrecognizer/receiptfields
92+
9093
:param receipt: JPEG, PNG, PDF and TIFF type file stream or bytes.
9194
Currently only supports US sales receipts.
9295
:type receipt: bytes or IO[bytes]
@@ -141,6 +144,9 @@ def begin_recognize_receipts_from_url(self, receipt_url, **kwargs):
141144
"""Extract field text and semantic values from a given US sales receipt.
142145
The input document must be the location (Url) of the receipt to be analyzed.
143146
147+
See fields found on a receipt here:
148+
https://aka.ms/azsdk/python/formrecognizer/receiptfields
149+
144150
:param str receipt_url: The url of the receipt to analyze. The input must be a valid, encoded url
145151
of one of the supported formats: JPEG, PNG, PDF and TIFF. Currently only supports
146152
US sales receipts.

sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/aio/_form_recognizer_client_async.py

+6
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,9 @@ async def begin_recognize_receipts(
9595
The input document must be of one of the supported content types - 'application/pdf',
9696
'image/jpeg', 'image/png' or 'image/tiff'.
9797
98+
See fields found on a receipt here:
99+
https://aka.ms/azsdk/python/formrecognizer/receiptfields
100+
98101
:param receipt: JPEG, PNG, PDF and TIFF type file stream or bytes.
99102
Currently only supports US sales receipts.
100103
:type receipt: bytes or IO[bytes]
@@ -155,6 +158,9 @@ async def begin_recognize_receipts_from_url(
155158
"""Extract field text and semantic values from a given US sales receipt.
156159
The input document must be the location (Url) of the receipt to be analyzed.
157160
161+
See fields found on a receipt here:
162+
https://aka.ms/azsdk/python/formrecognizer/receiptfields
163+
158164
:param str receipt_url: The url of the receipt to analyze. The input must be a valid, encoded url
159165
of one of the supported formats: JPEG, PNG, PDF and TIFF. Currently only supports
160166
US sales receipts.

sdk/formrecognizer/azure-ai-formrecognizer/samples/README.md

+4-1
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ what you can do with the Azure Form Recognizer client library.
6060

6161
|**Advanced Sample File Name**|**Description**|
6262
|----------------|-------------|
63+
|[sample_strongly_typing_recognized_form.py][sample_strongly_typing_recognized_form] and [sample_strongly_typing_recognized_form_async.py][sample_strongly_typing_recognized_form_async]|Use the fields in your recognized forms to create an object with strongly-typed fields|
6364
|[sample_get_bounding_boxes.py][sample_get_bounding_boxes] and [sample_get_bounding_boxes_async.py][sample_get_bounding_boxes_async]|Get info to visualize the outlines of form content and fields, which can be used for manual validation|
6465
|[sample_differentiate_output_models_trained_with_and_without_labels.py][sample_differentiate_output_models_trained_with_and_without_labels] and [sample_differentiate_output_models_trained_with_and_without_labels_async.py][sample_differentiate_output_models_trained_with_and_without_labels_async]|See the differences in output when using a custom model trained with labeled data and one trained with unlabeled data|
6566

@@ -94,4 +95,6 @@ what you can do with the Azure Form Recognizer client library.
9495
[sample_train_model_without_labels]: https://github.com/Azure/azure-sdk-for-python/tree/master/sdk/formrecognizer/azure-ai-formrecognizer/samples/sample_train_model_without_labels.py
9596
[sample_train_model_without_labels_async]: https://github.com/Azure/azure-sdk-for-python/tree/master/sdk/formrecognizer/azure-ai-formrecognizer/samples/async_samples/sample_train_model_without_labels_async.py
9697
[sample_copy_model]: https://github.com/Azure/azure-sdk-for-python/tree/master/sdk/formrecognizer/azure-ai-formrecognizer/samples/sample_copy_model.py
97-
[sample_copy_model_async]: https://github.com/Azure/azure-sdk-for-python/tree/master/sdk/formrecognizer/azure-ai-formrecognizer/samples/async_samples/sample_copy_model_async.py
98+
[sample_copy_model_async]: https://github.com/Azure/azure-sdk-for-python/tree/master/sdk/formrecognizer/azure-ai-formrecognizer/samples/async_samples/sample_copy_model_async.py
99+
[sample_strongly_typing_recognized_form]: https://github.com/Azure/azure-sdk-for-python/tree/master/sdk/formrecognizer/azure-ai-formrecognizer/samples/sample_strongly_typing_recognized_form.py
100+
[sample_strongly_typing_recognized_form_async]: https://github.com/Azure/azure-sdk-for-python/tree/master/sdk/formrecognizer/azure-ai-formrecognizer/samples/async_samples/sample_strongly_typing_recognized_form_async.py

sdk/formrecognizer/azure-ai-formrecognizer/samples/async_samples/sample_recognize_receipts_async.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,12 @@
1010
FILE: sample_recognize_receipts_async.py
1111
1212
DESCRIPTION:
13-
This sample demonstrates how to recognize US sales receipts from a file.
13+
This sample demonstrates how to recognize and extract common fields from US receipts,
14+
using a pre-trained receipt model. For a suggested approach to extracting information
15+
from receipts, see sample_strongly_typed_recognized_form_async.py.
16+
17+
See fields found on a receipt here:
18+
https://aka.ms/azsdk/python/formrecognizer/receiptfields
1419
1520
USAGE:
1621
python sample_recognize_receipts_async.py

sdk/formrecognizer/azure-ai-formrecognizer/samples/async_samples/sample_recognize_receipts_from_url_async.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,12 @@
1010
FILE: sample_recognize_receipts_from_url_async.py
1111
1212
DESCRIPTION:
13-
This sample demonstrates how to recognize US sales receipts from a URL.
13+
This sample demonstrates how to recognize and extract common fields from a US receipt URL,
14+
using a pre-trained receipt model. For a suggested approach to extracting information
15+
from receipts, see sample_strongly_typed_recognized_form_async.py.
16+
17+
See fields found on a receipt here:
18+
https://aka.ms/azsdk/python/formrecognizer/receiptfields
1419
1520
USAGE:
1621
python sample_recognize_receipts_from_url_async.py
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
# coding: utf-8
2+
3+
# -------------------------------------------------------------------------
4+
# Copyright (c) Microsoft Corporation. All rights reserved.
5+
# Licensed under the MIT License. See License.txt in the project root for
6+
# license information.
7+
# --------------------------------------------------------------------------
8+
9+
"""
10+
FILE: sample_strongly_typed_recognized_form_async.py
11+
12+
DESCRIPTION:
13+
This sample demonstrates how to use the fields in your recognized forms to create an object with
14+
strongly-typed fields. The pre-trained receipt method will be used to illustrate this sample, but
15+
note that a similar approach can be used for any custom form as long as you properly update the
16+
fields' names and types.
17+
18+
See fields found on a receipt here:
19+
https://aka.ms/azsdk/python/formrecognizer/receiptfields
20+
21+
USAGE:
22+
python sample_strongly_typed_recognized_form_async.py
23+
24+
Set the environment variables with your own values before running the sample:
25+
1) AZURE_FORM_RECOGNIZER_ENDPOINT - the endpoint to your Cognitive Services resource.
26+
2) AZURE_FORM_RECOGNIZER_KEY - your Form Recognizer API key
27+
"""
28+
29+
import os
30+
import asyncio
31+
from azure.ai.formrecognizer import FormField
32+
33+
34+
class Receipt(object):
35+
"""Creates a strongly-typed Receipt class from the fields returned in a RecognizedForm.
36+
If a specific field is not found on the receipt, it will return None.
37+
38+
See fields found on a receipt here:
39+
https://aka.ms/azsdk/python/formrecognizer/receiptfields
40+
"""
41+
42+
def __init__(self, form):
43+
self.receipt_type = form.fields.get("ReceiptType", FormField())
44+
self.merchant_name = form.fields.get("MerchantName", FormField())
45+
self.merchant_address = form.fields.get("MerchantAddress", FormField())
46+
self.merchant_phone_number = form.fields.get("MerchantPhoneNumber", FormField())
47+
self.receipt_items = self.convert_to_receipt_item(form.fields.get("Items", FormField()))
48+
self.subtotal = form.fields.get("Subtotal", FormField())
49+
self.tax = form.fields.get("Tax", FormField())
50+
self.tip = form.fields.get("Tip", FormField())
51+
self.total = form.fields.get("Total", FormField())
52+
self.transaction_date = form.fields.get("TransactionDate", FormField())
53+
self.transaction_time = form.fields.get("TransactionTime", FormField())
54+
55+
def convert_to_receipt_item(self, items):
56+
"""Converts Items in a receipt to a list of strongly-typed ReceiptItem
57+
"""
58+
if items is None:
59+
return []
60+
return [ReceiptItem(item) for item in items.value]
61+
62+
63+
class ReceiptItem(object):
64+
"""Creates a strongly-typed ReceiptItem for every receipt item found in a RecognizedForm
65+
"""
66+
67+
def __init__(self, item):
68+
self.name = item.value.get("Name", FormField())
69+
self.quantity = item.value.get("Quantity", FormField())
70+
self.price = item.value.get("Price", FormField())
71+
self.total_price = item.value.get("TotalPrice", FormField())
72+
73+
74+
class StronglyTypedRecognizedFormSampleAsync(object):
75+
76+
async def strongly_typed_receipt_async(self):
77+
path_to_sample_forms = os.path.abspath(os.path.join(os.path.abspath(__file__), "..", "..", "./sample_forms/receipt/contoso-allinone.jpg"))
78+
79+
from azure.core.credentials import AzureKeyCredential
80+
from azure.ai.formrecognizer.aio import FormRecognizerClient
81+
82+
endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
83+
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
84+
85+
async with FormRecognizerClient(
86+
endpoint=endpoint, credential=AzureKeyCredential(key)
87+
) as form_recognizer_client:
88+
89+
with open(path_to_sample_forms, "rb") as f:
90+
poller = await form_recognizer_client.begin_recognize_receipts(receipt=f)
91+
receipts = await poller.result()
92+
93+
for receipt in receipts:
94+
receipt = Receipt(receipt)
95+
print("Receipt Type: {} has confidence: {}".format(receipt.receipt_type.value, receipt.receipt_type.confidence))
96+
print("Merchant Name: {} has confidence: {}".format(receipt.merchant_name.value, receipt.merchant_name.confidence))
97+
print("Transaction Date: {} has confidence: {}".format(receipt.transaction_date.value, receipt.transaction_date.confidence))
98+
print("Receipt items:")
99+
for item in receipt.receipt_items:
100+
print("...Item Name: {} has confidence: {}".format(item.name.value, item.name.confidence))
101+
print("...Item Quantity: {} has confidence: {}".format(item.quantity.value, item.quantity.confidence))
102+
print("...Individual Item Price: {} has confidence: {}".format(item.price.value, item.price.confidence))
103+
print("...Total Item Price: {} has confidence: {}".format(item.total_price.value, item.total_price.confidence))
104+
print("Subtotal: {} has confidence: {}".format(receipt.subtotal.value, receipt.subtotal.confidence))
105+
print("Tax: {} has confidence: {}".format(receipt.tax.value, receipt.tax.confidence))
106+
print("Tip: {} has confidence: {}".format(receipt.tip.value, receipt.tip.confidence))
107+
print("Total: {} has confidence: {}".format(receipt.total.value, receipt.total.confidence))
108+
109+
110+
async def main():
111+
sample = StronglyTypedRecognizedFormSampleAsync()
112+
await sample.strongly_typed_receipt_async()
113+
114+
115+
if __name__ == '__main__':
116+
loop = asyncio.get_event_loop()
117+
loop.run_until_complete(main())

sdk/formrecognizer/azure-ai-formrecognizer/samples/sample_recognize_receipts.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,12 @@
1010
FILE: sample_recognize_receipts.py
1111
1212
DESCRIPTION:
13-
This sample demonstrates how to recognize US sales receipts from a file.
13+
This sample demonstrates how to recognize and extract common fields from US receipts,
14+
using a pre-trained receipt model. For a suggested approach to extracting information
15+
from receipts, see sample_strongly_typed_recognized_form.py.
16+
17+
See fields found on a receipt here:
18+
https://aka.ms/azsdk/python/formrecognizer/receiptfields
1419
1520
USAGE:
1621
python sample_recognize_receipts.py

sdk/formrecognizer/azure-ai-formrecognizer/samples/sample_recognize_receipts_from_url.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,12 @@
1010
FILE: sample_recognize_receipts_from_url.py
1111
1212
DESCRIPTION:
13-
This sample demonstrates how to recognize US sales receipts from a URL.
13+
This sample demonstrates how to recognize and extract common fields from a US receipt URL,
14+
using a pre-trained receipt model. For a suggested approach to extracting information
15+
from receipts, see sample_strongly_typed_recognized_form.py.
16+
17+
See fields found on a receipt here:
18+
https://aka.ms/azsdk/python/formrecognizer/receiptfields
1419
1520
USAGE:
1621
python sample_recognize_receipts_from_url.py
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
# coding: utf-8
2+
3+
# -------------------------------------------------------------------------
4+
# Copyright (c) Microsoft Corporation. All rights reserved.
5+
# Licensed under the MIT License. See License.txt in the project root for
6+
# license information.
7+
# --------------------------------------------------------------------------
8+
9+
"""
10+
FILE: sample_strongly_typed_recognized_form.py
11+
12+
DESCRIPTION:
13+
This sample demonstrates how to use the fields in your recognized forms to create an object with
14+
strongly-typed fields. The pre-trained receipt method will be used to illustrate this sample, but
15+
note that a similar approach can be used for any custom form as long as you properly update the
16+
fields' names and types.
17+
18+
See fields found on a receipt here:
19+
https://aka.ms/azsdk/python/formrecognizer/receiptfields
20+
21+
USAGE:
22+
python sample_strongly_typed_recognized_form.py
23+
24+
Set the environment variables with your own values before running the sample:
25+
1) AZURE_FORM_RECOGNIZER_ENDPOINT - the endpoint to your Cognitive Services resource.
26+
2) AZURE_FORM_RECOGNIZER_KEY - your Form Recognizer API key
27+
"""
28+
29+
import os
30+
from azure.ai.formrecognizer import FormField
31+
32+
33+
class Receipt(object):
34+
"""Creates a strongly-typed Receipt class from the fields returned in a RecognizedForm.
35+
If a specific field is not found on the receipt, it will return None.
36+
37+
See fields found on a receipt here:
38+
https://aka.ms/azsdk/python/formrecognizer/receiptfields
39+
"""
40+
41+
def __init__(self, form):
42+
self.receipt_type = form.fields.get("ReceiptType", FormField())
43+
self.merchant_name = form.fields.get("MerchantName", FormField())
44+
self.merchant_address = form.fields.get("MerchantAddress", FormField())
45+
self.merchant_phone_number = form.fields.get("MerchantPhoneNumber", FormField())
46+
self.receipt_items = self.convert_to_receipt_item(form.fields.get("Items", FormField()))
47+
self.subtotal = form.fields.get("Subtotal", FormField())
48+
self.tax = form.fields.get("Tax", FormField())
49+
self.tip = form.fields.get("Tip", FormField())
50+
self.total = form.fields.get("Total", FormField())
51+
self.transaction_date = form.fields.get("TransactionDate", FormField())
52+
self.transaction_time = form.fields.get("TransactionTime", FormField())
53+
54+
def convert_to_receipt_item(self, items):
55+
"""Converts Items in a receipt to a list of strongly-typed ReceiptItem
56+
"""
57+
if items is None:
58+
return []
59+
return [ReceiptItem(item) for item in items.value]
60+
61+
62+
class ReceiptItem(object):
63+
"""Creates a strongly-typed ReceiptItem for every receipt item found in a RecognizedForm
64+
"""
65+
66+
def __init__(self, item):
67+
self.name = item.value.get("Name", FormField())
68+
self.quantity = item.value.get("Quantity", FormField())
69+
self.price = item.value.get("Price", FormField())
70+
self.total_price = item.value.get("TotalPrice", FormField())
71+
72+
73+
class StronglyTypedRecognizedFormSample(object):
74+
75+
def strongly_typed_receipt(self):
76+
path_to_sample_forms = os.path.abspath(os.path.join(os.path.abspath(__file__), "..", "./sample_forms/receipt/contoso-allinone.jpg"))
77+
78+
from azure.core.credentials import AzureKeyCredential
79+
from azure.ai.formrecognizer import FormRecognizerClient
80+
81+
endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
82+
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
83+
84+
form_recognizer_client = FormRecognizerClient(
85+
endpoint=endpoint, credential=AzureKeyCredential(key)
86+
)
87+
with open(path_to_sample_forms, "rb") as f:
88+
poller = form_recognizer_client.begin_recognize_receipts(receipt=f)
89+
receipts = poller.result()
90+
91+
for receipt in receipts:
92+
receipt = Receipt(receipt)
93+
print("Receipt Type: {} has confidence: {}".format(receipt.receipt_type.value, receipt.receipt_type.confidence))
94+
print("Merchant Name: {} has confidence: {}".format(receipt.merchant_name.value, receipt.merchant_name.confidence))
95+
print("Transaction Date: {} has confidence: {}".format(receipt.transaction_date.value, receipt.transaction_date.confidence))
96+
print("Receipt items:")
97+
for item in receipt.receipt_items:
98+
print("...Item Name: {} has confidence: {}".format(item.name.value, item.name.confidence))
99+
print("...Item Quantity: {} has confidence: {}".format(item.quantity.value, item.quantity.confidence))
100+
print("...Individual Item Price: {} has confidence: {}".format(item.price.value, item.price.confidence))
101+
print("...Total Item Price: {} has confidence: {}".format(item.total_price.value, item.total_price.confidence))
102+
print("Subtotal: {} has confidence: {}".format(receipt.subtotal.value, receipt.subtotal.confidence))
103+
print("Tax: {} has confidence: {}".format(receipt.tax.value, receipt.tax.confidence))
104+
print("Tip: {} has confidence: {}".format(receipt.tip.value, receipt.tip.confidence))
105+
print("Total: {} has confidence: {}".format(receipt.total.value, receipt.total.confidence))
106+
107+
108+
if __name__ == '__main__':
109+
sample = StronglyTypedRecognizedFormSample()
110+
sample.strongly_typed_receipt()

sdk/formrecognizer/azure-ai-formrecognizer/tests/test_samples.py

+5
Original file line numberDiff line numberDiff line change
@@ -110,3 +110,8 @@ def test_sample_train_model_with_labels(self, resource_group, location, form_rec
110110
def test_sample_train_model_without_labels(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
111111
os.environ['CONTAINER_SAS_URL'] = self.get_settings_value("FORM_RECOGNIZER_STORAGE_CONTAINER_SAS_URL")
112112
_test_file('sample_train_model_without_labels.py', form_recognizer_account, form_recognizer_account_key)
113+
114+
@pytest.mark.live_test_only
115+
@GlobalFormRecognizerAccountPreparer()
116+
def test_sample_strongly_typing_recognized_form(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
117+
_test_file('sample_strongly_typing_recognized_form.py', form_recognizer_account, form_recognizer_account_key)

sdk/formrecognizer/azure-ai-formrecognizer/tests/test_samples_async.py

+4
Original file line numberDiff line numberDiff line change
@@ -108,3 +108,7 @@ def test_sample_train_model_without_labels_async(self, resource_group, location,
108108
os.environ['CONTAINER_SAS_URL'] = self.get_settings_value("FORM_RECOGNIZER_STORAGE_CONTAINER_SAS_URL")
109109
_test_file('sample_train_model_without_labels_async.py', form_recognizer_account, form_recognizer_account_key)
110110

111+
@pytest.mark.live_test_only
112+
@GlobalFormRecognizerAccountPreparer()
113+
def test_sample_strongly_typing_recognized_form_async(self, resource_group, location, form_recognizer_account, form_recognizer_account_key):
114+
_test_file('sample_strongly_typing_recognized_form_async.py', form_recognizer_account, form_recognizer_account_key)

0 commit comments

Comments
 (0)