Skip to content

Commit 66dd15e

Browse files
[formrecognizer] update samples to train a model where a model_id is needed (#18789)
* update samples to use existing model ID or train a model option * remove static environment variables * switch to using getenv for model id * fixes * update docs * fix links * review feedback
1 parent 87d7dc5 commit 66dd15e

19 files changed

+385
-70
lines changed

sdk/formrecognizer/azure-ai-formrecognizer/samples/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ All of these samples need the endpoint to your Form Recognizer resource ([instru
4444
1. Install the Azure Form Recognizer client library for Python with [pip][pip]:
4545

4646
```bash
47-
pip install azure-ai-formrecognizer --pre
47+
pip install azure-ai-formrecognizer
4848
```
4949

5050
2. Clone or download this sample repository

sdk/formrecognizer/azure-ai-formrecognizer/samples/async_samples/sample_authentication_async.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
3) AZURE_CLIENT_ID - the client ID of your active directory application.
2929
4) AZURE_TENANT_ID - the tenant ID of your active directory application.
3030
5) AZURE_CLIENT_SECRET - the secret of your active directory application.
31-
6) AZURE_FORM_RECOGNIZER_AAD_ENDPOINT - the endpoint to your Form Recognizer resource for using AAD.
3231
"""
3332

3433
import os
@@ -60,7 +59,7 @@ async def authentication_with_azure_active_directory_form_recognizer_client_asyn
6059
from azure.ai.formrecognizer.aio import FormRecognizerClient
6160
from azure.identity.aio import DefaultAzureCredential
6261

63-
endpoint = os.environ["AZURE_FORM_RECOGNIZER_AAD_ENDPOINT"]
62+
endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
6463
credential = DefaultAzureCredential()
6564

6665
form_recognizer_client = FormRecognizerClient(endpoint, credential)
@@ -89,7 +88,7 @@ async def authentication_with_azure_active_directory_form_training_client_async(
8988
from azure.ai.formrecognizer.aio import FormTrainingClient
9089
from azure.identity.aio import DefaultAzureCredential
9190

92-
endpoint = os.environ["AZURE_FORM_RECOGNIZER_AAD_ENDPOINT"]
91+
endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
9392
credential = DefaultAzureCredential()
9493

9594
form_training_client = FormTrainingClient(endpoint, credential)

sdk/formrecognizer/azure-ai-formrecognizer/samples/async_samples/sample_copy_model_async.py

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
to a target Form Recognizer resource. The resource id and the resource region can be found
1515
in the azure portal.
1616
17+
The model used in this sample can be created in the sample_train_model_with_labels_async.py using the
18+
training files in https://aka.ms/azsdk/formrecognizer/sampletrainingfiles
19+
1720
USAGE:
1821
python sample_copy_model_async.py
1922
@@ -23,6 +26,9 @@
2326
3) AZURE_FORM_RECOGNIZER_TARGET_ENDPOINT - the endpoint to your target Form Recognizer resource.
2427
4) AZURE_FORM_RECOGNIZER_TARGET_KEY - your target Form Recognizer API key
2528
5) AZURE_SOURCE_MODEL_ID - the model ID from the source resource to be copied over to the target resource.
29+
- OR -
30+
CONTAINER_SAS_URL - The shared access signature (SAS) Url of your Azure Blob Storage container with your forms.
31+
A model will be trained and used to run the sample.
2632
6) AZURE_FORM_RECOGNIZER_TARGET_REGION - the region the target resource was created in
2733
7) AZURE_FORM_RECOGNIZER_TARGET_RESOURCE_ID - the entire resource ID to the target resource
2834
"""
@@ -33,15 +39,15 @@
3339

3440
class CopyModelSampleAsync(object):
3541

36-
async def copy_model_async(self):
42+
async def copy_model_async(self, custom_model_id):
3743
from azure.core.credentials import AzureKeyCredential
3844
from azure.ai.formrecognizer.aio import FormTrainingClient
3945

4046
source_endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
4147
source_key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
4248
target_endpoint = os.environ["AZURE_FORM_RECOGNIZER_TARGET_ENDPOINT"]
4349
target_key = os.environ["AZURE_FORM_RECOGNIZER_TARGET_KEY"]
44-
source_model_id = os.environ["AZURE_SOURCE_MODEL_ID"]
50+
source_model_id = os.getenv("AZURE_SOURCE_MODEL_ID", custom_model_id)
4551
target_region = os.environ["AZURE_FORM_RECOGNIZER_TARGET_REGION"]
4652
target_resource_id = os.environ["AZURE_FORM_RECOGNIZER_TARGET_RESOURCE_ID"]
4753

@@ -74,7 +80,27 @@ async def copy_model_async(self):
7480

7581
async def main():
7682
sample = CopyModelSampleAsync()
77-
await sample.copy_model_async()
83+
model_id = None
84+
if os.getenv("CONTAINER_SAS_URL"):
85+
86+
from azure.core.credentials import AzureKeyCredential
87+
from azure.ai.formrecognizer.aio import FormTrainingClient
88+
89+
endpoint = os.getenv("AZURE_FORM_RECOGNIZER_ENDPOINT")
90+
key = os.getenv("AZURE_FORM_RECOGNIZER_KEY")
91+
92+
if not endpoint or not key:
93+
raise ValueError("Please provide endpoint and API key to run the samples.")
94+
95+
form_training_client = FormTrainingClient(
96+
endpoint=endpoint, credential=AzureKeyCredential(key)
97+
)
98+
async with form_training_client:
99+
model = await (await form_training_client.begin_training(
100+
os.getenv("CONTAINER_SAS_URL"), use_training_labels=True)).result()
101+
model_id = model.model_id
102+
103+
await sample.copy_model_async(model_id)
78104

79105

80106
if __name__ == '__main__':

sdk/formrecognizer/azure-ai-formrecognizer/samples/async_samples/sample_differentiate_output_labeled_tables_async.py

Lines changed: 41 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,8 @@
1212
DESCRIPTION:
1313
This sample demonstrates the differences in output that arise when begin_recognize_custom_forms
1414
is called with custom models trained with fixed vs. dynamic table tags.
15-
The models used in this sample can be created in the sample_train_model_with_labels.py using the
16-
training files in
17-
https://github.com/Azure/azure-sdk-for-python/tree/master/sdk/formrecognizer/azure-ai-formrecognizer/samples/sample_forms/labeled_tables
15+
The models used in this sample can be created in the sample_train_model_with_labels_async.py using the
16+
training files in https://aka.ms/azsdk/formrecognizer/sampletabletrainingfiles
1817
1918
Note that Form Recognizer automatically finds and extracts all tables in your documents whether the tables
2019
are tagged/labeled or not. Tables extracted automatically by Form Recognizer will be included in the
@@ -30,7 +29,13 @@
3029
1) AZURE_FORM_RECOGNIZER_ENDPOINT - the endpoint to your Cognitive Services resource.
3130
2) AZURE_FORM_RECOGNIZER_KEY - your Form Recognizer API key
3231
3) MODEL_ID_FIXED_ROW_TABLES - the ID of your custom model trained with labels on fixed row tables
32+
-OR-
33+
CONTAINER_SAS_URL_FIXED - The shared access signature (SAS) Url of your Azure Blob Storage container with
34+
your labeled data containing a fixed row table. A model will be trained and used to run the sample.
3335
4) MODEL_ID_DYNAMIC_ROW_TABLES - the ID of your custom model trained with labels on dynamic row tables
36+
-OR-
37+
CONTAINER_SAS_URL_DYNAMIC - The shared access signature (SAS) Url of your Azure Blob Storage container with
38+
your labeled data containing a dynamic row table. A model will be trained and used to run the sample.
3439
"""
3540

3641
import os
@@ -39,13 +44,13 @@
3944

4045
class TestDifferentiateOutputLabeledTablesAsync(object):
4146

42-
async def test_recognize_tables_fixed_rows_async(self):
47+
async def test_recognize_tables_fixed_rows_async(self, custom_model_id):
4348
from azure.core.credentials import AzureKeyCredential
4449
from azure.ai.formrecognizer.aio import FormRecognizerClient
4550

4651
endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
4752
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
48-
model_id_fixed_rows_table = os.environ["MODEL_ID_FIXED_ROW_TABLES"]
53+
model_id_fixed_rows_table = os.getenv("MODEL_ID_FIXED_ROW_TABLES", custom_model_id)
4954

5055
form_recognizer_client = FormRecognizerClient(
5156
endpoint=endpoint, credential=AzureKeyCredential(key)
@@ -82,13 +87,13 @@ async def test_recognize_tables_fixed_rows_async(self):
8287
field.confidence
8388
))
8489

85-
async def test_recognize_tables_dynamic_rows_async(self):
90+
async def test_recognize_tables_dynamic_rows_async(self, custom_model_id):
8691
from azure.core.credentials import AzureKeyCredential
8792
from azure.ai.formrecognizer.aio import FormRecognizerClient
8893

8994
endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
9095
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
91-
model_id_dynamic_rows_table = os.environ["MODEL_ID_DYNAMIC_ROW_TABLES"]
96+
model_id_dynamic_rows_table = os.getenv("MODEL_ID_DYNAMIC_ROW_TABLES", custom_model_id)
9297

9398
form_recognizer_client = FormRecognizerClient(
9499
endpoint=endpoint, credential=AzureKeyCredential(key)
@@ -128,8 +133,35 @@ async def test_recognize_tables_dynamic_rows_async(self):
128133

129134
async def main():
130135
sample = TestDifferentiateOutputLabeledTablesAsync()
131-
await sample.test_recognize_tables_fixed_rows_async()
132-
await sample.test_recognize_tables_dynamic_rows_async()
136+
fixed_model_id = None
137+
dynamic_model_id = None
138+
if os.getenv("CONTAINER_SAS_URL_FIXED") or os.getenv("CONTAINER_SAS_URL_DYNAMIC"):
139+
140+
from azure.core.credentials import AzureKeyCredential
141+
from azure.ai.formrecognizer.aio import FormTrainingClient
142+
143+
endpoint = os.getenv("AZURE_FORM_RECOGNIZER_ENDPOINT")
144+
key = os.getenv("AZURE_FORM_RECOGNIZER_KEY")
145+
fixed = os.getenv("CONTAINER_SAS_URL_FIXED")
146+
dynamic = os.getenv("CONTAINER_SAS_URL_DYNAMIC")
147+
148+
if not endpoint or not key:
149+
raise ValueError("Please provide endpoint and API key to run the samples.")
150+
151+
form_training_client = FormTrainingClient(
152+
endpoint=endpoint, credential=AzureKeyCredential(key)
153+
)
154+
155+
async with form_training_client:
156+
if fixed:
157+
model = await (await form_training_client.begin_training(fixed, use_training_labels=True)).result()
158+
fixed_model_id = model.model_id
159+
if dynamic:
160+
model = await (await form_training_client.begin_training(dynamic, use_training_labels=True)).result()
161+
dynamic_model_id = model.model_id
162+
163+
await sample.test_recognize_tables_fixed_rows_async(fixed_model_id)
164+
await sample.test_recognize_tables_dynamic_rows_async(dynamic_model_id)
133165

134166

135167
if __name__ == '__main__':

sdk/formrecognizer/azure-ai-formrecognizer/samples/async_samples/sample_differentiate_output_models_trained_with_and_without_labels_async.py

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
This sample demonstrates the differences in output that arise when begin_recognize_custom_forms
1414
is called with custom models trained with labels and without labels. The models used in this
1515
sample can be created in sample_train_model_with_labels_async.py and sample_train_model_without_labels_async.py
16+
using the training files found here: https://aka.ms/azsdk/formrecognizer/sampletrainingfiles
1617
1718
For a more general example of recognizing custom forms, see sample_recognize_custom_forms_async.py
1819
@@ -26,7 +27,13 @@
2627
1) AZURE_FORM_RECOGNIZER_ENDPOINT - the endpoint to your Cognitive Services resource.
2728
2) AZURE_FORM_RECOGNIZER_KEY - your Form Recognizer API key
2829
3) ID_OF_MODEL_TRAINED_WITH_LABELS - the ID of your custom model trained with labels
30+
-OR-
31+
CONTAINER_SAS_URL_WITH_LABELS - The shared access signature (SAS) Url of your Azure Blob Storage container with
32+
your labeled data. A model will be trained and used to run the sample.
2933
4) ID_OF_MODEL_TRAINED_WITHOUT_LABELS - the ID of your custom model trained without labels
34+
-OR-
35+
CONTAINER_SAS_URL_WITHOUT_LABELS - The shared access signature (SAS) Url of your Azure Blob Storage container with
36+
your forms. A model will be trained and used to run the sample.
3037
"""
3138

3239
import os
@@ -41,14 +48,14 @@ def format_bounding_box(bounding_box):
4148

4249
class DifferentiateOutputModelsTrainedWithAndWithoutLabelsSampleAsync(object):
4350

44-
async def recognize_custom_forms(self):
51+
async def recognize_custom_forms(self, labeled_model_id, unlabeled_model_id):
4552
from azure.core.credentials import AzureKeyCredential
4653
from azure.ai.formrecognizer.aio import FormRecognizerClient
4754

4855
endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
4956
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
50-
model_trained_with_labels_id = os.environ["ID_OF_MODEL_TRAINED_WITH_LABELS"]
51-
model_trained_without_labels_id = os.environ["ID_OF_MODEL_TRAINED_WITHOUT_LABELS"]
57+
model_trained_with_labels_id = os.getenv("ID_OF_MODEL_TRAINED_WITH_LABELS", labeled_model_id)
58+
model_trained_without_labels_id = os.getenv("ID_OF_MODEL_TRAINED_WITHOUT_LABELS", unlabeled_model_id)
5259

5360
path_to_sample_forms = os.path.abspath(os.path.join(os.path.abspath(__file__), "..", "..", "./sample_forms/forms/Form_1.jpg"))
5461
async with FormRecognizerClient(
@@ -120,7 +127,34 @@ async def recognize_custom_forms(self):
120127

121128
async def main():
122129
sample = DifferentiateOutputModelsTrainedWithAndWithoutLabelsSampleAsync()
123-
await sample.recognize_custom_forms()
130+
labeled_model_id = None
131+
unlabeled_model_id = None
132+
if os.getenv("CONTAINER_SAS_URL_WITH_LABELS") or os.getenv("CONTAINER_SAS_URL_WITHOUT_LABELS"):
133+
134+
from azure.core.credentials import AzureKeyCredential
135+
from azure.ai.formrecognizer.aio import FormTrainingClient
136+
137+
endpoint = os.getenv("AZURE_FORM_RECOGNIZER_ENDPOINT")
138+
key = os.getenv("AZURE_FORM_RECOGNIZER_KEY")
139+
labeled = os.getenv("CONTAINER_SAS_URL_WITH_LABELS")
140+
unlabeled = os.getenv("CONTAINER_SAS_URL_WITHOUT_LABELS")
141+
142+
if not endpoint or not key:
143+
raise ValueError("Please provide endpoint and API key to run the samples.")
144+
145+
form_training_client = FormTrainingClient(
146+
endpoint=endpoint, credential=AzureKeyCredential(key)
147+
)
148+
149+
async with form_training_client:
150+
if labeled:
151+
model = await (await form_training_client.begin_training(labeled, use_training_labels=True)).result()
152+
labeled_model_id = model.model_id
153+
if unlabeled:
154+
model = await (await form_training_client.begin_training(unlabeled, use_training_labels=False)).result()
155+
unlabeled_model_id = model.model_id
156+
157+
await sample.recognize_custom_forms(labeled_model_id, unlabeled_model_id)
124158

125159

126160
if __name__ == '__main__':

sdk/formrecognizer/azure-ai-formrecognizer/samples/async_samples/sample_get_bounding_boxes_async.py

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,20 @@
1212
DESCRIPTION:
1313
This sample demonstrates how to get detailed information to visualize the outlines of
1414
form content and fields, which can be used for manual validation and drawing UI as part of an application.
15+
16+
The model used in this sample can be created in the sample_train_model_without_labels_async.py using the
17+
training files in https://aka.ms/azsdk/formrecognizer/sampletrainingfiles
18+
1519
USAGE:
1620
python sample_get_bounding_boxes_async.py
1721
1822
Set the environment variables with your own values before running the sample:
1923
1) AZURE_FORM_RECOGNIZER_ENDPOINT - the endpoint to your Cognitive Services resource.
2024
2) AZURE_FORM_RECOGNIZER_KEY - your Form Recognizer API key
2125
3) CUSTOM_TRAINED_MODEL_ID - the ID of your custom trained model
26+
-OR-
27+
CONTAINER_SAS_URL - The shared access signature (SAS) Url of your Azure Blob Storage container with your forms.
28+
A model will be trained and used to run the sample.
2229
"""
2330

2431
import os
@@ -35,13 +42,13 @@ def format_bounding_box(bounding_box):
3542

3643
class GetBoundingBoxesSampleAsync(object):
3744

38-
async def get_bounding_boxes(self):
45+
async def get_bounding_boxes(self, custom_model_id):
3946
from azure.core.credentials import AzureKeyCredential
4047
from azure.ai.formrecognizer.aio import FormRecognizerClient
4148

4249
endpoint = os.environ["AZURE_FORM_RECOGNIZER_ENDPOINT"]
4350
key = os.environ["AZURE_FORM_RECOGNIZER_KEY"]
44-
model_id = os.environ["CUSTOM_TRAINED_MODEL_ID"]
51+
model_id = os.getenv("CUSTOM_TRAINED_MODEL_ID", custom_model_id)
4552

4653
form_recognizer_client = FormRecognizerClient(
4754
endpoint=endpoint, credential=AzureKeyCredential(key)
@@ -115,7 +122,26 @@ async def get_bounding_boxes(self):
115122

116123
async def main():
117124
sample = GetBoundingBoxesSampleAsync()
118-
await sample.get_bounding_boxes()
125+
model_id = None
126+
if os.getenv("CONTAINER_SAS_URL"):
127+
128+
from azure.core.credentials import AzureKeyCredential
129+
from azure.ai.formrecognizer.aio import FormTrainingClient
130+
131+
endpoint = os.getenv("AZURE_FORM_RECOGNIZER_ENDPOINT")
132+
key = os.getenv("AZURE_FORM_RECOGNIZER_KEY")
133+
134+
if not endpoint or not key:
135+
raise ValueError("Please provide endpoint and API key to run the samples.")
136+
137+
form_training_client = FormTrainingClient(
138+
endpoint=endpoint, credential=AzureKeyCredential(key)
139+
)
140+
async with form_training_client:
141+
model = await (await form_training_client.begin_training(
142+
os.getenv("CONTAINER_SAS_URL"), use_training_labels=False)).result()
143+
model_id = model.model_id
144+
await sample.get_bounding_boxes(model_id)
119145

120146

121147
if __name__ == '__main__':

0 commit comments

Comments
 (0)