Skip to content

Commit e7a2c52

Browse files
[formrecognizer] renames from consistency check (#12752)
* document_name -> name on TrainingDocumentInfo * include_sub_folders -> include_subfolders * update FieldValueType to python types * changelog
1 parent 6298034 commit e7a2c52

15 files changed

+63
-49
lines changed

sdk/formrecognizer/azure-ai-formrecognizer/CHANGELOG.md

+4-1
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,13 @@
55
**Breaking Changes**
66

77
- Values are now capitalized for enums `FormContentType`, `LengthUnit`, `TrainingStatus`, and `CustomFormModelStatus`
8+
- `document_name` renamed to `name` on `TrainingDocumentInfo`
9+
- Keyword argument `include_sub_folders` renamed to `include_subfolders` on `begin_training` methods
810

911
**New features**
1012

11-
- `FormField` now has attribute `value_type` which contains the semantic data type of the field value
13+
- `FormField` now has attribute `value_type` which contains the semantic data type of the field value. The options for
14+
`value_type` are described in the enum `FieldValueType`
1215

1316
**Fixes and improvements**
1417

sdk/formrecognizer/azure-ai-formrecognizer/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ for submodel in model.submodels:
289289

290290
# Training result information
291291
for doc in model.training_documents:
292-
print("Document name: {}".format(doc.document_name))
292+
print("Document name: {}".format(doc.name))
293293
print("Document status: {}".format(doc.status))
294294
print("Document page count: {}".format(doc.page_count))
295295
print("Document errors: {}".format(doc.errors))

sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_form_training_client.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def begin_training(self, training_files_url, use_training_labels, **kwargs):
111111
:keyword str prefix: A case-sensitive prefix string to filter documents in the source path for
112112
training. For example, when using a Azure storage blob URI, use the prefix to restrict sub
113113
folders for training.
114-
:keyword bool include_sub_folders: A flag to indicate if sub folders within the set of prefix folders
114+
:keyword bool include_subfolders: A flag to indicate if subfolders within the set of prefix folders
115115
will also need to be included when searching for content to be preprocessed. Not supported if
116116
training with labels.
117117
:keyword int polling_interval: Waiting time between two polls for LRO operations
@@ -157,7 +157,7 @@ def callback(raw_response):
157157
use_label_file=use_training_labels,
158158
source_filter=TrainSourceFilter(
159159
prefix=kwargs.pop("prefix", ""),
160-
include_sub_folders=kwargs.pop("include_sub_folders", False),
160+
include_sub_folders=kwargs.pop("include_subfolders", False),
161161
)
162162
),
163163
cls=lambda pipeline_response, _, response_headers: pipeline_response,

sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py

+23-13
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,16 @@
1212
import six
1313

1414

15+
def adjust_value_type(value_type):
16+
if value_type == "array":
17+
value_type = "list"
18+
if value_type == "number":
19+
value_type = "float"
20+
if value_type == "object":
21+
value_type = "dictionary"
22+
return value_type
23+
24+
1525
def adjust_confidence(score):
1626
"""Adjust confidence when not returned.
1727
"""
@@ -83,10 +93,10 @@ class FieldValueType(str, Enum):
8393
DATE = "date"
8494
TIME = "time"
8595
PHONE_NUMBER = "phoneNumber"
86-
NUMBER = "number"
96+
FLOAT = "float"
8797
INTEGER = "integer"
88-
ARRAY = "array"
89-
OBJECT = "object"
98+
LIST = "list"
99+
DICTIONARY = "dictionary"
90100

91101

92102
class LengthUnit(str, Enum):
@@ -202,9 +212,9 @@ def __repr__(self):
202212
class FormField(object):
203213
"""Represents a field recognized in an input form.
204214
205-
:ivar value_type: The type of `value` found on FormField. Possible types include: 'string',
206-
'date', 'time', 'phoneNumber', 'number', 'integer', 'object', or 'array'.
207-
:vartype value_type: str or ~azure.ai.formrecognizer.FieldValueType
215+
:ivar str value_type: The type of `value` found on FormField. Described in
216+
:class:`~azure.ai.formrecognizer.FieldValueType`, possible types include: 'string',
217+
'date', 'time', 'phoneNumber', 'float', 'integer', 'dictionary', or 'list'.
208218
:ivar ~azure.ai.formrecognizer.FieldData label_data:
209219
Contains the text, bounding box, and field elements for the field label.
210220
:ivar ~azure.ai.formrecognizer.FieldData value_data:
@@ -213,7 +223,7 @@ class FormField(object):
213223
:ivar value:
214224
The value for the recognized field. Its semantic data type is described by `value_type`.
215225
:vartype value: str, int, float, :class:`~datetime.date`, :class:`~datetime.time`,
216-
:class:`~azure.ai.formrecognizer.FormField`, or list[:class:`~azure.ai.formrecognizer.FormField`]
226+
dict[str, :class:`~azure.ai.formrecognizer.FormField`], or list[:class:`~azure.ai.formrecognizer.FormField`]
217227
:ivar float confidence:
218228
Measures the degree of certainty of the recognition result. Value is between [0.0, 1.0].
219229
"""
@@ -229,7 +239,7 @@ def __init__(self, **kwargs):
229239
@classmethod
230240
def _from_generated(cls, field, value, read_result):
231241
return cls(
232-
value_type=value.type if value else None,
242+
value_type=adjust_value_type(value.type) if value else None,
233243
label_data=FieldData._from_generated(field, read_result),
234244
value_data=FieldData._from_generated(value, read_result),
235245
value=get_field_value(field, value, read_result),
@@ -667,7 +677,7 @@ class TrainingDocumentInfo(object):
667677
"""Report for an individual document used for training
668678
a custom model.
669679
670-
:ivar str document_name:
680+
:ivar str name:
671681
The name of the document.
672682
:ivar str status:
673683
The :class:`~azure.ai.formrecognizer.TrainingStatus`
@@ -680,23 +690,23 @@ class TrainingDocumentInfo(object):
680690
"""
681691

682692
def __init__(self, **kwargs):
683-
self.document_name = kwargs.get("document_name", None)
693+
self.name = kwargs.get("name", None)
684694
self.status = kwargs.get("status", None)
685695
self.page_count = kwargs.get("page_count", None)
686696
self.errors = kwargs.get("errors", [])
687697

688698
@classmethod
689699
def _from_generated(cls, train_result):
690700
return [cls(
691-
document_name=doc.document_name,
701+
name=doc.document_name,
692702
status=doc.status,
693703
page_count=doc.pages,
694704
errors=FormRecognizerError._from_generated(doc.errors)
695705
) for doc in train_result.training_documents] if train_result.training_documents else None
696706

697707
def __repr__(self):
698-
return "TrainingDocumentInfo(document_name={}, status={}, page_count={}, errors={})".format(
699-
self.document_name, self.status, self.page_count, repr(self.errors)
708+
return "TrainingDocumentInfo(name={}, status={}, page_count={}, errors={})".format(
709+
self.name, self.status, self.page_count, repr(self.errors)
700710
)[:1024]
701711

702712

sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/aio/_form_training_client_async.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ async def begin_training(
120120
:keyword str prefix: A case-sensitive prefix string to filter documents in the source path for
121121
training. For example, when using a Azure storage blob URI, use the prefix to restrict sub
122122
folders for training.
123-
:keyword bool include_sub_folders: A flag to indicate if sub folders within the set of prefix folders
123+
:keyword bool include_subfolders: A flag to indicate if subfolders within the set of prefix folders
124124
will also need to be included when searching for content to be preprocessed. Not supported if
125125
training with labels.
126126
:keyword int polling_interval: Waiting time between two polls for LRO operations
@@ -170,7 +170,7 @@ def callback(raw_response):
170170
use_label_file=use_training_labels,
171171
source_filter=TrainSourceFilter(
172172
prefix=kwargs.pop("prefix", ""),
173-
include_sub_folders=kwargs.pop("include_sub_folders", False)
173+
include_sub_folders=kwargs.pop("include_subfolders", False)
174174
)
175175
),
176176
cls=lambda pipeline_response, _, response_headers: pipeline_response,

sdk/formrecognizer/azure-ai-formrecognizer/samples/async_samples/sample_train_model_with_labels_async.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ async def train_model_with_labels(self):
7171

7272
# Training result information
7373
for doc in model.training_documents:
74-
print("Document name: {}".format(doc.document_name))
74+
print("Document name: {}".format(doc.name))
7575
print("Document status: {}".format(doc.status))
7676
print("Document page count: {}".format(doc.page_count))
7777
print("Document errors: {}".format(doc.errors))

sdk/formrecognizer/azure-ai-formrecognizer/samples/async_samples/sample_train_model_without_labels_async.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ async def train_model_without_labels(self):
6767
# [END training_async]
6868
# Training result information
6969
for doc in model.training_documents:
70-
print("Document name: {}".format(doc.document_name))
70+
print("Document name: {}".format(doc.name))
7171
print("Document status: {}".format(doc.status))
7272
print("Document page count: {}".format(doc.page_count))
7373
print("Document errors: {}".format(doc.errors))

sdk/formrecognizer/azure-ai-formrecognizer/samples/sample_train_model_with_labels.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def train_model_with_labels(self):
6666

6767
# Training result information
6868
for doc in model.training_documents:
69-
print("Document name: {}".format(doc.document_name))
69+
print("Document name: {}".format(doc.name))
7070
print("Document status: {}".format(doc.status))
7171
print("Document page count: {}".format(doc.page_count))
7272
print("Document errors: {}".format(doc.errors))

sdk/formrecognizer/azure-ai-formrecognizer/samples/sample_train_model_without_labels.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def train_model_without_labels(self):
6363
# [END training]
6464
# Training result information
6565
for doc in model.training_documents:
66-
print("Document name: {}".format(doc.document_name))
66+
print("Document name: {}".format(doc.name))
6767
print("Document status: {}".format(doc.status))
6868
print("Document page count: {}".format(doc.page_count))
6969
print("Document errors: {}".format(doc.errors))

sdk/formrecognizer/azure-ai-formrecognizer/tests/test_mgmt.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def test_mgmt_model_labeled(self, client, container_sas_url):
9191
self.assertEqual(labeled_model_from_train.training_completed_on, labeled_model_from_get.training_completed_on)
9292
self.assertEqual(labeled_model_from_train.errors, labeled_model_from_get.errors)
9393
for a, b in zip(labeled_model_from_train.training_documents, labeled_model_from_get.training_documents):
94-
self.assertEqual(a.document_name, b.document_name)
94+
self.assertEqual(a.name, b.name)
9595
self.assertEqual(a.errors, b.errors)
9696
self.assertEqual(a.page_count, b.page_count)
9797
self.assertEqual(a.status, b.status)
@@ -127,7 +127,7 @@ def test_mgmt_model_unlabeled(self, client, container_sas_url):
127127
self.assertEqual(unlabeled_model_from_train.training_completed_on, unlabeled_model_from_get.training_completed_on)
128128
self.assertEqual(unlabeled_model_from_train.errors, unlabeled_model_from_get.errors)
129129
for a, b in zip(unlabeled_model_from_train.training_documents, unlabeled_model_from_get.training_documents):
130-
self.assertEqual(a.document_name, b.document_name)
130+
self.assertEqual(a.name, b.name)
131131
self.assertEqual(a.errors, b.errors)
132132
self.assertEqual(a.page_count, b.page_count)
133133
self.assertEqual(a.status, b.status)

sdk/formrecognizer/azure-ai-formrecognizer/tests/test_mgmt_async.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ async def test_mgmt_model_labeled(self, client, container_sas_url):
111111
self.assertEqual(labeled_model_from_train.training_completed_on, labeled_model_from_get.training_completed_on)
112112
self.assertEqual(labeled_model_from_train.errors, labeled_model_from_get.errors)
113113
for a, b in zip(labeled_model_from_train.training_documents, labeled_model_from_get.training_documents):
114-
self.assertEqual(a.document_name, b.document_name)
114+
self.assertEqual(a.name, b.name)
115115
self.assertEqual(a.errors, b.errors)
116116
self.assertEqual(a.page_count, b.page_count)
117117
self.assertEqual(a.status, b.status)
@@ -146,7 +146,7 @@ async def test_mgmt_model_unlabeled(self, client, container_sas_url):
146146
self.assertEqual(unlabeled_model_from_train.training_completed_on, unlabeled_model_from_get.training_completed_on)
147147
self.assertEqual(unlabeled_model_from_train.errors, unlabeled_model_from_get.errors)
148148
for a, b in zip(unlabeled_model_from_train.training_documents, unlabeled_model_from_get.training_documents):
149-
self.assertEqual(a.document_name, b.document_name)
149+
self.assertEqual(a.name, b.name)
150150
self.assertEqual(a.errors, b.errors)
151151
self.assertEqual(a.page_count, b.page_count)
152152
self.assertEqual(a.status, b.status)

sdk/formrecognizer/azure-ai-formrecognizer/tests/test_repr.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,8 @@ def form_recognizer_error():
118118

119119
@pytest.fixture
120120
def training_document_info(form_recognizer_error):
121-
model = _models.TrainingDocumentInfo(document_name="document_name", status=_models.TrainingStatus.PARTIALLY_SUCCEEDED, page_count=5, errors=[form_recognizer_error[0]])
122-
model_repr = "TrainingDocumentInfo(document_name=document_name, status=partiallySucceeded, page_count=5, errors=[{}])".format(form_recognizer_error[1])[:1024]
121+
model = _models.TrainingDocumentInfo(name="name", status=_models.TrainingStatus.PARTIALLY_SUCCEEDED, page_count=5, errors=[form_recognizer_error[0]])
122+
model_repr = "TrainingDocumentInfo(name=name, status=partiallySucceeded, page_count=5, errors=[{}])".format(form_recognizer_error[1])[:1024]
123123
assert repr(model) == model_repr
124124
return model, model_repr
125125

sdk/formrecognizer/azure-ai-formrecognizer/tests/test_training.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def test_training(self, client, container_sas_url):
6767
self.assertEqual(model.errors, [])
6868
self.assertEqual(model.status, "ready")
6969
for doc in model.training_documents:
70-
self.assertIsNotNone(doc.document_name)
70+
self.assertIsNotNone(doc.name)
7171
self.assertIsNotNone(doc.page_count)
7272
self.assertIsNotNone(doc.status)
7373
self.assertEqual(doc.errors, [])
@@ -90,7 +90,7 @@ def test_training_multipage(self, client, container_sas_url):
9090
self.assertEqual(model.errors, [])
9191
self.assertEqual(model.status, "ready")
9292
for doc in model.training_documents:
93-
self.assertIsNotNone(doc.document_name)
93+
self.assertIsNotNone(doc.name)
9494
self.assertIsNotNone(doc.page_count)
9595
self.assertIsNotNone(doc.status)
9696
self.assertEqual(doc.errors, [])
@@ -151,7 +151,7 @@ def test_training_with_labels(self, client, container_sas_url):
151151
self.assertEqual(model.errors, [])
152152
self.assertEqual(model.status, "ready")
153153
for doc in model.training_documents:
154-
self.assertIsNotNone(doc.document_name)
154+
self.assertIsNotNone(doc.name)
155155
self.assertIsNotNone(doc.page_count)
156156
self.assertIsNotNone(doc.status)
157157
self.assertEqual(doc.errors, [])
@@ -175,7 +175,7 @@ def test_training_multipage_with_labels(self, client, container_sas_url):
175175
self.assertEqual(model.errors, [])
176176
self.assertEqual(model.status, "ready")
177177
for doc in model.training_documents:
178-
self.assertIsNotNone(doc.document_name)
178+
self.assertIsNotNone(doc.name)
179179
self.assertIsNotNone(doc.page_count)
180180
self.assertIsNotNone(doc.status)
181181
self.assertEqual(doc.errors, [])
@@ -228,15 +228,15 @@ def callback(response):
228228
@GlobalClientPreparer(training=True)
229229
def test_training_with_files_filter(self, client, container_sas_url):
230230

231-
poller = client.begin_training(training_files_url=container_sas_url, use_training_labels=False, include_sub_folders=True)
231+
poller = client.begin_training(training_files_url=container_sas_url, use_training_labels=False, include_subfolders=True)
232232
model = poller.result()
233233
self.assertEqual(len(model.training_documents), 6)
234-
self.assertEqual(model.training_documents[-1].document_name, "subfolder/Form_6.jpg") # we traversed subfolders
234+
self.assertEqual(model.training_documents[-1].name, "subfolder/Form_6.jpg") # we traversed subfolders
235235

236-
poller = client.begin_training(container_sas_url, use_training_labels=False, prefix="subfolder", include_sub_folders=True)
236+
poller = client.begin_training(container_sas_url, use_training_labels=False, prefix="subfolder", include_subfolders=True)
237237
model = poller.result()
238238
self.assertEqual(len(model.training_documents), 1)
239-
self.assertEqual(model.training_documents[0].document_name, "subfolder/Form_6.jpg") # we filtered for only subfolders
239+
self.assertEqual(model.training_documents[0].name, "subfolder/Form_6.jpg") # we filtered for only subfolders
240240

241241
with pytest.raises(HttpResponseError) as e:
242242
poller = client.begin_training(training_files_url=container_sas_url, use_training_labels=False, prefix="xxx")

sdk/formrecognizer/azure-ai-formrecognizer/tests/test_training_async.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ async def test_training(self, client, container_sas_url):
7474
self.assertEqual(model.errors, [])
7575
self.assertEqual(model.status, "ready")
7676
for doc in model.training_documents:
77-
self.assertIsNotNone(doc.document_name)
77+
self.assertIsNotNone(doc.name)
7878
self.assertIsNotNone(doc.page_count)
7979
self.assertIsNotNone(doc.status)
8080
self.assertEqual(doc.errors, [])
@@ -97,7 +97,7 @@ async def test_training_multipage(self, client, container_sas_url):
9797
self.assertEqual(model.errors, [])
9898
self.assertEqual(model.status, "ready")
9999
for doc in model.training_documents:
100-
self.assertIsNotNone(doc.document_name)
100+
self.assertIsNotNone(doc.name)
101101
self.assertIsNotNone(doc.page_count)
102102
self.assertIsNotNone(doc.status)
103103
self.assertEqual(doc.errors, [])
@@ -163,7 +163,7 @@ async def test_training_with_labels(self, client, container_sas_url):
163163
self.assertEqual(model.errors, [])
164164
self.assertEqual(model.status, "ready")
165165
for doc in model.training_documents:
166-
self.assertIsNotNone(doc.document_name)
166+
self.assertIsNotNone(doc.name)
167167
self.assertIsNotNone(doc.page_count)
168168
self.assertIsNotNone(doc.status)
169169
self.assertEqual(doc.errors, [])
@@ -186,7 +186,7 @@ async def test_training_multipage_with_labels(self, client, container_sas_url):
186186
self.assertEqual(model.errors, [])
187187
self.assertEqual(model.status, "ready")
188188
for doc in model.training_documents:
189-
self.assertIsNotNone(doc.document_name)
189+
self.assertIsNotNone(doc.name)
190190
self.assertIsNotNone(doc.page_count)
191191
self.assertIsNotNone(doc.status)
192192
self.assertEqual(doc.errors, [])
@@ -241,15 +241,15 @@ def callback(response):
241241
@GlobalClientPreparer(training=True)
242242
async def test_training_with_files_filter(self, client, container_sas_url):
243243
async with client:
244-
poller = await client.begin_training(training_files_url=container_sas_url, use_training_labels=False, include_sub_folders=True)
244+
poller = await client.begin_training(training_files_url=container_sas_url, use_training_labels=False, include_subfolders=True)
245245
model = await poller.result()
246246
self.assertEqual(len(model.training_documents), 6)
247-
self.assertEqual(model.training_documents[-1].document_name, "subfolder/Form_6.jpg") # we traversed subfolders
247+
self.assertEqual(model.training_documents[-1].name, "subfolder/Form_6.jpg") # we traversed subfolders
248248

249-
poller = await client.begin_training(container_sas_url, use_training_labels=False, prefix="subfolder", include_sub_folders=True)
249+
poller = await client.begin_training(container_sas_url, use_training_labels=False, prefix="subfolder", include_subfolders=True)
250250
model = await poller.result()
251251
self.assertEqual(len(model.training_documents), 1)
252-
self.assertEqual(model.training_documents[0].document_name, "subfolder/Form_6.jpg") # we filtered for only subfolders
252+
self.assertEqual(model.training_documents[0].name, "subfolder/Form_6.jpg") # we filtered for only subfolders
253253

254254
with pytest.raises(HttpResponseError) as e:
255255
poller = await client.begin_training(training_files_url=container_sas_url, use_training_labels=False, prefix="xxx")

0 commit comments

Comments
 (0)