Skip to content

[formrecognizer] renames from consistency check #12752

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion sdk/formrecognizer/azure-ai-formrecognizer/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@
**Breaking Changes**

- Values are now capitalized for enums `FormContentType`, `LengthUnit`, `TrainingStatus`, and `CustomFormModelStatus`
- `document_name` renamed to `name` on `TrainingDocumentInfo`
- Keyword argument `include_sub_folders` renamed to `include_subfolders` on `begin_training` methods

**New features**

- `FormField` now has attribute `value_type` which contains the semantic data type of the field value
- `FormField` now has attribute `value_type` which contains the semantic data type of the field value. The options for
`value_type` are described in the enum `FieldValueType`

**Fixes and improvements**

Expand Down
2 changes: 1 addition & 1 deletion sdk/formrecognizer/azure-ai-formrecognizer/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ for submodel in model.submodels:

# Training result information
for doc in model.training_documents:
print("Document name: {}".format(doc.document_name))
print("Document name: {}".format(doc.name))
print("Document status: {}".format(doc.status))
print("Document page count: {}".format(doc.page_count))
print("Document errors: {}".format(doc.errors))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def begin_training(self, training_files_url, use_training_labels, **kwargs):
:keyword str prefix: A case-sensitive prefix string to filter documents in the source path for
training. For example, when using a Azure storage blob URI, use the prefix to restrict sub
folders for training.
:keyword bool include_sub_folders: A flag to indicate if sub folders within the set of prefix folders
:keyword bool include_subfolders: A flag to indicate if subfolders within the set of prefix folders
will also need to be included when searching for content to be preprocessed. Not supported if
training with labels.
:keyword int polling_interval: Waiting time between two polls for LRO operations
Expand Down Expand Up @@ -157,7 +157,7 @@ def callback(raw_response):
use_label_file=use_training_labels,
source_filter=TrainSourceFilter(
prefix=kwargs.pop("prefix", ""),
include_sub_folders=kwargs.pop("include_sub_folders", False),
include_sub_folders=kwargs.pop("include_subfolders", False),
)
),
cls=lambda pipeline_response, _, response_headers: pipeline_response,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,16 @@
import six


def adjust_value_type(value_type):
if value_type == "array":
value_type = "list"
if value_type == "number":
value_type = "float"
if value_type == "object":
value_type = "dictionary"
return value_type


def adjust_confidence(score):
"""Adjust confidence when not returned.
"""
Expand Down Expand Up @@ -83,10 +93,10 @@ class FieldValueType(str, Enum):
DATE = "date"
TIME = "time"
PHONE_NUMBER = "phoneNumber"
NUMBER = "number"
FLOAT = "float"
INTEGER = "integer"
ARRAY = "array"
OBJECT = "object"
LIST = "list"
DICTIONARY = "dictionary"


class LengthUnit(str, Enum):
Expand Down Expand Up @@ -202,9 +212,9 @@ def __repr__(self):
class FormField(object):
"""Represents a field recognized in an input form.

:ivar value_type: The type of `value` found on FormField. Possible types include: 'string',
'date', 'time', 'phoneNumber', 'number', 'integer', 'object', or 'array'.
:vartype value_type: str or ~azure.ai.formrecognizer.FieldValueType
:ivar str value_type: The type of `value` found on FormField. Described in
:class:`~azure.ai.formrecognizer.FieldValueType`, possible types include: 'string',
'date', 'time', 'phoneNumber', 'float', 'integer', 'dictionary', or 'list'.
:ivar ~azure.ai.formrecognizer.FieldData label_data:
Contains the text, bounding box, and field elements for the field label.
:ivar ~azure.ai.formrecognizer.FieldData value_data:
Expand All @@ -213,7 +223,7 @@ class FormField(object):
:ivar value:
The value for the recognized field. Its semantic data type is described by `value_type`.
:vartype value: str, int, float, :class:`~datetime.date`, :class:`~datetime.time`,
:class:`~azure.ai.formrecognizer.FormField`, or list[:class:`~azure.ai.formrecognizer.FormField`]
dict[str, :class:`~azure.ai.formrecognizer.FormField`], or list[:class:`~azure.ai.formrecognizer.FormField`]
:ivar float confidence:
Measures the degree of certainty of the recognition result. Value is between [0.0, 1.0].
"""
Expand All @@ -229,7 +239,7 @@ def __init__(self, **kwargs):
@classmethod
def _from_generated(cls, field, value, read_result):
return cls(
value_type=value.type if value else None,
value_type=adjust_value_type(value.type) if value else None,
label_data=FieldData._from_generated(field, read_result),
value_data=FieldData._from_generated(value, read_result),
value=get_field_value(field, value, read_result),
Expand Down Expand Up @@ -667,7 +677,7 @@ class TrainingDocumentInfo(object):
"""Report for an individual document used for training
a custom model.

:ivar str document_name:
:ivar str name:
The name of the document.
:ivar str status:
The :class:`~azure.ai.formrecognizer.TrainingStatus`
Expand All @@ -680,23 +690,23 @@ class TrainingDocumentInfo(object):
"""

def __init__(self, **kwargs):
self.document_name = kwargs.get("document_name", None)
self.name = kwargs.get("name", None)
self.status = kwargs.get("status", None)
self.page_count = kwargs.get("page_count", None)
self.errors = kwargs.get("errors", [])

@classmethod
def _from_generated(cls, train_result):
return [cls(
document_name=doc.document_name,
name=doc.document_name,
status=doc.status,
page_count=doc.pages,
errors=FormRecognizerError._from_generated(doc.errors)
) for doc in train_result.training_documents] if train_result.training_documents else None

def __repr__(self):
return "TrainingDocumentInfo(document_name={}, status={}, page_count={}, errors={})".format(
self.document_name, self.status, self.page_count, repr(self.errors)
return "TrainingDocumentInfo(name={}, status={}, page_count={}, errors={})".format(
self.name, self.status, self.page_count, repr(self.errors)
)[:1024]


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ async def begin_training(
:keyword str prefix: A case-sensitive prefix string to filter documents in the source path for
training. For example, when using a Azure storage blob URI, use the prefix to restrict sub
folders for training.
:keyword bool include_sub_folders: A flag to indicate if sub folders within the set of prefix folders
:keyword bool include_subfolders: A flag to indicate if subfolders within the set of prefix folders
will also need to be included when searching for content to be preprocessed. Not supported if
training with labels.
:keyword int polling_interval: Waiting time between two polls for LRO operations
Expand Down Expand Up @@ -170,7 +170,7 @@ def callback(raw_response):
use_label_file=use_training_labels,
source_filter=TrainSourceFilter(
prefix=kwargs.pop("prefix", ""),
include_sub_folders=kwargs.pop("include_sub_folders", False)
include_sub_folders=kwargs.pop("include_subfolders", False)
)
),
cls=lambda pipeline_response, _, response_headers: pipeline_response,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ async def train_model_with_labels(self):

# Training result information
for doc in model.training_documents:
print("Document name: {}".format(doc.document_name))
print("Document name: {}".format(doc.name))
print("Document status: {}".format(doc.status))
print("Document page count: {}".format(doc.page_count))
print("Document errors: {}".format(doc.errors))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ async def train_model_without_labels(self):
# [END training_async]
# Training result information
for doc in model.training_documents:
print("Document name: {}".format(doc.document_name))
print("Document name: {}".format(doc.name))
print("Document status: {}".format(doc.status))
print("Document page count: {}".format(doc.page_count))
print("Document errors: {}".format(doc.errors))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def train_model_with_labels(self):

# Training result information
for doc in model.training_documents:
print("Document name: {}".format(doc.document_name))
print("Document name: {}".format(doc.name))
print("Document status: {}".format(doc.status))
print("Document page count: {}".format(doc.page_count))
print("Document errors: {}".format(doc.errors))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def train_model_without_labels(self):
# [END training]
# Training result information
for doc in model.training_documents:
print("Document name: {}".format(doc.document_name))
print("Document name: {}".format(doc.name))
print("Document status: {}".format(doc.status))
print("Document page count: {}".format(doc.page_count))
print("Document errors: {}".format(doc.errors))
Expand Down
4 changes: 2 additions & 2 deletions sdk/formrecognizer/azure-ai-formrecognizer/tests/test_mgmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def test_mgmt_model_labeled(self, client, container_sas_url):
self.assertEqual(labeled_model_from_train.training_completed_on, labeled_model_from_get.training_completed_on)
self.assertEqual(labeled_model_from_train.errors, labeled_model_from_get.errors)
for a, b in zip(labeled_model_from_train.training_documents, labeled_model_from_get.training_documents):
self.assertEqual(a.document_name, b.document_name)
self.assertEqual(a.name, b.name)
self.assertEqual(a.errors, b.errors)
self.assertEqual(a.page_count, b.page_count)
self.assertEqual(a.status, b.status)
Expand Down Expand Up @@ -127,7 +127,7 @@ def test_mgmt_model_unlabeled(self, client, container_sas_url):
self.assertEqual(unlabeled_model_from_train.training_completed_on, unlabeled_model_from_get.training_completed_on)
self.assertEqual(unlabeled_model_from_train.errors, unlabeled_model_from_get.errors)
for a, b in zip(unlabeled_model_from_train.training_documents, unlabeled_model_from_get.training_documents):
self.assertEqual(a.document_name, b.document_name)
self.assertEqual(a.name, b.name)
self.assertEqual(a.errors, b.errors)
self.assertEqual(a.page_count, b.page_count)
self.assertEqual(a.status, b.status)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ async def test_mgmt_model_labeled(self, client, container_sas_url):
self.assertEqual(labeled_model_from_train.training_completed_on, labeled_model_from_get.training_completed_on)
self.assertEqual(labeled_model_from_train.errors, labeled_model_from_get.errors)
for a, b in zip(labeled_model_from_train.training_documents, labeled_model_from_get.training_documents):
self.assertEqual(a.document_name, b.document_name)
self.assertEqual(a.name, b.name)
self.assertEqual(a.errors, b.errors)
self.assertEqual(a.page_count, b.page_count)
self.assertEqual(a.status, b.status)
Expand Down Expand Up @@ -146,7 +146,7 @@ async def test_mgmt_model_unlabeled(self, client, container_sas_url):
self.assertEqual(unlabeled_model_from_train.training_completed_on, unlabeled_model_from_get.training_completed_on)
self.assertEqual(unlabeled_model_from_train.errors, unlabeled_model_from_get.errors)
for a, b in zip(unlabeled_model_from_train.training_documents, unlabeled_model_from_get.training_documents):
self.assertEqual(a.document_name, b.document_name)
self.assertEqual(a.name, b.name)
self.assertEqual(a.errors, b.errors)
self.assertEqual(a.page_count, b.page_count)
self.assertEqual(a.status, b.status)
Expand Down
4 changes: 2 additions & 2 deletions sdk/formrecognizer/azure-ai-formrecognizer/tests/test_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,8 @@ def form_recognizer_error():

@pytest.fixture
def training_document_info(form_recognizer_error):
model = _models.TrainingDocumentInfo(document_name="document_name", status=_models.TrainingStatus.PARTIALLY_SUCCEEDED, page_count=5, errors=[form_recognizer_error[0]])
model_repr = "TrainingDocumentInfo(document_name=document_name, status=partiallySucceeded, page_count=5, errors=[{}])".format(form_recognizer_error[1])[:1024]
model = _models.TrainingDocumentInfo(name="name", status=_models.TrainingStatus.PARTIALLY_SUCCEEDED, page_count=5, errors=[form_recognizer_error[0]])
model_repr = "TrainingDocumentInfo(name=name, status=partiallySucceeded, page_count=5, errors=[{}])".format(form_recognizer_error[1])[:1024]
assert repr(model) == model_repr
return model, model_repr

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def test_training(self, client, container_sas_url):
self.assertEqual(model.errors, [])
self.assertEqual(model.status, "ready")
for doc in model.training_documents:
self.assertIsNotNone(doc.document_name)
self.assertIsNotNone(doc.name)
self.assertIsNotNone(doc.page_count)
self.assertIsNotNone(doc.status)
self.assertEqual(doc.errors, [])
Expand All @@ -90,7 +90,7 @@ def test_training_multipage(self, client, container_sas_url):
self.assertEqual(model.errors, [])
self.assertEqual(model.status, "ready")
for doc in model.training_documents:
self.assertIsNotNone(doc.document_name)
self.assertIsNotNone(doc.name)
self.assertIsNotNone(doc.page_count)
self.assertIsNotNone(doc.status)
self.assertEqual(doc.errors, [])
Expand Down Expand Up @@ -151,7 +151,7 @@ def test_training_with_labels(self, client, container_sas_url):
self.assertEqual(model.errors, [])
self.assertEqual(model.status, "ready")
for doc in model.training_documents:
self.assertIsNotNone(doc.document_name)
self.assertIsNotNone(doc.name)
self.assertIsNotNone(doc.page_count)
self.assertIsNotNone(doc.status)
self.assertEqual(doc.errors, [])
Expand All @@ -175,7 +175,7 @@ def test_training_multipage_with_labels(self, client, container_sas_url):
self.assertEqual(model.errors, [])
self.assertEqual(model.status, "ready")
for doc in model.training_documents:
self.assertIsNotNone(doc.document_name)
self.assertIsNotNone(doc.name)
self.assertIsNotNone(doc.page_count)
self.assertIsNotNone(doc.status)
self.assertEqual(doc.errors, [])
Expand Down Expand Up @@ -228,15 +228,15 @@ def callback(response):
@GlobalClientPreparer(training=True)
def test_training_with_files_filter(self, client, container_sas_url):

poller = client.begin_training(training_files_url=container_sas_url, use_training_labels=False, include_sub_folders=True)
poller = client.begin_training(training_files_url=container_sas_url, use_training_labels=False, include_subfolders=True)
model = poller.result()
self.assertEqual(len(model.training_documents), 6)
self.assertEqual(model.training_documents[-1].document_name, "subfolder/Form_6.jpg") # we traversed subfolders
self.assertEqual(model.training_documents[-1].name, "subfolder/Form_6.jpg") # we traversed subfolders

poller = client.begin_training(container_sas_url, use_training_labels=False, prefix="subfolder", include_sub_folders=True)
poller = client.begin_training(container_sas_url, use_training_labels=False, prefix="subfolder", include_subfolders=True)
model = poller.result()
self.assertEqual(len(model.training_documents), 1)
self.assertEqual(model.training_documents[0].document_name, "subfolder/Form_6.jpg") # we filtered for only subfolders
self.assertEqual(model.training_documents[0].name, "subfolder/Form_6.jpg") # we filtered for only subfolders

with pytest.raises(HttpResponseError) as e:
poller = client.begin_training(training_files_url=container_sas_url, use_training_labels=False, prefix="xxx")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ async def test_training(self, client, container_sas_url):
self.assertEqual(model.errors, [])
self.assertEqual(model.status, "ready")
for doc in model.training_documents:
self.assertIsNotNone(doc.document_name)
self.assertIsNotNone(doc.name)
self.assertIsNotNone(doc.page_count)
self.assertIsNotNone(doc.status)
self.assertEqual(doc.errors, [])
Expand All @@ -97,7 +97,7 @@ async def test_training_multipage(self, client, container_sas_url):
self.assertEqual(model.errors, [])
self.assertEqual(model.status, "ready")
for doc in model.training_documents:
self.assertIsNotNone(doc.document_name)
self.assertIsNotNone(doc.name)
self.assertIsNotNone(doc.page_count)
self.assertIsNotNone(doc.status)
self.assertEqual(doc.errors, [])
Expand Down Expand Up @@ -163,7 +163,7 @@ async def test_training_with_labels(self, client, container_sas_url):
self.assertEqual(model.errors, [])
self.assertEqual(model.status, "ready")
for doc in model.training_documents:
self.assertIsNotNone(doc.document_name)
self.assertIsNotNone(doc.name)
self.assertIsNotNone(doc.page_count)
self.assertIsNotNone(doc.status)
self.assertEqual(doc.errors, [])
Expand All @@ -186,7 +186,7 @@ async def test_training_multipage_with_labels(self, client, container_sas_url):
self.assertEqual(model.errors, [])
self.assertEqual(model.status, "ready")
for doc in model.training_documents:
self.assertIsNotNone(doc.document_name)
self.assertIsNotNone(doc.name)
self.assertIsNotNone(doc.page_count)
self.assertIsNotNone(doc.status)
self.assertEqual(doc.errors, [])
Expand Down Expand Up @@ -241,15 +241,15 @@ def callback(response):
@GlobalClientPreparer(training=True)
async def test_training_with_files_filter(self, client, container_sas_url):
async with client:
poller = await client.begin_training(training_files_url=container_sas_url, use_training_labels=False, include_sub_folders=True)
poller = await client.begin_training(training_files_url=container_sas_url, use_training_labels=False, include_subfolders=True)
model = await poller.result()
self.assertEqual(len(model.training_documents), 6)
self.assertEqual(model.training_documents[-1].document_name, "subfolder/Form_6.jpg") # we traversed subfolders
self.assertEqual(model.training_documents[-1].name, "subfolder/Form_6.jpg") # we traversed subfolders

poller = await client.begin_training(container_sas_url, use_training_labels=False, prefix="subfolder", include_sub_folders=True)
poller = await client.begin_training(container_sas_url, use_training_labels=False, prefix="subfolder", include_subfolders=True)
model = await poller.result()
self.assertEqual(len(model.training_documents), 1)
self.assertEqual(model.training_documents[0].document_name, "subfolder/Form_6.jpg") # we filtered for only subfolders
self.assertEqual(model.training_documents[0].name, "subfolder/Form_6.jpg") # we filtered for only subfolders

with pytest.raises(HttpResponseError) as e:
poller = await client.begin_training(training_files_url=container_sas_url, use_training_labels=False, prefix="xxx")
Expand Down
Loading