Skip to content

Commit c90a60d

Browse files
authored
[text analytics] Analyze updates for v5.1.0b6 (#17003)
fixes #16372
1 parent 4300118 commit c90a60d

26 files changed

+1542
-1790
lines changed

sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
- Renamed classes `AspectSentiment` and `OpinionSentiment` to `TargetSentiment` and `AssessmentSentiment` respectively.
1010

1111
**New Features**
12-
12+
- Added `RecognizeLinkedEntitiesAction` as a supported action type for `begin_analyze_batch_actions`.
1313
- Added parameter `categories_filter` to the `recognize_pii_entities` client method.
1414
- Added enum `PiiEntityCategoryType`.
1515
- Add property `normalized_text` to `HealthcareEntity`. This property is a normalized version of the `text` property that already

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
HealthcareEntity,
3737
HealthcareEntityDataSource,
3838
RecognizeEntitiesAction,
39+
RecognizeLinkedEntitiesAction,
3940
RecognizePiiEntitiesAction,
4041
ExtractKeyPhrasesAction,
4142
AnalyzeBatchActionsResult,
@@ -82,6 +83,7 @@
8283
'HealthcareEntity',
8384
'HealthcareEntityDataSource',
8485
'RecognizeEntitiesAction',
86+
'RecognizeLinkedEntitiesAction',
8587
'RecognizePiiEntitiesAction',
8688
'ExtractKeyPhrasesAction',
8789
'AnalyzeBatchActionsResult',

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_async_lro.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@
1111
from azure.core.polling._async_poller import PollingReturnType
1212

1313

14-
_FINISHED = frozenset(["succeeded", "cancelled", "failed", "partiallysucceeded"])
14+
_FINISHED = frozenset(["succeeded", "cancelled", "failed", "partiallycompleted"])
1515
_FAILED = frozenset(["failed"])
16-
_SUCCEEDED = frozenset(["succeeded", "partiallysucceeded"])
16+
_SUCCEEDED = frozenset(["succeeded", "partiallycompleted"])
1717

1818

1919
class TextAnalyticsAsyncLROPollingMethod(AsyncLROBasePolling):

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_async_paging.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,5 @@ def __init__(self, *args, **kwargs):
1616

1717
class AnalyzeResultAsync(AsyncItemPaged):
1818
def __init__(self, *args, **kwargs):
19-
self.statistics = kwargs.pop('statistics')
19+
self.statistics = kwargs.pop('statistics', None)
2020
super(AnalyzeResultAsync, self).__init__(*args, **kwargs)

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_lro.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88
from azure.core.polling import LROPoller
99
from azure.core.polling.base_polling import LROBasePolling, OperationResourcePolling, OperationFailed, BadStatus
1010

11-
_FINISHED = frozenset(["succeeded", "cancelled", "failed", "partiallysucceeded"])
11+
_FINISHED = frozenset(["succeeded", "cancelled", "failed", "partiallycompleted"])
1212
_FAILED = frozenset(["failed"])
13-
_SUCCEEDED = frozenset(["succeeded", "partiallysucceeded"])
13+
_SUCCEEDED = frozenset(["succeeded", "partiallycompleted"])
1414

1515

1616
class TextAnalyticsOperationResourcePolling(OperationResourcePolling):

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py

Lines changed: 48 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1362,6 +1362,7 @@ class AnalyzeBatchActionsType(str, Enum):
13621362
RECOGNIZE_ENTITIES = "recognize_entities" #: Entities Recognition action.
13631363
RECOGNIZE_PII_ENTITIES = "recognize_pii_entities" #: PII Entities Recognition action.
13641364
EXTRACT_KEY_PHRASES = "extract_key_phrases" #: Key Phrase Extraction action.
1365+
RECOGNIZE_LINKED_ENTITIES = "recognize_linked_entities" #: Linked Entities Recognition action.
13651366

13661367

13671368
class AnalyzeBatchActionsResult(DictMixin):
@@ -1377,20 +1378,24 @@ class AnalyzeBatchActionsResult(DictMixin):
13771378
:vartype action_type: str or ~azure.ai.textanalytics.AnalyzeBatchActionsType
13781379
:ivar ~datetime.datetime completed_on: Date and time (UTC) when the result completed
13791380
on the service.
1381+
:ivar statistics: Overall statistics for the action result.
1382+
:vartype statistics: ~azure.ai.RequestStatistics
13801383
"""
13811384
def __init__(self, **kwargs):
13821385
self.document_results = kwargs.get("document_results")
13831386
self.is_error = False
13841387
self.action_type = kwargs.get("action_type")
13851388
self.completed_on = kwargs.get("completed_on")
1389+
self.statistics = kwargs.get("statistics")
13861390

13871391
def __repr__(self):
1388-
return "AnalyzeBatchActionsResult(document_results={}, is_error={}, action_type={}, completed_on={})" \
1389-
.format(
1392+
return "AnalyzeBatchActionsResult(document_results={}, is_error={}, action_type={}, completed_on={}, " \
1393+
"statistics={})".format(
13901394
repr(self.document_results),
13911395
self.is_error,
13921396
self.action_type,
1393-
self.completed_on
1397+
self.completed_on,
1398+
repr(self.statistics)
13941399
)[:1024]
13951400

13961401
class AnalyzeBatchActionsError(DictMixin):
@@ -1527,6 +1532,44 @@ def to_generated(self):
15271532
)
15281533
)
15291534

1535+
1536+
class RecognizeLinkedEntitiesAction(DictMixin):
1537+
"""RecognizeEntitiesAction encapsulates the parameters for starting a long-running Linked Entities
1538+
Recognition operation.
1539+
1540+
If you just want to recognize linked entities in a list of documents, and not perform a batch
1541+
of long running actions on the input of documents, call method `recognize_linked_entities` instead
1542+
of interfacing with this model.
1543+
1544+
:keyword str model_version: The model version to use for the analysis.
1545+
:keyword str string_index_type: Specifies the method used to interpret string offsets.
1546+
`UnicodeCodePoint`, the Python encoding, is the default. To override the Python default,
1547+
you can also pass in `Utf16CodePoint` or TextElements_v8`. For additional information
1548+
see https://aka.ms/text-analytics-offsets
1549+
:ivar str model_version: The model version to use for the analysis.
1550+
:ivar str string_index_type: Specifies the method used to interpret string offsets.
1551+
`UnicodeCodePoint`, the Python encoding, is the default. To override the Python default,
1552+
you can also pass in `Utf16CodePoint` or TextElements_v8`. For additional information
1553+
see https://aka.ms/text-analytics-offsets
1554+
"""
1555+
1556+
def __init__(self, **kwargs):
1557+
self.model_version = kwargs.get("model_version", "latest")
1558+
self.string_index_type = kwargs.get("string_index_type", "UnicodeCodePoint")
1559+
1560+
def __repr__(self, **kwargs):
1561+
return "RecognizeLinkedEntitiesAction(model_version={}, string_index_type={})" \
1562+
.format(self.model_version, self.string_index_type)[:1024]
1563+
1564+
def to_generated(self):
1565+
return _latest_preview_models.EntityLinkingTask(
1566+
parameters=_latest_preview_models.EntityLinkingTaskParameters(
1567+
model_version=self.model_version,
1568+
string_index_type=self.string_index_type
1569+
)
1570+
)
1571+
1572+
15301573
class RequestStatistics(DictMixin):
15311574
def __init__(self, **kwargs):
15321575
self.documents_count = kwargs.get("documents_count")
@@ -1544,8 +1587,8 @@ def _from_generated(cls, request_statistics):
15441587
)
15451588

15461589
def __repr__(self, **kwargs):
1547-
return "RequestStatistics(documents_count={}, valid_documents_count={}, erroneous_documents_count={}, \
1548-
transactions_count={}".format(
1590+
return "RequestStatistics(documents_count={}, valid_documents_count={}, erroneous_documents_count={}, " \
1591+
"transactions_count={})".format(
15491592
self.documents_count,
15501593
self.valid_documents_count,
15511594
self.erroneous_documents_count,

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_paging.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,5 @@ def __init__(self, *args, **kwargs):
1616

1717
class AnalyzeResult(ItemPaged):
1818
def __init__(self, *args, **kwargs):
19-
self.statistics = kwargs.pop('statistics')
19+
self.statistics = kwargs.pop('statistics', None)
2020
super(AnalyzeResult, self).__init__(*args, **kwargs)

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_request_handlers.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
TextDocumentInput,
1313
RecognizeEntitiesAction,
1414
RecognizePiiEntitiesAction,
15+
RecognizeLinkedEntitiesAction,
1516
AnalyzeBatchActionsType,
1617
)
1718

@@ -72,6 +73,8 @@ def _determine_action_type(action):
7273
return AnalyzeBatchActionsType.RECOGNIZE_ENTITIES
7374
if isinstance(action, RecognizePiiEntitiesAction):
7475
return AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES
76+
if isinstance(action, RecognizeLinkedEntitiesAction):
77+
return AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES
7578
return AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES
7679

7780
def _check_string_index_type_arg(string_index_type_arg, api_version, string_index_type_default="UnicodeCodePoint"):

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_response_handlers.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
RequestStatistics,
3636
AnalyzeBatchActionsType,
3737
AnalyzeBatchActionsError,
38-
TextDocumentBatchStatistics,
3938
_get_indices,
4039
)
4140
from ._paging import AnalyzeHealthcareEntitiesResult, AnalyzeResult
@@ -204,27 +203,34 @@ def _get_deserialization_callback_from_task_type(task_type):
204203
return entities_result
205204
if task_type == AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES:
206205
return pii_entities_result
206+
if task_type == AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES:
207+
return linked_entities_result
207208
return key_phrases_result
208209

209210
def _get_property_name_from_task_type(task_type):
210211
if task_type == AnalyzeBatchActionsType.RECOGNIZE_ENTITIES:
211212
return "entity_recognition_tasks"
212213
if task_type == AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES:
213214
return "entity_recognition_pii_tasks"
215+
if task_type == AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES:
216+
return "entity_linking_tasks"
214217
return "key_phrase_extraction_tasks"
215218

216219
def _num_tasks_in_current_page(returned_tasks_object):
217220
return (
218221
len(returned_tasks_object.entity_recognition_tasks or []) +
219222
len(returned_tasks_object.entity_recognition_pii_tasks or []) +
220-
len(returned_tasks_object.key_phrase_extraction_tasks or [])
223+
len(returned_tasks_object.key_phrase_extraction_tasks or []) +
224+
len(returned_tasks_object.entity_linking_tasks or [])
221225
)
222226

223227
def _get_task_type_from_error(error):
224228
if "pii" in error.target.lower():
225229
return AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES
226-
if "entity" in error.target.lower():
230+
if "entityrecognition" in error.target.lower():
227231
return AnalyzeBatchActionsType.RECOGNIZE_ENTITIES
232+
if "entitylinking" in error.target.lower():
233+
return AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES
228234
return AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES
229235

230236
def _get_mapped_errors(analyze_job_state):
@@ -249,6 +255,9 @@ def _get_good_result(current_task_type, index_of_task_result, doc_id_order, resp
249255
)
250256
return AnalyzeBatchActionsResult(
251257
document_results=document_results,
258+
statistics=RequestStatistics._from_generated( # pylint: disable=protected-access
259+
response_task_to_deserialize.results.statistics
260+
) if response_task_to_deserialize.results.statistics else None,
252261
action_type=current_task_type,
253262
completed_on=response_task_to_deserialize.last_update_date_time,
254263
)
@@ -312,9 +321,7 @@ def healthcare_paged_result(doc_id_order, health_status_callback, _, obj, respon
312321
def analyze_paged_result(doc_id_order, task_order, analyze_status_callback, _, obj, response_headers, show_stats=False): # pylint: disable=unused-argument
313322
return AnalyzeResult(
314323
functools.partial(lro_get_next_page, analyze_status_callback, obj, show_stats=show_stats),
315-
functools.partial(analyze_extract_page_data, doc_id_order, task_order, response_headers),
316-
statistics=TextDocumentBatchStatistics._from_generated(obj.statistics) \
317-
if (show_stats and obj.statistics) else None # pylint: disable=protected-access
324+
functools.partial(analyze_extract_page_data, doc_id_order, task_order, response_headers)
318325
)
319326

320327
def _get_deserialize():

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_response_handlers_async.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from urllib.parse import urlparse, parse_qsl
1010

1111
from azure.core.async_paging import AsyncList
12-
from ._models import RequestStatistics, TextDocumentBatchStatistics
12+
from ._models import RequestStatistics
1313
from ._async_paging import (
1414
AnalyzeHealthcareEntitiesResultAsync,
1515
AnalyzeResultAsync
@@ -58,6 +58,4 @@ def analyze_paged_result(
5858
return AnalyzeResultAsync(
5959
functools.partial(lro_get_next_page_async, analyze_status_callback, obj),
6060
functools.partial(analyze_extract_page_data_async, doc_id_order, task_order, response_headers),
61-
statistics=TextDocumentBatchStatistics._from_generated(obj.statistics) \
62-
if show_stats and obj.statistics is not None else None # pylint: disable=protected-access
6361
)

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_text_analytics_client.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
RecognizePiiEntitiesResult,
5959
RecognizeEntitiesAction,
6060
RecognizePiiEntitiesAction,
61+
RecognizeLinkedEntitiesAction,
6162
ExtractKeyPhrasesAction,
6263
AnalyzeHealthcareEntitiesResultItem,
6364
AnalyzeBatchActionsResult,
@@ -743,7 +744,7 @@ def _analyze_result_callback(self, doc_id_order, task_order, raw_response, _, he
743744
def begin_analyze_batch_actions( # type: ignore
744745
self,
745746
documents, # type: Union[List[str], List[TextDocumentInput], List[Dict[str, str]]]
746-
actions, # type: List[Union[RecognizeEntitiesAction, RecognizePiiEntitiesAction, ExtractKeyPhrasesAction]]
747+
actions, # type: List[Union[RecognizeEntitiesAction, RecognizeLinkedEntitiesAction, RecognizePiiEntitiesAction, ExtractKeyPhrasesAction]] # pylint: disable=line-too-long
747748
**kwargs # type: Any
748749
): # type: (...) -> LROPoller[ItemPaged[AnalyzeBatchActionsResult]]
749750
"""Start a long-running operation to perform a variety of text analysis actions over a batch of documents.
@@ -761,7 +762,8 @@ def begin_analyze_batch_actions( # type: ignore
761762
The outputted action results will be in the same order you inputted your actions.
762763
Duplicate actions in list not supported.
763764
:type actions:
764-
list[RecognizeEntitiesAction or RecognizePiiEntitiesAction or ExtractKeyPhrasesAction]
765+
list[RecognizeEntitiesAction or RecognizePiiEntitiesAction or ExtractKeyPhrasesAction or
766+
RecognizeLinkedEntitiesAction]
765767
:keyword str display_name: An optional display name to set for the requested analysis.
766768
:keyword str language: The 2 letter ISO 639-1 representation of language for the
767769
entire batch. For example, use "en" for English; "es" for Spanish etc.
@@ -816,6 +818,13 @@ def begin_analyze_batch_actions( # type: ignore
816818
key_phrase_extraction_tasks=[
817819
t.to_generated() for t in
818820
[a for a in actions if _determine_action_type(a) == AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES]
821+
],
822+
entity_linking_tasks=[
823+
t.to_generated() for t in
824+
[
825+
a for a in actions
826+
if _determine_action_type(a) == AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES
827+
]
819828
]
820829
)
821830
analyze_body = self._client.models(api_version='v3.1-preview.4').AnalyzeBatchInput(

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/aio/_text_analytics_client_async.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -742,7 +742,8 @@ async def begin_analyze_batch_actions( # type: ignore
742742
The outputted action results will be in the same order you inputted your actions.
743743
Duplicate actions in list not supported.
744744
:type actions:
745-
list[RecognizeEntitiesAction or RecognizePiiEntitiesAction or ExtractKeyPhrasesAction]
745+
list[RecognizeEntitiesAction or RecognizePiiEntitiesAction or ExtractKeyPhrasesAction or
746+
RecognizeLinkedEntitiesAction]
746747
:keyword str display_name: An optional display name to set for the requested analysis.
747748
:keyword str language: The 2 letter ISO 639-1 representation of language for the
748749
entire batch. For example, use "en" for English; "es" for Spanish etc.
@@ -797,6 +798,13 @@ async def begin_analyze_batch_actions( # type: ignore
797798
key_phrase_extraction_tasks=[
798799
t.to_generated() for t in
799800
[a for a in actions if _determine_action_type(a) == AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES]
801+
],
802+
entity_linking_tasks=[
803+
t.to_generated() for t in
804+
[
805+
a for a in actions if \
806+
_determine_action_type(a) == AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES
807+
]
800808
]
801809
)
802810
analyze_body = self._client.models(api_version='v3.1-preview.4').AnalyzeBatchInput(

sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_analyze_batch_actions_async.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ async def analyze_async(self):
3434
from azure.ai.textanalytics.aio import TextAnalyticsClient
3535
from azure.ai.textanalytics import (
3636
RecognizeEntitiesAction,
37+
RecognizeLinkedEntitiesAction,
3738
RecognizePiiEntitiesAction,
3839
ExtractKeyPhrasesAction,
3940
AnalyzeBatchActionsType
@@ -63,7 +64,8 @@ async def analyze_async(self):
6364
actions=[
6465
RecognizeEntitiesAction(),
6566
RecognizePiiEntitiesAction(),
66-
ExtractKeyPhrasesAction()
67+
ExtractKeyPhrasesAction(),
68+
RecognizeLinkedEntitiesAction()
6769
]
6870
)
6971

@@ -104,6 +106,24 @@ async def analyze_async(self):
104106
print("Key Phrases: {}\n".format(doc.key_phrases))
105107
print("------------------------------------------")
106108

109+
if action_result.action_type == AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES:
110+
print("Results of Linked Entities Recognition action:")
111+
for idx, doc in enumerate(action_result.document_results):
112+
print("Document text: {}\n".format(documents[idx]))
113+
for linked_entity in doc.entities:
114+
print("Entity name: {}".format(linked_entity.name))
115+
print("...Data source: {}".format(linked_entity.data_source))
116+
print("...Data source language: {}".format(linked_entity.language))
117+
print("...Data source entity ID: {}".format(linked_entity.data_source_entity_id))
118+
print("...Data source URL: {}".format(linked_entity.url))
119+
print("...Document matches:")
120+
for match in linked_entity.matches:
121+
print("......Match text: {}".format(match.text))
122+
print(".........Confidence Score: {}".format(match.confidence_score))
123+
print(".........Offset: {}".format(match.offset))
124+
print(".........Length: {}".format(match.length))
125+
print("------------------------------------------")
126+
107127
# [END analyze_async]
108128

109129

0 commit comments

Comments
 (0)