Skip to content

Commit b6d2979

Browse files
authored
[text analytics] PII updates for v5.1.0b6 (#17038)
1 parent 8265479 commit b6d2979

20 files changed

+370
-71
lines changed

sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010

1111
**New Features**
1212

13+
- Added parameter `categories_filter` to the `recognize_pii_entities` client method.
14+
- Added enum `PiiEntityCategoryType`.
1315
- Add property `normalized_text` to `HealthcareEntity`. This property is a normalized version of the `text` property that already
1416
exists on the `HealthcareEntity`
1517
- Add property `assertion` onto `HealthcareEntity`. This contains assertions about the entity itself, i.e. if the entity represents a diagnosis,

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/__init__.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@
4646
)
4747
from ._paging import AnalyzeHealthcareEntitiesResult
4848
from ._generated.v3_1_preview_4.models import (
49-
RelationType as HealthcareEntityRelationType,
49+
PiiCategory as PiiEntityCategoryType,
50+
RelationType as HealthcareEntityRelationType
5051
)
5152

5253
__all__ = [
@@ -87,6 +88,7 @@
8788
'RequestStatistics',
8889
'AnalyzeBatchActionsType',
8990
"AnalyzeBatchActionsError",
91+
"PiiEntityCategoryType",
9092
"HealthcareEntityRelationType",
9193
"HealthcareEntityRelationRoleType",
9294
]

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_generated/_operations_mixin.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ def begin_health(
143143
self,
144144
documents, # type: List["_models.MultiLanguageInput"]
145145
model_version=None, # type: Optional[str]
146-
string_index_type="TextElements_v8", # type: Optional[Union[str, "_models.StringIndexType"]]
146+
string_index_type=None, # type: Optional[Union[str, "_models.StringIndexType"]]
147147
**kwargs # type: Any
148148
):
149149
"""Submit healthcare analysis job.

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_generated/aio/_operations_mixin.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ async def begin_health(
139139
self,
140140
documents: List["_models.MultiLanguageInput"],
141141
model_version: Optional[str] = None,
142-
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
142+
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
143143
**kwargs
144144
) -> AnalyzeHealthcareEntitiesAsyncLROPoller["_models.HealthcareJobState"]:
145145
"""Submit healthcare analysis job.

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_generated/v3_1_preview_4/_metadata.json

+12-12
Large diffs are not rendered by default.

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_generated/v3_1_preview_4/aio/operations/_text_analytics_client_operations.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ async def _health_initial(
409409
self,
410410
documents: List["_models.MultiLanguageInput"],
411411
model_version: Optional[str] = None,
412-
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
412+
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
413413
**kwargs
414414
) -> Optional["_models.HealthcareJobState"]:
415415
cls = kwargs.pop('cls', None) # type: ClsType[Optional["_models.HealthcareJobState"]]
@@ -474,7 +474,7 @@ async def begin_health(
474474
self,
475475
documents: List["_models.MultiLanguageInput"],
476476
model_version: Optional[str] = None,
477-
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
477+
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
478478
**kwargs
479479
) -> AnalyzeHealthcareEntitiesAsyncLROPoller["_models.HealthcareJobState"]:
480480
"""Submit healthcare analysis job.
@@ -550,7 +550,7 @@ async def entities_recognition_general(
550550
documents: List["_models.MultiLanguageInput"],
551551
model_version: Optional[str] = None,
552552
show_stats: Optional[bool] = None,
553-
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
553+
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
554554
**kwargs
555555
) -> "_models.EntitiesResult":
556556
"""Named Entity Recognition.
@@ -637,7 +637,7 @@ async def entities_recognition_pii(
637637
model_version: Optional[str] = None,
638638
show_stats: Optional[bool] = None,
639639
domain: Optional[str] = None,
640-
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
640+
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
641641
pii_categories: Optional[List[Union[str, "_models.PiiCategory"]]] = None,
642642
**kwargs
643643
) -> "_models.PiiResult":
@@ -734,7 +734,7 @@ async def entities_linking(
734734
documents: List["_models.MultiLanguageInput"],
735735
model_version: Optional[str] = None,
736736
show_stats: Optional[bool] = None,
737-
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
737+
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
738738
**kwargs
739739
) -> "_models.EntityLinkingResult":
740740
"""Linked entities from a well known knowledge base.
@@ -977,7 +977,7 @@ async def sentiment(
977977
model_version: Optional[str] = None,
978978
show_stats: Optional[bool] = None,
979979
opinion_mining: Optional[bool] = None,
980-
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = "TextElements_v8",
980+
string_index_type: Optional[Union[str, "_models.StringIndexType"]] = None,
981981
**kwargs
982982
) -> "_models.SentimentResponse":
983983
"""Sentiment.

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_generated/v3_1_preview_4/models/__init__.py

-2
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,6 @@
160160
SentenceSentimentValue,
161161
State,
162162
StringIndexType,
163-
StringIndexTypeResponse,
164163
TargetRelationType,
165164
TokenSentimentValue,
166165
WarningCodeValue,
@@ -248,7 +247,6 @@
248247
'SentenceSentimentValue',
249248
'State',
250249
'StringIndexType',
251-
'StringIndexTypeResponse',
252250
'TargetRelationType',
253251
'TokenSentimentValue',
254252
'WarningCodeValue',

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_generated/v3_1_preview_4/models/_models.py

+10-13
Original file line numberDiff line numberDiff line change
@@ -801,9 +801,8 @@ class EntitiesTaskParameters(msrest.serialization.Model):
801801
:param model_version:
802802
:type model_version: str
803803
:param string_index_type: Possible values include: "TextElements_v8", "UnicodeCodePoint",
804-
"Utf16CodeUnit". Default value: "TextElements_v8".
805-
:type string_index_type: str or
806-
~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
804+
"Utf16CodeUnit".
805+
:type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
807806
"""
808807

809808
_attribute_map = {
@@ -817,7 +816,7 @@ def __init__(
817816
):
818817
super(EntitiesTaskParameters, self).__init__(**kwargs)
819818
self.model_version = kwargs.get('model_version', "latest")
820-
self.string_index_type = kwargs.get('string_index_type', "TextElements_v8")
819+
self.string_index_type = kwargs.get('string_index_type', None)
821820

822821

823822
class Entity(msrest.serialization.Model):
@@ -936,9 +935,8 @@ class EntityLinkingTaskParameters(msrest.serialization.Model):
936935
:param model_version:
937936
:type model_version: str
938937
:param string_index_type: Possible values include: "TextElements_v8", "UnicodeCodePoint",
939-
"Utf16CodeUnit". Default value: "TextElements_v8".
940-
:type string_index_type: str or
941-
~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
938+
"Utf16CodeUnit".
939+
:type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
942940
"""
943941

944942
_attribute_map = {
@@ -952,7 +950,7 @@ def __init__(
952950
):
953951
super(EntityLinkingTaskParameters, self).__init__(**kwargs)
954952
self.model_version = kwargs.get('model_version', "latest")
955-
self.string_index_type = kwargs.get('string_index_type', "TextElements_v8")
953+
self.string_index_type = kwargs.get('string_index_type', None)
956954

957955

958956
class ErrorResponse(msrest.serialization.Model):
@@ -1792,9 +1790,8 @@ class PiiTaskParameters(msrest.serialization.Model):
17921790
:param pii_categories: (Optional) describes the PII categories to return.
17931791
:type pii_categories: list[str or ~azure.ai.textanalytics.v3_1_preview_4.models.PiiCategory]
17941792
:param string_index_type: Possible values include: "TextElements_v8", "UnicodeCodePoint",
1795-
"Utf16CodeUnit". Default value: "TextElements_v8".
1796-
:type string_index_type: str or
1797-
~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
1793+
"Utf16CodeUnit".
1794+
:type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
17981795
"""
17991796

18001797
_validation = {
@@ -1804,7 +1801,7 @@ class PiiTaskParameters(msrest.serialization.Model):
18041801
_attribute_map = {
18051802
'domain': {'key': 'domain', 'type': 'str'},
18061803
'model_version': {'key': 'model-version', 'type': 'str'},
1807-
'pii_categories': {'key': 'piiCategories', 'type': '[str]'},
1804+
'pii_categories': {'key': 'pii-categories', 'type': '[str]'},
18081805
'string_index_type': {'key': 'stringIndexType', 'type': 'str'},
18091806
}
18101807

@@ -1816,7 +1813,7 @@ def __init__(
18161813
self.domain = kwargs.get('domain', "none")
18171814
self.model_version = kwargs.get('model_version', "latest")
18181815
self.pii_categories = kwargs.get('pii_categories', None)
1819-
self.string_index_type = kwargs.get('string_index_type', "TextElements_v8")
1816+
self.string_index_type = kwargs.get('string_index_type', None)
18201817

18211818

18221819
class RequestStatistics(msrest.serialization.Model):

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_generated/v3_1_preview_4/models/_models_py3.py

+10-13
Original file line numberDiff line numberDiff line change
@@ -900,9 +900,8 @@ class EntitiesTaskParameters(msrest.serialization.Model):
900900
:param model_version:
901901
:type model_version: str
902902
:param string_index_type: Possible values include: "TextElements_v8", "UnicodeCodePoint",
903-
"Utf16CodeUnit". Default value: "TextElements_v8".
904-
:type string_index_type: str or
905-
~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
903+
"Utf16CodeUnit".
904+
:type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
906905
"""
907906

908907
_attribute_map = {
@@ -914,7 +913,7 @@ def __init__(
914913
self,
915914
*,
916915
model_version: Optional[str] = "latest",
917-
string_index_type: Optional[Union[str, "StringIndexTypeResponse"]] = "TextElements_v8",
916+
string_index_type: Optional[Union[str, "StringIndexType"]] = None,
918917
**kwargs
919918
):
920919
super(EntitiesTaskParameters, self).__init__(**kwargs)
@@ -1052,9 +1051,8 @@ class EntityLinkingTaskParameters(msrest.serialization.Model):
10521051
:param model_version:
10531052
:type model_version: str
10541053
:param string_index_type: Possible values include: "TextElements_v8", "UnicodeCodePoint",
1055-
"Utf16CodeUnit". Default value: "TextElements_v8".
1056-
:type string_index_type: str or
1057-
~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
1054+
"Utf16CodeUnit".
1055+
:type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
10581056
"""
10591057

10601058
_attribute_map = {
@@ -1066,7 +1064,7 @@ def __init__(
10661064
self,
10671065
*,
10681066
model_version: Optional[str] = "latest",
1069-
string_index_type: Optional[Union[str, "StringIndexTypeResponse"]] = "TextElements_v8",
1067+
string_index_type: Optional[Union[str, "StringIndexType"]] = None,
10701068
**kwargs
10711069
):
10721070
super(EntityLinkingTaskParameters, self).__init__(**kwargs)
@@ -2013,9 +2011,8 @@ class PiiTaskParameters(msrest.serialization.Model):
20132011
:param pii_categories: (Optional) describes the PII categories to return.
20142012
:type pii_categories: list[str or ~azure.ai.textanalytics.v3_1_preview_4.models.PiiCategory]
20152013
:param string_index_type: Possible values include: "TextElements_v8", "UnicodeCodePoint",
2016-
"Utf16CodeUnit". Default value: "TextElements_v8".
2017-
:type string_index_type: str or
2018-
~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexTypeResponse
2014+
"Utf16CodeUnit".
2015+
:type string_index_type: str or ~azure.ai.textanalytics.v3_1_preview_4.models.StringIndexType
20192016
"""
20202017

20212018
_validation = {
@@ -2025,7 +2022,7 @@ class PiiTaskParameters(msrest.serialization.Model):
20252022
_attribute_map = {
20262023
'domain': {'key': 'domain', 'type': 'str'},
20272024
'model_version': {'key': 'model-version', 'type': 'str'},
2028-
'pii_categories': {'key': 'piiCategories', 'type': '[str]'},
2025+
'pii_categories': {'key': 'pii-categories', 'type': '[str]'},
20292026
'string_index_type': {'key': 'stringIndexType', 'type': 'str'},
20302027
}
20312028

@@ -2035,7 +2032,7 @@ def __init__(
20352032
domain: Optional[Union[str, "PiiTaskParametersDomain"]] = "none",
20362033
model_version: Optional[str] = "latest",
20372034
pii_categories: Optional[List[Union[str, "PiiCategory"]]] = None,
2038-
string_index_type: Optional[Union[str, "StringIndexTypeResponse"]] = "TextElements_v8",
2035+
string_index_type: Optional[Union[str, "StringIndexType"]] = None,
20392036
**kwargs
20402037
):
20412038
super(PiiTaskParameters, self).__init__(**kwargs)

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_generated/v3_1_preview_4/models/_text_analytics_client_enums.py

-13
Original file line numberDiff line numberDiff line change
@@ -322,19 +322,6 @@ class StringIndexType(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
322322
#: application is written in a language that support Unicode, for example Java, JavaScript.
323323
UTF16_CODE_UNIT = "Utf16CodeUnit"
324324

325-
class StringIndexTypeResponse(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
326-
327-
#: Returned offset and length values will correspond to TextElements (Graphemes and Grapheme
328-
#: clusters) confirming to the Unicode 8.0.0 standard. Use this option if your application is
329-
#: written in .Net Framework or .Net Core and you will be using StringInfo.
330-
TEXT_ELEMENTS_V8 = "TextElements_v8"
331-
#: Returned offset and length values will correspond to Unicode code points. Use this option if
332-
#: your application is written in a language that support Unicode, for example Python.
333-
UNICODE_CODE_POINT = "UnicodeCodePoint"
334-
#: Returned offset and length values will correspond to UTF-16 code units. Use this option if your
335-
#: application is written in a language that support Unicode, for example Java, JavaScript.
336-
UTF16_CODE_UNIT = "Utf16CodeUnit"
337-
338325
class TargetRelationType(with_metaclass(_CaseInsensitiveEnumMeta, str, Enum)):
339326
"""The type related to the target.
340327
"""

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_generated/v3_1_preview_4/operations/_text_analytics_client_operations.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,7 @@ def _health_initial(
419419
self,
420420
documents, # type: List["_models.MultiLanguageInput"]
421421
model_version=None, # type: Optional[str]
422-
string_index_type="TextElements_v8", # type: Optional[Union[str, "_models.StringIndexType"]]
422+
string_index_type=None, # type: Optional[Union[str, "_models.StringIndexType"]]
423423
**kwargs # type: Any
424424
):
425425
# type: (...) -> Optional["_models.HealthcareJobState"]
@@ -485,7 +485,7 @@ def begin_health(
485485
self,
486486
documents, # type: List["_models.MultiLanguageInput"]
487487
model_version=None, # type: Optional[str]
488-
string_index_type="TextElements_v8", # type: Optional[Union[str, "_models.StringIndexType"]]
488+
string_index_type=None, # type: Optional[Union[str, "_models.StringIndexType"]]
489489
**kwargs # type: Any
490490
):
491491
# type: (...) -> AnalyzeHealthcareEntitiesLROPoller["_models.HealthcareJobState"]
@@ -562,7 +562,7 @@ def entities_recognition_general(
562562
documents, # type: List["_models.MultiLanguageInput"]
563563
model_version=None, # type: Optional[str]
564564
show_stats=None, # type: Optional[bool]
565-
string_index_type="TextElements_v8", # type: Optional[Union[str, "_models.StringIndexType"]]
565+
string_index_type=None, # type: Optional[Union[str, "_models.StringIndexType"]]
566566
**kwargs # type: Any
567567
):
568568
# type: (...) -> "_models.EntitiesResult"
@@ -650,7 +650,7 @@ def entities_recognition_pii(
650650
model_version=None, # type: Optional[str]
651651
show_stats=None, # type: Optional[bool]
652652
domain=None, # type: Optional[str]
653-
string_index_type="TextElements_v8", # type: Optional[Union[str, "_models.StringIndexType"]]
653+
string_index_type=None, # type: Optional[Union[str, "_models.StringIndexType"]]
654654
pii_categories=None, # type: Optional[List[Union[str, "_models.PiiCategory"]]]
655655
**kwargs # type: Any
656656
):
@@ -748,7 +748,7 @@ def entities_linking(
748748
documents, # type: List["_models.MultiLanguageInput"]
749749
model_version=None, # type: Optional[str]
750750
show_stats=None, # type: Optional[bool]
751-
string_index_type="TextElements_v8", # type: Optional[Union[str, "_models.StringIndexType"]]
751+
string_index_type=None, # type: Optional[Union[str, "_models.StringIndexType"]]
752752
**kwargs # type: Any
753753
):
754754
# type: (...) -> "_models.EntityLinkingResult"
@@ -994,7 +994,7 @@ def sentiment(
994994
model_version=None, # type: Optional[str]
995995
show_stats=None, # type: Optional[bool]
996996
opinion_mining=None, # type: Optional[bool]
997-
string_index_type="TextElements_v8", # type: Optional[Union[str, "_models.StringIndexType"]]
997+
string_index_type=None, # type: Optional[Union[str, "_models.StringIndexType"]]
998998
**kwargs # type: Any
999999
):
10001000
# type: (...) -> "_models.SentimentResponse"

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -1485,8 +1485,11 @@ def __init__(self, **kwargs):
14851485
self.string_index_type = kwargs.get("string_index_type", "UnicodeCodePoint")
14861486

14871487
def __repr__(self, **kwargs):
1488-
return "RecognizePiiEntitiesAction(model_version={}, domain_filter={}, string_index_type={})" \
1489-
.format(self.model_version, self.domain_filter, self.string_index_type)[:1024]
1488+
return "RecognizePiiEntitiesAction(model_version={}, domain_filter={}, string_index_type={}".format(
1489+
self.model_version,
1490+
self.domain_filter,
1491+
self.string_index_type
1492+
)[:1024]
14901493

14911494
def to_generated(self):
14921495
return _latest_preview_models.PiiTask(

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_text_analytics_client.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,11 @@
1818
from azure.core.tracing.decorator import distributed_trace
1919
from azure.core.exceptions import HttpResponseError
2020
from ._base_client import TextAnalyticsClientBase
21-
from ._request_handlers import _validate_input, _determine_action_type, _check_string_index_type_arg
21+
from ._request_handlers import (
22+
_validate_input,
23+
_determine_action_type,
24+
_check_string_index_type_arg
25+
)
2226
from ._response_handlers import (
2327
process_http_response_error,
2428
entities_result,
@@ -301,6 +305,11 @@ def recognize_pii_entities( # type: ignore
301305
I.e., if set to 'phi', will only return entities in the Protected Healthcare Information domain.
302306
See https://aka.ms/tanerpii for more information.
303307
:paramtype domain_filter: str or ~azure.ai.textanalytics.PiiEntityDomainType
308+
:keyword categories_filter: Instead of filtering over all PII entity categories, you can pass in a list of
309+
the specific PII entity categories you want to filter out. For example, if you only want to filter out
310+
U.S. social security numbers in a document, you can pass in
311+
`[PiiEntityCategoryType.US_SOCIAL_SECURITY_NUMBER]` for this kwarg.
312+
:paramtype categories_filter: list[~azure.ai.textanalytics.PiiEntityCategoryType]
304313
:keyword str string_index_type: Specifies the method used to interpret string offsets.
305314
`UnicodeCodePoint`, the Python encoding, is the default. To override the Python default,
306315
you can also pass in `Utf16CodePoint` or `TextElements_v8`. For additional information
@@ -327,6 +336,7 @@ def recognize_pii_entities( # type: ignore
327336
model_version = kwargs.pop("model_version", None)
328337
show_stats = kwargs.pop("show_stats", False)
329338
domain_filter = kwargs.pop("domain_filter", None)
339+
categories_filter = kwargs.pop("categories_filter", None)
330340

331341
string_index_type = _check_string_index_type_arg(
332342
kwargs.pop("string_index_type", None),
@@ -342,6 +352,7 @@ def recognize_pii_entities( # type: ignore
342352
model_version=model_version,
343353
show_stats=show_stats,
344354
domain=domain_filter,
355+
pii_categories=categories_filter,
345356
cls=kwargs.pop("cls", pii_entities_result),
346357
**kwargs
347358
)

0 commit comments

Comments
 (0)