Skip to content

Commit 7f1e4d8

Browse files
authored
[text analytics] add domain_filter param (#13451)
1 parent f683b29 commit 7f1e4d8

8 files changed

+126
-1
lines changed

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
OpinionSentiment,
3232
RecognizePiiEntitiesResult,
3333
PiiEntity,
34+
PiiEntityDomainType,
3435
)
3536

3637
__all__ = [
@@ -59,6 +60,7 @@
5960
'OpinionSentiment',
6061
'RecognizePiiEntitiesResult',
6162
'PiiEntity',
63+
'PiiEntityDomainType',
6264
]
6365

6466
__version__ = VERSION

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py

+5
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# Licensed under the MIT License.
55
# ------------------------------------
66
import re
7+
from enum import Enum
78
from ._generated.models import (
89
LanguageInput,
910
MultiLanguageInput,
@@ -64,6 +65,10 @@ def get(self, key, default=None):
6465
return self.__dict__[key]
6566
return default
6667

68+
class PiiEntityDomainType(str, Enum):
69+
"""The different domains of PII entities that users can filter by"""
70+
PROTECTED_HEALTH_INFORMATION = "PHI" # See https://aka.ms/tanerpii for more information.
71+
6772

6873
class DetectedLanguage(DictMixin):
6974
"""DetectedLanguage contains the predicted language found in text,

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_text_analytics_client.py

+6
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,10 @@ def recognize_pii_entities( # type: ignore
260260
be used for scoring, e.g. "latest", "2019-10-01". If a model-version
261261
is not specified, the API will default to the latest, non-preview version.
262262
:keyword bool show_stats: If set to true, response will contain document level statistics.
263+
:keyword domain_filter: Filters the response entities to ones only included in the specified domain.
264+
I.e., if set to 'PHI', will only return entities in the Protected Healthcare Information domain.
265+
See https://aka.ms/tanerpii for more information.
266+
:paramtype domain_filter: str or ~azure.ai.textanalytics.PiiEntityDomainType
263267
:return: The combined list of :class:`~azure.ai.textanalytics.RecognizePiiEntitiesResult`
264268
and :class:`~azure.ai.textanalytics.DocumentError` in the order the original documents
265269
were passed in.
@@ -281,13 +285,15 @@ def recognize_pii_entities( # type: ignore
281285
docs = _validate_input(documents, "language", language)
282286
model_version = kwargs.pop("model_version", None)
283287
show_stats = kwargs.pop("show_stats", False)
288+
domain_filter = kwargs.pop("domain_filter", None)
284289
if self._string_code_unit:
285290
kwargs.update({"string_index_type": self._string_code_unit})
286291
try:
287292
return self._client.entities_recognition_pii(
288293
documents=docs,
289294
model_version=model_version,
290295
show_stats=show_stats,
296+
domain=domain_filter,
291297
cls=kwargs.pop("cls", pii_entities_result),
292298
**kwargs
293299
)

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/aio/_text_analytics_client_async.py

+7
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,10 @@ async def recognize_pii_entities( # type: ignore
262262
be used for scoring, e.g. "latest", "2019-10-01". If a model-version
263263
is not specified, the API will default to the latest, non-preview version.
264264
:keyword bool show_stats: If set to true, response will contain document level statistics.
265+
:keyword domain_filter: Filters the response entities to ones only included in the specified domain.
266+
I.e., if set to 'PHI', will only return entities in the Protected Healthcare Information domain.
267+
See https://aka.ms/tanerpii for more information.
268+
:paramtype domain_filter: str or ~azure.ai.textanalytics.PiiEntityDomainType
265269
:return: The combined list of :class:`~azure.ai.textanalytics.RecognizePiiEntitiesResult`
266270
and :class:`~azure.ai.textanalytics.DocumentError` in the order the original documents
267271
were passed in.
@@ -283,13 +287,16 @@ async def recognize_pii_entities( # type: ignore
283287
docs = _validate_input(documents, "language", language)
284288
model_version = kwargs.pop("model_version", None)
285289
show_stats = kwargs.pop("show_stats", False)
290+
domain_filter = kwargs.pop("domain_filter", None)
291+
286292
if self._string_code_unit:
287293
kwargs.update({"string_index_type": self._string_code_unit})
288294
try:
289295
return await self._client.entities_recognition_pii(
290296
documents=docs,
291297
model_version=model_version,
292298
show_stats=show_stats,
299+
domain=domain_filter,
293300
cls=kwargs.pop("cls", pii_entities_result),
294301
**kwargs
295302
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
interactions:
2+
- request:
3+
body: '{"documents": [{"id": "0", "text": "I work at Microsoft and my phone number
4+
is 333-333-3333", "language": "en"}]}'
5+
headers:
6+
Accept:
7+
- application/json, text/json
8+
Accept-Encoding:
9+
- gzip, deflate
10+
Connection:
11+
- keep-alive
12+
Content-Length:
13+
- '113'
14+
Content-Type:
15+
- application/json
16+
User-Agent:
17+
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
18+
method: POST
19+
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/entities/recognition/pii?showStats=false&domain=PHI&stringIndexType=UnicodeCodePoint
20+
response:
21+
body:
22+
string: '{"documents":[{"id":"0","entities":[{"text":"333-333-3333","category":"Phone
23+
Number","offset":43,"length":12,"confidenceScore":0.8}],"warnings":[]}],"errors":[],"modelVersion":"2020-07-01"}'
24+
headers:
25+
apim-request-id:
26+
- c2319b95-6fd2-46c9-80e3-06c8f2701825
27+
content-type:
28+
- application/json; charset=utf-8
29+
csp-billing-usage:
30+
- CognitiveServices.TextAnalytics.BatchScoring=1
31+
date:
32+
- Mon, 31 Aug 2020 20:32:54 GMT
33+
strict-transport-security:
34+
- max-age=31536000; includeSubDomains; preload
35+
transfer-encoding:
36+
- chunked
37+
x-content-type-options:
38+
- nosniff
39+
x-envoy-upstream-service-time:
40+
- '79'
41+
status:
42+
code: 200
43+
message: OK
44+
version: 1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
interactions:
2+
- request:
3+
body: '{"documents": [{"id": "0", "text": "I work at Microsoft and my phone number
4+
is 333-333-3333", "language": "en"}]}'
5+
headers:
6+
Accept:
7+
- application/json, text/json
8+
Content-Length:
9+
- '113'
10+
Content-Type:
11+
- application/json
12+
User-Agent:
13+
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
14+
method: POST
15+
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/entities/recognition/pii?showStats=false&domain=PHI&stringIndexType=UnicodeCodePoint
16+
response:
17+
body:
18+
string: '{"documents":[{"id":"0","entities":[{"text":"333-333-3333","category":"Phone
19+
Number","offset":43,"length":12,"confidenceScore":0.8}],"warnings":[]}],"errors":[],"modelVersion":"2020-07-01"}'
20+
headers:
21+
apim-request-id: 9265752d-3262-4dbb-94d6-be26889e3db9
22+
content-type: application/json; charset=utf-8
23+
csp-billing-usage: CognitiveServices.TextAnalytics.BatchScoring=1
24+
date: Mon, 31 Aug 2020 20:32:55 GMT
25+
strict-transport-security: max-age=31536000; includeSubDomains; preload
26+
transfer-encoding: chunked
27+
x-content-type-options: nosniff
28+
x-envoy-upstream-service-time: '82'
29+
status:
30+
code: 200
31+
message: OK
32+
url: https://westus2.api.cognitive.microsoft.com//text/analytics/v3.1-preview.1/entities/recognition/pii?showStats=false&domain=PHI&stringIndexType=UnicodeCodePoint
33+
version: 1

sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_pii_entities.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
TextDocumentInput,
1818
VERSION,
1919
TextAnalyticsApiVersion,
20+
PiiEntityDomainType,
2021
)
2122

2223
# pre-apply the client_cls positional argument so it needn't be explicitly passed below
@@ -573,4 +574,17 @@ def test_recognize_pii_entities_v3(self, client):
573574
with pytest.raises(NotImplementedError) as excinfo:
574575
client.recognize_pii_entities(["this should fail"])
575576

576-
assert "'recognize_pii_entities' endpoint is only available for API version v3.1-preview.1 and up" in str(excinfo.value)
577+
assert "'recognize_pii_entities' endpoint is only available for API version v3.1-preview.1 and up" in str(excinfo.value)
578+
579+
@GlobalTextAnalyticsAccountPreparer()
580+
@TextAnalyticsClientPreparer()
581+
def test_phi_domain_filter(self, client):
582+
# without the domain filter, this should return two entities: Microsoft as an org,
583+
# and the phone number. With the domain filter, it should only return one.
584+
result = client.recognize_pii_entities(
585+
["I work at Microsoft and my phone number is 333-333-3333"],
586+
domain_filter=PiiEntityDomainType.PROTECTED_HEALTH_INFORMATION
587+
)
588+
self.assertEqual(len(result[0].entities), 1)
589+
self.assertEqual(result[0].entities[0].text, '333-333-3333')
590+
self.assertEqual(result[0].entities[0].category, 'Phone Number')

sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_pii_entities_async.py

+14
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
TextDocumentInput,
1919
VERSION,
2020
TextAnalyticsApiVersion,
21+
PiiEntityDomainType,
2122
)
2223

2324
# pre-apply the client_cls positional argument so it needn't be explicitly passed below
@@ -572,3 +573,16 @@ async def test_recognize_pii_entities_v3(self, client):
572573
await client.recognize_pii_entities(["this should fail"])
573574

574575
assert "'recognize_pii_entities' endpoint is only available for API version v3.1-preview.1 and up" in str(excinfo.value)
576+
577+
@GlobalTextAnalyticsAccountPreparer()
578+
@TextAnalyticsClientPreparer()
579+
async def test_phi_domain_filter(self, client):
580+
# without the domain filter, this should return two entities: Microsoft as an org,
581+
# and the phone number. With the domain filter, it should only return one.
582+
result = await client.recognize_pii_entities(
583+
["I work at Microsoft and my phone number is 333-333-3333"],
584+
domain_filter=PiiEntityDomainType.PROTECTED_HEALTH_INFORMATION
585+
)
586+
self.assertEqual(len(result[0].entities), 1)
587+
self.assertEqual(result[0].entities[0].text, '333-333-3333')
588+
self.assertEqual(result[0].entities[0].category, 'Phone Number')

0 commit comments

Comments
 (0)