Skip to content

Commit 264543c

Browse files
authored
[text analytics] fix docs and samples for UX study (#16894)
1 parent 1a60ae0 commit 264543c

File tree

6 files changed

+136
-53
lines changed

6 files changed

+136
-53
lines changed

sdk/textanalytics/azure-ai-textanalytics/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -458,7 +458,7 @@ text_analytics_client = TextAnalyticsClient(endpoint, credential)
458458

459459
documents = ["Subject is taking 100mg of ibuprofen twice daily"]
460460

461-
poller = text_analytics_client.begin_analyze_healthcare_entities(documents, show_stats=True)
461+
poller = text_analytics_client.begin_analyze_healthcare_entities(documents)
462462
result = poller.result()
463463

464464
docs = [doc for doc in result if not doc.is_error]

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -422,8 +422,9 @@ class HealthcareEntity(DictMixin):
422422
This value depends on the value of the `string_index_type` parameter specified
423423
in the original request, which is UnicodeCodePoints by default.
424424
:ivar related_entities: Other healthcare entities that are related to this
425-
specific entity.
426-
:vartype related_entities: list[~azure.ai.textanalytics.HealthcareEntity]
425+
specific entity. It is represented as a dict, mapping all of the related entities
426+
to how they are related
427+
:vartype related_entities: dict[~azure.ai.textanalytics.HealthcareEntity, str]
427428
:ivar float confidence_score: Confidence score between 0 and 1 of the extracted
428429
entity.
429430
:ivar data_sources: A collection of entity references in known data sources.

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_text_analytics_client.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,10 @@ def begin_analyze_healthcare_entities( # type: ignore
457457
): # type: (...) -> LROPoller[ItemPaged[AnalyzeHealthcareEntitiesResultItem]]
458458
"""Analyze healthcare entities and identify relationships between these entities in a batch of documents.
459459
460+
NOTE: this endpoint is currently in gated preview, meaning your subscription needs to be allow-listed
461+
for you to use this endpoint. More information about that here:
462+
https://aka.ms/text-analytics-health-request-access
463+
460464
Entities are associated with references that can be found in existing knowledge bases,
461465
such as UMLS, CHV, MSH, etc.
462466

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/aio/_text_analytics_client_async.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,10 @@ async def begin_analyze_healthcare_entities( # type: ignore
615615
): # type: (...) -> AsyncLROPoller[AsyncItemPaged[AnalyzeHealthcareEntitiesResultItem]]
616616
"""Analyze healthcare entities and identify relationships between these entities in a batch of documents.
617617
618+
NOTE: this endpoint is currently in gated preview, meaning your subscription needs to be allow-listed
619+
for you to use this endpoint. More information about that here:
620+
https://aka.ms/text-analytics-health-request-access
621+
618622
Entities are associated with references that can be found in existing knowledge bases,
619623
such as UMLS, CHV, MSH, etc.
620624

sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_analyze_healthcare_entities_async.py

Lines changed: 62 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,14 @@
1111
1212
DESCRIPTION:
1313
This sample demonstrates how to detect healthcare entities in a batch of documents.
14-
Each entity found in the document will have a link associated with it from a
15-
data source. Relations between entities will also be included in the response.
14+
15+
In this sample we will be a newly-hired engineer working in a pharmacy. We are going to
16+
comb through all of the prescriptions our pharmacy has fulfilled so we can catalog how
17+
much inventory we have.
18+
19+
As a usage note: healthcare is currently in gated preview. Your subscription needs to
20+
be allow-listed before you can use this endpoint. More information about that here:
21+
https://aka.ms/text-analytics-health-request-access
1622
1723
USAGE:
1824
python sample_analyze_healthcare_entities_async.py
@@ -30,9 +36,21 @@
3036
class AnalyzeHealthcareEntitiesSampleAsync(object):
3137

3238
async def analyze_healthcare_entities_async(self):
39+
40+
print(
41+
"In this sample we will be combing through the prescriptions our pharmacy has fulfilled "
42+
"so we can catalog how much inventory we have"
43+
)
44+
print(
45+
"We start out with a list of prescription documents. "
46+
"To simplify matters, we will assume all dosages are in units of mg."
47+
)
48+
3349
# [START analyze_healthcare_entities_async]
50+
import re
3451
from azure.core.credentials import AzureKeyCredential
3552
from azure.ai.textanalytics.aio import TextAnalyticsClient
53+
from collections import defaultdict
3654

3755
endpoint = os.environ["AZURE_TEXT_ANALYTICS_ENDPOINT"]
3856
key = os.environ["AZURE_TEXT_ANALYTICS_KEY"]
@@ -43,38 +61,57 @@ async def analyze_healthcare_entities_async(self):
4361
)
4462

4563
documents = [
46-
"Subject is taking 100mg of ibuprofen twice daily"
64+
"""
65+
Patient needs to take 100 mg of ibuprofen, and 3 mg of potassium. Also needs to take
66+
10 mg of Zocor.
67+
""",
68+
"""
69+
Patient needs to take 50 mg of ibuprofen, and 2 mg of Coumadin.
70+
"""
4771
]
4872

4973
async with text_analytics_client:
5074
poller = await text_analytics_client.begin_analyze_healthcare_entities(documents)
5175
result = await poller.result()
5276
docs = [doc async for doc in result if not doc.is_error]
5377

54-
print("Results of Healthcare Entities Analysis:")
55-
for idx, doc in enumerate(docs):
56-
print("Document text: {}\n".format(documents[idx]))
57-
for entity in doc.entities:
58-
print("Entity: {}".format(entity.text))
59-
print("...Category: {}".format(entity.category))
60-
print("...Subcategory: {}".format(entity.subcategory))
61-
print("...Offset: {}".format(entity.offset))
62-
print("...Confidence score: {}".format(entity.confidence_score))
63-
if entity.data_sources is not None:
64-
print("...Data Sources:")
65-
for data_source in entity.data_sources:
66-
print("......Entity ID: {}".format(data_source.entity_id))
67-
print("......Name: {}".format(data_source.name))
68-
if len(entity.related_entities) > 0:
69-
print("...Related Entities:")
70-
for related_entity, relation_type in entity.related_entities.items():
71-
print("......Entity Text: {}".format(related_entity.text))
72-
print("......Relation Type: {}".format(relation_type))
73-
print("------------------------------------------")
74-
78+
print(
79+
"In order to find the total dosage for every mentioned medication, "
80+
"let's create a dict, mapping medication name -> total dosage. "
81+
)
82+
83+
medication_to_dosage = defaultdict(int)
84+
85+
print(
86+
"We will start off by extracting all of the dosage entities."
87+
)
88+
89+
dosage_entities = [
90+
entity
91+
for doc in docs
92+
for entity in doc.entities
93+
if entity.category == "Dosage"
94+
]
95+
96+
print(
97+
"Now we traverse the related entities of each dosage entity. "
98+
"We are looking for entities that are related by 'DosageOfMedication'. "
99+
"After that, we're done!"
100+
)
101+
for dosage in dosage_entities:
102+
dosage_value = int(re.findall(r"\d+", dosage.text)[0]) # we find the numbers in the dosage
103+
for related_entity, relation_type in dosage.related_entities.items():
104+
if relation_type == "DosageOfMedication":
105+
medication_to_dosage[related_entity.text] += dosage_value
106+
107+
[
108+
print("We have fulfilled '{}' total mg of '{}'".format(
109+
dosage, medication
110+
))
111+
for medication, dosage in medication_to_dosage.items()
112+
]
75113
# [END analyze_healthcare_entities_async]
76114

77-
78115
async def main():
79116
sample = AnalyzeHealthcareEntitiesSampleAsync()
80117
await sample.analyze_healthcare_entities_async()
@@ -83,5 +120,3 @@ async def main():
83120
if __name__ == '__main__':
84121
loop = asyncio.get_event_loop()
85122
loop.run_until_complete(main())
86-
87-

sdk/textanalytics/azure-ai-textanalytics/samples/sample_analyze_healthcare_entities.py

Lines changed: 62 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,14 @@
1111
1212
DESCRIPTION:
1313
This sample demonstrates how to detect healthcare entities in a batch of documents.
14-
Each entity found in the document will have a link associated with it from a
15-
data source. Relations between entities will also be included in the response.
14+
15+
In this sample we will be a newly-hired engineer working in a pharmacy. We are going to
16+
comb through all of the prescriptions our pharmacy has fulfilled so we can catalog how
17+
much inventory we have.
18+
19+
As a usage note: healthcare is currently in gated preview. Your subscription needs to
20+
be allow-listed before you can use this endpoint. More information about that here:
21+
https://aka.ms/text-analytics-health-request-access
1622
1723
USAGE:
1824
python sample_analyze_healthcare_entities.py
@@ -29,9 +35,21 @@
2935
class AnalyzeHealthcareEntitiesSample(object):
3036

3137
def analyze_healthcare_entities(self):
38+
39+
print(
40+
"In this sample we will be combing through the prescriptions our pharmacy has fulfilled "
41+
"so we can catalog how much inventory we have"
42+
)
43+
print(
44+
"We start out with a list of prescription documents. "
45+
"To simplify matters, we will assume all dosages are in units of mg."
46+
)
47+
3248
# [START analyze_healthcare_entities]
49+
import re
3350
from azure.core.credentials import AzureKeyCredential
3451
from azure.ai.textanalytics import TextAnalyticsClient
52+
from collections import defaultdict
3553

3654
endpoint = os.environ["AZURE_TEXT_ANALYTICS_ENDPOINT"]
3755
key = os.environ["AZURE_TEXT_ANALYTICS_KEY"]
@@ -42,34 +60,55 @@ def analyze_healthcare_entities(self):
4260
)
4361

4462
documents = [
45-
"Subject is taking 100mg of ibuprofen twice daily"
63+
"""
64+
Patient needs to take 100 mg of ibuprofen, and 3 mg of potassium. Also needs to take
65+
10 mg of Zocor.
66+
""",
67+
"""
68+
Patient needs to take 50 mg of ibuprofen, and 2 mg of Coumadin.
69+
"""
4670
]
4771

48-
poller = text_analytics_client.begin_analyze_healthcare_entities(documents, show_stats=True)
72+
poller = text_analytics_client.begin_analyze_healthcare_entities(documents)
4973
result = poller.result()
5074

5175
docs = [doc for doc in result if not doc.is_error]
5276

53-
print("Results of Healthcare Entities Analysis:")
54-
for idx, doc in enumerate(docs):
55-
for entity in doc.entities:
56-
print("Entity: {}".format(entity.text))
57-
print("...Category: {}".format(entity.category))
58-
print("...Subcategory: {}".format(entity.subcategory))
59-
print("...Offset: {}".format(entity.offset))
60-
print("...Confidence score: {}".format(entity.confidence_score))
61-
if entity.data_sources is not None:
62-
print("...Data Sources:")
63-
for data_source in entity.data_sources:
64-
print("......Entity ID: {}".format(data_source.entity_id))
65-
print("......Name: {}".format(data_source.name))
66-
if len(entity.related_entities) > 0:
67-
print("...Related Entities:")
68-
for related_entity, relation_type in entity.related_entities.items():
69-
print("......Entity Text: {}".format(related_entity.text))
70-
print("......Relation Type: {}".format(relation_type))
71-
print("------------------------------------------")
77+
print(
78+
"In order to find the total dosage for every mentioned medication, "
79+
"let's create a dict, mapping medication name -> total dosage. "
80+
)
81+
82+
medication_to_dosage = defaultdict(int)
83+
84+
print(
85+
"We will start off by extracting all of the dosage entities."
86+
)
7287

88+
dosage_entities = [
89+
entity
90+
for doc in docs
91+
for entity in doc.entities
92+
if entity.category == "Dosage"
93+
]
94+
95+
print(
96+
"Now we traverse the related entities of each dosage entity. "
97+
"We are looking for entities that are related by 'DosageOfMedication'. "
98+
"After that, we're done!"
99+
)
100+
for dosage in dosage_entities:
101+
dosage_value = int(re.findall(r"\d+", dosage.text)[0]) # we find the numbers in the dosage
102+
for related_entity, relation_type in dosage.related_entities.items():
103+
if relation_type == "DosageOfMedication":
104+
medication_to_dosage[related_entity.text] += dosage_value
105+
106+
[
107+
print("We have fulfilled '{}' total mg of '{}'".format(
108+
dosage, medication
109+
))
110+
for medication, dosage in medication_to_dosage.items()
111+
]
73112
# [END analyze_healthcare_entities]
74113

75114
if __name__ == "__main__":

0 commit comments

Comments
 (0)