[text analytics] fix docs and samples for UX study (#16894)

iscai-msft · web-flow · commit 264543c7b7b3 · 2021-02-24T12:01:21.000-05:00
diff --git a/sdk/textanalytics/azure-ai-textanalytics/README.md b/sdk/textanalytics/azure-ai-textanalytics/README.md
@@ -458,7 +458,7 @@ text_analytics_client = TextAnalyticsClient(endpoint, credential)
 
 documents = ["Subject is taking 100mg of ibuprofen twice daily"]
 
-poller = text_analytics_client.begin_analyze_healthcare_entities(documents, show_stats=True)
+poller = text_analytics_client.begin_analyze_healthcare_entities(documents)
 result = poller.result()
 
 docs = [doc for doc in result if not doc.is_error]
diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py
@@ -422,8 +422,9 @@ class HealthcareEntity(DictMixin):
         This value depends on the value of the `string_index_type` parameter specified
         in the original request, which is UnicodeCodePoints by default.
     :ivar related_entities: Other healthcare entities that are related to this
-        specific entity.
-    :vartype related_entities: list[~azure.ai.textanalytics.HealthcareEntity]
+        specific entity. It is represented as a dict, mapping all of the related entities
+        to how they are related
+    :vartype related_entities: dict[~azure.ai.textanalytics.HealthcareEntity, str]
     :ivar float confidence_score: Confidence score between 0 and 1 of the extracted
         entity.
     :ivar data_sources: A collection of entity references in known data sources.
diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_text_analytics_client.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_text_analytics_client.py
@@ -457,6 +457,10 @@ def begin_analyze_healthcare_entities(  # type: ignore
     ):  # type: (...) -> LROPoller[ItemPaged[AnalyzeHealthcareEntitiesResultItem]]
         """Analyze healthcare entities and identify relationships between these entities in a batch of documents.
 
+        NOTE: this endpoint is currently in gated preview, meaning your subscription needs to be allow-listed
+        for you to use this endpoint. More information about that here:
+        https://aka.ms/text-analytics-health-request-access
+
         Entities are associated with references that can be found in existing knowledge bases,
         such as UMLS, CHV, MSH, etc.
 
diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/aio/_text_analytics_client_async.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/aio/_text_analytics_client_async.py
@@ -615,6 +615,10 @@ async def begin_analyze_healthcare_entities(  # type: ignore
     ):  # type: (...) -> AsyncLROPoller[AsyncItemPaged[AnalyzeHealthcareEntitiesResultItem]]
         """Analyze healthcare entities and identify relationships between these entities in a batch of documents.
 
+        NOTE: this endpoint is currently in gated preview, meaning your subscription needs to be allow-listed
+        for you to use this endpoint. More information about that here:
+        https://aka.ms/text-analytics-health-request-access
+
         Entities are associated with references that can be found in existing knowledge bases,
         such as UMLS, CHV, MSH, etc.
 
diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_analyze_healthcare_entities_async.py b/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_analyze_healthcare_entities_async.py
@@ -11,8 +11,14 @@
 
 DESCRIPTION:
     This sample demonstrates how to detect healthcare entities in a batch of documents.
-    Each entity found in the document will have a link associated with it from a
-    data source.  Relations between entities will also be included in the response.
+
+    In this sample we will be a newly-hired engineer working in a pharmacy. We are going to
+    comb through all of the prescriptions our pharmacy has fulfilled so we can catalog how
+    much inventory we have.
+
+    As a usage note: healthcare is currently in gated preview. Your subscription needs to
+    be allow-listed before you can use this endpoint. More information about that here:
+    https://aka.ms/text-analytics-health-request-access
 
 USAGE:
     python sample_analyze_healthcare_entities_async.py
@@ -30,9 +36,21 @@
 class AnalyzeHealthcareEntitiesSampleAsync(object):
 
     async def analyze_healthcare_entities_async(self):
+
+        print(
+            "In this sample we will be combing through the prescriptions our pharmacy has fulfilled "
+            "so we can catalog how much inventory we have"
+        )
+        print(
+            "We start out with a list of prescription documents. "
+            "To simplify matters, we will assume all dosages are in units of mg."
+        )
+
         # [START analyze_healthcare_entities_async]
+        import re
         from azure.core.credentials import AzureKeyCredential
         from azure.ai.textanalytics.aio import TextAnalyticsClient
+        from collections import defaultdict
 
         endpoint = os.environ["AZURE_TEXT_ANALYTICS_ENDPOINT"]
         key = os.environ["AZURE_TEXT_ANALYTICS_KEY"]
@@ -43,38 +61,57 @@ async def analyze_healthcare_entities_async(self):
         )
 
         documents = [
-            "Subject is taking 100mg of ibuprofen twice daily"
+            """
+            Patient needs to take 100 mg of ibuprofen, and 3 mg of potassium. Also needs to take
+            10 mg of Zocor.
+            """,
+            """
+            Patient needs to take 50 mg of ibuprofen, and 2 mg of Coumadin.
+            """
         ]
 
         async with text_analytics_client:
             poller = await text_analytics_client.begin_analyze_healthcare_entities(documents)
             result = await poller.result()
             docs = [doc async for doc in result if not doc.is_error]
 
-        print("Results of Healthcare Entities Analysis:")
-        for idx, doc in enumerate(docs):
-            print("Document text: {}\n".format(documents[idx]))
-            for entity in doc.entities:
-                print("Entity: {}".format(entity.text))
-                print("...Category: {}".format(entity.category))
-                print("...Subcategory: {}".format(entity.subcategory))
-                print("...Offset: {}".format(entity.offset))
-                print("...Confidence score: {}".format(entity.confidence_score))
-                if entity.data_sources is not None:
-                    print("...Data Sources:")
-                    for data_source in entity.data_sources:
-                        print("......Entity ID: {}".format(data_source.entity_id))
-                        print("......Name: {}".format(data_source.name))
-                if len(entity.related_entities) > 0:
-                    print("...Related Entities:")
-                    for related_entity, relation_type in entity.related_entities.items():
-                        print("......Entity Text: {}".format(related_entity.text))
-                        print("......Relation Type: {}".format(relation_type))
-            print("------------------------------------------")
-
+            print(
+                "In order to find the total dosage for every mentioned medication, "
+                "let's create a dict, mapping medication name -> total dosage. "
+            )
+
+            medication_to_dosage = defaultdict(int)
+
+            print(
+                "We will start off by extracting all of the dosage entities."
+            )
+
+            dosage_entities = [
+                entity
+                for doc in docs
+                for entity in doc.entities
+                if entity.category == "Dosage"
+            ]
+
+            print(
+                "Now we traverse the related entities of each dosage entity. "
+                "We are looking for entities that are related by 'DosageOfMedication'. "
+                "After that, we're done!"
+            )
+            for dosage in dosage_entities:
+                dosage_value = int(re.findall(r"\d+", dosage.text)[0]) # we find the numbers in the dosage
+                for related_entity, relation_type in dosage.related_entities.items():
+                    if relation_type == "DosageOfMedication":
+                        medication_to_dosage[related_entity.text] += dosage_value
+
+            [
+                print("We have fulfilled '{}' total mg of '{}'".format(
+                    dosage, medication
+                ))
+                for medication, dosage in medication_to_dosage.items()
+            ]
         # [END analyze_healthcare_entities_async]
 
-
 async def main():
     sample = AnalyzeHealthcareEntitiesSampleAsync()
     await sample.analyze_healthcare_entities_async()
@@ -83,5 +120,3 @@ async def main():
 if __name__ == '__main__':
     loop = asyncio.get_event_loop()
     loop.run_until_complete(main())
-
-
diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_analyze_healthcare_entities.py b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_analyze_healthcare_entities.py
@@ -11,8 +11,14 @@
 
 DESCRIPTION:
     This sample demonstrates how to detect healthcare entities in a batch of documents.
-    Each entity found in the document will have a link associated with it from a
-    data source.  Relations between entities will also be included in the response.
+
+    In this sample we will be a newly-hired engineer working in a pharmacy. We are going to
+    comb through all of the prescriptions our pharmacy has fulfilled so we can catalog how
+    much inventory we have.
+
+    As a usage note: healthcare is currently in gated preview. Your subscription needs to
+    be allow-listed before you can use this endpoint. More information about that here:
+    https://aka.ms/text-analytics-health-request-access
 
 USAGE:
     python sample_analyze_healthcare_entities.py
@@ -29,9 +35,21 @@
 class AnalyzeHealthcareEntitiesSample(object):
 
     def analyze_healthcare_entities(self):
+
+        print(
+            "In this sample we will be combing through the prescriptions our pharmacy has fulfilled "
+            "so we can catalog how much inventory we have"
+        )
+        print(
+            "We start out with a list of prescription documents. "
+            "To simplify matters, we will assume all dosages are in units of mg."
+        )
+
         # [START analyze_healthcare_entities]
+        import re
         from azure.core.credentials import AzureKeyCredential
         from azure.ai.textanalytics import TextAnalyticsClient
+        from collections import defaultdict
 
         endpoint = os.environ["AZURE_TEXT_ANALYTICS_ENDPOINT"]
         key = os.environ["AZURE_TEXT_ANALYTICS_KEY"]
@@ -42,34 +60,55 @@ def analyze_healthcare_entities(self):
         )
 
         documents = [
-            "Subject is taking 100mg of ibuprofen twice daily"
+            """
+            Patient needs to take 100 mg of ibuprofen, and 3 mg of potassium. Also needs to take
+            10 mg of Zocor.
+            """,
+            """
+            Patient needs to take 50 mg of ibuprofen, and 2 mg of Coumadin.
+            """
         ]
 
-        poller = text_analytics_client.begin_analyze_healthcare_entities(documents, show_stats=True)
+        poller = text_analytics_client.begin_analyze_healthcare_entities(documents)
         result = poller.result()
 
         docs = [doc for doc in result if not doc.is_error]
 
-        print("Results of Healthcare Entities Analysis:")
-        for idx, doc in enumerate(docs):
-            for entity in doc.entities:
-                print("Entity: {}".format(entity.text))
-                print("...Category: {}".format(entity.category))
-                print("...Subcategory: {}".format(entity.subcategory))
-                print("...Offset: {}".format(entity.offset))
-                print("...Confidence score: {}".format(entity.confidence_score))
-                if entity.data_sources is not None:
-                    print("...Data Sources:")
-                    for data_source in entity.data_sources:
-                        print("......Entity ID: {}".format(data_source.entity_id))
-                        print("......Name: {}".format(data_source.name))
-                if len(entity.related_entities) > 0:
-                    print("...Related Entities:")
-                    for related_entity, relation_type in entity.related_entities.items():
-                        print("......Entity Text: {}".format(related_entity.text))
-                        print("......Relation Type: {}".format(relation_type))
-            print("------------------------------------------")
+        print(
+            "In order to find the total dosage for every mentioned medication, "
+            "let's create a dict, mapping medication name -> total dosage. "
+        )
+
+        medication_to_dosage = defaultdict(int)
+
+        print(
+            "We will start off by extracting all of the dosage entities."
+        )
 
+        dosage_entities = [
+            entity
+            for doc in docs
+            for entity in doc.entities
+            if entity.category == "Dosage"
+        ]
+
+        print(
+            "Now we traverse the related entities of each dosage entity. "
+            "We are looking for entities that are related by 'DosageOfMedication'. "
+            "After that, we're done!"
+        )
+        for dosage in dosage_entities:
+            dosage_value = int(re.findall(r"\d+", dosage.text)[0]) # we find the numbers in the dosage
+            for related_entity, relation_type in dosage.related_entities.items():
+                if relation_type == "DosageOfMedication":
+                    medication_to_dosage[related_entity.text] += dosage_value
+
+        [
+            print("We have fulfilled '{}' total mg of '{}'".format(
+                dosage, medication
+            ))
+            for medication, dosage in medication_to_dosage.items()
+        ]
         # [END analyze_healthcare_entities]
 
 if __name__ == "__main__":