Skip to content

Commit 9b1b9ec

Browse files
authored
[text analytics] add bing_id property to LinkedEntity class (#13446)
1 parent 37d5472 commit 9b1b9ec

8 files changed

+142
-7
lines changed

sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ pass in `v3.0` to the kwarg `api_version` when creating your TextAnalyticsClient
1111
- `offset` is the offset of the text from the start of the document
1212
- We now have added support for opinion mining. To use this feature, you need to make sure you are using the service's
1313
v3.1-preview.1 API. To get this support pass `show_opinion_mining` as True when calling the `analyze_sentiment` endpoint
14+
- Add property `bing_id` to the `LinkedEntity` class. This property is only available for v3.1-preview.2 and up, and it is to be
15+
used in conjunction with the Bing Entity Search API to fetch additional relevant information about the returned entity.
1416

1517
## 5.0.0 (2020-07-27)
1618

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_base_client.py

+4
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ class TextAnalyticsApiVersion(str, Enum):
1515

1616
#: this is the default version
1717
V3_1_PREVIEW_1 = "v3.1-preview.1"
18+
19+
# 3.1-preview.2 is not yet the default version since we don't have a
20+
# reliable endpoint
21+
V3_1_PREVIEW_2 = "v3.1-preview.2"
1822
V3_0 = "v3.0"
1923

2024
def _authentication_policy(credential):

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py

+18-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# coding=utf-8
1+
# coding=utf-8 pylint: disable=too-many-lines
22
# ------------------------------------
33
# Copyright (c) Microsoft Corporation.
44
# Licensed under the MIT License.
@@ -616,6 +616,11 @@ class LinkedEntity(DictMixin):
616616
:ivar data_source: Data source used to extract entity linking,
617617
such as Wiki/Bing etc.
618618
:vartype data_source: str
619+
:ivar str bing_id: Bing unique identifier of the recognized entity. Use in conjunction
620+
with the Bing Entity Search SDK to fetch additional relevant information. Only
621+
available for API version v3.1-preview.2 and up.
622+
.. versionadded:: v3.1-preview.2
623+
The *bing_id* property.
619624
"""
620625

621626
def __init__(self, **kwargs):
@@ -625,22 +630,32 @@ def __init__(self, **kwargs):
625630
self.data_source_entity_id = kwargs.get("data_source_entity_id", None)
626631
self.url = kwargs.get("url", None)
627632
self.data_source = kwargs.get("data_source", None)
633+
self.bing_id = kwargs.get("bing_id", None)
628634

629635
@classmethod
630636
def _from_generated(cls, entity):
637+
bing_id = entity.bing_id if hasattr(entity, "bing_id") else None
631638
return cls(
632639
name=entity.name,
633640
matches=[LinkedEntityMatch._from_generated(e) for e in entity.matches], # pylint: disable=protected-access
634641
language=entity.language,
635642
data_source_entity_id=entity.id,
636643
url=entity.url,
637644
data_source=entity.data_source,
645+
bing_id=bing_id,
638646
)
639647

640648
def __repr__(self):
641649
return "LinkedEntity(name={}, matches={}, language={}, data_source_entity_id={}, url={}, " \
642-
"data_source={})".format(self.name, repr(self.matches), self.language, self.data_source_entity_id,
643-
self.url, self.data_source)[:1024]
650+
"data_source={}, bing_id={})".format(
651+
self.name,
652+
repr(self.matches),
653+
self.language,
654+
self.data_source_entity_id,
655+
self.url,
656+
self.data_source,
657+
self.bing_id,
658+
)[:1024]
644659

645660

646661
class LinkedEntityMatch(DictMixin):
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
interactions:
2+
- request:
3+
body: '{"documents": [{"id": "0", "text": "Microsoft was founded by Bill Gates
4+
and Paul Allen", "language": "en"}]}'
5+
headers:
6+
Accept:
7+
- application/json, text/json
8+
Accept-Encoding:
9+
- gzip, deflate
10+
Connection:
11+
- keep-alive
12+
Content-Length:
13+
- '108'
14+
Content-Type:
15+
- application/json
16+
User-Agent:
17+
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
18+
method: POST
19+
uri: https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/linking?showStats=false&stringIndexType=UnicodeCodePoint
20+
response:
21+
body:
22+
string: '{"documents":[{"id":"0","entities":[{"bingId":"0d47c987-0042-5576-15e8-97af601614fa","name":"Bill
23+
Gates","matches":[{"text":"Bill Gates","offset":25,"length":10,"confidenceScore":0.52}],"language":"en","id":"Bill
24+
Gates","url":"https://en.wikipedia.org/wiki/Bill_Gates","dataSource":"Wikipedia"},{"bingId":"df2c4376-9923-6a54-893f-2ee5a5badbc7","name":"Paul
25+
Allen","matches":[{"text":"Paul Allen","offset":40,"length":10,"confidenceScore":0.54}],"language":"en","id":"Paul
26+
Allen","url":"https://en.wikipedia.org/wiki/Paul_Allen","dataSource":"Wikipedia"},{"bingId":"a093e9b9-90f5-a3d5-c4b8-5855e1b01f85","name":"Microsoft","matches":[{"text":"Microsoft","offset":0,"length":9,"confidenceScore":0.49}],"language":"en","id":"Microsoft","url":"https://en.wikipedia.org/wiki/Microsoft","dataSource":"Wikipedia"}],"warnings":[]}],"errors":[],"modelVersion":"2020-02-01"}'
27+
headers:
28+
apim-request-id:
29+
- 34b34e81-fcc2-4c1e-85b2-116f85196a4c
30+
content-type:
31+
- application/json; charset=utf-8
32+
csp-billing-usage:
33+
- CognitiveServices.TextAnalytics.BatchScoring=1
34+
date:
35+
- Mon, 31 Aug 2020 18:48:40 GMT
36+
strict-transport-security:
37+
- max-age=31536000; includeSubDomains; preload
38+
transfer-encoding:
39+
- chunked
40+
x-content-type-options:
41+
- nosniff
42+
x-envoy-upstream-service-time:
43+
- '27'
44+
status:
45+
code: 200
46+
message: OK
47+
version: 1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
interactions:
2+
- request:
3+
body: '{"documents": [{"id": "0", "text": "Microsoft was founded by Bill Gates
4+
and Paul Allen", "language": "en"}]}'
5+
headers:
6+
Accept:
7+
- application/json, text/json
8+
Content-Length:
9+
- '108'
10+
Content-Type:
11+
- application/json
12+
User-Agent:
13+
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
14+
method: POST
15+
uri: https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/linking?showStats=false&stringIndexType=UnicodeCodePoint
16+
response:
17+
body:
18+
string: '{"documents":[{"id":"0","entities":[{"bingId":"0d47c987-0042-5576-15e8-97af601614fa","name":"Bill
19+
Gates","matches":[{"text":"Bill Gates","offset":25,"length":10,"confidenceScore":0.52}],"language":"en","id":"Bill
20+
Gates","url":"https://en.wikipedia.org/wiki/Bill_Gates","dataSource":"Wikipedia"},{"bingId":"df2c4376-9923-6a54-893f-2ee5a5badbc7","name":"Paul
21+
Allen","matches":[{"text":"Paul Allen","offset":40,"length":10,"confidenceScore":0.54}],"language":"en","id":"Paul
22+
Allen","url":"https://en.wikipedia.org/wiki/Paul_Allen","dataSource":"Wikipedia"},{"bingId":"a093e9b9-90f5-a3d5-c4b8-5855e1b01f85","name":"Microsoft","matches":[{"text":"Microsoft","offset":0,"length":9,"confidenceScore":0.49}],"language":"en","id":"Microsoft","url":"https://en.wikipedia.org/wiki/Microsoft","dataSource":"Wikipedia"}],"warnings":[]}],"errors":[],"modelVersion":"2020-02-01"}'
23+
headers:
24+
apim-request-id: 70ab796e-3da1-4a55-86b4-16c4b19a97a8
25+
content-type: application/json; charset=utf-8
26+
csp-billing-usage: CognitiveServices.TextAnalytics.BatchScoring=1
27+
date: Mon, 31 Aug 2020 18:48:41 GMT
28+
strict-transport-security: max-age=31536000; includeSubDomains; preload
29+
transfer-encoding: chunked
30+
x-content-type-options: nosniff
31+
x-envoy-upstream-service-time: '26'
32+
status:
33+
code: 200
34+
message: OK
35+
url: https://cognitiveusw2dev.azure-api.net/text/analytics/v3.1-preview.2/entities/linking?showStats=false&stringIndexType=UnicodeCodePoint
36+
version: 1

sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_linked_entities.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Copyright (c) Microsoft Corporation.
44
# Licensed under the MIT License.
55
# ------------------------------------
6-
6+
import os
77
import pytest
88
import platform
99
import functools
@@ -586,3 +586,17 @@ def test_string_index_type_not_fail_v3(self, client):
586586
# make sure that the addition of the string_index_type kwarg for v3.1-preview.1 doesn't
587587
# cause v3.0 calls to fail
588588
client.recognize_linked_entities(["please don't fail"])
589+
590+
# currently only have this as playback since the dev endpoint is unreliable
591+
@pytest.mark.playback_test_only
592+
@GlobalTextAnalyticsAccountPreparer()
593+
@TextAnalyticsClientPreparer(client_kwargs={
594+
"api_version": TextAnalyticsApiVersion.V3_1_PREVIEW_2,
595+
"text_analytics_account_key": os.environ.get('AZURE_TEXT_ANALYTICS_KEY'),
596+
"text_analytics_account": "https://cognitiveusw2dev.azure-api.net/"
597+
})
598+
def test_bing_id(self, client):
599+
result = client.recognize_linked_entities(["Microsoft was founded by Bill Gates and Paul Allen"])
600+
for doc in result:
601+
for entity in doc.entities:
602+
assert entity.bing_id # this checks if it's None and if it's empty

sdk/textanalytics/azure-ai-textanalytics/tests/test_recognize_linked_entities_async.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Copyright (c) Microsoft Corporation.
44
# Licensed under the MIT License.
55
# ------------------------------------
6-
6+
import os
77
import pytest
88
import platform
99
import functools
@@ -622,3 +622,17 @@ async def test_string_index_type_not_fail_v3(self, client):
622622
# make sure that the addition of the string_index_type kwarg for v3.1-preview.1 doesn't
623623
# cause v3.0 calls to fail
624624
await client.recognize_linked_entities(["please don't fail"])
625+
626+
# currently only have this as playback since the dev endpoint is unreliable
627+
@pytest.mark.playback_test_only
628+
@GlobalTextAnalyticsAccountPreparer()
629+
@TextAnalyticsClientPreparer(client_kwargs={
630+
"api_version": TextAnalyticsApiVersion.V3_1_PREVIEW_2,
631+
"text_analytics_account_key": os.environ.get('AZURE_TEXT_ANALYTICS_KEY'),
632+
"text_analytics_account": "https://cognitiveusw2dev.azure-api.net/"
633+
})
634+
async def test_bing_id(self, client):
635+
result = await client.recognize_linked_entities(["Microsoft was founded by Bill Gates and Paul Allen"])
636+
for doc in result:
637+
for entity in doc.entities:
638+
assert entity.bing_id # this checks if it's None and if it's empty

sdk/textanalytics/azure-ai-textanalytics/tests/test_repr.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -116,12 +116,15 @@ def linked_entity(linked_entity_match):
116116
language="English",
117117
data_source_entity_id="Bill Gates",
118118
url="https://en.wikipedia.org/wiki/Bill_Gates",
119-
data_source="wikipedia"
119+
data_source="wikipedia",
120+
bing_id="12345678"
120121
)
121122
model_repr = (
122123
"LinkedEntity(name=Bill Gates, matches=[{}, {}], "\
123124
"language=English, data_source_entity_id=Bill Gates, "\
124-
"url=https://en.wikipedia.org/wiki/Bill_Gates, data_source=wikipedia)".format(linked_entity_match[1], linked_entity_match[1])
125+
"url=https://en.wikipedia.org/wiki/Bill_Gates, data_source=wikipedia, bing_id=12345678)".format(
126+
linked_entity_match[1], linked_entity_match[1]
127+
)
125128
)
126129
assert repr(model) == model_repr
127130
return model, model_repr

0 commit comments

Comments
 (0)