Skip to content

[text analytics] add string-index-type support #13378

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Aug 28, 2020
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def __init__(self, endpoint, credential, **kwargs):
)
self._default_language = kwargs.pop("default_language", "en")
self._default_country_hint = kwargs.pop("default_country_hint", "US")
self._string_index_type = None if kwargs.get("api_version") == "v3.0" else "UnicodeCodePoint"

@distributed_trace
def detect_language( # type: ignore
Expand Down Expand Up @@ -148,6 +149,8 @@ def detect_language( # type: ignore
docs = _validate_input(documents, "country_hint", country_hint)
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
if self._string_index_type:
kwargs.update({"string_index_type": self._string_index_type})
try:
return self._client.languages(
documents=docs,
Expand Down Expand Up @@ -213,6 +216,8 @@ def recognize_entities( # type: ignore
docs = _validate_input(documents, "language", language)
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
if self._string_index_type:
kwargs.update({"string_index_type": self._string_index_type})
try:
return self._client.entities_recognition_general(
documents=docs,
Expand Down Expand Up @@ -278,6 +283,8 @@ def recognize_pii_entities( # type: ignore
docs = _validate_input(documents, "language", language)
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
if self._string_index_type:
kwargs.update({"string_index_type": self._string_index_type})
try:
return self._client.entities_recognition_pii(
documents=docs,
Expand Down Expand Up @@ -350,6 +357,8 @@ def recognize_linked_entities( # type: ignore
docs = _validate_input(documents, "language", language)
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
if self._string_index_type:
kwargs.update({"string_index_type": self._string_index_type})
try:
return self._client.entities_linking(
documents=docs,
Expand Down Expand Up @@ -416,6 +425,8 @@ def extract_key_phrases( # type: ignore
docs = _validate_input(documents, "language", language)
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
if self._string_index_type:
kwargs.update({"string_index_type": self._string_index_type})
try:
return self._client.key_phrases(
documents=docs,
Expand Down Expand Up @@ -490,6 +501,8 @@ def analyze_sentiment( # type: ignore
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
show_opinion_mining = kwargs.pop("show_opinion_mining", None)
if self._string_index_type:
kwargs.update({"string_index_type": self._string_index_type})

if show_opinion_mining is not None:
kwargs.update({"opinion_mining": show_opinion_mining})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def __init__( # type: ignore
)
self._default_language = kwargs.pop("default_language", "en")
self._default_country_hint = kwargs.pop("default_country_hint", "US")
self._string_index_type = None if kwargs.get("api_version") == "v3.0" else "UnicodeCodePoint"

@distributed_trace_async
async def detect_language( # type: ignore
Expand Down Expand Up @@ -152,6 +153,8 @@ async def detect_language( # type: ignore
docs = _validate_input(documents, "country_hint", country_hint)
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
if self._string_index_type:
kwargs.update({"string_index_type": self._string_index_type})
try:
return await self._client.languages(
documents=docs,
Expand Down Expand Up @@ -216,6 +219,8 @@ async def recognize_entities( # type: ignore
docs = _validate_input(documents, "language", language)
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
if self._string_index_type:
kwargs.update({"string_index_type": self._string_index_type})
try:
return await self._client.entities_recognition_general(
documents=docs,
Expand Down Expand Up @@ -280,6 +285,8 @@ async def recognize_pii_entities( # type: ignore
docs = _validate_input(documents, "language", language)
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
if self._string_index_type:
kwargs.update({"string_index_type": self._string_index_type})
try:
return await self._client.entities_recognition_pii(
documents=docs,
Expand Down Expand Up @@ -351,6 +358,8 @@ async def recognize_linked_entities( # type: ignore
docs = _validate_input(documents, "language", language)
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
if self._string_index_type:
kwargs.update({"string_index_type": self._string_index_type})
try:
return await self._client.entities_linking(
documents=docs,
Expand Down Expand Up @@ -416,6 +425,8 @@ async def extract_key_phrases( # type: ignore
docs = _validate_input(documents, "language", language)
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
if self._string_index_type:
kwargs.update({"string_index_type": self._string_index_type})
try:
return await self._client.key_phrases(
documents=docs,
Expand Down Expand Up @@ -489,6 +500,8 @@ async def analyze_sentiment( # type: ignore
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
show_opinion_mining = kwargs.pop("show_opinion_mining", None)
if self._string_index_type:
kwargs.update({"string_index_type": self._string_index_type})

if show_opinion_mining is not None:
kwargs.update({"opinion_mining": show_opinion_mining})
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
interactions:
- request:
body: '{"documents": [{"id": "0", "text": "please don''t fail", "language": "en"}]}'
headers:
Accept:
- application/json, text/json
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '75'
Content-Type:
- application/json
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.0/sentiment?showStats=false
response:
body:
string: '{"documents":[{"id":"0","sentiment":"positive","confidenceScores":{"positive":0.99,"neutral":0.0,"negative":0.01},"sentences":[{"sentiment":"positive","confidenceScores":{"positive":0.99,"neutral":0.0,"negative":0.01},"offset":0,"length":17,"text":"please
don''t fail"}],"warnings":[]}],"errors":[],"modelVersion":"2020-04-01"}'
headers:
apim-request-id:
- 2852436a-4b3c-491e-952f-4fdb76505d2a
content-type:
- application/json; charset=utf-8
csp-billing-usage:
- CognitiveServices.TextAnalytics.BatchScoring=1
date:
- Thu, 27 Aug 2020 15:31:24 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
- chunked
x-content-type-options:
- nosniff
x-envoy-upstream-service-time:
- '88'
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
interactions:
- request:
body: '{"documents": [{"id": "0", "text": "please don''t fail", "language": "en"}]}'
headers:
Accept:
- application/json, text/json
Content-Length:
- '75'
Content-Type:
- application/json
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.0/sentiment?showStats=false
response:
body:
string: '{"documents":[{"id":"0","sentiment":"positive","confidenceScores":{"positive":0.99,"neutral":0.0,"negative":0.01},"sentences":[{"sentiment":"positive","confidenceScores":{"positive":0.99,"neutral":0.0,"negative":0.01},"offset":0,"length":17,"text":"please
don''t fail"}],"warnings":[]}],"errors":[],"modelVersion":"2020-04-01"}'
headers:
apim-request-id: f16a6dea-510a-418f-bcc1-c871b5c5d57a
content-type: application/json; charset=utf-8
csp-billing-usage: CognitiveServices.TextAnalytics.BatchScoring=1
date: Thu, 27 Aug 2020 15:31:26 GMT
strict-transport-security: max-age=31536000; includeSubDomains; preload
transfer-encoding: chunked
x-content-type-options: nosniff
x-envoy-upstream-service-time: '84'
status:
code: 200
message: OK
url: https://westus2.api.cognitive.microsoft.com//text/analytics/v3.0/sentiment?showStats=false
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
interactions:
- request:
body: '{"documents": [{"id": "0", "text": "please don''t fail", "countryHint":
"US"}]}'
headers:
Accept:
- application/json, text/json
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '78'
Content-Type:
- application/json
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.0/languages?showStats=false
response:
body:
string: '{"documents":[{"id":"0","detectedLanguage":{"name":"English","iso6391Name":"en","confidenceScore":1.0},"warnings":[]}],"errors":[],"modelVersion":"2020-07-01"}'
headers:
apim-request-id:
- 7a2f9487-18a0-4128-bb98-39ed81ca300d
content-type:
- application/json; charset=utf-8
csp-billing-usage:
- CognitiveServices.TextAnalytics.BatchScoring=1
date:
- Thu, 27 Aug 2020 15:31:26 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
- chunked
x-content-type-options:
- nosniff
x-envoy-upstream-service-time:
- '9'
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
interactions:
- request:
body: '{"documents": [{"id": "0", "text": "please don''t fail", "countryHint":
"US"}]}'
headers:
Accept:
- application/json, text/json
Content-Length:
- '78'
Content-Type:
- application/json
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.0/languages?showStats=false
response:
body:
string: '{"documents":[{"id":"0","detectedLanguage":{"name":"English","iso6391Name":"en","confidenceScore":1.0},"warnings":[]}],"errors":[],"modelVersion":"2020-07-01"}'
headers:
apim-request-id: b8468725-d979-4816-ba83-427823f1c222
content-type: application/json; charset=utf-8
csp-billing-usage: CognitiveServices.TextAnalytics.BatchScoring=1
date: Thu, 27 Aug 2020 15:31:26 GMT
strict-transport-security: max-age=31536000; includeSubDomains; preload
transfer-encoding: chunked
x-content-type-options: nosniff
x-envoy-upstream-service-time: '7'
status:
code: 200
message: OK
url: https://westus2.api.cognitive.microsoft.com//text/analytics/v3.0/languages?showStats=false
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
interactions:
- request:
body: '{"documents": [{"id": "0", "text": "please don''t fail", "language": "en"}]}'
headers:
Accept:
- application/json, text/json
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '75'
Content-Type:
- application/json
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.0/keyPhrases?showStats=false
response:
body:
string: '{"documents":[{"id":"0","keyPhrases":[],"warnings":[]}],"errors":[],"modelVersion":"2020-07-01"}'
headers:
apim-request-id:
- 544cecca-c7d8-4458-a829-4dc925bbf80a
content-type:
- application/json; charset=utf-8
csp-billing-usage:
- CognitiveServices.TextAnalytics.BatchScoring=1
date:
- Thu, 27 Aug 2020 15:31:27 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
- chunked
x-content-type-options:
- nosniff
x-envoy-upstream-service-time:
- '14'
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
interactions:
- request:
body: '{"documents": [{"id": "0", "text": "please don''t fail", "language": "en"}]}'
headers:
Accept:
- application/json, text/json
Content-Length:
- '75'
Content-Type:
- application/json
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.0/keyPhrases?showStats=false
response:
body:
string: '{"documents":[{"id":"0","keyPhrases":[],"warnings":[]}],"errors":[],"modelVersion":"2020-07-01"}'
headers:
apim-request-id: d5716979-5639-4042-adaf-d60288389c99
content-type: application/json; charset=utf-8
csp-billing-usage: CognitiveServices.TextAnalytics.BatchScoring=1
date: Thu, 27 Aug 2020 15:31:27 GMT
strict-transport-security: max-age=31536000; includeSubDomains; preload
transfer-encoding: chunked
x-content-type-options: nosniff
x-envoy-upstream-service-time: '10'
status:
code: 200
message: OK
url: https://westus2.api.cognitive.microsoft.com//text/analytics/v3.0/keyPhrases?showStats=false
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
interactions:
- request:
body: '{"documents": [{"id": "0", "text": "please don''t fail", "language": "en"}]}'
headers:
Accept:
- application/json, text/json
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
Content-Length:
- '75'
Content-Type:
- application/json
User-Agent:
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
method: POST
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.0/entities/recognition/general?showStats=false
response:
body:
string: '{"documents":[{"id":"0","entities":[],"warnings":[]}],"errors":[],"modelVersion":"2020-04-01"}'
headers:
apim-request-id:
- d986518d-aebc-4d0d-9142-d980c68d3353
content-type:
- application/json; charset=utf-8
csp-billing-usage:
- CognitiveServices.TextAnalytics.BatchScoring=1
date:
- Thu, 27 Aug 2020 15:31:27 GMT
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
transfer-encoding:
- chunked
x-content-type-options:
- nosniff
x-envoy-upstream-service-time:
- '70'
status:
code: 200
message: OK
version: 1
Loading