Skip to content

Commit 3891c08

Browse files
authored
[text analytics] add string-index-type support (#13378)
1 parent bd05a04 commit 3891c08

File tree

450 files changed

+3274
-1669
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

450 files changed

+3274
-1669
lines changed

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py

+21-9
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,9 @@ class CategorizedEntity(DictMixin):
207207
:ivar subcategory: Entity subcategory, such as Age/Year/TimeRange etc
208208
:vartype subcategory: str
209209
:ivar int offset: The entity text offset from the start of the document.
210-
:ivar int length: The length of the entity text.
210+
Returned in unicode code points.
211+
:ivar int length: The length of the entity text. Returned
212+
in unicode code points.
211213
:ivar confidence_score: Confidence score between 0 and 1 of the extracted
212214
entity.
213215
:vartype confidence_score: float
@@ -253,7 +255,9 @@ class PiiEntity(DictMixin):
253255
:ivar str subcategory: Entity subcategory, such as Credit Card/EU
254256
Phone number/ABA Routing Numbers, etc.
255257
:ivar int offset: The PII entity text offset from the start of the document.
256-
:ivar int length: The length of the PII entity text.
258+
Returned in unicode code points.
259+
:ivar int length: The length of the PII entity text. Returned
260+
in unicode code points.
257261
:ivar float confidence_score: Confidence score between 0 and 1 of the extracted
258262
entity.
259263
"""
@@ -636,7 +640,9 @@ class LinkedEntityMatch(DictMixin):
636640
:vartype confidence_score: float
637641
:ivar text: Entity text as appears in the request.
638642
:ivar int offset: The linked entity match text offset from the start of the document.
639-
:ivar int length: The length of the linked entity match text.
643+
Returned in unicode code points.
644+
:ivar int length: The length of the linked entity match text. Returned
645+
in unicode code points.
640646
:vartype text: str
641647
"""
642648

@@ -738,8 +744,10 @@ class SentenceSentiment(DictMixin):
738744
and 1 for the sentence for all labels.
739745
:vartype confidence_scores:
740746
~azure.ai.textanalytics.SentimentConfidenceScores
741-
:ivar int offset: The sentence offset from the start of the document.
742-
:ivar int length: The length of the sentence.
747+
:ivar int offset: The sentence offset from the start of the document. Returned
748+
in unicode code points.
749+
:ivar int length: The length of the sentence. Returned
750+
in unicode code points.
743751
:ivar mined_opinions: The list of opinions mined from this sentence.
744752
For example in "The food is good, but the service is bad", we would
745753
mind these two opinions "food is good", "service is bad". Only returned
@@ -847,8 +855,10 @@ class AspectSentiment(DictMixin):
847855
for 'neutral' will always be 0
848856
:vartype confidence_scores:
849857
~azure.ai.textanalytics.SentimentConfidenceScores
850-
:ivar int offset: The aspect offset from the start of the document.
851-
:ivar int length: The length of the aspect.
858+
:ivar int offset: The aspect offset from the start of the document. Returned
859+
in unicode code points.
860+
:ivar int length: The length of the aspect. Returned
861+
in unicode code points.
852862
"""
853863

854864
def __init__(self, **kwargs):
@@ -892,8 +902,10 @@ class OpinionSentiment(DictMixin):
892902
for 'neutral' will always be 0
893903
:vartype confidence_scores:
894904
~azure.ai.textanalytics.SentimentConfidenceScores
895-
:ivar int offset: The opinion offset from the start of the document.
896-
:ivar int length: The length of the opinion.
905+
:ivar int offset: The opinion offset from the start of the document. Returned
906+
in unicode code points.
907+
:ivar int length: The length of the opinion. Returned
908+
in unicode code points.
897909
:ivar bool is_negated: Whether the opinion is negated. For example, in
898910
"The food is not good", the opinion "good" is negated.
899911
"""

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_text_analytics_client.py

+9
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ def __init__(self, endpoint, credential, **kwargs):
9393
)
9494
self._default_language = kwargs.pop("default_language", "en")
9595
self._default_country_hint = kwargs.pop("default_country_hint", "US")
96+
self._string_code_unit = None if kwargs.get("api_version") == "v3.0" else "UnicodeCodePoint"
9697

9798
@distributed_trace
9899
def detect_language( # type: ignore
@@ -213,6 +214,8 @@ def recognize_entities( # type: ignore
213214
docs = _validate_input(documents, "language", language)
214215
model_version = kwargs.pop("model_version", None)
215216
show_stats = kwargs.pop("show_stats", False)
217+
if self._string_code_unit:
218+
kwargs.update({"string_index_type": self._string_code_unit})
216219
try:
217220
return self._client.entities_recognition_general(
218221
documents=docs,
@@ -278,6 +281,8 @@ def recognize_pii_entities( # type: ignore
278281
docs = _validate_input(documents, "language", language)
279282
model_version = kwargs.pop("model_version", None)
280283
show_stats = kwargs.pop("show_stats", False)
284+
if self._string_code_unit:
285+
kwargs.update({"string_index_type": self._string_code_unit})
281286
try:
282287
return self._client.entities_recognition_pii(
283288
documents=docs,
@@ -350,6 +355,8 @@ def recognize_linked_entities( # type: ignore
350355
docs = _validate_input(documents, "language", language)
351356
model_version = kwargs.pop("model_version", None)
352357
show_stats = kwargs.pop("show_stats", False)
358+
if self._string_code_unit:
359+
kwargs.update({"string_index_type": self._string_code_unit})
353360
try:
354361
return self._client.entities_linking(
355362
documents=docs,
@@ -490,6 +497,8 @@ def analyze_sentiment( # type: ignore
490497
model_version = kwargs.pop("model_version", None)
491498
show_stats = kwargs.pop("show_stats", False)
492499
show_opinion_mining = kwargs.pop("show_opinion_mining", None)
500+
if self._string_code_unit:
501+
kwargs.update({"string_index_type": self._string_code_unit})
493502

494503
if show_opinion_mining is not None:
495504
kwargs.update({"opinion_mining": show_opinion_mining})

sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/aio/_text_analytics_client_async.py

+9
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ def __init__( # type: ignore
9898
)
9999
self._default_language = kwargs.pop("default_language", "en")
100100
self._default_country_hint = kwargs.pop("default_country_hint", "US")
101+
self._string_code_unit = None if kwargs.get("api_version") == "v3.0" else "UnicodeCodePoint"
101102

102103
@distributed_trace_async
103104
async def detect_language( # type: ignore
@@ -216,6 +217,8 @@ async def recognize_entities( # type: ignore
216217
docs = _validate_input(documents, "language", language)
217218
model_version = kwargs.pop("model_version", None)
218219
show_stats = kwargs.pop("show_stats", False)
220+
if self._string_code_unit:
221+
kwargs.update({"string_index_type": self._string_code_unit})
219222
try:
220223
return await self._client.entities_recognition_general(
221224
documents=docs,
@@ -280,6 +283,8 @@ async def recognize_pii_entities( # type: ignore
280283
docs = _validate_input(documents, "language", language)
281284
model_version = kwargs.pop("model_version", None)
282285
show_stats = kwargs.pop("show_stats", False)
286+
if self._string_code_unit:
287+
kwargs.update({"string_index_type": self._string_code_unit})
283288
try:
284289
return await self._client.entities_recognition_pii(
285290
documents=docs,
@@ -351,6 +356,8 @@ async def recognize_linked_entities( # type: ignore
351356
docs = _validate_input(documents, "language", language)
352357
model_version = kwargs.pop("model_version", None)
353358
show_stats = kwargs.pop("show_stats", False)
359+
if self._string_code_unit:
360+
kwargs.update({"string_index_type": self._string_code_unit})
354361
try:
355362
return await self._client.entities_linking(
356363
documents=docs,
@@ -489,6 +496,8 @@ async def analyze_sentiment( # type: ignore
489496
model_version = kwargs.pop("model_version", None)
490497
show_stats = kwargs.pop("show_stats", False)
491498
show_opinion_mining = kwargs.pop("show_opinion_mining", None)
499+
if self._string_code_unit:
500+
kwargs.update({"string_index_type": self._string_code_unit})
492501

493502
if show_opinion_mining is not None:
494503
kwargs.update({"opinion_mining": show_opinion_mining})

sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_analyze_sentiment.test_all_successful_passing_dict.yaml

+4-4
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ interactions:
1919
User-Agent:
2020
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
2121
method: POST
22-
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=true&stringIndexType=TextElements_v8
22+
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=true&stringIndexType=UnicodeCodePoint
2323
response:
2424
body:
2525
string: '{"statistics":{"documentsCount":3,"validDocumentsCount":3,"erroneousDocumentsCount":0,"transactionsCount":3},"documents":[{"id":"1","sentiment":"neutral","statistics":{"charactersCount":51,"transactionsCount":1},"confidenceScores":{"positive":0.01,"neutral":0.99,"negative":0.0},"sentences":[{"sentiment":"neutral","confidenceScores":{"positive":0.01,"neutral":0.99,"negative":0.0},"offset":0,"length":51,"text":"Microsoft
@@ -30,21 +30,21 @@ interactions:
3030
recommend you try it."}],"warnings":[]}],"errors":[],"modelVersion":"2020-04-01"}'
3131
headers:
3232
apim-request-id:
33-
- b1e4352f-1e0f-46e3-9f6e-5a82195726b5
33+
- 546ef146-2055-49be-945d-8b4d95870565
3434
content-type:
3535
- application/json; charset=utf-8
3636
csp-billing-usage:
3737
- CognitiveServices.TextAnalytics.BatchScoring=3
3838
date:
39-
- Wed, 26 Aug 2020 21:20:39 GMT
39+
- Thu, 27 Aug 2020 19:31:50 GMT
4040
strict-transport-security:
4141
- max-age=31536000; includeSubDomains; preload
4242
transfer-encoding:
4343
- chunked
4444
x-content-type-options:
4545
- nosniff
4646
x-envoy-upstream-service-time:
47-
- '91'
47+
- '84'
4848
status:
4949
code: 200
5050
message: OK

sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_analyze_sentiment.test_all_successful_passing_text_document_input.yaml

+4-4
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ interactions:
1919
User-Agent:
2020
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
2121
method: POST
22-
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=TextElements_v8
22+
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=UnicodeCodePoint
2323
response:
2424
body:
2525
string: '{"documents":[{"id":"1","sentiment":"neutral","confidenceScores":{"positive":0.01,"neutral":0.99,"negative":0.0},"sentences":[{"sentiment":"neutral","confidenceScores":{"positive":0.01,"neutral":0.99,"negative":0.0},"offset":0,"length":51,"text":"Microsoft
@@ -30,21 +30,21 @@ interactions:
3030
recommend you try it."}],"warnings":[]}],"errors":[],"modelVersion":"2020-04-01"}'
3131
headers:
3232
apim-request-id:
33-
- 36f47b42-b805-4655-9cc9-ed373487b586
33+
- ee67d363-828c-4a5b-92ee-4a943a9aa020
3434
content-type:
3535
- application/json; charset=utf-8
3636
csp-billing-usage:
3737
- CognitiveServices.TextAnalytics.BatchScoring=3
3838
date:
39-
- Wed, 26 Aug 2020 21:20:35 GMT
39+
- Thu, 27 Aug 2020 19:31:50 GMT
4040
strict-transport-security:
4141
- max-age=31536000; includeSubDomains; preload
4242
transfer-encoding:
4343
- chunked
4444
x-content-type-options:
4545
- nosniff
4646
x-envoy-upstream-service-time:
47-
- '83'
47+
- '95'
4848
status:
4949
code: 200
5050
message: OK

sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_analyze_sentiment.test_bad_credentials.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ interactions:
1616
User-Agent:
1717
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
1818
method: POST
19-
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=TextElements_v8
19+
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=UnicodeCodePoint
2020
response:
2121
body:
2222
string: '{"error":{"code":"401","message":"Access denied due to invalid subscription
@@ -26,7 +26,7 @@ interactions:
2626
content-length:
2727
- '224'
2828
date:
29-
- Wed, 26 Aug 2020 21:20:35 GMT
29+
- Thu, 27 Aug 2020 19:31:56 GMT
3030
status:
3131
code: 401
3232
message: PermissionDenied

sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_analyze_sentiment.test_bad_model_version_error.yaml

+4-4
Original file line numberDiff line numberDiff line change
@@ -16,26 +16,26 @@ interactions:
1616
User-Agent:
1717
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
1818
method: POST
19-
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?model-version=bad&showStats=false&stringIndexType=TextElements_v8
19+
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?model-version=bad&showStats=false&stringIndexType=UnicodeCodePoint
2020
response:
2121
body:
2222
string: '{"error":{"code":"InvalidRequest","message":"Invalid Request.","innererror":{"code":"ModelVersionIncorrect","message":"Invalid
2323
model version. Possible values are: latest,2019-10-01,2020-04-01"}}}'
2424
headers:
2525
apim-request-id:
26-
- e98c3279-f8c4-49ce-b25c-f51289330fdd
26+
- 600cfe88-8c7b-4017-a50e-ef0c30a546a4
2727
content-type:
2828
- application/json; charset=utf-8
2929
date:
30-
- Wed, 26 Aug 2020 21:20:35 GMT
30+
- Thu, 27 Aug 2020 19:31:56 GMT
3131
strict-transport-security:
3232
- max-age=31536000; includeSubDomains; preload
3333
transfer-encoding:
3434
- chunked
3535
x-content-type-options:
3636
- nosniff
3737
x-envoy-upstream-service-time:
38-
- '10'
38+
- '4'
3939
status:
4040
code: 400
4141
message: Bad Request

sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_analyze_sentiment.test_batch_size_over_limit.yaml

+4-4
Original file line numberDiff line numberDiff line change
@@ -760,26 +760,26 @@ interactions:
760760
User-Agent:
761761
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
762762
method: POST
763-
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=TextElements_v8
763+
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=UnicodeCodePoint
764764
response:
765765
body:
766766
string: '{"error":{"code":"InvalidRequest","message":"Invalid document in request.","innererror":{"code":"InvalidDocumentBatch","message":"Batch
767767
request contains too many records. Max 10 records are permitted."}}}'
768768
headers:
769769
apim-request-id:
770-
- 5bcf6f2d-8a67-4bf7-a552-67c0c0ce9f9b
770+
- e63eddb4-ac2c-4b1d-bfa8-ff78dc65076f
771771
content-type:
772772
- application/json; charset=utf-8
773773
date:
774-
- Wed, 26 Aug 2020 21:20:36 GMT
774+
- Thu, 27 Aug 2020 19:31:50 GMT
775775
strict-transport-security:
776776
- max-age=31536000; includeSubDomains; preload
777777
transfer-encoding:
778778
- chunked
779779
x-content-type-options:
780780
- nosniff
781781
x-envoy-upstream-service-time:
782-
- '13'
782+
- '12'
783783
status:
784784
code: 400
785785
message: Bad Request

sdk/textanalytics/azure-ai-textanalytics/tests/recordings/test_analyze_sentiment.test_batch_size_over_limit_error.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -725,18 +725,18 @@ interactions:
725725
User-Agent:
726726
- azsdk-python-ai-textanalytics/5.0.1 Python/3.8.5 (macOS-10.13.6-x86_64-i386-64bit)
727727
method: POST
728-
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=TextElements_v8
728+
uri: https://westus2.api.cognitive.microsoft.com/text/analytics/v3.1-preview.1/sentiment?showStats=false&stringIndexType=UnicodeCodePoint
729729
response:
730730
body:
731731
string: '{"error":{"code":"InvalidRequest","message":"Invalid document in request.","innererror":{"code":"InvalidDocumentBatch","message":"Batch
732732
request contains too many records. Max 10 records are permitted."}}}'
733733
headers:
734734
apim-request-id:
735-
- 35aa5189-c6e8-46c5-9339-607d86aef6a1
735+
- 22ce0f08-e152-4611-bf63-9cc9ae125568
736736
content-type:
737737
- application/json; charset=utf-8
738738
date:
739-
- Wed, 26 Aug 2020 21:20:39 GMT
739+
- Thu, 27 Aug 2020 19:31:50 GMT
740740
strict-transport-security:
741741
- max-age=31536000; includeSubDomains; preload
742742
transfer-encoding:

0 commit comments

Comments
 (0)