Make Credentials Required for Content Safety and Protected Materials Evaluators (Azure#37707)

needuv · w-javed · commit 793c3fc693c4 · 2024-10-04T16:52:24.000-07:00
* Make Credentials Required for Content Safety Evaluators

* fix a typo

* lint, fix content safety evaluator

* revert test change

* remove credential from rai_service
diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md
@@ -7,6 +7,7 @@
 ### Breaking Changes
 
 - Removed `numpy` dependency. All NaN values returned by the SDK have been changed to from `numpy.nan` to `math.nan`.
+- `credential` is now required to be passed in for all content safety evaluators and `ProtectedMaterialsEvaluator`. `DefaultAzureCredential` will no longer be chosen if a credential is not passed. 
 
 ### Bugs Fixed
 
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/rai_service.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/rai_service.py
@@ -20,7 +20,6 @@
 from azure.ai.evaluation._http_utils import get_async_http_client
 from azure.ai.evaluation._model_configurations import AzureAIProject
 from azure.core.credentials import TokenCredential
-from azure.identity import DefaultAzureCredential
 
 from .constants import (
     CommonConstants,
@@ -438,10 +437,6 @@ async def evaluate_with_rai_service(
        :return: The parsed annotation result.
        :rtype: List[List[Dict]]
     """
-    # Use DefaultAzureCredential if no credential is provided
-    # This is for the for batch run scenario as the credential cannot be serialized by promoptflow
-    if credential is None or credential == {}:
-        credential = DefaultAzureCredential()
 
     # Get RAI service URL from discovery service and check service availability
     token = await fetch_or_reuse_token(credential)
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py
@@ -5,7 +5,7 @@
 from typing import Dict, Optional
 from typing_extensions import override
 
-from azure.identity import DefaultAzureCredential
+from azure.core.credentials import TokenCredential
 from azure.ai.evaluation._common.constants import EvaluationMetrics
 from azure.ai.evaluation._common.rai_service import evaluate_with_rai_service
 from azure.ai.evaluation._exceptions import EvaluationException
@@ -17,32 +17,28 @@ class RaiServiceEvaluatorBase(EvaluatorBase):
     This includes content safety evaluators, protected material evaluators, and others. These evaluators
     are all assumed to be of the "query and response or conversation" input variety.
 
-    param eval_metric: The evaluation metric to be used for evaluation. This is used by the API call logic
-    to specify which evaluation to perform.
-    type eval_metric: ~azure.ai.evaluation._common.constants.EvaluationMetrics
-    param eval_last_turn: If True, only the last turn of the conversation will be evaluated, and no
+    :param eval_metric: The evaluation metric to be used for evaluation. This is used by the API call logic
+        to specify which evaluation to perform.
+    :type eval_metric: ~azure.ai.evaluation._common.constants.EvaluationMetrics
+    :param eval_last_turn: If True, only the last turn of the conversation will be evaluated, and no
         aggregation will be performed. If False, all turns will be evaluated and the numeric results will be,
         aggregated. Per-turn results are still be available in the output via the "evaluation_per_turn" key
         when this occurs. Default is False, resulting full conversation evaluation and aggregation.
-    type eval_last_turn: bool
+    :type eval_last_turn: bool
     """
 
     @override
     def __init__(
         self,
         eval_metric: EvaluationMetrics,
         azure_ai_project: dict,
-        credential: Optional[dict] = None,
+        credential: TokenCredential,
         eval_last_turn: bool = False,
     ):
         super().__init__(eval_last_turn=eval_last_turn)
         self._eval_metric = eval_metric
         self._azure_ai_project = azure_ai_project
-        if credential is None:
-            # Use DefaultCredential if no credential is provided
-            self._credential = DefaultAzureCredential()
-        else:
-            self._credential = credential
+        self._credential = credential
 
     @override
     def __call__(
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py
@@ -21,13 +21,13 @@ class ContentSafetyEvaluator:
     """
     Initialize a content safety evaluator configured to evaluate content safetry metrics for QA scenario.
 
+    :param credential: The credential for connecting to Azure AI project. Required
+    :type credential: ~azure.core.credentials.TokenCredential
     :param azure_ai_project: The scope of the Azure AI project.
         It contains subscription id, resource group, and project name.
     :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
     :param parallel: If True, use parallel execution for evaluators. Else, use sequential execution.
         Default is True.
-    :param credential: The credential for connecting to Azure AI project.
-    :type credential: ~azure.core.credentials.TokenCredential
     :return: A function that evaluates content-safety metrics for "question-answering" scenario.
     :rtype: Callable
 
@@ -66,13 +66,13 @@ class ContentSafetyEvaluator:
         }
     """
 
-    def __init__(self, azure_ai_project: dict, parallel: bool = True, credential=None):
+    def __init__(self, credential, azure_ai_project: dict, parallel: bool = True):
         self._parallel = parallel
         self._evaluators = [
-            ViolenceEvaluator(azure_ai_project, credential),
-            SexualEvaluator(azure_ai_project, credential),
-            SelfHarmEvaluator(azure_ai_project, credential),
-            HateUnfairnessEvaluator(azure_ai_project, credential),
+            ViolenceEvaluator(credential, azure_ai_project),
+            SexualEvaluator(credential, azure_ai_project),
+            SelfHarmEvaluator(credential, azure_ai_project),
+            HateUnfairnessEvaluator(credential, azure_ai_project),
         ]
 
     def __call__(self, *, query: str, response: str, **kwargs):
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_content_safety_chat.py
@@ -29,6 +29,8 @@ class ContentSafetyChatEvaluator:
     """
     Initialize a content safety chat evaluator configured to evaluate content safetry metrics for chat scenario.
 
+    :param credential: The credential for connecting to Azure AI project. Required
+    :type credential: ~azure.core.credentials.TokenCredential
     :param azure_ai_project: The scope of the Azure AI project.
         It contains subscription id, resource group, and project name.
     :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
@@ -38,8 +40,6 @@ class ContentSafetyChatEvaluator:
     :param parallel: If True, use parallel execution for evaluators. Else, use sequential execution.
         Default is True.
     :type parallel: bool
-    :param credential: The credential for connecting to Azure AI project.
-    :type credential: ~azure.core.credentials.TokenCredential
     :return: A function that evaluates and generates metrics for "chat" scenario.
     :rtype: Callable
 
@@ -88,7 +88,7 @@ class ContentSafetyChatEvaluator:
         }
     """
 
-    def __init__(self, azure_ai_project: dict, eval_last_turn: bool = False, parallel: bool = True, credential=None):
+    def __init__(self, credential, azure_ai_project: dict, eval_last_turn: bool = False, parallel: bool = True):
         self._eval_last_turn = eval_last_turn
         self._parallel = parallel
         self._evaluators = [
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py
@@ -1,7 +1,6 @@
 # ---------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
-from typing import Optional
 from typing_extensions import override
 from azure.ai.evaluation._common.constants import EvaluationMetrics
 from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
@@ -11,11 +10,11 @@ class HateUnfairnessEvaluator(RaiServiceEvaluatorBase):
     """
     Initialize a hate-unfairness evaluator for hate unfairness score.
 
+    :param credential: The credential for connecting to Azure AI project. Required
+    :type credential: ~azure.core.credentials.TokenCredential
     :param azure_ai_project: The scope of the Azure AI project.
         It contains subscription id, resource group, and project name.
     :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
-    :param credential: The credential for connecting to Azure AI project.
-    :type credential: Optional[~azure.core.credentials.TokenCredential]
 
     **Usage**
 
@@ -43,8 +42,8 @@ class HateUnfairnessEvaluator(RaiServiceEvaluatorBase):
     @override
     def __init__(
         self,
+        credential,
         azure_ai_project: dict,
-        credential: Optional[dict] = None,
         eval_last_turn: bool = False,
     ):
         super().__init__(
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py
@@ -1,7 +1,6 @@
 # ---------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
-from typing import Optional
 from typing_extensions import override
 from azure.ai.evaluation._common.constants import EvaluationMetrics
 from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
@@ -11,11 +10,11 @@ class SelfHarmEvaluator(RaiServiceEvaluatorBase):
     """
     Initialize a self harm evaluator for self harm score.
 
+    :param credential: The credential for connecting to Azure AI project. Required
+    :type credential: ~azure.core.credentials.TokenCredential
     :param azure_ai_project: The scope of the Azure AI project.
         It contains subscription id, resource group, and project name.
     :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
-    :param credential: The credential for connecting to Azure AI project.
-    :type credential: Optional[~azure.core.credentials.TokenCredential]
 
     **Usage**
 
@@ -43,8 +42,8 @@ class SelfHarmEvaluator(RaiServiceEvaluatorBase):
     @override
     def __init__(
         self,
+        credential,
         azure_ai_project: dict,
-        credential: Optional[dict] = None,
         eval_last_turn: bool = False,
     ):
         super().__init__(
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py
@@ -1,7 +1,6 @@
 # ---------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
-from typing import Optional
 from typing_extensions import override
 from azure.ai.evaluation._common.constants import EvaluationMetrics
 from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
@@ -11,11 +10,11 @@ class SexualEvaluator(RaiServiceEvaluatorBase):
     """
     Initialize a sexual evaluator for sexual score.
 
+    :param credential: The credential for connecting to Azure AI project. Required
+    :type credential: ~azure.core.credentials.TokenCredential
     :param azure_ai_project: The scope of the Azure AI project.
         It contains subscription id, resource group, and project name.
     :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
-    :param credential: The credential for connecting to Azure AI project.
-    :type credential: Optional[~azure.core.credentials.TokenCredential]
 
     **Usage**
 
@@ -43,8 +42,8 @@ class SexualEvaluator(RaiServiceEvaluatorBase):
     @override
     def __init__(
         self,
+        credential,
         azure_ai_project: dict,
-        credential: Optional[dict] = None,
         eval_last_turn: bool = False,
     ):
         super().__init__(
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_violence.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_violence.py
@@ -1,7 +1,6 @@
 # ---------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
-from typing import Optional
 from typing_extensions import override
 from azure.ai.evaluation._common.constants import EvaluationMetrics
 from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
@@ -11,11 +10,11 @@ class ViolenceEvaluator(RaiServiceEvaluatorBase):
     """
     Initialize a violence evaluator for violence score.
 
+    :param credential: The credential for connecting to Azure AI project. Required
+    :type credential: ~azure.core.credentials.TokenCredential
     :param azure_ai_project: The scope of the Azure AI project.
         It contains subscription id, resource group, and project name.
     :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
-    :param credential: The credential for connecting to Azure AI project.
-    :type credential: Optional[~azure.core.credentials.TokenCredential]
 
     **Usage**
 
@@ -43,8 +42,8 @@ class ViolenceEvaluator(RaiServiceEvaluatorBase):
     @override
     def __init__(
         self,
+        credential,
         azure_ai_project: dict,
-        credential: Optional[dict] = None,
         eval_last_turn: bool = False,
     ):
         super().__init__(
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_eci/_eci.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_eci/_eci.py
@@ -1,7 +1,6 @@
 # ---------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
-from typing import Optional
 from typing_extensions import override
 from azure.ai.evaluation._common.constants import _InternalEvaluationMetrics
 from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
@@ -17,11 +16,11 @@ class ECIEvaluator(RaiServiceEvaluatorBase):
     "AI-generated content may be incorrect. If you are seeking ECI-related information, please go to Bing Search."
     Outputs True or False with AI-generated reasoning.
 
+    :param credential: The credential for connecting to Azure AI project. Required
+    :type credential: ~azure.core.credentials.TokenCredential
     :param azure_ai_project: The scope of the Azure AI project.
         It contains subscription id, resource group, and project name.
     :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
-    :param credential: The credential for connecting to Azure AI project.
-    :type credential: Optional[~azure.core.credentials.TokenCredential]
     :return: Whether or not ECI was found in the response without a disclaimer, with AI-generated reasoning
     :rtype: Dict[str, str]
 
@@ -50,8 +49,8 @@ class ECIEvaluator(RaiServiceEvaluatorBase):
     @override
     def __init__(
         self,
+        credential,
         azure_ai_project: dict,
-        credential: Optional[dict] = None,
         eval_last_turn: bool = False,
     ):
         super().__init__(
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py
@@ -1,7 +1,6 @@
 # ---------------------------------------------------------
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
-from typing import Optional
 from typing_extensions import override
 from azure.ai.evaluation._common.constants import EvaluationMetrics
 from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
@@ -12,11 +11,11 @@ class ProtectedMaterialEvaluator(RaiServiceEvaluatorBase):
     Initialize a protected material evaluator to detect whether protected material
     is present in your AI system's response. Outputs True or False with AI-generated reasoning.
 
+    :param credential: The credential for connecting to Azure AI project. Required
+    :type credential: ~azure.core.credentials.TokenCredential
     :param azure_ai_project: The scope of the Azure AI project.
         It contains subscription id, resource group, and project name.
     :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
-    :param credential: The credential for connecting to Azure AI project.
-    :type credential: Optional[~azure.core.credentials.TokenCredential]
     :return: Whether or not protected material was found in the response, with AI-generated reasoning.
     :rtype: Dict[str, str]
 
@@ -45,8 +44,8 @@ class ProtectedMaterialEvaluator(RaiServiceEvaluatorBase):
     @override
     def __init__(
         self,
+        credential,
         azure_ai_project: dict,
-        credential: Optional[dict] = None,
         eval_last_turn: bool = False,
     ):
         super().__init__(
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_protected_materials/_protected_materials.py
@@ -54,11 +54,11 @@ class ProtectedMaterialsEvaluator:
     Initialize a protected materials evaluator to detect whether protected material
     is present in your AI system's response. Outputs True or False with AI-generated reasoning.
 
+    :param credential: The credential for connecting to Azure AI project. Required
+    :type credential: ~azure.core.credentials.TokenCredential
     :param azure_ai_project: The scope of the Azure AI project.
         It contains subscription id, resource group, and project name.
     :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
-    :param credential: The credential for connecting to Azure AI project.
-    :type credential: ~azure.core.credentials.TokenCredential
     :return: Whether or not protected material was found in the response, with AI-generated reasoning.
     :rtype: Dict[str, str]
 
@@ -84,7 +84,7 @@ class ProtectedMaterialsEvaluator:
         }
     """
 
-    def __init__(self, azure_ai_project: dict, credential=None):
+    def __init__(self, credential, azure_ai_project: dict):
         self._async_evaluator = _AsyncProtectedMaterialsEvaluator(azure_ai_project, credential)
 
     def __call__(self, *, query: str, response: str, **kwargs):
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_xpia/xpia.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_xpia/xpia.py
@@ -3,7 +3,6 @@
 # ---------------------------------------------------------
 import logging
 
-from typing import Optional
 from typing_extensions import override
 from azure.ai.evaluation._common.constants import EvaluationMetrics
 from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase
@@ -17,14 +16,14 @@ class IndirectAttackEvaluator(RaiServiceEvaluatorBase):
 
     Detect whether cross domain injected attacks are present in your AI system's response.
 
+    :param credential: The credential for connecting to Azure AI project. Required
+    :type credential: ~azure.core.credentials.TokenCredential
     :param azure_ai_project: The scope of the Azure AI project. It contains subscription id, resource group, and project
         name.
     :type azure_ai_project: ~azure.ai.evaluation.AzureAIProject
     :param eval_last_turn: Set to True to evaluate only the most recent exchange in the dialogue,
         focusing on the latest user inquiry and the assistant's corresponding response. Defaults to False
     :type eval_last_turn: bool
-    :param credential: The credential for connecting to Azure AI project.
-    :type credential: Optional[~azure.core.credentials.TokenCredential]
     :return: A function that evaluates and generates metrics for XPIA chat scenario. Metrics include the overall
         evaluation label and reason for the Q/A Pair, as well as sub-labels for manipulated content, intrusion, and
         information.
@@ -53,8 +52,8 @@ class IndirectAttackEvaluator(RaiServiceEvaluatorBase):
     @override
     def __init__(
         self,
+        credential,
         azure_ai_project: dict,
-        credential: Optional[dict] = None,
         eval_last_turn: bool = False,
     ):
         super().__init__(
diff --git a/sdk/evaluation/azure-ai-evaluation/setup.py b/sdk/evaluation/azure-ai-evaluation/setup.py
@@ -68,7 +68,8 @@
         "promptflow-devkit>=1.15.0",
         "promptflow-core>=1.15.0",
         "pyjwt>=2.8.0",
-        "azure-identity>=1.12.0",
+        # pickle support for credentials was added to this release
+        "azure-identity>=1.16.0",
         "azure-core>=1.30.2",
         "nltk>=3.9.1",
         "rouge-score>=0.1.2",
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_builtin_evaluators.py b/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_builtin_evaluators.py
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_sim_and_eval.py b/sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_sim_and_eval.py
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate_telemetry.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_evaluate_telemetry.py