[Evaluation] Error improve for service-based evaluator/simulator (#38106)

ninghu · web-flow · commit acc9e33ecad5 · 2024-10-29T11:55:17.000-07:00
* Error improve for service-based evaluator/simulator

* update

* update print summary

* update

* fix failed tests

* fix black

* update changelog

* update

* update version
diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md
@@ -9,6 +9,8 @@
 ### Bugs Fixed
 
 ### Other Changes
+- Refined error messages for serviced-based evaluators and simulators.
+- Introduced environment variable `AI_EVALS_DISABLE_EXPERIMENTAL_WARNING` to disable the warning message for experimental features.
 
 ## 1.0.0b5 (2024-10-28)
 
diff --git a/sdk/evaluation/azure-ai-evaluation/TROUBLESHOOTING.md b/sdk/evaluation/azure-ai-evaluation/TROUBLESHOOTING.md
@@ -6,7 +6,7 @@ This guide walks you through how to investigate failures, common errors in the `
 
 - [Handle Evaluate API Errors](#handle-evaluate-api-errors)
   - [Troubleshoot Remote Tracking Issues](#troubleshoot-remote-tracking-issues)
-  - [Safety Metric Supported Regions](#safety-metric-supported-regions)
+  - [Troubleshoot Safety Evaluator Issues](#troubleshoot-safety-evaluator-issues)
 - [Handle Simulation Errors](#handle-simulation-errors)
   - [Adversarial Simulation Supported Regions](#adversarial-simulation-supported-regions)
 - [Logging](#logging)
@@ -31,9 +31,10 @@ This guide walks you through how to investigate failures, common errors in the `
 
 - Additionally, if you're using a virtual network or private link, and your evaluation run upload fails because of that, check out this [guide](https://docs.microsoft.com/azure/machine-learning/how-to-enable-studio-virtual-network#access-data-using-the-studio).
 
-### Safety Metric Supported Regions
+### Troubleshoot Safety Evaluator Issues
 
-Risk and safety evaluators depend on the Azure AI Studio safety evaluation backend service. For a list of supported regions, please refer to the documentation [here](https://aka.ms/azureaisafetyeval-regionsupport).
+- Risk and safety evaluators depend on the Azure AI Studio safety evaluation backend service. For a list of supported regions, please refer to the documentation [here](https://aka.ms/azureaisafetyeval-regionsupport).
+- If you encounter a 403 Unauthorized error when using safety evaluators, verify that you have the `Contributor` role assigned to your Azure AI project. `Contributor` role is currently required to run safety evaluations.
 
 ## Handle Simulation Errors
 
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/_experimental.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/_experimental.py
@@ -2,6 +2,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
 
+import os
 import functools
 import inspect
 import logging
@@ -149,6 +150,9 @@ def _get_indentation_size(doc_string: str) -> int:
 def _should_skip_warning():
     skip_warning_msg = False
 
+    if os.getenv("AI_EVALS_DISABLE_EXPERIMENTAL_WARNING", "false").lower() == "true":
+        skip_warning_msg = True
+
     # Cases where we want to suppress the warning:
     # 1. When converting from REST object to SDK object
     for frame in inspect.stack():
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/rai_service.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/rai_service.py
@@ -83,27 +83,31 @@ async def ensure_service_availability(rai_svc_url: str, token: str, capability:
     async with get_async_http_client() as client:
         response = await client.get(svc_liveness_url, headers=headers)
 
-    if response.status_code != 200:
-        msg = f"RAI service is not available in this region. Status Code: {response.status_code}"
-        raise EvaluationException(
-            message=msg,
-            internal_message=msg,
-            target=ErrorTarget.UNKNOWN,
-            category=ErrorCategory.SERVICE_UNAVAILABLE,
-            blame=ErrorBlame.USER_ERROR,
-        )
-
-    capabilities = response.json()
+        if response.status_code != 200:
+            msg = (
+                f"RAI service is unavailable in this region, or you lack the necessary permissions "
+                f"to access the AI project. Status Code: {response.status_code}"
+            )
+            raise EvaluationException(
+                message=msg,
+                internal_message=msg,
+                target=ErrorTarget.RAI_CLIENT,
+                category=ErrorCategory.SERVICE_UNAVAILABLE,
+                blame=ErrorBlame.USER_ERROR,
+                tsg_link="https://aka.ms/azsdk/python/evaluation/safetyevaluator/troubleshoot",
+            )
 
-    if capability and capability not in capabilities:
-        msg = f"Capability '{capability}' is not available in this region"
-        raise EvaluationException(
-            message=msg,
-            internal_message=msg,
-            target=ErrorTarget.RAI_CLIENT,
-            category=ErrorCategory.SERVICE_UNAVAILABLE,
-            blame=ErrorBlame.USER_ERROR,
-        )
+        capabilities = response.json()
+        if capability and capability not in capabilities:
+            msg = f"The needed capability '{capability}' is not supported by the RAI service in this region."
+            raise EvaluationException(
+                message=msg,
+                internal_message=msg,
+                target=ErrorTarget.RAI_CLIENT,
+                category=ErrorCategory.SERVICE_UNAVAILABLE,
+                blame=ErrorBlame.USER_ERROR,
+                tsg_link="https://aka.ms/azsdk/python/evaluation/safetyevaluator/troubleshoot",
+            )
 
 
 def generate_payload(normalized_user_text: str, metric: str, annotation_task: str) -> Dict:
@@ -371,13 +375,17 @@ async def _get_service_discovery_url(azure_ai_project: AzureAIProject, token: st
         )
 
     if response.status_code != 200:
-        msg = "Failed to retrieve the discovery service URL."
+        msg = (
+            f"Failed to connect to your Azure AI project. Please check if the project scope is configured correctly, "
+            f"and make sure you have the necessary access permissions. "
+            f"Status code: {response.status_code}."
+        )
         raise EvaluationException(
             message=msg,
-            internal_message=msg,
             target=ErrorTarget.RAI_CLIENT,
-            category=ErrorCategory.SERVICE_UNAVAILABLE,
-            blame=ErrorBlame.UNKNOWN,
+            blame=ErrorBlame.USER_ERROR,
+            category=ErrorCategory.PROJECT_ACCESS_ERROR,
+            tsg_link="https://aka.ms/azsdk/python/evaluation/safetyevaluator/troubleshoot",
         )
 
     base_url = urlparse(response.json()["properties"]["discoveryUrl"])
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_batch_run/proxy_client.py
@@ -68,12 +68,22 @@ def get_run_summary(self, proxy_run: ProxyRun) -> Dict[str, Any]:
         run = proxy_run.run.result()
 
         # pylint: disable=protected-access
+        completed_lines = run._properties.get("system_metrics", {}).get("__pf__.lines.completed", "NA")
+        failed_lines = run._properties.get("system_metrics", {}).get("__pf__.lines.failed", "NA")
+
+        # Update status to "Completed with Errors" if the original status is "Completed" and there are failed lines
+        if run.status == "Completed" and failed_lines != "NA" and int(failed_lines) > 0:
+            status = "Completed with Errors"
+        else:
+            status = run.status
+
+        # Return the ordered dictionary with the updated status
         return OrderedDict(
             [
-                ("status", run.status),
+                ("status", status),
                 ("duration", str(run._end_time - run._created_on)),
-                ("completed_lines", run._properties.get("system_metrics", {}).get("__pf__.lines.completed", "NA")),
-                ("failed_lines", run._properties.get("system_metrics", {}).get("__pf__.lines.failed", "NA")),
+                ("completed_lines", completed_lines),
+                ("failed_lines", failed_lines),
                 ("log_path", str(run._output_path)),
             ]
         )
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_exceptions.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_exceptions.py
@@ -23,6 +23,7 @@ class ErrorCategory(Enum):
     * SERVICE_UNAVAILABLE -> Service is unavailable
     * MISSING_PACKAGE -> Required package is missing
     * FAILED_REMOTE_TRACKING -> Remote tracking failed
+    * PROJECT_ACCESS_ERROR -> Access to project failed
     * UNKNOWN -> Undefined placeholder. Avoid using.
     """
 
@@ -35,6 +36,7 @@ class ErrorCategory(Enum):
     SERVICE_UNAVAILABLE = "SERVICE UNAVAILABLE"
     MISSING_PACKAGE = "MISSING PACKAGE"
     FAILED_REMOTE_TRACKING = "FAILED REMOTE TRACKING"
+    PROJECT_ACCESS_ERROR = "PROJECT ACCESS ERROR"
     UNKNOWN = "UNKNOWN"
 
 
diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_model_tools/_rai_client.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_model_tools/_rai_client.py
@@ -74,14 +74,18 @@ def _get_service_discovery_url(self):
             timeout=5,
         )
         if response.status_code != 200:
-            msg = "Failed to retrieve the discovery service URL."
+            msg = (
+                f"Failed to connect to your Azure AI project. Please check if the project scope is configured "
+                f"correctly, and make sure you have the necessary access permissions. "
+                f"Status code: {response.status_code}."
+            )
             raise EvaluationException(
                 message=msg,
-                internal_message=msg,
                 target=ErrorTarget.RAI_CLIENT,
-                category=ErrorCategory.SERVICE_UNAVAILABLE,
-                blame=ErrorBlame.UNKNOWN,
+                category=ErrorCategory.PROJECT_ACCESS_ERROR,
+                blame=ErrorBlame.USER_ERROR,
             )
+
         base_url = urlparse(response.json()["properties"]["discoveryUrl"])
         return f"{base_url.scheme}://{base_url.netloc}"
 
diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_content_safety_rai_script.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_content_safety_rai_script.py
@@ -146,15 +146,18 @@ async def test_ensure_service_availability(self, client_mock):
     async def test_ensure_service_availability_service_unavailable(self, client_mock):
         with pytest.raises(Exception) as exc_info:
             _ = await ensure_service_availability("dummy_url", "dummy_token")
-        assert "RAI service is not available in this region. Status Code: 9001" in str(exc_info._excinfo[1])
+        assert "RAI service is unavailable in this region" in str(exc_info._excinfo[1])
+        assert "Status Code: 9001" in str(exc_info._excinfo[1])
         assert client_mock._mock_await_count == 1
 
     @pytest.mark.asyncio
     @patch("azure.ai.evaluation._http_utils.AsyncHttpPipeline.get", return_value=MockAsyncHttpResponse(200, json={}))
     async def test_ensure_service_availability_exception_capability_unavailable(self, client_mock):
         with pytest.raises(Exception) as exc_info:
             _ = await ensure_service_availability("dummy_url", "dummy_token", capability="does not exist")
-        assert "Capability 'does not exist' is not available in this region" in str(exc_info._excinfo[1])
+        assert "The needed capability 'does not exist' is not supported by the RAI service in this region" in str(
+            exc_info._excinfo[1]
+        )
         assert client_mock._mock_await_count == 1
 
     @pytest.mark.asyncio
@@ -359,7 +362,7 @@ async def test_get_service_discovery_url_exception(self, client_mock):
 
         with pytest.raises(Exception) as exc_info:
             _ = await _get_service_discovery_url(azure_ai_project=azure_ai_project, token=token)
-        assert "Failed to retrieve the discovery service URL" in str(exc_info._excinfo[1])
+        assert "Failed to connect to your Azure AI project." in str(exc_info._excinfo[1])
 
     @pytest.mark.asyncio
     @patch(