From 7de6367e5f504e3dfa47695d3a7e45a42485fb55 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Tue, 1 Oct 2024 14:51:41 -0700 Subject: [PATCH 01/14] Update task_query_response.prompty remove required keys --- .../simulator/_prompty/task_query_response.prompty | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty index 881d00493ff8..42a5d3fe4e37 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty @@ -3,11 +3,6 @@ name: TaskSimulatorQueryResponse description: Gets queries and responses from a blob of text model: api: chat - configuration: - type: azure_openai - azure_deployment: ${env:AZURE_DEPLOYMENT} - api_key: ${env:AZURE_OPENAI_API_KEY} - azure_endpoint: ${env:AZURE_OPENAI_ENDPOINT} parameters: temperature: 0.0 top_p: 1.0 From f288b341820d9f54f7830dae8f841035b4f30df6 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Tue, 1 Oct 2024 14:51:54 -0700 Subject: [PATCH 02/14] Update task_simulate.prompty --- .../ai/evaluation/simulator/_prompty/task_simulate.prompty | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty index 7dce5e28a6d1..1d8e360b56b9 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty @@ -3,10 +3,6 @@ name: TaskSimulatorWithPersona description: Simulates a user to complete a conversation model: api: chat - configuration: - type: azure_openai - azure_deployment: ${env:AZURE_DEPLOYMENT} - azure_endpoint: ${env:AZURE_OPENAI_ENDPOINT} parameters: temperature: 0.0 top_p: 1.0 From 2a4b6f744a9a6c8faee8c742f0ad55d5cf82b922 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 2 Oct 2024 07:21:58 -0700 Subject: [PATCH 03/14] Update task_query_response.prompty --- .../evaluation/simulator/_prompty/task_query_response.prompty | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty index 42a5d3fe4e37..b8c04fb19ef1 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty @@ -3,6 +3,10 @@ name: TaskSimulatorQueryResponse description: Gets queries and responses from a blob of text model: api: chat + configuration: + type: azure_openai + azure_deployment: ${env:AZURE_DEPLOYMENT} + azure_endpoint: ${env:AZURE_OPENAI_ENDPOINT} parameters: temperature: 0.0 top_p: 1.0 From c8ce251bc34b2c3913f1d7e793ed65292e6a2e24 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Wed, 2 Oct 2024 07:22:17 -0700 Subject: [PATCH 04/14] Update task_simulate.prompty --- .../ai/evaluation/simulator/_prompty/task_simulate.prompty | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty index 1d8e360b56b9..7dce5e28a6d1 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty @@ -3,6 +3,10 @@ name: TaskSimulatorWithPersona description: Simulates a user to complete a conversation model: api: chat + configuration: + type: azure_openai + azure_deployment: ${env:AZURE_DEPLOYMENT} + azure_endpoint: ${env:AZURE_OPENAI_ENDPOINT} parameters: temperature: 0.0 top_p: 1.0 From fab4f9a177884f91b1ebdc079221fbe39d9ba3d5 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Thu, 3 Oct 2024 08:09:58 -0700 Subject: [PATCH 05/14] Keys are not needed for the simulator --- .../azure-ai-evaluation/CHANGELOG.md | 20 ++++ sdk/evaluation/azure-ai-evaluation/README.md | 10 +- .../_prompty/task_query_response.prompty | 4 - .../simulator/_prompty/task_simulate.prompty | 4 - .../ai/evaluation/simulator/_simulator.py | 109 +++++++----------- 5 files changed, 63 insertions(+), 84 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index 201f0002e179..1353ec8ce3ce 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -55,6 +55,26 @@ evaluate( ) ``` +- Simulator now requires configuration to call the prompty within the `azure_ai_project`. This enables key-less usage of the simulator. +Before: +```python +azure_ai_project = { + "subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"), + "resource_group_name": os.environ.get("RESOURCE_GROUP"), + "project_name": os.environ.get("PROJECT_NAME"), +} +``` +After: +```python +azure_ai_project = { + "subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"), + "resource_group_name": os.environ.get("RESOURCE_GROUP"), + "project_name": os.environ.get("PROJECT_NAME"), + "azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"), + "azure_deployment": os.environ.get("AZURE_DEPLOYMENT"), +} +``` + ### Bugs Fixed - Fixed issue where Entra ID authentication was not working with `AzureOpenAIModelConfiguration` diff --git a/sdk/evaluation/azure-ai-evaluation/README.md b/sdk/evaluation/azure-ai-evaluation/README.md index 3bce7ec0a4a0..00a165f46bb2 100644 --- a/sdk/evaluation/azure-ai-evaluation/README.md +++ b/sdk/evaluation/azure-ai-evaluation/README.md @@ -119,11 +119,6 @@ name: ApplicationPrompty description: Simulates an application model: api: chat - configuration: - type: azure_openai - azure_deployment: ${env:AZURE_DEPLOYMENT} - api_key: ${env:AZURE_OPENAI_API_KEY} - azure_endpoint: ${env:AZURE_OPENAI_ENDPOINT} parameters: temperature: 0.0 top_p: 1.0 @@ -158,7 +153,9 @@ import os azure_ai_project = { "subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"), "resource_group_name": os.environ.get("RESOURCE_GROUP"), - "project_name": os.environ.get("PROJECT_NAME") + "project_name": os.environ.get("PROJECT_NAME"), + "azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"), + "azure_deployment": os.environ.get("AZURE_DEPLOYMENT"), } import wikipedia @@ -228,7 +225,6 @@ if __name__ == "__main__": os.environ["AZURE_SUBSCRIPTION_ID"] = "" os.environ["RESOURCE_GROUP"] = "" os.environ["PROJECT_NAME"] = "" - os.environ["AZURE_OPENAI_API_KEY"] = "" os.environ["AZURE_OPENAI_ENDPOINT"] = "" os.environ["AZURE_DEPLOYMENT"] = "" asyncio.run(main()) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty index b8c04fb19ef1..42a5d3fe4e37 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_query_response.prompty @@ -3,10 +3,6 @@ name: TaskSimulatorQueryResponse description: Gets queries and responses from a blob of text model: api: chat - configuration: - type: azure_openai - azure_deployment: ${env:AZURE_DEPLOYMENT} - azure_endpoint: ${env:AZURE_OPENAI_ENDPOINT} parameters: temperature: 0.0 top_p: 1.0 diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty index 7dce5e28a6d1..1d8e360b56b9 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_prompty/task_simulate.prompty @@ -3,10 +3,6 @@ name: TaskSimulatorWithPersona description: Simulates a user to complete a conversation model: api: chat - configuration: - type: azure_openai - azure_deployment: ${env:AZURE_DEPLOYMENT} - azure_endpoint: ${env:AZURE_OPENAI_ENDPOINT} parameters: temperature: 0.0 top_p: 1.0 diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py index bff65d987f7e..4f7ee2711049 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py @@ -10,9 +10,8 @@ import re import warnings from typing import Any, Callable, Dict, List, Optional, Union - -from promptflow.client import load_flow -from promptflow.core import AzureOpenAIModelConfiguration, Flow +from promptflow.core import AsyncPrompty +from azure.ai.evaluation._common.utils import construct_prompty_model_config from tqdm import tqdm from .._user_agent import USER_AGENT @@ -128,7 +127,7 @@ async def __call__( num_queries = min(num_queries, len(tasks)) max_conversation_turns *= 2 # account for both user and assistant turns - prompty_model_config = self._build_prompty_model_config() + prompty_model_config = self.azure_ai_project if conversation_turns: return await self._simulate_with_predefined_turns( target=target, @@ -148,7 +147,6 @@ async def __call__( prompty_model_config=prompty_model_config, **kwargs, ) - return await self._create_conversations_from_query_responses( query_responses=query_responses, max_conversation_turns=max_conversation_turns, @@ -159,18 +157,6 @@ async def __call__( api_call_delay_sec=api_call_delay_sec, ) - def _build_prompty_model_config(self) -> Dict[str, Any]: - """ - Constructs the configuration for the prompty model. - - :return: A dictionary containing the prompty model configuration, including API version and user agent headers if applicable. - :rtype: Dict[str, Any] - """ - config = {"configuration": self.azure_ai_project} - if USER_AGENT and isinstance(self.azure_ai_project, AzureOpenAIModelConfiguration): - config.update({"parameters": {"extra_headers": {"x-ms-useragent": USER_AGENT}}}) - return config - async def _simulate_with_predefined_turns( self, *, @@ -280,13 +266,13 @@ async def _extend_conversation_with_simulator( :paramtype progress_bar: tqdm, """ user_flow = self._load_user_simulation_flow( - user_simulator_prompty=user_simulator_prompty, + user_simulator_prompty=user_simulator_prompty, # type: ignore prompty_model_config=prompty_model_config, user_simulator_prompty_kwargs=user_simulator_prompty_kwargs, ) while len(current_simulation) < max_conversation_turns: - user_response_content = user_flow( + user_response_content = await user_flow( task="Continue the conversation", conversation_history=current_simulation.to_list(), **user_simulator_prompty_kwargs, @@ -308,7 +294,7 @@ def _load_user_simulation_flow( user_simulator_prompty: Union[str, os.PathLike], prompty_model_config: Dict[str, Any], user_simulator_prompty_kwargs: Dict[str, Any], - ) -> Flow: + ) -> "Prompty": # type: ignore """ Loads the flow for simulating user interactions. @@ -328,11 +314,21 @@ def _load_user_simulation_flow( # Access the resource as a file path # pylint: disable=deprecated-method with pkg_resources.path(package, resource_name) as prompty_path: - return load_flow(source=str(prompty_path), model=prompty_model_config) + prompty_model_config = construct_prompty_model_config( + model_config=prompty_model_config, # type: ignore + default_api_version="2024-06-01", + user_agent=USER_AGENT, + ) + return AsyncPrompty.load(source=prompty_path, model=prompty_model_config) except FileNotFoundError as e: raise f"Flow path for {resource_name} does not exist in package {package}." from e - return load_flow( - source=user_simulator_prompty, + prompty_model_config = construct_prompty_model_config( + model_config=prompty_model_config, # type: ignore + default_api_version="2024-06-01", + user_agent=USER_AGENT, + ) + return AsyncPrompty.load( + source=user_simulator_prompty, model=prompty_model_config, **user_simulator_prompty_kwargs, ) @@ -404,12 +400,12 @@ async def _generate_query_responses( :raises RuntimeError: If an error occurs during query generation. """ query_flow = self._load_query_generation_flow( - query_response_generating_prompty=query_response_generating_prompty, + query_response_generating_prompty=query_response_generating_prompty, # type: ignore prompty_model_config=prompty_model_config, query_response_generating_prompty_kwargs=query_response_generating_prompty_kwargs, ) try: - query_responses = query_flow(text=text, num_queries=num_queries) + query_responses = await query_flow(text=text, num_queries=num_queries) if isinstance(query_responses, dict): keys = list(query_responses.keys()) return query_responses[keys[0]] @@ -423,7 +419,7 @@ def _load_query_generation_flow( query_response_generating_prompty: Union[str, os.PathLike], prompty_model_config: Dict[str, Any], query_response_generating_prompty_kwargs: Dict[str, Any], - ) -> Flow: + ) -> "Prompty": # type: ignore """ Loads the flow for generating query responses. @@ -443,13 +439,23 @@ def _load_query_generation_flow( # Access the resource as a file path # pylint: disable=deprecated-method with pkg_resources.path(package, resource_name) as prompty_path: - return load_flow(source=str(prompty_path), model=prompty_model_config) + prompty_model_config = construct_prompty_model_config( + model_config=prompty_model_config, # type: ignore + default_api_version="2024-06-01", + user_agent=USER_AGENT, + ) + return AsyncPrompty.load(source=prompty_path, model=prompty_model_config) except FileNotFoundError as e: raise f"Flow path for {resource_name} does not exist in package {package}." from e - return load_flow( - source=query_response_generating_prompty, + prompty_model_config = construct_prompty_model_config( + model_config=prompty_model_config, # type: ignore + default_api_version="2024-06-01", + user_agent=USER_AGENT, + ) + return AsyncPrompty.load( + source=query_response_generating_prompty, model=prompty_model_config, - **query_response_generating_prompty_kwargs, + **query_response_generating_prompty_kwargs ) async def _create_conversations_from_query_responses( @@ -501,7 +507,7 @@ async def _create_conversations_from_query_responses( conversation = await self._complete_conversation( conversation_starter=query, max_conversation_turns=max_conversation_turns, - task=task, + task=task, # type: ignore user_simulator_prompty=user_simulator_prompty, user_simulator_prompty_kwargs=user_simulator_prompty_kwargs, target=target, @@ -565,11 +571,11 @@ async def _complete_conversation( while len(conversation_history) < max_conversation_turns: user_flow = self._load_user_simulation_flow( - user_simulator_prompty=user_simulator_prompty, - prompty_model_config=self._build_prompty_model_config(), + user_simulator_prompty=user_simulator_prompty, # type: ignore + prompty_model_config=self.azure_ai_project, user_simulator_prompty_kwargs=user_simulator_prompty_kwargs, ) - conversation_starter_from_simulated_user = user_flow( + conversation_starter_from_simulated_user = await user_flow( task=task, conversation_history=[ { @@ -595,41 +601,6 @@ async def _complete_conversation( return conversation_history.to_list() - async def _build_user_simulation_response( - self, - task: str, - conversation_history: List[Dict[str, Any]], - user_simulator_prompty: Optional[str], - user_simulator_prompty_kwargs: Dict[str, Any], - ) -> str: - """ - Builds a response from the user simulator based on the current conversation history. - - :param task: A string representing the task details. - :type task: str - :param conversation_history: The current conversation history as a list of dictionaries. - :type conversation_history: List[Dict[str, Any]] - :param user_simulator_prompty: Path to the user simulator prompty file. - :type user_simulator_prompty: Optional[str] - :param user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty. - :type user_simulator_prompty_kwargs: Dict[str, Any] - :return: The generated response content from the user simulator. - :rtype: str - :raises RuntimeError: If an error occurs during response generation. - """ - user_flow = self._load_user_simulation_flow( - user_simulator_prompty=user_simulator_prompty, - prompty_model_config=self._build_prompty_model_config(), - user_simulator_prompty_kwargs=user_simulator_prompty_kwargs, - ) - try: - response_content = user_flow( - task=task, conversation_history=conversation_history, **user_simulator_prompty_kwargs - ) - user_response = self._parse_prompty_response(response=response_content) - return user_response["content"] - except Exception as e: - raise RuntimeError("Error building user simulation response") from e async def _get_target_response( self, *, target: Callable, api_call_delay_sec: float, conversation_history: ConversationHistory From 6fc39f0330b48c7c90d0213ccadda4a8441b153d Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Thu, 3 Oct 2024 11:51:59 -0700 Subject: [PATCH 06/14] Use model_config for the simulator instead of azure_ai_project --- sdk/evaluation/azure-ai-evaluation/README.md | 53 ++- .../ai/evaluation/simulator/_simulator.py | 116 +++--- .../tests/unittests/test_non_adv_simulator.py | 358 ++++++++++++++---- 3 files changed, 390 insertions(+), 137 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/README.md b/sdk/evaluation/azure-ai-evaluation/README.md index 00a165f46bb2..d1f66cc06420 100644 --- a/sdk/evaluation/azure-ai-evaluation/README.md +++ b/sdk/evaluation/azure-ai-evaluation/README.md @@ -149,52 +149,52 @@ from azure.ai.evaluation.simulator import Simulator from promptflow.client import load_flow from azure.identity import DefaultAzureCredential import os +import wikipedia -azure_ai_project = { - "subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"), - "resource_group_name": os.environ.get("RESOURCE_GROUP"), - "project_name": os.environ.get("PROJECT_NAME"), +# Set up the model configuration without api_key, using DefaultAzureCredential +model_config = { "azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"), "azure_deployment": os.environ.get("AZURE_DEPLOYMENT"), } -import wikipedia -wiki_search_term = "Leonardo da vinci" +# Use Wikipedia to get some text for the simulation +wiki_search_term = "Leonardo da Vinci" wiki_title = wikipedia.search(wiki_search_term)[0] wiki_page = wikipedia.page(wiki_title) text = wiki_page.summary[:1000] -def method_to_invoke_application_prompty(query: str): +def method_to_invoke_application_prompty(query: str, messages_list: List[Dict], context: Optional[Dict]): try: current_dir = os.path.dirname(__file__) prompty_path = os.path.join(current_dir, "application.prompty") - _flow = load_flow(source=prompty_path, model={ - "configuration": azure_ai_project - }) + _flow = load_flow( + source=prompty_path, + model=model_config, + credential=DefaultAzureCredential() + ) response = _flow( query=query, context=context, conversation_history=messages_list ) return response - except: - print("Something went wrong invoking the prompty") + except Exception as e: + print(f"Something went wrong invoking the prompty: {e}") return "something went wrong" async def callback( - messages: List[Dict], + messages: Dict[str, List[Dict]], stream: bool = False, session_state: Any = None, # noqa: ANN401 context: Optional[Dict[str, Any]] = None, ) -> dict: messages_list = messages["messages"] - # get last message + # Get the last message from the user latest_message = messages_list[-1] query = latest_message["content"] - context = None - # call your endpoint or ai application here - response = method_to_invoke_application_prompty(query) - # we are formatting the response to follow the openAI chat protocol format + # Call your endpoint or AI application here + response = method_to_invoke_application_prompty(query, messages_list, context) + # Format the response to follow the OpenAI chat protocol format formatted_response = { "content": response, "role": "assistant", @@ -205,10 +205,8 @@ async def callback( messages["messages"].append(formatted_response) return {"messages": messages["messages"], "stream": stream, "session_state": session_state, "context": context} - - async def main(): - simulator = Simulator(azure_ai_project=azure_ai_project, credential=DefaultAzureCredential()) + simulator = Simulator(model_config=model_config, credential=DefaultAzureCredential()) outputs = await simulator( target=callback, text=text, @@ -219,16 +217,17 @@ async def main(): f"I am a teacher and I want to teach my students about {wiki_search_term}" ], ) - print(json.dumps(outputs)) + print(json.dumps(outputs, indent=2)) if __name__ == "__main__": - os.environ["AZURE_SUBSCRIPTION_ID"] = "" - os.environ["RESOURCE_GROUP"] = "" - os.environ["PROJECT_NAME"] = "" - os.environ["AZURE_OPENAI_ENDPOINT"] = "" - os.environ["AZURE_DEPLOYMENT"] = "" + # Ensure that the following environment variables are set in your environment: + # AZURE_OPENAI_ENDPOINT and AZURE_DEPLOYMENT + # Example: + # os.environ["AZURE_OPENAI_ENDPOINT"] = "https://your-endpoint.openai.azure.com/" + # os.environ["AZURE_DEPLOYMENT"] = "your-deployment-name" asyncio.run(main()) print("done!") + ``` #### Adversarial Simulator diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py index 4f7ee2711049..c46e6ceff73c 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py @@ -11,6 +11,7 @@ import warnings from typing import Any, Callable, Dict, List, Optional, Union from promptflow.core import AsyncPrompty +from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration from azure.ai.evaluation._common.utils import construct_prompty_model_config from tqdm import tqdm @@ -28,37 +29,61 @@ class Simulator: Simulator for generating synthetic conversations. """ - def __init__(self, azure_ai_project: Dict[str, Any], credential: Optional[Any] = None): + def __init__( + self, + model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration], + credential: Optional[Any] = None, + ): """ - Initializes the task simulator with a project scope. + Initializes the task simulator with the model configuration. - :param azure_ai_project: A dictionary defining the scope of the project, including keys such as - "subscription_id", "resource_group_name", and "project_name". + :param model_config: A dictionary defining the configuration for the model. Acceptable types are AzureOpenAIModelConfiguration and OpenAIModelConfiguration. + :type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration, ~azure.ai.evaluation.OpenAIModelConfiguration] :param credential: Azure credentials to authenticate the user. If None, the default credentials are used. :paramtype credential: Optional[Any] - :raises ValueError: If the azure_ai_project does not contain the required keys or any value is None. + :raises ValueError: If the model_config does not contain the required keys or any value is None. """ - self._validate_project_config(azure_ai_project) - self.azure_ai_project = azure_ai_project - self.azure_ai_project["api_version"] = "2024-06-01" + self._validate_model_config(model_config) + self.model_config = model_config + if "api_version" not in self.model_config: + self.model_config["api_version"] = "2024-06-01" self.credential = credential @staticmethod - def _validate_project_config(azure_ai_project: Dict[str, Any]): + def _validate_model_config(model_config: Dict[str, Any]): """ - Validates the azure_ai_project configuration to ensure all required keys are present and have non-None values. + Validates the model_config to ensure all required keys are present and have non-None values. + If 'type' is not specified, it will attempt to infer the type based on the keys present. - :param azure_ai_project: The Azure AI project configuration dictionary. - :type azure_ai_project: Dict[str, Any] + :param model_config: The model configuration dictionary. + :type model_config: Dict[str, Any] :raises ValueError: If required keys are missing or any of the values are None. """ - required_keys = ["subscription_id", "resource_group_name", "project_name"] - if not all(key in azure_ai_project for key in required_keys): - raise ValueError(f"azure_ai_project must contain keys: {', '.join(required_keys)}") - if not all(azure_ai_project[key] for key in required_keys): - raise ValueError("subscription_id, resource_group_name, and project_name must not be None") + # Attempt to infer 'type' if not provided + if "type" not in model_config: + if "azure_deployment" in model_config and "azure_endpoint" in model_config: + model_config["type"] = "azure_openai" + elif "model" in model_config: + model_config["type"] = "openai" + else: + raise ValueError( + "Unable to infer 'type' from model_config. Please specify 'type' as 'azure_openai' or 'openai'." + ) + + if model_config["type"] == "azure_openai": + required_keys = ["azure_deployment", "azure_endpoint"] + elif model_config["type"] == "openai": + required_keys = ["api_key", "model"] + else: + raise ValueError("model_config 'type' must be 'azure_openai' or 'openai'.") + + missing_keys = [key for key in required_keys if key not in model_config] + if missing_keys: + raise ValueError(f"model_config is missing required keys: {', '.join(missing_keys)}") + none_keys = [key for key in required_keys if model_config.get(key) is None] + if none_keys: + raise ValueError(f"The following keys in model_config must not be None: {', '.join(none_keys)}") - # @monitor_task_simulator async def __call__( self, *, @@ -108,7 +133,7 @@ async def __call__( Modes: - Task-Free Mode: When only num_queries is specified and tasks is not, the method generates num_queries x max_conversation_turns lines of simulated data grounded in the context of the text. - - Task-Specific Mode: When both num_queries and tasks are specified, the method generates lines of simulated data based on the tasks. If num_queries > len(tasks), the remaining lines are simulated in task-free mode. If num_queries < len(tasks), only the first num_queries tasks are used. + - Task-Specific Mode: When both num_queries and tasks are specified, the method generates lines of simulated data based on the tasks. If num_queries > len(tasks), the remaining lines will be simulated in task-free mode. If num_queries < len(tasks), only the first num_queries tasks are used. - Conversation Starter Mode: When conversation_turns are specified, the method starts each conversation with the user-specified queries and then follows the conversation history for the remaining turns. """ if conversation_turns and (text or tasks): @@ -127,7 +152,7 @@ async def __call__( num_queries = min(num_queries, len(tasks)) max_conversation_turns *= 2 # account for both user and assistant turns - prompty_model_config = self.azure_ai_project + prompty_model_config = self.model_config if conversation_turns: return await self._simulate_with_predefined_turns( target=target, @@ -266,7 +291,7 @@ async def _extend_conversation_with_simulator( :paramtype progress_bar: tqdm, """ user_flow = self._load_user_simulation_flow( - user_simulator_prompty=user_simulator_prompty, # type: ignore + user_simulator_prompty=user_simulator_prompty, # type: ignore prompty_model_config=prompty_model_config, user_simulator_prompty_kwargs=user_simulator_prompty_kwargs, ) @@ -294,7 +319,7 @@ def _load_user_simulation_flow( user_simulator_prompty: Union[str, os.PathLike], prompty_model_config: Dict[str, Any], user_simulator_prompty_kwargs: Dict[str, Any], - ) -> "Prompty": # type: ignore + ) -> "AsyncPrompty": # type: ignore """ Loads the flow for simulating user interactions. @@ -305,7 +330,7 @@ def _load_user_simulation_flow( :keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty. :paramtype user_simulator_prompty_kwargs: Dict[str, Any] :return: The loaded flow for simulating user interactions. - :rtype: Flow + :rtype: AsyncPrompty """ if not user_simulator_prompty: package = "azure.ai.evaluation.simulator._prompty" @@ -315,30 +340,30 @@ def _load_user_simulation_flow( # pylint: disable=deprecated-method with pkg_resources.path(package, resource_name) as prompty_path: prompty_model_config = construct_prompty_model_config( - model_config=prompty_model_config, # type: ignore + model_config=prompty_model_config, # type: ignore default_api_version="2024-06-01", user_agent=USER_AGENT, ) - return AsyncPrompty.load(source=prompty_path, model=prompty_model_config) + return AsyncPrompty.load(source=prompty_path, model=prompty_model_config) # type: ignore except FileNotFoundError as e: - raise f"Flow path for {resource_name} does not exist in package {package}." from e + raise FileNotFoundError(f"Flow path for {resource_name} does not exist in package {package}.") from e prompty_model_config = construct_prompty_model_config( - model_config=prompty_model_config, # type: ignore + model_config=prompty_model_config, # type: ignore default_api_version="2024-06-01", user_agent=USER_AGENT, ) return AsyncPrompty.load( - source=user_simulator_prompty, + source=user_simulator_prompty, model=prompty_model_config, **user_simulator_prompty_kwargs, - ) + ) # type: ignore def _parse_prompty_response(self, *, response: str) -> Dict[str, Any]: """ Parses the response from the prompty execution. :keyword response: The raw response from the prompty. - :paramtype str: str + :paramtype response: str :return: A dictionary representing the parsed response content. :rtype: Dict[str, Any] :raises ValueError: If the response cannot be parsed. @@ -400,7 +425,7 @@ async def _generate_query_responses( :raises RuntimeError: If an error occurs during query generation. """ query_flow = self._load_query_generation_flow( - query_response_generating_prompty=query_response_generating_prompty, # type: ignore + query_response_generating_prompty=query_response_generating_prompty, # type: ignore prompty_model_config=prompty_model_config, query_response_generating_prompty_kwargs=query_response_generating_prompty_kwargs, ) @@ -419,7 +444,7 @@ def _load_query_generation_flow( query_response_generating_prompty: Union[str, os.PathLike], prompty_model_config: Dict[str, Any], query_response_generating_prompty_kwargs: Dict[str, Any], - ) -> "Prompty": # type: ignore + ) -> "AsyncPrompty": """ Loads the flow for generating query responses. @@ -430,7 +455,7 @@ def _load_query_generation_flow( :keyword query_response_generating_prompty_kwargs: Additional keyword arguments for the flow. :paramtype query_response_generating_prompty_kwargs: Dict[str, Any] :return: The loaded flow for generating query responses. - :rtype: Flow + :rtype: AsyncPrompty """ if not query_response_generating_prompty: package = "azure.ai.evaluation.simulator._prompty" @@ -440,30 +465,30 @@ def _load_query_generation_flow( # pylint: disable=deprecated-method with pkg_resources.path(package, resource_name) as prompty_path: prompty_model_config = construct_prompty_model_config( - model_config=prompty_model_config, # type: ignore + model_config=prompty_model_config, # type: ignore default_api_version="2024-06-01", user_agent=USER_AGENT, ) - return AsyncPrompty.load(source=prompty_path, model=prompty_model_config) + return AsyncPrompty.load(source=prompty_path, model=prompty_model_config) # type: ignore except FileNotFoundError as e: - raise f"Flow path for {resource_name} does not exist in package {package}." from e + raise FileNotFoundError(f"Flow path for {resource_name} does not exist in package {package}.") from e prompty_model_config = construct_prompty_model_config( - model_config=prompty_model_config, # type: ignore + model_config=prompty_model_config, # type: ignore default_api_version="2024-06-01", user_agent=USER_AGENT, ) return AsyncPrompty.load( - source=query_response_generating_prompty, + source=query_response_generating_prompty, model=prompty_model_config, - **query_response_generating_prompty_kwargs - ) + **query_response_generating_prompty_kwargs, + ) # type: ignore async def _create_conversations_from_query_responses( self, *, query_responses: List[Dict[str, str]], max_conversation_turns: int, - tasks: List[Dict], + tasks: List[str], user_simulator_prompty: Optional[str], user_simulator_prompty_kwargs: Dict[str, Any], target: Callable, @@ -477,7 +502,7 @@ async def _create_conversations_from_query_responses( :keyword max_conversation_turns: The maximum number of conversation turns. :paramtype max_conversation_turns: int :keyword tasks: A list of tasks for the simulation. - :paramtype tasks: List[Dict] + :paramtype tasks: List[str] :keyword user_simulator_prompty: Path to the user simulator prompty file. :paramtype user_simulator_prompty: Optional[str] :keyword user_simulator_prompty_kwargs: Additional keyword arguments for the user simulator prompty. @@ -507,7 +532,7 @@ async def _create_conversations_from_query_responses( conversation = await self._complete_conversation( conversation_starter=query, max_conversation_turns=max_conversation_turns, - task=task, # type: ignore + task=task, # type: ignore user_simulator_prompty=user_simulator_prompty, user_simulator_prompty_kwargs=user_simulator_prompty_kwargs, target=target, @@ -571,8 +596,8 @@ async def _complete_conversation( while len(conversation_history) < max_conversation_turns: user_flow = self._load_user_simulation_flow( - user_simulator_prompty=user_simulator_prompty, # type: ignore - prompty_model_config=self.azure_ai_project, + user_simulator_prompty=user_simulator_prompty, # type: ignore + prompty_model_config=self.model_config, # type: ignore user_simulator_prompty_kwargs=user_simulator_prompty_kwargs, ) conversation_starter_from_simulated_user = await user_flow( @@ -601,7 +626,6 @@ async def _complete_conversation( return conversation_history.to_list() - async def _get_target_response( self, *, target: Callable, api_call_delay_sec: float, conversation_history: ConversationHistory ) -> str: diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py index 14bf9c1fdcba..b98d5940bba6 100644 --- a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_non_adv_simulator.py @@ -4,78 +4,319 @@ # flake8: noqa # type: ignore import asyncio -from unittest.mock import AsyncMock, MagicMock, Mock, patch +from unittest.mock import AsyncMock, patch -from azure.ai.evaluation.simulator._utils import JsonLineChatProtocol import pytest - from azure.ai.evaluation.simulator import Simulator -from promptflow.core import AzureOpenAIModelConfiguration +from azure.ai.evaluation.simulator._utils import JsonLineChatProtocol @pytest.fixture() -def async_callback(): - async def callback(x): - return x +def valid_azure_model_config(): + return { + "azure_deployment": "test_deployment", + "azure_endpoint": "https://test-endpoint.openai.azure.com/", + } + - yield callback +@pytest.fixture() +def invalid_azure_model_config(): + # Missing 'azure_endpoint' + return { + "azure_deployment": "test_deployment", + } @pytest.fixture() -def valid_project(): +def valid_openai_model_config(): return { - "subscription_id": "test_subscription", - "resource_group_name": "test_resource_group", - "project_name": "test_project", + "api_key": "test_api_key", + "model": "gpt-3.5-turbo", } @pytest.fixture() -def invalid_project(): - return {"subscription_id": None, "resource_group_name": "test_resource_group", "project_name": "test_project"} +def invalid_openai_model_config(): + # Missing 'model' + return { + "api_key": "test_api_key", + } @pytest.mark.unittest -class TestNonAdvSimulator: - def test_init_valid_project(self, valid_project): - simulator = Simulator(azure_ai_project=valid_project) - assert simulator.azure_ai_project["subscription_id"] == "test_subscription" - assert simulator.azure_ai_project["api_version"] == "2024-06-01" - - def test_init_invalid_project(self, invalid_project): - with pytest.raises(ValueError): - Simulator(azure_ai_project=invalid_project) - - def test_validate_project_config_valid(self, valid_project): - Simulator._validate_project_config(valid_project) # Should not raise - - def test_validate_project_config_invalid(self, invalid_project): - with pytest.raises(ValueError): - Simulator._validate_project_config(invalid_project) - - def test_validate_project_config_missing_keys(self): - with pytest.raises(ValueError): - Simulator._validate_project_config({"subscription_id": "test_subscription"}) - - def test_validate_project_config_none_values(self): - with pytest.raises(ValueError): - Simulator._validate_project_config( - {"subscription_id": None, "resource_group_name": "test", "project_name": "test"} +class TestSimulator: + def test_init_valid_azure_model_config(self, valid_azure_model_config): + simulator = Simulator(model_config=valid_azure_model_config) + assert simulator.model_config["azure_deployment"] == "test_deployment" + assert simulator.model_config["api_version"] == "2024-06-01" + + def test_init_valid_openai_model_config(self, valid_openai_model_config): + simulator = Simulator(model_config=valid_openai_model_config) + assert simulator.model_config["model"] == "gpt-3.5-turbo" + assert simulator.model_config["api_version"] == "2024-06-01" + + def test_init_invalid_azure_model_config(self, invalid_azure_model_config): + with pytest.raises(ValueError) as exc_info: + Simulator(model_config=invalid_azure_model_config) + assert exc_info is not None + + def test_init_invalid_openai_model_config(self, invalid_openai_model_config): + with pytest.raises(ValueError) as exc_info: + Simulator(model_config=invalid_openai_model_config) + assert exc_info is not None + + def test_validate_model_config_valid_azure(self, valid_azure_model_config): + Simulator._validate_model_config(valid_azure_model_config) # Should not raise + + def test_validate_model_config_valid_openai(self, valid_openai_model_config): + Simulator._validate_model_config(valid_openai_model_config) # Should not raise + + def test_validate_model_config_infer_type_azure(self, valid_azure_model_config): + if "type" in valid_azure_model_config: + del valid_azure_model_config["type"] + Simulator._validate_model_config(valid_azure_model_config) + assert valid_azure_model_config["type"] == "azure_openai" + + def test_validate_model_config_infer_type_openai(self, valid_openai_model_config): + if "type" in valid_openai_model_config: + del valid_openai_model_config["type"] + Simulator._validate_model_config(valid_openai_model_config) + assert valid_openai_model_config["type"] == "openai" + + def test_validate_model_config_unable_to_infer_type(self): + model_config = {"api_key": "test_api_key"} # Not enough info to infer type + with pytest.raises(ValueError) as exc_info: + Simulator._validate_model_config(model_config) + assert "Unable to infer 'type' from model_config" in str(exc_info.value) + + def test_validate_model_config_invalid_type(self): + model_config = { + "type": "invalid_type", + "api_key": "test_api_key", + "model": "gpt-3.5-turbo", + } + with pytest.raises(ValueError) as exc_info: + Simulator._validate_model_config(model_config) + assert "model_config 'type' must be 'azure_openai' or 'openai'" in str(exc_info.value) + + def test_validate_model_config_none_values(self): + model_config = { + "type": "azure_openai", + "azure_deployment": None, + "azure_endpoint": "https://test-endpoint.openai.azure.com/", + "api_key": "test_api_key", + } + with pytest.raises(ValueError) as exc_info: + Simulator._validate_model_config(model_config) + assert "must not be None" in str(exc_info.value) + + def test_parse_prompty_response_valid_json(self, valid_azure_model_config): + simulator = Simulator(model_config=valid_azure_model_config) + response = '{"content": "Test response"}' + parsed_response = simulator._parse_prompty_response(response=response) + assert parsed_response == {"content": "Test response"} + + def test_parse_prompty_response_invalid_json(self, valid_azure_model_config): + simulator = Simulator(model_config=valid_azure_model_config) + response = "Invalid JSON" + with pytest.raises(ValueError) as exc_info: + simulator._parse_prompty_response(response=response) + assert "Error parsing response content" in str(exc_info.value) + + @pytest.mark.asyncio + @patch("azure.ai.evaluation.simulator._simulator.AsyncPrompty.load") + async def test_generate_query_responses(self, mock_async_prompty_load, valid_azure_model_config): + simulator = Simulator(model_config=valid_azure_model_config) + mock_flow = AsyncMock() + mock_flow.return_value = '[{"q": "query1", "r": "response1"}]' + mock_async_prompty_load.return_value = mock_flow + + query_responses = await simulator._generate_query_responses( + text="Test text", + num_queries=1, + query_response_generating_prompty=None, + query_response_generating_prompty_kwargs={}, + prompty_model_config={}, + ) + assert query_responses == [{"q": "query1", "r": "response1"}] + + @patch("azure.ai.evaluation.simulator._simulator.AsyncPrompty.load") + def test_load_user_simulation_flow(self, mock_async_prompty_load, valid_azure_model_config): + simulator = Simulator(model_config=valid_azure_model_config) + mock_async_prompty_load.return_value = AsyncMock() + user_flow = simulator._load_user_simulation_flow( + user_simulator_prompty=None, + prompty_model_config={}, + user_simulator_prompty_kwargs={}, + ) + assert user_flow is not None + + @pytest.mark.asyncio + @patch("azure.ai.evaluation.simulator._simulator.Simulator._load_user_simulation_flow") + @patch("azure.ai.evaluation.simulator._simulator.Simulator._get_target_response") + async def test_complete_conversation( + self, mock_get_target_response, mock_load_user_simulation_flow, valid_azure_model_config + ): + simulator = Simulator(model_config=valid_azure_model_config) + mock_user_flow = AsyncMock() + mock_user_flow.return_value = {"content": "User response"} + mock_load_user_simulation_flow.return_value = mock_user_flow + mock_get_target_response.return_value = "Assistant response" + + conversation = await simulator._complete_conversation( + conversation_starter="Hello", + max_conversation_turns=4, + task="Test task", + user_simulator_prompty=None, + user_simulator_prompty_kwargs={}, + target=AsyncMock(), + api_call_delay_sec=0, + progress_bar=AsyncMock(), + ) + assert len(conversation) == 4 + assert conversation[0]["role"] == "user" + assert conversation[0]["content"] == "User response" + assert conversation[1]["role"] == "assistant" + assert conversation[1]["content"] == "Assistant response" + + @pytest.mark.asyncio + async def test_get_target_response(self, valid_openai_model_config): + simulator = Simulator(model_config=valid_openai_model_config) + mock_target = AsyncMock() + mock_target.return_value = { + "messages": [ + {"role": "assistant", "content": "Assistant response"}, + ] + } + response = await simulator._get_target_response( + target=mock_target, + api_call_delay_sec=0, + conversation_history=AsyncMock(), + ) + assert response == "Assistant response" + + @pytest.mark.asyncio + async def test_call_with_both_conversation_turns_and_text_tasks(self, valid_openai_model_config): + simulator = Simulator(model_config=valid_openai_model_config) + with pytest.raises(ValueError, match="Cannot specify both conversation_turns and text/tasks"): + await simulator( + target=AsyncMock(), + max_conversation_turns=2, + conversation_turns=[["user_turn"]], + text="some text", + tasks=[{"task": "task"}], + api_call_delay_sec=1, ) - def test_build_prompty_model_config(self, valid_project): - simulator = Simulator(azure_ai_project=valid_project) - config = simulator._build_prompty_model_config() - assert "configuration" in config - assert config["configuration"] == valid_project + @pytest.mark.asyncio + @patch("azure.ai.evaluation.simulator._simulator.Simulator._simulate_with_predefined_turns", new_callable=AsyncMock) + async def test_call_with_conversation_turns(self, mock_simulate_with_predefined_turns, valid_openai_model_config): + simulator = Simulator(model_config=valid_openai_model_config) + mock_simulate_with_predefined_turns.return_value = [JsonLineChatProtocol({"messages": []})] + + result = await simulator( + target=AsyncMock(), + max_conversation_turns=2, + conversation_turns=[["user_turn"]], + api_call_delay_sec=1, + ) + assert len(result) == 1 + assert isinstance(result[0], JsonLineChatProtocol) @pytest.mark.asyncio - @patch("azure.ai.evaluation.simulator.Simulator._get_target_response", new_callable=AsyncMock) - @patch("azure.ai.evaluation.simulator.Simulator._extend_conversation_with_simulator", new_callable=AsyncMock) + @patch("azure.ai.evaluation.simulator._simulator.Simulator._generate_query_responses", new_callable=AsyncMock) + @patch( + "azure.ai.evaluation.simulator._simulator.Simulator._create_conversations_from_query_responses", + new_callable=AsyncMock, + ) + async def test_call_with_text_and_tasks( + self, + mock_create_conversations_from_query_responses, + mock_generate_query_responses, + valid_openai_model_config, + ): + simulator = Simulator(model_config=valid_openai_model_config) + mock_generate_query_responses.return_value = [{"q": "query", "r": "response"}] + mock_create_conversations_from_query_responses.return_value = [JsonLineChatProtocol({"messages": []})] + + result = await simulator( + target=AsyncMock(), + max_conversation_turns=2, + text="some text", + tasks=[{"task": "task"}], + api_call_delay_sec=1, + num_queries=1, + ) + assert len(result) == 1 + assert isinstance(result[0], JsonLineChatProtocol) + + @pytest.mark.asyncio + @patch("azure.ai.evaluation.simulator._simulator.Simulator._generate_query_responses", new_callable=AsyncMock) + @patch( + "azure.ai.evaluation.simulator._simulator.Simulator._create_conversations_from_query_responses", + new_callable=AsyncMock, + ) + async def test_call_with_num_queries_greater_than_tasks( + self, + mock_create_conversations_from_query_responses, + mock_generate_query_responses, + valid_openai_model_config, + ): + simulator = Simulator(model_config=valid_openai_model_config) + mock_generate_query_responses.return_value = [{"q": "query", "r": "response"}] + mock_create_conversations_from_query_responses.return_value = [JsonLineChatProtocol({"messages": []})] + tasks = [{"task": "task1"}] + + with pytest.warns(UserWarning, match="You have specified 'num_queries' > len\\('tasks'\\)"): + result = await simulator( + target=AsyncMock(), + max_conversation_turns=2, + text="some text", + tasks=tasks, + api_call_delay_sec=1, + num_queries=2, + ) + assert len(result) == 1 + assert isinstance(result[0], JsonLineChatProtocol) + + @pytest.mark.asyncio + @patch("azure.ai.evaluation.simulator._simulator.Simulator._generate_query_responses", new_callable=AsyncMock) + @patch( + "azure.ai.evaluation.simulator._simulator.Simulator._create_conversations_from_query_responses", + new_callable=AsyncMock, + ) + async def test_call_with_num_queries_less_than_tasks( + self, + mock_create_conversations_from_query_responses, + mock_generate_query_responses, + valid_openai_model_config, + ): + simulator = Simulator(model_config=valid_openai_model_config) + mock_generate_query_responses.return_value = [{"q": "query", "r": "response"}] + mock_create_conversations_from_query_responses.return_value = [JsonLineChatProtocol({"messages": []})] + tasks = [{"task": "task1"}, {"task": "task2"}] + + with pytest.warns(UserWarning, match="You have specified 'num_queries' < len\\('tasks'\\)"): + result = await simulator( + target=AsyncMock(), + max_conversation_turns=2, + text="some text", + tasks=tasks, + api_call_delay_sec=1, + num_queries=1, + ) + assert len(result) == 1 + assert isinstance(result[0], JsonLineChatProtocol) + + @pytest.mark.asyncio + @patch("azure.ai.evaluation.simulator._simulator.Simulator._get_target_response", new_callable=AsyncMock) + @patch( + "azure.ai.evaluation.simulator._simulator.Simulator._extend_conversation_with_simulator", new_callable=AsyncMock + ) async def test_simulate_with_predefined_turns( - self, mock_extend_conversation_with_simulator, mock_get_target_response, valid_project + self, mock_extend_conversation_with_simulator, mock_get_target_response, valid_openai_model_config ): - simulator = Simulator(azure_ai_project=valid_project) + simulator = Simulator(model_config=valid_openai_model_config) mock_get_target_response.return_value = "assistant_response" mock_extend_conversation_with_simulator.return_value = None @@ -94,9 +335,11 @@ async def test_simulate_with_predefined_turns( assert isinstance(result[0], JsonLineChatProtocol) @pytest.mark.asyncio - @patch("azure.ai.evaluation.simulator.Simulator._complete_conversation", new_callable=AsyncMock) - async def test_create_conversations_from_query_responses(self, mock_complete_conversation, valid_project): - simulator = Simulator(azure_ai_project=valid_project) + @patch("azure.ai.evaluation.simulator._simulator.Simulator._complete_conversation", new_callable=AsyncMock) + async def test_create_conversations_from_query_responses( + self, mock_complete_conversation, valid_openai_model_config + ): + simulator = Simulator(model_config=valid_openai_model_config) mock_complete_conversation.return_value = [{"role": "user", "content": "query"}] query_responses = [{"q": "query", "r": "response"}] @@ -114,16 +357,3 @@ async def test_create_conversations_from_query_responses(self, mock_complete_con assert len(result) == 1 assert isinstance(result[0], JsonLineChatProtocol) - - @pytest.mark.asyncio - async def test_call_with_both_conversation_turns_and_text_tasks(self, valid_project): - simulator = Simulator(azure_ai_project=valid_project) - with pytest.raises(ValueError, match="Cannot specify both conversation_turns and text/tasks"): - await simulator( - target=AsyncMock(), - max_conversation_turns=2, - conversation_turns=[["user_turn"]], - text="some text", - tasks=[{"task": "task"}], - api_call_delay_sec=1, - ) From c05e80e7fcd91a31e45584747ef1e78fb0198166 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Thu, 3 Oct 2024 12:16:45 -0700 Subject: [PATCH 07/14] Changelog updated --- sdk/evaluation/azure-ai-evaluation/CHANGELOG.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index 1353ec8ce3ce..a37cd3c89ead 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -66,10 +66,7 @@ azure_ai_project = { ``` After: ```python -azure_ai_project = { - "subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"), - "resource_group_name": os.environ.get("RESOURCE_GROUP"), - "project_name": os.environ.get("PROJECT_NAME"), +model_config = { "azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"), "azure_deployment": os.environ.get("AZURE_DEPLOYMENT"), } From efdc18a404839aaaf6594a50160d2d963f86cff9 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Thu, 3 Oct 2024 15:03:15 -0700 Subject: [PATCH 08/14] Update _simulator.py --- .../azure/ai/evaluation/simulator/_simulator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py index c46e6ceff73c..bf07dfba9ec9 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py @@ -1,5 +1,5 @@ # flake8: noqa -# pylint: disable=W0102,W0613,R0914,C0301,E0401,E0611 +# pylint: disable=W0102,W0613,R0914,C0301,E0401,E0611,C0411 # --------------------------------------------------------- # Copyright (c) Microsoft Corporation. All rights reserved. # --------------------------------------------------------- From e6944a1dc8415536594146862f63e5b2a9aaf3ab Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Thu, 3 Oct 2024 16:26:48 -0700 Subject: [PATCH 09/14] Update _simulator.py --- .../azure/ai/evaluation/simulator/_simulator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py index bb2805c8e311..acf38ec01254 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py @@ -1,5 +1,5 @@ # flake8: noqa -# pylint: disable=W0102,W0613,R0914,C0301,E0401,E0611,C0114,R0913,E0702,R0903 +# pylint: disable=W0102,W0613,R0914,C0301,E0401,E0611,C0114,R0913,E0702,R0903,C0411 # --------------------------------------------------------- # Copyright (c) Microsoft Corporation. All rights reserved. # --------------------------------------------------------- From 3eb8646f845af0fa18529c985bcb632a228ef9e7 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Fri, 4 Oct 2024 11:42:37 -0700 Subject: [PATCH 10/14] Remove credentails and update docs --- sdk/evaluation/azure-ai-evaluation/CHANGELOG.md | 3 +++ sdk/evaluation/azure-ai-evaluation/README.md | 5 +++-- .../azure/ai/evaluation/simulator/_simulator.py | 8 ++------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index a37cd3c89ead..9161d53d54f1 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -63,6 +63,7 @@ azure_ai_project = { "resource_group_name": os.environ.get("RESOURCE_GROUP"), "project_name": os.environ.get("PROJECT_NAME"), } +sim = Simulator(azure_ai_project=azure_ai_project, credentails=DefaultAzureCredentials()) ``` After: ```python @@ -70,7 +71,9 @@ model_config = { "azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"), "azure_deployment": os.environ.get("AZURE_DEPLOYMENT"), } +sim = Simulator(model_config=model_config) ``` +If `api_key` is not included in the `model_config`, the prompty runtime in `promtpflow-core` will pick up `DefaultAzureCredential`. ### Bugs Fixed diff --git a/sdk/evaluation/azure-ai-evaluation/README.md b/sdk/evaluation/azure-ai-evaluation/README.md index d1f66cc06420..0b0cdd0a21ff 100644 --- a/sdk/evaluation/azure-ai-evaluation/README.md +++ b/sdk/evaluation/azure-ai-evaluation/README.md @@ -147,7 +147,6 @@ import asyncio from typing import Any, Dict, List, Optional from azure.ai.evaluation.simulator import Simulator from promptflow.client import load_flow -from azure.identity import DefaultAzureCredential import os import wikipedia @@ -155,6 +154,8 @@ import wikipedia model_config = { "azure_endpoint": os.environ.get("AZURE_OPENAI_ENDPOINT"), "azure_deployment": os.environ.get("AZURE_DEPLOYMENT"), + # not providing key would make the SDK pick up `DefaultAzureCredential` + # use "api_key": "" } # Use Wikipedia to get some text for the simulation @@ -206,7 +207,7 @@ async def callback( return {"messages": messages["messages"], "stream": stream, "session_state": session_state, "context": context} async def main(): - simulator = Simulator(model_config=model_config, credential=DefaultAzureCredential()) + simulator = Simulator(model_config=model_config) outputs = await simulator( target=callback, text=text, diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py index c46e6ceff73c..27ae2c5cfbfb 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py @@ -5,6 +5,7 @@ # --------------------------------------------------------- import asyncio import importlib.resources as pkg_resources +from tqdm import tqdm import json import os import re @@ -13,7 +14,6 @@ from promptflow.core import AsyncPrompty from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration from azure.ai.evaluation._common.utils import construct_prompty_model_config -from tqdm import tqdm from .._user_agent import USER_AGENT from ._conversation.constants import ConversationRole @@ -31,23 +31,19 @@ class Simulator: def __init__( self, - model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration], - credential: Optional[Any] = None, + model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration] ): """ Initializes the task simulator with the model configuration. :param model_config: A dictionary defining the configuration for the model. Acceptable types are AzureOpenAIModelConfiguration and OpenAIModelConfiguration. :type model_config: Union[~azure.ai.evaluation.AzureOpenAIModelConfiguration, ~azure.ai.evaluation.OpenAIModelConfiguration] - :param credential: Azure credentials to authenticate the user. If None, the default credentials are used. - :paramtype credential: Optional[Any] :raises ValueError: If the model_config does not contain the required keys or any value is None. """ self._validate_model_config(model_config) self.model_config = model_config if "api_version" not in self.model_config: self.model_config["api_version"] = "2024-06-01" - self.credential = credential @staticmethod def _validate_model_config(model_config: Dict[str, Any]): From c4decf847404811f1380bf7582cd19acfbf5f362 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Mon, 7 Oct 2024 08:01:15 -0700 Subject: [PATCH 11/14] Better message --- sdk/evaluation/azure-ai-evaluation/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index 8b7f41b43ed7..60c49a3b9416 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -66,7 +66,7 @@ evaluate( ) ``` -- Simulator now requires configuration to call the prompty within the `azure_ai_project`. This enables key-less usage of the simulator. +- Simulator now requires configuration to call the prompty within the `azure_ai_project`. This enables the usage of simulator with Entra ID based auth. Before: ```python azure_ai_project = { From 1badd0cb58f0b55989ab2d1018123f885c87c338 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Tue, 8 Oct 2024 13:31:35 -0700 Subject: [PATCH 12/14] Update sdk/evaluation/azure-ai-evaluation/CHANGELOG.md Co-authored-by: Neehar Duvvuri <40341266+needuv@users.noreply.github.com> --- sdk/evaluation/azure-ai-evaluation/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index 60c49a3b9416..02621ef6877e 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -84,7 +84,7 @@ model_config = { } sim = Simulator(model_config=model_config) ``` -If `api_key` is not included in the `model_config`, the prompty runtime in `promtpflow-core` will pick up `DefaultAzureCredential`. +If `api_key` is not included in the `model_config`, the prompty runtime in `promptflow-core` will pick up `DefaultAzureCredential`. ### Bugs Fixed From 8bece13090da4326bc0ae348284b31de168f0721 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Tue, 8 Oct 2024 13:31:44 -0700 Subject: [PATCH 13/14] Update sdk/evaluation/azure-ai-evaluation/CHANGELOG.md Co-authored-by: Neehar Duvvuri <40341266+needuv@users.noreply.github.com> --- sdk/evaluation/azure-ai-evaluation/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index 02621ef6877e..846b3fd9d179 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -66,7 +66,7 @@ evaluate( ) ``` -- Simulator now requires configuration to call the prompty within the `azure_ai_project`. This enables the usage of simulator with Entra ID based auth. +- Simulator now requires a model configuration to call the prompty instead of an Azure AI project scope. This enables the usage of simulator with Entra ID based auth. Before: ```python azure_ai_project = { From 1997ab95e30b17c5e42d4dfa38a91622e66be037 Mon Sep 17 00:00:00 2001 From: Nagkumar Arkalgud Date: Tue, 8 Oct 2024 13:53:32 -0700 Subject: [PATCH 14/14] Fix the indent error --- .../ai/evaluation/simulator/_simulator.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py index 1da025a3be18..06a62a97781a 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py @@ -28,10 +28,7 @@ class Simulator: Simulator for generating synthetic conversations. """ - def __init__( - self, - model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration] - ): + def __init__(self, model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration]): """ Initializes the task simulator with the model configuration. @@ -42,10 +39,10 @@ def __init__( self._validate_model_config(model_config) self.model_config = model_config if "api_version" not in self.model_config: - self.model_config["api_version"] = "2024-06-01" + self.model_config["api_version"] = "2024-06-01" # type: ignore @staticmethod - def _validate_model_config(model_config: Dict[str, Any]): + def _validate_model_config(model_config: Any): """ Validates the model_config to ensure all required keys are present and have non-None values. If 'type' is not specified, it will attempt to infer the type based on the keys present. @@ -186,7 +183,7 @@ async def _simulate_with_predefined_turns( user_simulator_prompty: Optional[str], user_simulator_prompty_kwargs: Dict[str, Any], api_call_delay_sec: float, - prompty_model_config: Dict[str, Any], + prompty_model_config: Any, ) -> List[JsonLineChatProtocol]: """ Simulates conversations using predefined conversation turns. @@ -204,7 +201,7 @@ async def _simulate_with_predefined_turns( :keyword api_call_delay_sec: Delay in seconds between API calls. :paramtype api_call_delay_sec: float :keyword prompty_model_config: The configuration for the prompty model. - :paramtype prompty_model_config: Dict[str, Any] + :paramtype prompty_model_config: Any :return: A list of simulated conversations represented as JsonLineChatProtocol objects. :rtype: List[JsonLineChatProtocol] """ @@ -341,7 +338,7 @@ def _load_user_simulation_flow( ) return AsyncPrompty.load(source=prompty_path, model=prompty_model_config) # type: ignore except FileNotFoundError as e: - msg = f"Flow path for {resource_name} does not exist in package {package}." + msg = f"Flow path for {resource_name} does not exist in package {package}." raise EvaluationException( message=msg, internal_message=msg, @@ -405,7 +402,7 @@ async def _generate_query_responses( num_queries: int, query_response_generating_prompty: Optional[str], query_response_generating_prompty_kwargs: Dict[str, Any], - prompty_model_config: Dict[str, Any], + prompty_model_config: Any, **kwargs, ) -> List[Dict[str, str]]: """ @@ -420,7 +417,7 @@ async def _generate_query_responses( :keyword query_response_generating_prompty_kwargs: Additional keyword arguments for the query response generating prompty. :paramtype query_response_generating_prompty_kwargs: Dict[str, Any] :keyword prompty_model_config: The configuration for the prompty model. - :paramtype prompty_model_config: Dict[str, Any] + :paramtype prompty_model_config: Any :return: A list of query-response dictionaries. :rtype: List[Dict[str, str]] :raises RuntimeError: If an error occurs during query generation.