Skip to content

Commit f92c4f0

Browse files
authored
Merge pull request Azure#3 from nagkumar91/task/mandatory_attack_objective
Make attack objective generator mandatory
2 parents 6efffcd + eae2dfe commit f92c4f0

File tree

7 files changed

+142
-176
lines changed

7 files changed

+142
-176
lines changed

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_exceptions.py

+1
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ class ErrorTarget(Enum):
8080
MODELS = "Models"
8181
UNKNOWN = "Unknown"
8282
CONVERSATION = "Conversation"
83+
RED_TEAM_AGENT = "RedTeamAgent"
8384

8485

8586
class EvaluationException(AzureError):

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_safety_evaluation/__init__.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,8 @@ class AttackObjectiveGenerator:
2323
:param risk_categories: List of risk categories to generate attack objectives for
2424
:type risk_categories: List[RiskCategory]
2525
"""
26-
27-
# TODO num objectives here to replace num_rows in red team agent
28-
# Q: Should this be total or per category?
29-
def __init__(self, risk_categories):
26+
def __init__(self, risk_categories: list[RiskCategory], num_objectives: int = 10):
3027
self.risk_categories = risk_categories
28+
self.num_objectives = num_objectives
3129

3230
__all__ = ["RedTeamAgent", "AttackStrategy", "RiskCategory", "AttackObjectiveGenerator"]

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_safety_evaluation/_red_team_agent.py

+70-146
Large diffs are not rendered by default.

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/autogen/raiclient/operations/_operations.py

-1
Original file line numberDiff line numberDiff line change
@@ -621,7 +621,6 @@ def get_attack_objectives(
621621
"workspaceName": self._serialize.url("self._config.workspace_name", self._config.workspace_name, "str"),
622622
}
623623
_request.url = self._client.format_url(_request.url, **path_format_arguments)
624-
625624
_stream = kwargs.pop("stream", False)
626625
pipeline_response: PipelineResponse = self._client._pipeline.run( # pylint: disable=protected-access
627626
_request, stream=_stream, **kwargs

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_model_tools/_generated_rai_client.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -72,24 +72,23 @@ def _get_service_discovery_url(self):
7272
base_url = urlparse(response.json()["properties"]["discoveryUrl"])
7373
return f"{base_url.scheme}://{base_url.netloc}"
7474

75-
async def get_attack_objectives(self, risk_categories: List[str], application_scenario: str = None) -> Dict:
75+
async def get_attack_objectives(self, risk_categories: Optional[List[str]] = None, application_scenario: str = None, strategy: Optional[str] = None) -> Dict:
7676
"""Get attack objectives using the auto-generated operations.
7777
78-
:param risk_categories: List of risk categories to generate attack objectives for
79-
:type risk_categories: List[str]
78+
:param risk_categories: Optional list of risk categories to generate attack objectives for
79+
:type risk_categories: Optional[List[str]]
8080
:param application_scenario: Optional description of the application scenario for context
8181
:type application_scenario: str
8282
:return: The attack objectives
8383
:rtype: Dict
84-
"""
85-
84+
"""
85+
risk_categories = risk_categories or []
8686
try:
8787
# Send the request using the autogenerated client
8888
response = self._client.rai_svc.get_attack_objectives(
89-
risk_types=risk_categories,
89+
risk_types=[], # TODO: fix the filtering on this level
9090
lang="en"
9191
)
92-
# TODO figure out how to process this string output properly
9392
return response
9493

9594
except Exception as e:

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_model_tools/_rai_client.py

+15
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,9 @@ async def get_attack_objectives(self, risk_categories: List[str], application_sc
222222
:rtype: Any
223223
"""
224224
# Create query parameters for the request
225+
if application_scenario:
226+
raise NotImplementedError("Application scenario is not supported yet")
227+
225228
params = {
226229
"api-version": "2022-11-01-preview",
227230
"riskTypes": ",".join(risk_categories),
@@ -235,6 +238,18 @@ async def get_attack_objectives(self, risk_categories: List[str], application_sc
235238
try:
236239
# Make the request using the existing get method
237240
result = await self.get(self.attack_objectives_endpoint)
241+
# from collections import defaultdict
242+
# counts_by_risk = defaultdict(int)
243+
# for item in result:
244+
# target_harms = item.get("Metadata", {}).get("TargetHarms", [])
245+
# if not target_harms:
246+
# # No risk type specified
247+
# counts_by_risk["empty"] += 1
248+
# else:
249+
# for harm in target_harms:
250+
# # Use "empty" if the risk type field is missing
251+
# risk_type = harm.get("RiskType", "") or "empty"
252+
# counts_by_risk[risk_type] += 1
238253
return result
239254
except Exception:
240255
# If the API fails or isn't implemented yet, return a mock response

sdk/evaluation/azure-ai-evaluation/samples/pyrit_sim.py

+48-18
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,13 @@
55
pip install -e ".[pyrit]"
66
"""
77

8-
98
from typing import Dict, List, Optional
10-
from azure.ai.evaluation._safety_evaluation._red_team_agent import RedTeamAgent, AttackStrategy
9+
from azure.ai.evaluation._safety_evaluation import RedTeamAgent, AttackStrategy, AttackObjectiveGenerator, RiskCategory
1110
import os
1211
from azure.identity import DefaultAzureCredential
1312
from azure.ai.evaluation.simulator import AdversarialScenario
1413
from pyrit.prompt_target import OpenAIChatTarget
1514
from pyrit.common import initialize_pyrit, DUCK_DB
16-
from azure.ai.evaluation._safety_evaluation import AttackObjectiveGenerator, RiskCategory
1715

1816

1917
async def main():
@@ -23,35 +21,55 @@ async def main():
2321
"project_name": os.environ.get("AZURE_PROJECT_NAME"),
2422
}
2523

26-
2724
# [START red_team_agent_targets]
2825
# Model config target
2926
model_config = {
3027
"azure_endpoint": os.environ.get("AZURE_ENDPOINT"),
3128
"azure_deployment": os.environ.get("AZURE_DEPLOYMENT_NAME"),
3229
}
3330

31+
## Minimal inputs
32+
attack_objective_generator = AttackObjectiveGenerator(
33+
risk_categories=[
34+
RiskCategory.HateUnfairness,
35+
],
36+
num_objectives=10,
37+
)
38+
3439
red_team_agent = RedTeamAgent(
3540
azure_ai_project=azure_ai_project,
3641
credential=DefaultAzureCredential(),
3742
)
3843

3944
outputs = await red_team_agent.attack(
4045
target=model_config, # type: ignore
46+
attack_objective_generator=attack_objective_generator,
4147
)
4248
print(outputs)
4349

4450
# Mock function target to simulate an AI application
4551
def call_to_ai_application(query: str) -> str:
4652
return "mock response"
4753

54+
## Maximal inputs
55+
attack_objective_generator = AttackObjectiveGenerator(
56+
risk_categories=[
57+
RiskCategory.HateUnfairness,
58+
RiskCategory.Violence,
59+
RiskCategory.Sexual,
60+
RiskCategory.SelfHarm,
61+
],
62+
num_objectives=10,
63+
)
64+
4865
red_team_agent = RedTeamAgent(
4966
azure_ai_project=azure_ai_project,
5067
credential=DefaultAzureCredential(),
5168
)
5269

5370
outputs = await red_team_agent.attack(
5471
target=call_to_ai_application, # type: ignore
72+
attack_objective_generator=attack_objective_generator,
5573
)
5674
print(outputs)
5775

@@ -62,7 +80,7 @@ def callback_target(
6280
session_state: Optional[str] = None,
6381
context: Optional[Dict] = None
6482
) -> dict:
65-
messages_list = [{"role": chat_message.role,"content": chat_message.content,} for chat_message in messages] #type: ignore
83+
messages_list = [{"role": chat_message.role,"content": chat_message.content} for chat_message in messages] #type: ignore
6684
latest_message = messages_list[-1]
6785
application_input = latest_message["content"]
6886
try:
@@ -86,14 +104,15 @@ def callback_target(
86104

87105
outputs = await red_team_agent.attack(
88106
target=callback_target, # type: ignore
107+
attack_objective_generator=attack_objective_generator,
89108
)
90109
print(outputs)
91110

92111
# Pyrit target
93112
initialize_pyrit(memory_db_type=DUCK_DB)
94113
pyrit_target = OpenAIChatTarget(
95-
deployment_name = os.environ.get("AZURE_DEPLOYMENT_NAME"),
96-
endpoint = os.environ.get("AZURE_ENDPOINT"),
114+
deployment_name=os.environ.get("AZURE_DEPLOYMENT_NAME"),
115+
endpoint=os.environ.get("AZURE_ENDPOINT"),
97116
use_aad_auth=True
98117
)
99118

@@ -104,6 +123,7 @@ def callback_target(
104123

105124
outputs = await red_team_agent.attack(
106125
target=pyrit_target, # type: ignore
126+
attack_objective_generator=attack_objective_generator,
107127
)
108128
print(outputs)
109129
# [END red_team_agent_targets]
@@ -117,42 +137,46 @@ def callback_target(
117137

118138
outputs = await red_team_agent.attack(
119139
target=call_to_ai_application, # type: ignore
140+
attack_objective_generator=attack_objective_generator,
120141
)
121142
print(outputs)
122143

123-
# Low budget
144+
# EASY budget
124145
red_team_agent = RedTeamAgent(
125146
azure_ai_project=azure_ai_project,
126147
credential=DefaultAzureCredential(),
127148
)
128149

129150
outputs = await red_team_agent.attack(
130151
target=call_to_ai_application, # type: ignore
131-
attack_strategy=[AttackStrategy.LOW]
152+
attack_strategy=[AttackStrategy.EASY],
153+
attack_objective_generator=attack_objective_generator,
132154
)
133155
print(outputs)
134156

135-
# Medium budget
157+
# MODERATE budget
136158
red_team_agent = RedTeamAgent(
137159
azure_ai_project=azure_ai_project,
138160
credential=DefaultAzureCredential(),
139161
)
140162

141163
outputs = await red_team_agent.attack(
142164
target=model_config, # type: ignore
143-
attack_strategy=[AttackStrategy.MEDIUM]
165+
attack_strategy=[AttackStrategy.MODERATE],
166+
attack_objective_generator=attack_objective_generator,
144167
)
145168
print(outputs)
146169

147-
# High budget
170+
# DIFFICULT budget
148171
red_team_agent = RedTeamAgent(
149172
azure_ai_project=azure_ai_project,
150173
credential=DefaultAzureCredential(),
151174
)
152175

153176
outputs = await red_team_agent.attack(
154177
target=model_config, # type: ignore
155-
attack_strategy=[AttackStrategy.HIGH]
178+
attack_strategy=[AttackStrategy.DIFFICULT],
179+
attack_objective_generator=attack_objective_generator,
156180
)
157181

158182
# Compose attack strategies
@@ -164,8 +188,10 @@ def callback_target(
164188
outputs = await red_team_agent.attack(
165189
target=model_config, # type: ignore
166190
attack_strategy=[AttackStrategy.Compose([AttackStrategy.Flip, AttackStrategy.Base64]),
167-
AttackStrategy.LOW,
191+
AttackStrategy.EASY,
168192
AttackStrategy.Morse],
193+
output_path="RacoonRedTeamEvalResults.jsonl",
194+
attack_objective_generator=attack_objective_generator,
169195
)
170196
print(outputs)
171197

@@ -177,19 +203,22 @@ def callback_target(
177203

178204
outputs = await red_team_agent.attack(
179205
target=model_config, # type: ignore
180-
attack_strategy=[AttackStrategy.HIGH, AttackStrategy.Compose([AttackStrategy.Math, AttackStrategy.Tense])],
206+
attack_strategy=[AttackStrategy.DIFFICULT, AttackStrategy.Compose([AttackStrategy.Math, AttackStrategy.Tense])],
207+
output_path="DIFFICULTBudget-Duplicates.jsonl",
208+
attack_objective_generator=attack_objective_generator,
181209
)
182210
print(outputs)
183211

184-
# Low, medium, and high
212+
# EASY, MODERATE, and DIFFICULT
185213
red_team_agent = RedTeamAgent(
186214
azure_ai_project=azure_ai_project,
187215
credential=DefaultAzureCredential(),
188216
)
189217

190218
outputs = await red_team_agent.attack(
191219
target=call_to_ai_application, # type: ignore
192-
attack_strategy=[AttackStrategy.LOW, AttackStrategy.MEDIUM, AttackStrategy.HIGH]
220+
attack_strategy=[AttackStrategy.EASY, AttackStrategy.MODERATE, AttackStrategy.DIFFICULT],
221+
attack_objective_generator=attack_objective_generator,
193222
)
194223
print(outputs)
195224

@@ -200,7 +229,8 @@ def callback_target(
200229
risk_categories=[
201230
RiskCategory.HateUnfairness,
202231
RiskCategory.Violence,
203-
]
232+
],
233+
num_objectives=10,
204234
)
205235
application_scenario = "You are an AI application fine tuned on financial services data, a chatbot which answers questions about how best to use the company's product for a user's financial needs. Industry = Financial Services"
206236

0 commit comments

Comments
 (0)