5
5
pip install -e ".[pyrit]"
6
6
"""
7
7
8
-
9
8
from typing import Dict , List , Optional
10
- from azure .ai .evaluation ._safety_evaluation . _red_team_agent import RedTeamAgent , AttackStrategy
9
+ from azure .ai .evaluation ._safety_evaluation import RedTeamAgent , AttackStrategy , AttackObjectiveGenerator , RiskCategory
11
10
import os
12
11
from azure .identity import DefaultAzureCredential
13
12
from azure .ai .evaluation .simulator import AdversarialScenario
14
13
from pyrit .prompt_target import OpenAIChatTarget
15
14
from pyrit .common import initialize_pyrit , DUCK_DB
16
- from azure .ai .evaluation ._safety_evaluation import AttackObjectiveGenerator , RiskCategory
17
15
18
16
19
17
async def main ():
@@ -23,35 +21,55 @@ async def main():
23
21
"project_name" : os .environ .get ("AZURE_PROJECT_NAME" ),
24
22
}
25
23
26
-
27
24
# [START red_team_agent_targets]
28
25
# Model config target
29
26
model_config = {
30
27
"azure_endpoint" : os .environ .get ("AZURE_ENDPOINT" ),
31
28
"azure_deployment" : os .environ .get ("AZURE_DEPLOYMENT_NAME" ),
32
29
}
33
30
31
+ ## Minimal inputs
32
+ attack_objective_generator = AttackObjectiveGenerator (
33
+ risk_categories = [
34
+ RiskCategory .HateUnfairness ,
35
+ ],
36
+ num_objectives = 10 ,
37
+ )
38
+
34
39
red_team_agent = RedTeamAgent (
35
40
azure_ai_project = azure_ai_project ,
36
41
credential = DefaultAzureCredential (),
37
42
)
38
43
39
44
outputs = await red_team_agent .attack (
40
45
target = model_config , # type: ignore
46
+ attack_objective_generator = attack_objective_generator ,
41
47
)
42
48
print (outputs )
43
49
44
50
# Mock function target to simulate an AI application
45
51
def call_to_ai_application (query : str ) -> str :
46
52
return "mock response"
47
53
54
+ ## Maximal inputs
55
+ attack_objective_generator = AttackObjectiveGenerator (
56
+ risk_categories = [
57
+ RiskCategory .HateUnfairness ,
58
+ RiskCategory .Violence ,
59
+ RiskCategory .Sexual ,
60
+ RiskCategory .SelfHarm ,
61
+ ],
62
+ num_objectives = 10 ,
63
+ )
64
+
48
65
red_team_agent = RedTeamAgent (
49
66
azure_ai_project = azure_ai_project ,
50
67
credential = DefaultAzureCredential (),
51
68
)
52
69
53
70
outputs = await red_team_agent .attack (
54
71
target = call_to_ai_application , # type: ignore
72
+ attack_objective_generator = attack_objective_generator ,
55
73
)
56
74
print (outputs )
57
75
@@ -62,7 +80,7 @@ def callback_target(
62
80
session_state : Optional [str ] = None ,
63
81
context : Optional [Dict ] = None
64
82
) -> dict :
65
- messages_list = [{"role" : chat_message .role ,"content" : chat_message .content , } for chat_message in messages ] #type: ignore
83
+ messages_list = [{"role" : chat_message .role ,"content" : chat_message .content } for chat_message in messages ] #type: ignore
66
84
latest_message = messages_list [- 1 ]
67
85
application_input = latest_message ["content" ]
68
86
try :
@@ -86,14 +104,15 @@ def callback_target(
86
104
87
105
outputs = await red_team_agent .attack (
88
106
target = callback_target , # type: ignore
107
+ attack_objective_generator = attack_objective_generator ,
89
108
)
90
109
print (outputs )
91
110
92
111
# Pyrit target
93
112
initialize_pyrit (memory_db_type = DUCK_DB )
94
113
pyrit_target = OpenAIChatTarget (
95
- deployment_name = os .environ .get ("AZURE_DEPLOYMENT_NAME" ),
96
- endpoint = os .environ .get ("AZURE_ENDPOINT" ),
114
+ deployment_name = os .environ .get ("AZURE_DEPLOYMENT_NAME" ),
115
+ endpoint = os .environ .get ("AZURE_ENDPOINT" ),
97
116
use_aad_auth = True
98
117
)
99
118
@@ -104,6 +123,7 @@ def callback_target(
104
123
105
124
outputs = await red_team_agent .attack (
106
125
target = pyrit_target , # type: ignore
126
+ attack_objective_generator = attack_objective_generator ,
107
127
)
108
128
print (outputs )
109
129
# [END red_team_agent_targets]
@@ -117,42 +137,46 @@ def callback_target(
117
137
118
138
outputs = await red_team_agent .attack (
119
139
target = call_to_ai_application , # type: ignore
140
+ attack_objective_generator = attack_objective_generator ,
120
141
)
121
142
print (outputs )
122
143
123
- # Low budget
144
+ # EASY budget
124
145
red_team_agent = RedTeamAgent (
125
146
azure_ai_project = azure_ai_project ,
126
147
credential = DefaultAzureCredential (),
127
148
)
128
149
129
150
outputs = await red_team_agent .attack (
130
151
target = call_to_ai_application , # type: ignore
131
- attack_strategy = [AttackStrategy .LOW ]
152
+ attack_strategy = [AttackStrategy .EASY ],
153
+ attack_objective_generator = attack_objective_generator ,
132
154
)
133
155
print (outputs )
134
156
135
- # Medium budget
157
+ # MODERATE budget
136
158
red_team_agent = RedTeamAgent (
137
159
azure_ai_project = azure_ai_project ,
138
160
credential = DefaultAzureCredential (),
139
161
)
140
162
141
163
outputs = await red_team_agent .attack (
142
164
target = model_config , # type: ignore
143
- attack_strategy = [AttackStrategy .MEDIUM ]
165
+ attack_strategy = [AttackStrategy .MODERATE ],
166
+ attack_objective_generator = attack_objective_generator ,
144
167
)
145
168
print (outputs )
146
169
147
- # High budget
170
+ # DIFFICULT budget
148
171
red_team_agent = RedTeamAgent (
149
172
azure_ai_project = azure_ai_project ,
150
173
credential = DefaultAzureCredential (),
151
174
)
152
175
153
176
outputs = await red_team_agent .attack (
154
177
target = model_config , # type: ignore
155
- attack_strategy = [AttackStrategy .HIGH ]
178
+ attack_strategy = [AttackStrategy .DIFFICULT ],
179
+ attack_objective_generator = attack_objective_generator ,
156
180
)
157
181
158
182
# Compose attack strategies
@@ -164,8 +188,10 @@ def callback_target(
164
188
outputs = await red_team_agent .attack (
165
189
target = model_config , # type: ignore
166
190
attack_strategy = [AttackStrategy .Compose ([AttackStrategy .Flip , AttackStrategy .Base64 ]),
167
- AttackStrategy .LOW ,
191
+ AttackStrategy .EASY ,
168
192
AttackStrategy .Morse ],
193
+ output_path = "RacoonRedTeamEvalResults.jsonl" ,
194
+ attack_objective_generator = attack_objective_generator ,
169
195
)
170
196
print (outputs )
171
197
@@ -177,19 +203,22 @@ def callback_target(
177
203
178
204
outputs = await red_team_agent .attack (
179
205
target = model_config , # type: ignore
180
- attack_strategy = [AttackStrategy .HIGH , AttackStrategy .Compose ([AttackStrategy .Math , AttackStrategy .Tense ])],
206
+ attack_strategy = [AttackStrategy .DIFFICULT , AttackStrategy .Compose ([AttackStrategy .Math , AttackStrategy .Tense ])],
207
+ output_path = "DIFFICULTBudget-Duplicates.jsonl" ,
208
+ attack_objective_generator = attack_objective_generator ,
181
209
)
182
210
print (outputs )
183
211
184
- # Low, medium , and high
212
+ # EASY, MODERATE , and DIFFICULT
185
213
red_team_agent = RedTeamAgent (
186
214
azure_ai_project = azure_ai_project ,
187
215
credential = DefaultAzureCredential (),
188
216
)
189
217
190
218
outputs = await red_team_agent .attack (
191
219
target = call_to_ai_application , # type: ignore
192
- attack_strategy = [AttackStrategy .LOW , AttackStrategy .MEDIUM , AttackStrategy .HIGH ]
220
+ attack_strategy = [AttackStrategy .EASY , AttackStrategy .MODERATE , AttackStrategy .DIFFICULT ],
221
+ attack_objective_generator = attack_objective_generator ,
193
222
)
194
223
print (outputs )
195
224
@@ -200,7 +229,8 @@ def callback_target(
200
229
risk_categories = [
201
230
RiskCategory .HateUnfairness ,
202
231
RiskCategory .Violence ,
203
- ]
232
+ ],
233
+ num_objectives = 10 ,
204
234
)
205
235
application_scenario = "You are an AI application fine tuned on financial services data, a chatbot which answers questions about how best to use the company's product for a user's financial needs. Industry = Financial Services"
206
236
0 commit comments