Skip to content

Commit 2bcc41a

Browse files
committed
improve deep research use case
1 parent ffcdf69 commit 2bcc41a

File tree

2 files changed

+61
-15
lines changed

2 files changed

+61
-15
lines changed

templates/components/agents/python/deep_research/app/workflows/agents.py

+33-8
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@ class AnalysisDecision(BaseModel):
1616
description="Whether to continue research, write a report, or cancel the research after several retries"
1717
)
1818
research_questions: Optional[List[str]] = Field(
19-
description="Questions to research if continuing research. Maximum 3 questions. Set to null or empty if writing a report.",
19+
description="""
20+
If the decision is to research, provide a list of questions to research that related to the user request.
21+
Maximum 3 questions. Set to null or empty if writing a report or cancel the research.
22+
""",
2023
default_factory=list,
2124
)
2225
cancel_reason: Optional[str] = Field(
@@ -29,32 +32,53 @@ async def plan_research(
2932
memory: SimpleComposableMemory,
3033
context_nodes: List[Node],
3134
user_request: str,
35+
total_questions: int,
3236
) -> AnalysisDecision:
33-
analyze_prompt = PromptTemplate(
34-
"""
37+
analyze_prompt = """
3538
You are a professor who is guiding a researcher to research a specific request/problem.
3639
Your task is to decide on a research plan for the researcher.
40+
3741
The possible actions are:
3842
+ Provide a list of questions for the researcher to investigate, with the purpose of clarifying the request.
3943
+ Write a report if the researcher has already gathered enough research on the topic and can resolve the initial request.
4044
+ Cancel the research if most of the answers from researchers indicate there is insufficient information to research the request. Do not attempt more than 3 research iterations or too many questions.
45+
4146
The workflow should be:
4247
+ Always begin by providing some initial questions for the researcher to investigate.
4348
+ Analyze the provided answers against the initial topic/request. If the answers are insufficient to resolve the initial request, provide additional questions for the researcher to investigate.
4449
+ If the answers are sufficient to resolve the initial request, instruct the researcher to write a report.
45-
<User request>
46-
{user_request}
47-
</User request>
4850
51+
Here are the context:
4952
<Collected information>
5053
{context_str}
5154
</Collected information>
5255
5356
<Conversation context>
5457
{conversation_context}
5558
</Conversation context>
59+
60+
{enhanced_prompt}
61+
62+
Now, provide your decision in the required format for this user request:
63+
<User request>
64+
{user_request}
65+
</User request>
5666
"""
57-
)
67+
# Manually craft the prompt to avoid LLM hallucination
68+
enhanced_prompt = ""
69+
if total_questions == 0:
70+
# Avoid writing a report without any research context
71+
enhanced_prompt = """
72+
73+
The student has no questions to research. Let start by asking some questions.
74+
"""
75+
elif total_questions > 6:
76+
# Avoid asking too many questions (when the data is not ready for writing a report)
77+
enhanced_prompt = """
78+
79+
The student has researched {total_questions} questions. Should cancel the research if the context is not enough to write a report.
80+
"""
81+
5882
conversation_context = "\n".join(
5983
[f"{message.role}: {message.content}" for message in memory.get_all()]
6084
)
@@ -63,10 +87,11 @@ async def plan_research(
6387
)
6488
res = await Settings.llm.astructured_predict(
6589
output_cls=AnalysisDecision,
66-
prompt=analyze_prompt,
90+
prompt=PromptTemplate(template=analyze_prompt),
6791
user_request=user_request,
6892
context_str=context_str,
6993
conversation_context=conversation_context,
94+
enhanced_prompt=enhanced_prompt,
7095
)
7196
return res
7297

templates/components/agents/python/deep_research/app/workflows/deep_research.py

+28-7
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,11 @@ def __init__(
8989
)
9090

9191
@step
92-
def retrieve(self, ctx: Context, ev: StartEvent) -> PlanResearchEvent:
92+
async def retrieve(self, ctx: Context, ev: StartEvent) -> PlanResearchEvent:
9393
"""
9494
Initiate the workflow: memory, tools, agent
9595
"""
96+
await ctx.set("total_questions", 0)
9697
self.user_request = ev.get("input")
9798
self.memory.put_messages(
9899
messages=[
@@ -132,9 +133,7 @@ def retrieve(self, ctx: Context, ev: StartEvent) -> PlanResearchEvent:
132133
nodes=nodes,
133134
)
134135
)
135-
return PlanResearchEvent(
136-
context_nodes=self.context_nodes,
137-
)
136+
return PlanResearchEvent()
138137

139138
@step
140139
async def analyze(
@@ -153,10 +152,12 @@ async def analyze(
153152
},
154153
)
155154
)
155+
total_questions = await ctx.get("total_questions")
156156
res = await plan_research(
157157
memory=self.memory,
158158
context_nodes=self.context_nodes,
159159
user_request=self.user_request,
160+
total_questions=total_questions,
160161
)
161162
if res.decision == "cancel":
162163
ctx.write_event_to_stream(
@@ -172,6 +173,22 @@ async def analyze(
172173
result=res.cancel_reason,
173174
)
174175
elif res.decision == "write":
176+
# Writing a report without any research context is not allowed.
177+
# It's a LLM hallucination.
178+
if total_questions == 0:
179+
ctx.write_event_to_stream(
180+
DataEvent(
181+
type="deep_research_event",
182+
data={
183+
"event": "analyze",
184+
"state": "done",
185+
},
186+
)
187+
)
188+
return StopEvent(
189+
result="Sorry, I have a problem when analyzing the retrieved information. Please try again.",
190+
)
191+
175192
self.memory.put(
176193
message=ChatMessage(
177194
role=MessageRole.ASSISTANT,
@@ -180,7 +197,11 @@ async def analyze(
180197
)
181198
ctx.send_event(ReportEvent())
182199
else:
183-
await ctx.set("n_questions", len(res.research_questions))
200+
total_questions += len(res.research_questions)
201+
await ctx.set("total_questions", total_questions) # For tracking
202+
await ctx.set(
203+
"waiting_questions", len(res.research_questions)
204+
) # For waiting questions to be answered
184205
self.memory.put(
185206
message=ChatMessage(
186207
role=MessageRole.ASSISTANT,
@@ -270,7 +291,7 @@ async def collect_answers(
270291
"""
271292
Collect answers to all questions
272293
"""
273-
num_questions = await ctx.get("n_questions")
294+
num_questions = await ctx.get("waiting_questions")
274295
results = ctx.collect_events(
275296
ev,
276297
expected=[CollectAnswersEvent] * num_questions,
@@ -284,7 +305,7 @@ async def collect_answers(
284305
content=f"<Question>{result.question}</Question>\n<Answer>{result.answer}</Answer>",
285306
)
286307
)
287-
await ctx.set("n_questions", 0)
308+
await ctx.set("waiting_questions", 0)
288309
self.memory.put(
289310
message=ChatMessage(
290311
role=MessageRole.ASSISTANT,

0 commit comments

Comments
 (0)