|
| 1 | +from typing import List, Literal, Optional |
| 2 | + |
| 3 | +from llama_index.core.base.llms.types import ( |
| 4 | + CompletionResponse, |
| 5 | + CompletionResponseAsyncGen, |
| 6 | +) |
| 7 | +from llama_index.core.memory.simple_composable_memory import SimpleComposableMemory |
| 8 | +from llama_index.core.prompts import PromptTemplate |
| 9 | +from llama_index.core.schema import MetadataMode, Node, NodeWithScore |
| 10 | +from llama_index.core.settings import Settings |
| 11 | +from pydantic import BaseModel, Field |
| 12 | + |
| 13 | + |
| 14 | +class AnalysisDecision(BaseModel): |
| 15 | + decision: Literal["research", "write", "cancel"] = Field( |
| 16 | + description="Whether to continue research, write a report, or cancel the research after several retries" |
| 17 | + ) |
| 18 | + research_questions: Optional[List[str]] = Field( |
| 19 | + description="Questions to research if continuing research. Maximum 3 questions. Set to null or empty if writing a report.", |
| 20 | + default_factory=list, |
| 21 | + ) |
| 22 | + cancel_reason: Optional[str] = Field( |
| 23 | + description="The reason for cancellation if the decision is to cancel research.", |
| 24 | + default=None, |
| 25 | + ) |
| 26 | + |
| 27 | + |
| 28 | +async def plan_research( |
| 29 | + memory: SimpleComposableMemory, |
| 30 | + context_nodes: List[Node], |
| 31 | + user_request: str, |
| 32 | +) -> AnalysisDecision: |
| 33 | + analyze_prompt = PromptTemplate( |
| 34 | + """ |
| 35 | + You are a professor who is guiding a researcher to research a specific request/problem. |
| 36 | + Your task is to decide on a research plan for the researcher. |
| 37 | + The possible actions are: |
| 38 | + + Provide a list of questions for the researcher to investigate, with the purpose of clarifying the request. |
| 39 | + + Write a report if the researcher has already gathered enough research on the topic and can resolve the initial request. |
| 40 | + + Cancel the research if most of the answers from researchers indicate there is insufficient information to research the request. Do not attempt more than 3 research iterations or too many questions. |
| 41 | + The workflow should be: |
| 42 | + + Always begin by providing some initial questions for the researcher to investigate. |
| 43 | + + Analyze the provided answers against the initial topic/request. If the answers are insufficient to resolve the initial request, provide additional questions for the researcher to investigate. |
| 44 | + + If the answers are sufficient to resolve the initial request, instruct the researcher to write a report. |
| 45 | + <User request> |
| 46 | + {user_request} |
| 47 | + </User request> |
| 48 | +
|
| 49 | + <Collected information> |
| 50 | + {context_str} |
| 51 | + </Collected information> |
| 52 | +
|
| 53 | + <Conversation context> |
| 54 | + {conversation_context} |
| 55 | + </Conversation context> |
| 56 | + """ |
| 57 | + ) |
| 58 | + conversation_context = "\n".join( |
| 59 | + [f"{message.role}: {message.content}" for message in memory.get_all()] |
| 60 | + ) |
| 61 | + context_str = "\n".join( |
| 62 | + [node.get_content(metadata_mode=MetadataMode.LLM) for node in context_nodes] |
| 63 | + ) |
| 64 | + res = await Settings.llm.astructured_predict( |
| 65 | + output_cls=AnalysisDecision, |
| 66 | + prompt=analyze_prompt, |
| 67 | + user_request=user_request, |
| 68 | + context_str=context_str, |
| 69 | + conversation_context=conversation_context, |
| 70 | + ) |
| 71 | + return res |
| 72 | + |
| 73 | + |
| 74 | +async def research( |
| 75 | + question: str, |
| 76 | + context_nodes: List[NodeWithScore], |
| 77 | +) -> str: |
| 78 | + prompt = """ |
| 79 | + You are a researcher who is in the process of answering the question. |
| 80 | + The purpose is to answer the question based on the collected information, without using prior knowledge or making up any new information. |
| 81 | + Always add citations to the sentence/point/paragraph using the id of the provided content. |
| 82 | + The citation should follow this format: [citation:id]() where id is the id of the content. |
| 83 | + |
| 84 | + E.g: |
| 85 | + If we have a context like this: |
| 86 | + <Citation id='abc-xyz'> |
| 87 | + Baby llama is called cria |
| 88 | + </Citation id='abc-xyz'> |
| 89 | +
|
| 90 | + And your answer uses the content, then the citation should be: |
| 91 | + - Baby llama is called cria [citation:abc-xyz]() |
| 92 | +
|
| 93 | + Here is the provided context for the question: |
| 94 | + <Collected information> |
| 95 | + {context_str} |
| 96 | + </Collected information>` |
| 97 | +
|
| 98 | + No prior knowledge, just use the provided context to answer the question: {question} |
| 99 | + """ |
| 100 | + context_str = "\n".join( |
| 101 | + [_get_text_node_content_for_citation(node) for node in context_nodes] |
| 102 | + ) |
| 103 | + res = await Settings.llm.acomplete( |
| 104 | + prompt=prompt.format(question=question, context_str=context_str), |
| 105 | + ) |
| 106 | + return res.text |
| 107 | + |
| 108 | + |
| 109 | +async def write_report( |
| 110 | + memory: SimpleComposableMemory, |
| 111 | + user_request: str, |
| 112 | + stream: bool = False, |
| 113 | +) -> CompletionResponse | CompletionResponseAsyncGen: |
| 114 | + report_prompt = """ |
| 115 | + You are a researcher writing a report based on a user request and the research context. |
| 116 | + You have researched various perspectives related to the user request. |
| 117 | + The report should provide a comprehensive outline covering all important points from the researched perspectives. |
| 118 | + Create a well-structured outline for the research report that covers all the answers. |
| 119 | +
|
| 120 | + # IMPORTANT when writing in markdown format: |
| 121 | + + Use tables or figures where appropriate to enhance presentation. |
| 122 | + + Preserve all citation syntax (the `[citation:id]()` parts in the provided context). Keep these citations in the final report - no separate reference section is needed. |
| 123 | + + Do not add links, a table of contents, or a references section to the report. |
| 124 | +
|
| 125 | + <User request> |
| 126 | + {user_request} |
| 127 | + </User request> |
| 128 | +
|
| 129 | + <Research context> |
| 130 | + {research_context} |
| 131 | + </Research context> |
| 132 | +
|
| 133 | + Now, write a report addressing the user request based on the research provided following the format and guidelines above. |
| 134 | + """ |
| 135 | + research_context = "\n".join( |
| 136 | + [f"{message.role}: {message.content}" for message in memory.get_all()] |
| 137 | + ) |
| 138 | + |
| 139 | + llm_complete_func = ( |
| 140 | + Settings.llm.astream_complete if stream else Settings.llm.acomplete |
| 141 | + ) |
| 142 | + |
| 143 | + res = await llm_complete_func( |
| 144 | + prompt=report_prompt.format( |
| 145 | + user_request=user_request, |
| 146 | + research_context=research_context, |
| 147 | + ), |
| 148 | + ) |
| 149 | + return res |
| 150 | + |
| 151 | + |
| 152 | +def _get_text_node_content_for_citation(node: NodeWithScore) -> str: |
| 153 | + """ |
| 154 | + Construct node content for LLM with citation flag. |
| 155 | + """ |
| 156 | + node_id = node.node.node_id |
| 157 | + content = f"<Citation id='{node_id}'>\n{node.get_content(metadata_mode=MetadataMode.LLM)}</Citation id='{node_id}'>" |
| 158 | + return content |
0 commit comments