Skip to content

Commit 0c60f27

Browse files
committed
add env settings for suggest questions feature
1 parent c16deed commit 0c60f27

File tree

8 files changed

+258
-44
lines changed

8 files changed

+258
-44
lines changed

helpers/env-variables.ts

Lines changed: 21 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -487,33 +487,28 @@ It\\'s cute animal.
487487
};
488488

489489
const getTemplateEnvs = (template?: TemplateType): EnvVar[] => {
490-
if (template === "multiagent") {
491-
return [
492-
{
493-
name: "MESSAGE_QUEUE_PORT",
494-
},
495-
{
496-
name: "CONTROL_PLANE_PORT",
497-
},
498-
{
499-
name: "HUMAN_CONSUMER_PORT",
500-
},
501-
{
502-
name: "AGENT_QUERY_ENGINE_PORT",
503-
value: "8003",
504-
},
505-
{
506-
name: "AGENT_QUERY_ENGINE_DESCRIPTION",
507-
value: "Query information from the provided data",
508-
},
509-
{
510-
name: "AGENT_DUMMY_PORT",
511-
value: "8004",
512-
},
513-
];
514-
} else {
515-
return [];
490+
const nextQuestionEnvs: EnvVar[] = [
491+
{
492+
name: "NEXT_QUESTION_ENABLE",
493+
description: "Whether to show next question suggestions",
494+
value: "true",
495+
},
496+
{
497+
name: "NEXT_QUESTION_PROMPT",
498+
description: `Customize prompt to generate the next question suggestions based on the conversation history.
499+
Default prompt is:
500+
NEXT_QUESTION_PROMPT=# You're a helpful assistant! Your task is to suggest the next question that user might ask.
501+
# Here is the conversation history
502+
# ---------------------\n{conversation}\n---------------------
503+
# Given the conversation history, please give me 3 questions that you might ask next!
504+
`,
505+
},
506+
];
507+
508+
if (template === "multiagent" || template === "streaming") {
509+
return nextQuestionEnvs;
516510
}
511+
return [];
517512
};
518513

519514
const getObservabilityEnvs = (

helpers/python.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,13 @@ export const installPythonTemplate = async ({
395395
cwd: path.join(compPath, "settings", "python"),
396396
});
397397

398+
// Copy services
399+
if (template == "streaming" || template == "multiagent") {
400+
await copy("**", path.join(root, "app", "api", "services"), {
401+
cwd: path.join(compPath, "services", "python"),
402+
});
403+
}
404+
398405
if (template === "streaming") {
399406
// For the streaming template only:
400407
// Select and copy engine code based on data sources and tools
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
import base64
2+
import mimetypes
3+
import os
4+
from io import BytesIO
5+
from pathlib import Path
6+
from typing import Any, List, Tuple
7+
8+
from app.engine.index import IndexConfig, get_index
9+
from llama_index.core import VectorStoreIndex
10+
from llama_index.core.ingestion import IngestionPipeline
11+
from llama_index.core.readers.file.base import (
12+
_try_loading_included_file_formats as get_file_loaders_map,
13+
)
14+
from llama_index.core.schema import Document
15+
from llama_index.indices.managed.llama_cloud.base import LlamaCloudIndex
16+
from llama_index.readers.file import FlatReader
17+
18+
19+
def get_llamaparse_parser():
20+
from app.engine.loaders import load_configs
21+
from app.engine.loaders.file import FileLoaderConfig, llama_parse_parser
22+
23+
config = load_configs()
24+
file_loader_config = FileLoaderConfig(**config["file"])
25+
if file_loader_config.use_llama_parse:
26+
return llama_parse_parser()
27+
else:
28+
return None
29+
30+
31+
def default_file_loaders_map():
32+
default_loaders = get_file_loaders_map()
33+
default_loaders[".txt"] = FlatReader
34+
return default_loaders
35+
36+
37+
class PrivateFileService:
38+
PRIVATE_STORE_PATH = "output/uploaded"
39+
40+
@staticmethod
41+
def preprocess_base64_file(base64_content: str) -> Tuple[bytes, str | None]:
42+
header, data = base64_content.split(",", 1)
43+
mime_type = header.split(";")[0].split(":", 1)[1]
44+
extension = mimetypes.guess_extension(mime_type)
45+
# File data as bytes
46+
return base64.b64decode(data), extension
47+
48+
@staticmethod
49+
def store_and_parse_file(file_name, file_data, extension) -> List[Document]:
50+
# Store file to the private directory
51+
os.makedirs(PrivateFileService.PRIVATE_STORE_PATH, exist_ok=True)
52+
file_path = Path(os.path.join(PrivateFileService.PRIVATE_STORE_PATH, file_name))
53+
54+
# write file
55+
with open(file_path, "wb") as f:
56+
f.write(file_data)
57+
58+
# Load file to documents
59+
# If LlamaParse is enabled, use it to parse the file
60+
# Otherwise, use the default file loaders
61+
reader = get_llamaparse_parser()
62+
if reader is None:
63+
reader_cls = default_file_loaders_map().get(extension)
64+
if reader_cls is None:
65+
raise ValueError(f"File extension {extension} is not supported")
66+
reader = reader_cls()
67+
documents = reader.load_data(file_path)
68+
# Add custom metadata
69+
for doc in documents:
70+
doc.metadata["file_name"] = file_name
71+
doc.metadata["private"] = "true"
72+
return documents
73+
74+
@staticmethod
75+
def process_file(file_name: str, base64_content: str, params: Any) -> List[str]:
76+
file_data, extension = PrivateFileService.preprocess_base64_file(base64_content)
77+
78+
# Add the nodes to the index and persist it
79+
index_config = IndexConfig(**params)
80+
current_index = get_index(index_config)
81+
82+
# Insert the documents into the index
83+
if isinstance(current_index, LlamaCloudIndex):
84+
from app.engine.service import LLamaCloudFileService
85+
86+
project_id = current_index._get_project_id()
87+
pipeline_id = current_index._get_pipeline_id()
88+
# LlamaCloudIndex is a managed index so we can directly use the files
89+
upload_file = (file_name, BytesIO(file_data))
90+
return [
91+
LLamaCloudFileService.add_file_to_pipeline(
92+
project_id,
93+
pipeline_id,
94+
upload_file,
95+
custom_metadata={
96+
# Set private=true to mark the document as private user docs (required for filtering)
97+
"private": "true",
98+
},
99+
)
100+
]
101+
else:
102+
# First process documents into nodes
103+
documents = PrivateFileService.store_and_parse_file(
104+
file_name, file_data, extension
105+
)
106+
pipeline = IngestionPipeline()
107+
nodes = pipeline.run(documents=documents)
108+
109+
# Add the nodes to the index and persist it
110+
if current_index is None:
111+
current_index = VectorStoreIndex(nodes=nodes)
112+
else:
113+
current_index.insert_nodes(nodes=nodes)
114+
current_index.storage_context.persist(
115+
persist_dir=os.environ.get("STORAGE_DIR", "storage")
116+
)
117+
118+
# Return the document ids
119+
return [doc.doc_id for doc in documents]
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
import logging
2+
from typing import List, Optional
3+
4+
from llama_index.core.prompts import PromptTemplate
5+
from llama_index.core.settings import Settings
6+
from pydantic import BaseModel
7+
from pydantic_settings import BaseSettings, SettingsConfigDict
8+
9+
from app.api.routers.models import Message
10+
11+
logger = logging.getLogger("uvicorn")
12+
13+
14+
class NextQuestionSettings(BaseSettings):
15+
enable: bool = True
16+
prompt_template: str = (
17+
"You're a helpful assistant! Your task is to suggest the next question that user might ask. "
18+
"\nHere is the conversation history"
19+
"\n---------------------\n{conversation}\n---------------------"
20+
"Given the conversation history, please give me 3 questions that you might ask next!"
21+
)
22+
23+
model_config = SettingsConfigDict(env_prefix="NEXT_QUESTION_")
24+
25+
@property
26+
def prompt(self) -> PromptTemplate:
27+
return PromptTemplate(self.prompt_template)
28+
29+
30+
next_question_settings = NextQuestionSettings()
31+
32+
33+
class NextQuestions(BaseModel):
34+
"""A list of questions that user might ask next"""
35+
36+
questions: List[str]
37+
38+
39+
class NextQuestionSuggestion:
40+
@staticmethod
41+
async def suggest_next_questions(
42+
messages: List[Message],
43+
) -> Optional[List[str]]:
44+
"""
45+
Suggest the next questions that user might ask based on the conversation history
46+
Return None if suggestion is disabled or there is an error
47+
"""
48+
if not next_question_settings.enable:
49+
return None
50+
51+
try:
52+
# Reduce the cost by only using the last two messages
53+
last_user_message = None
54+
last_assistant_message = None
55+
for message in reversed(messages):
56+
if message.role == "user":
57+
last_user_message = f"User: {message.content}"
58+
elif message.role == "assistant":
59+
last_assistant_message = f"Assistant: {message.content}"
60+
if last_user_message and last_assistant_message:
61+
break
62+
conversation: str = f"{last_user_message}\n{last_assistant_message}"
63+
64+
output: NextQuestions = await Settings.llm.astructured_predict(
65+
NextQuestions,
66+
prompt=next_question_settings.prompt,
67+
conversation=conversation,
68+
)
69+
70+
return output.questions
71+
except Exception as e:
72+
logger.error(f"Error when generating next question: {e}")
73+
return None

templates/types/multiagent/fastapi/app/api/routers/vercel_response.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@
77
from fastapi import Request
88
from fastapi.responses import StreamingResponse
99

10-
from app.api.routers.models import ChatData
10+
from app.api.routers.models import ChatData, Message
1111
from app.agents.single import AgentRunEvent, AgentRunResult
12+
from app.api.services.suggestion import NextQuestionSuggestion, next_question_settings
1213

1314
logger = logging.getLogger("uvicorn")
1415

@@ -57,16 +58,32 @@ async def content_generator(
5758
# Yield the text response
5859
async def _chat_response_generator():
5960
result = await task
60-
61+
final_response = ""
62+
6163
if isinstance(result, AgentRunResult):
6264
for token in result.response.message.content:
6365
yield VercelStreamResponse.convert_text(token)
6466

6567
if isinstance(result, AsyncGenerator):
6668
async for token in result:
69+
final_response += token.delta
6770
yield VercelStreamResponse.convert_text(token.delta)
6871

69-
# TODO: stream NextQuestionSuggestion
72+
# Generate questions that user might be interested in
73+
if next_question_settings.enable:
74+
conversation = chat_data.messages + [
75+
Message(role="assistant", content=final_response)
76+
]
77+
questions = await NextQuestionSuggestion.suggest_next_questions(
78+
conversation
79+
)
80+
if questions:
81+
yield VercelStreamResponse.convert_data(
82+
{
83+
"type": "suggested_questions",
84+
"data": questions,
85+
}
86+
)
7087
# TODO: stream sources
7188

7289
# Yield the events from the event handler

templates/types/multiagent/fastapi/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ llama-index = "^0.11.4"
1616
fastapi = "^0.112.2"
1717
python-dotenv = "^1.0.0"
1818
uvicorn = { extras = ["standard"], version = "^0.23.2" }
19+
pydantic-settings = "^2.4.0"
1920
cachetools = "^5.3.3"
2021
aiostream = "^0.5.2"
2122

templates/types/streaming/fastapi/app/api/routers/vercel_response.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from app.api.routers.events import EventCallbackHandler
99
from app.api.routers.models import ChatData, Message, SourceNodes
10-
from app.api.services.suggestion import NextQuestionSuggestion
10+
from app.api.services.suggestion import NextQuestionSuggestion, next_question_settings
1111

1212

1313
class VercelStreamResponse(StreamingResponse):
@@ -56,20 +56,21 @@ async def _chat_response_generator():
5656
final_response += token
5757
yield VercelStreamResponse.convert_text(token)
5858

59-
# Generate questions that user might interested to
60-
conversation = chat_data.messages + [
61-
Message(role="assistant", content=final_response)
62-
]
63-
questions = await NextQuestionSuggestion.suggest_next_questions(
64-
conversation
65-
)
66-
if len(questions) > 0:
67-
yield VercelStreamResponse.convert_data(
68-
{
69-
"type": "suggested_questions",
70-
"data": questions,
71-
}
59+
# Generate questions that user might be interested in
60+
if next_question_settings.enable:
61+
conversation = chat_data.messages + [
62+
Message(role="assistant", content=final_response)
63+
]
64+
questions = await NextQuestionSuggestion.suggest_next_questions(
65+
conversation
7266
)
67+
if questions:
68+
yield VercelStreamResponse.convert_data(
69+
{
70+
"type": "suggested_questions",
71+
"data": questions,
72+
}
73+
)
7374

7475
# the text_generator is the leading stream, once it's finished, also finish the event stream
7576
event_handler.is_done = True

templates/types/streaming/fastapi/pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,9 @@ fastapi = "^0.109.1"
1414
uvicorn = { extras = ["standard"], version = "^0.23.2" }
1515
python-dotenv = "^1.0.0"
1616
aiostream = "^0.5.2"
17-
llama-index = "0.11.6"
17+
pydantic-settings = "^2.4.0"
1818
cachetools = "^5.3.3"
19+
llama-index = "0.11.6"
1920

2021
[build-system]
2122
requires = ["poetry-core"]

0 commit comments

Comments
 (0)