Skip to content

feat: Make suggest next questions configurable #275

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/cyan-buttons-clean.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"create-llama": patch
---

Add env config for next questions feature
49 changes: 23 additions & 26 deletions helpers/env-variables.ts
Original file line number Diff line number Diff line change
Expand Up @@ -487,33 +487,30 @@ It\\'s cute animal.
};

const getTemplateEnvs = (template?: TemplateType): EnvVar[] => {
if (template === "multiagent") {
return [
{
name: "MESSAGE_QUEUE_PORT",
},
{
name: "CONTROL_PLANE_PORT",
},
{
name: "HUMAN_CONSUMER_PORT",
},
{
name: "AGENT_QUERY_ENGINE_PORT",
value: "8003",
},
{
name: "AGENT_QUERY_ENGINE_DESCRIPTION",
value: "Query information from the provided data",
},
{
name: "AGENT_DUMMY_PORT",
value: "8004",
},
];
} else {
return [];
const nextQuestionEnvs: EnvVar[] = [
{
name: "NEXT_QUESTION_PROMPT",
description: `Customize prompt to generate the next question suggestions based on the conversation history.
Disable this prompt to disable the next question suggestions feature.`,
value: `"You're a helpful assistant! Your task is to suggest the next question that user might ask.
Here is the conversation history
---------------------
$conversation
---------------------
Given the conversation history, please give me 3 questions that you might ask next!
Your answer should be wrapped in three sticks which follows the following format:
\`\`\`
<question 1>
<question 2>
<question 3>
\`\`\`"`,
},
];

if (template === "multiagent" || template === "streaming") {
return nextQuestionEnvs;
}
return [];
};

const getObservabilityEnvs = (
Expand Down
7 changes: 7 additions & 0 deletions helpers/python.ts
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,13 @@ export const installPythonTemplate = async ({
cwd: path.join(compPath, "settings", "python"),
});

// Copy services
if (template == "streaming" || template == "multiagent") {
await copy("**", path.join(root, "app", "api", "services"), {
cwd: path.join(compPath, "services", "python"),
});
}

if (template === "streaming") {
// For the streaming template only:
// Select and copy engine code based on data sources and tools
Expand Down
26 changes: 7 additions & 19 deletions templates/components/llamaindex/typescript/streaming/suggestion.ts
Original file line number Diff line number Diff line change
@@ -1,32 +1,20 @@
import { ChatMessage, Settings } from "llamaindex";

const NEXT_QUESTION_PROMPT_TEMPLATE = `You're a helpful assistant! Your task is to suggest the next question that user might ask.
Here is the conversation history
---------------------
$conversation
---------------------
Given the conversation history, please give me $number_of_questions questions that you might ask next!
Your answer should be wrapped in three sticks which follows the following format:
\`\`\`
<question 1>
<question 2>\`\`\`
`;
const N_QUESTIONS_TO_GENERATE = 3;

export async function generateNextQuestions(
conversation: ChatMessage[],
numberOfQuestions: number = N_QUESTIONS_TO_GENERATE,
) {
export async function generateNextQuestions(conversation: ChatMessage[]) {
const llm = Settings.llm;
const NEXT_QUESTION_PROMPT = process.env.NEXT_QUESTION_PROMPT;
if (!NEXT_QUESTION_PROMPT) {
return [];
}

// Format conversation
const conversationText = conversation
.map((message) => `${message.role}: ${message.content}`)
.join("\n");
const message = NEXT_QUESTION_PROMPT_TEMPLATE.replace(
const message = NEXT_QUESTION_PROMPT.replace(
"$conversation",
conversationText,
).replace("$number_of_questions", numberOfQuestions.toString());
);

try {
const response = await llm.complete({ prompt: message });
Expand Down
File renamed without changes.
66 changes: 66 additions & 0 deletions templates/components/services/python/suggestion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import logging
import os
import re
from typing import List, Optional

from app.api.routers.models import Message
from llama_index.core.prompts import PromptTemplate
from llama_index.core.settings import Settings

logger = logging.getLogger("uvicorn")


class NextQuestionSuggestion:
"""
Suggest the next questions that user might ask based on the conversation history
Disable this feature by removing the NEXT_QUESTION_PROMPT environment variable
"""

@classmethod
def get_configured_prompt(cls) -> Optional[str]:
prompt = os.getenv("NEXT_QUESTION_PROMPT", None)
if not prompt:
return None
return PromptTemplate(prompt)

@classmethod
async def suggest_next_questions(
cls,
messages: List[Message],
) -> Optional[List[str]]:
"""
Suggest the next questions that user might ask based on the conversation history
Return None if suggestion is disabled or there is an error
"""
prompt_template = cls.get_configured_prompt()
if not prompt_template:
return None

try:
# Reduce the cost by only using the last two messages
last_user_message = None
last_assistant_message = None
for message in reversed(messages):
if message.role == "user":
last_user_message = f"User: {message.content}"
elif message.role == "assistant":
last_assistant_message = f"Assistant: {message.content}"
if last_user_message and last_assistant_message:
break
conversation: str = f"{last_user_message}\n{last_assistant_message}"

# Call the LLM and parse questions from the output
prompt = prompt_template.format(conversation=conversation)
output = await Settings.llm.acomplete(prompt)
questions = cls._extract_questions(output.text)

return questions
except Exception as e:
logger.error(f"Error when generating next question: {e}")
return None

@classmethod
def _extract_questions(cls, text: str) -> List[str]:
content_match = re.search(r"```(.*?)```", text, re.DOTALL)
content = content_match.group(1) if content_match else ""
return content.strip().split("\n")
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
from asyncio import Task
import json
import logging
from asyncio import Task
from typing import AsyncGenerator

from aiostream import stream
from app.agents.single import AgentRunEvent, AgentRunResult
from app.api.routers.models import ChatData, Message
from app.api.services.suggestion import NextQuestionSuggestion
from fastapi import Request
from fastapi.responses import StreamingResponse

from app.api.routers.models import ChatData
from app.agents.single import AgentRunEvent, AgentRunResult

logger = logging.getLogger("uvicorn")


Expand Down Expand Up @@ -57,16 +57,32 @@ async def content_generator(
# Yield the text response
async def _chat_response_generator():
result = await task
final_response = ""

if isinstance(result, AgentRunResult):
for token in result.response.message.content:
yield VercelStreamResponse.convert_text(token)

if isinstance(result, AsyncGenerator):
async for token in result:
final_response += token.delta
yield VercelStreamResponse.convert_text(token.delta)

# TODO: stream NextQuestionSuggestion
# Generate next questions if next question prompt is configured
if NextQuestionSuggestion.get_configured_prompt() is not None:
conversation = chat_data.messages + [
Message(role="assistant", content=final_response)
]
questions = await NextQuestionSuggestion.suggest_next_questions(
conversation
)
if questions:
yield VercelStreamResponse.convert_data(
{
"type": "suggested_questions",
"data": questions,
}
)
# TODO: stream sources

# Yield the events from the event handler
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,20 +56,21 @@ async def _chat_response_generator():
final_response += token
yield VercelStreamResponse.convert_text(token)

# Generate questions that user might interested to
conversation = chat_data.messages + [
Message(role="assistant", content=final_response)
]
questions = await NextQuestionSuggestion.suggest_next_questions(
conversation
)
if len(questions) > 0:
yield VercelStreamResponse.convert_data(
{
"type": "suggested_questions",
"data": questions,
}
# Generate next questions if next question prompt is configured
if NextQuestionSuggestion.get_configured_prompt() is not None:
conversation = chat_data.messages + [
Message(role="assistant", content=final_response)
]
questions = await NextQuestionSuggestion.suggest_next_questions(
conversation
)
if questions:
yield VercelStreamResponse.convert_data(
{
"type": "suggested_questions",
"data": questions,
}
)

# the text_generator is the leading stream, once it's finished, also finish the event stream
event_handler.is_done = True
Expand Down
60 changes: 0 additions & 60 deletions templates/types/streaming/fastapi/app/api/services/suggestion.py

This file was deleted.

2 changes: 1 addition & 1 deletion templates/types/streaming/fastapi/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ fastapi = "^0.109.1"
uvicorn = { extras = ["standard"], version = "^0.23.2" }
python-dotenv = "^1.0.0"
aiostream = "^0.5.2"
llama-index = "0.11.6"
cachetools = "^5.3.3"
llama-index = "0.11.6"

[build-system]
requires = ["poetry-core"]
Expand Down
Loading