Skip to content

add tutorial files #19

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions tutorial/build_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import os
from dotenv import load_dotenv

load_dotenv()

from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
from azure.ai.ml.entities import Index

from promptflow.rag.config import (
LocalSource,
AzureAISearchConfig,
EmbeddingsModelConfig,
ConnectionConfig,
)
from promptflow.rag import build_index

client = MLClient(
DefaultAzureCredential(),
os.getenv("AZURE_SUBSCRIPTION_ID"),
os.getenv("AZURE_RESOURCE_GROUP"),
os.getenv("AZUREAI_PROJECT_NAME"),
)
import os

# append directory of the current script to data directory
script_dir = os.path.dirname(os.path.abspath(__file__))
data_directory = os.path.join(script_dir, "data/product-info/")

# Check if the directory exists
if os.path.exists(data_directory):
files = os.listdir(data_directory) # List all files in the directory
if files:
print(
f"Data directory '{data_directory}' exists and contains {len(files)} files."
)
else:
print(f"Data directory '{data_directory}' exists but is empty.")
exit()
else:
print(f"Data directory '{data_directory}' does not exist.")
exit()

index_name = "tutorial-index" # your desired index name
index_path = build_index(
name=index_name, # name of your index
vector_store="azure_ai_search", # the type of vector store - in this case it is Azure AI Search. Users can also use "azure_cognitive search"
embeddings_model_config=EmbeddingsModelConfig(
model_name=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT"),
deployment_name=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT"),
connection_config=ConnectionConfig(
subscription_id=client.subscription_id,
resource_group_name=client.resource_group_name,
workspace_name=client.workspace_name,
connection_name=os.getenv("AZURE_OPENAI_CONNECTION_NAME"),
),
),
input_source=LocalSource(input_data=data_directory), # the location of your files
index_config=AzureAISearchConfig(
ai_search_index_name=index_name, # the name of the index store inside the azure ai search service
ai_search_connection_config=ConnectionConfig(
subscription_id=client.subscription_id,
resource_group_name=client.resource_group_name,
workspace_name=client.workspace_name,
connection_name=os.getenv("AZURE_SEARCH_CONNECTION_NAME"),
),
),
tokens_per_chunk=800, # Optional field - Maximum number of tokens per chunk
token_overlap_across_chunks=0, # Optional field - Number of tokens to overlap between chunks
)

# register the index so that it shows up in the cloud project
client.indexes.create_or_update(Index(name=index_name, path=index_path))
3 changes: 3 additions & 0 deletions tutorial/copilot_flow/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
FROM mcr.microsoft.com/azureml/promptflow/promptflow-runtime:latest
COPY ./requirements.txt .
RUN pip install -r requirements.txt
36 changes: 36 additions & 0 deletions tutorial/copilot_flow/chat.prompty
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
---
name: Chat Prompt
description: A prompty that uses the chat API to respond to queries grounded in relevant documents
model:
api: chat
configuration:
type: azure_openai
inputs:
chat_input:
type: string
chat_history:
type: list
is_chat_history: true
default: []
documents:
type: object

---
system:
You are an AI assistant helping users with queries related to outdoor outdooor/camping gear and clothing.
If the question is not related to outdoor/camping gear and clothing, just say 'Sorry, I only can answer queries related to outdoor/camping gear and clothing. So, how can I help?'
Don't try to make up any answers.
If the question is related to outdoor/camping gear and clothing but vague, ask for clarifying questions instead of referencing documents. If the question is general, for example it uses "it" or "they", ask the user to specify what product they are asking about.
Use the following pieces of context to answer the questions about outdoor/camping gear and clothing as completely, correctly, and concisely as possible.
Do not add documentation reference in the response.

# Documents
{{documents}}

{% for item in chat_history %}
{{item.role}}
{{item.content}}
{% endfor %}

user:
{{chat_input}}
110 changes: 110 additions & 0 deletions tutorial/copilot_flow/copilot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import os
from dotenv import load_dotenv

load_dotenv()

from promptflow.core import Prompty, AzureOpenAIModelConfiguration
from promptflow.tracing import trace
from openai import AzureOpenAI

# <get_documents>
@trace
def get_documents(search_query: str, num_docs=3):
from azure.identity import DefaultAzureCredential, get_bearer_token_provider
from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizedQuery

token_provider = get_bearer_token_provider(
DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
)

index_name = os.getenv("AZUREAI_SEARCH_INDEX_NAME")

# retrieve documents relevant to the user's question from Cognitive Search
search_client = SearchClient(
endpoint=os.getenv("AZURE_SEARCH_ENDPOINT"),
credential=DefaultAzureCredential(),
index_name=index_name,
)

aoai_client = AzureOpenAI(
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
azure_ad_token_provider=token_provider,
api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
)

# generate a vector embedding of the user's question
embedding = aoai_client.embeddings.create(
input=search_query, model=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT")
)
embedding_to_query = embedding.data[0].embedding

context = ""
# use the vector embedding to do a vector search on the index
vector_query = VectorizedQuery(
vector=embedding_to_query, k_nearest_neighbors=num_docs, fields="contentVector"
)
results = trace(search_client.search)(
search_text="", vector_queries=[vector_query], select=["id", "content"]
)

for result in results:
context += f"\n>>> From: {result['id']}\n{result['content']}"

return context


# <get_documents>

from promptflow.core import Prompty, AzureOpenAIModelConfiguration

from pathlib import Path
from typing import TypedDict


class ChatResponse(TypedDict):
context: dict
reply: str


def get_chat_response(chat_input: str, chat_history: list = []) -> ChatResponse:
model_config = AzureOpenAIModelConfiguration(
azure_deployment=os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT"),
api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
)

searchQuery = chat_input

# Only extract intent if there is chat_history
if len(chat_history) > 0:
# extract current query intent given chat_history
path_to_prompty = f"{Path(__file__).parent.absolute().as_posix()}/queryIntent.prompty" # pass absolute file path to prompty
intentPrompty = Prompty.load(
path_to_prompty,
model={
"configuration": model_config,
"parameters": {
"max_tokens": 256,
},
},
)
searchQuery = intentPrompty(query=chat_input, chat_history=chat_history)

# retrieve relevant documents and context given chat_history and current user query (chat_input)
documents = get_documents(searchQuery, 3)

# send query + document context to chat completion for a response
path_to_prompty = f"{Path(__file__).parent.absolute().as_posix()}/chat.prompty"
chatPrompty = Prompty.load(
path_to_prompty,
model={
"configuration": model_config,
"parameters": {"max_tokens": 256, "temperature": 0.2},
},
)
result = chatPrompty(
chat_history=chat_history, chat_input=chat_input, documents=documents
)

return dict(reply=result, context=documents)
3 changes: 3 additions & 0 deletions tutorial/copilot_flow/flow.flex.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
entry: copilot:get_chat_response
environment:
python_requirements_txt: requirements.txt
37 changes: 37 additions & 0 deletions tutorial/copilot_flow/input_with_chat_history.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"chat_input": "how much does it cost?",
"chat_history": [
{
"role": "user",
"content": "are the trailwalker shoes waterproof?"
},
{
"role": "assistant",
"content": "Yes, the TrailWalker Hiking Shoes are waterproof. They are designed with a durable and waterproof construction to withstand various terrains and weather conditions."
},
{
"role": "user",
"content": "how much do they cost?"
},
{
"role": "assistant",
"content": "The TrailWalker Hiking Shoes are priced at $110."
},
{
"role": "user",
"content": "do you have waterproof tents?"
},
{
"role": "assistant",
"content": "Yes, we have waterproof tents available. Can you please provide more information about the type or size of tent you are looking for?"
},
{
"role": "user",
"content": "which is your most waterproof tent?"
},
{
"role": "assistant",
"content": "Our most waterproof tent is the Alpine Explorer Tent. It is designed with a waterproof material and has a rainfly with a waterproof rating of 3000mm. This tent provides reliable protection against rain and moisture."
}
]
}
93 changes: 93 additions & 0 deletions tutorial/copilot_flow/queryIntent.prompty
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
---
name: Chat Prompt
description: A prompty that extract users query intent based on the current_query and chat_history of the conversation
model:
api: chat
configuration:
type: azure_openai
inputs:
query:
type: string
chat_history:
type: list
is_chat_history: true
default: []

---
system:
- You are an AI assistant reading a current user query and chat_history.
- Given the chat_history, and current user's query, infer the user's intent expressed in the current user query.
- Once you infer the intent, respond with a search query that can be used to retrieve relevant documents for the current user's query based on the intent
- Be specific in what the user is asking about, but disregard parts of the chat history that are not relevant to the user's intent.

Example 1:
With a chat_history like below:
\```
chat_history: [ {
"role": "user",
"content": "are the trailwalker shoes waterproof?"
},
{
"role": "assistant",
"content": "Yes, the TrailWalker Hiking Shoes are waterproof. They are designed with a durable and waterproof construction to withstand various terrains and weather conditions."
}
]
\```
User query: "how much do they cost?"

Intent: "The user wants to know how much the Trailwalker Hiking Shoes cost."
Search query: "price of Trailwalker Hiking Shoes"


Example 2:
With a chat_history like below:
\```
chat_history: [ {
"role": "user",
"content": "are the trailwalker shoes waterproof?"
},
{
"role": "assistant",
"content": "Yes, the TrailWalker Hiking Shoes are waterproof. They are designed with a durable and waterproof construction to withstand various terrains and weather conditions."
},
{
"role": "user",
"content": "how much do they cost?"
},
{
"role": "assistant",
"content": "The TrailWalker Hiking Shoes are priced at $110."
},
{
"role": "user",
"content": "do you have waterproof tents?"
},
{
"role": "assistant",
"content": "Yes, we have waterproof tents available. Can you please provide more information about the type or size of tent you are looking for?"
},
{
"role": "user",
"content": "which is your most waterproof tent?"
},
{
"role": "assistant",
"content": "Our most waterproof tent is the Alpine Explorer Tent. It is designed with a waterproof material and has a rainfly with a waterproof rating of 3000mm. This tent provides reliable protection against rain and moisture."
}
]
\```
User query: "how much does it cost?"

Intent: "the user would like to know how much the Alpine Explorer Tent costs"
Search query: "price of Alpine Explorer Tent"

{% for item in chat_history %}
{{item.role}}
{{item.content}}
{% endfor %}

Current user query:
{{query}}

Search query:

10 changes: 10 additions & 0 deletions tutorial/copilot_flow/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
openai
azure-identity
azure-search-documents==11.4.0
promptflow[azure]==1.11.0
promptflow-tracing==1.11.0
promptflow-tools==1.4.0
promptflow-evals==0.3.0
jinja2
aiohttp
python-dotenv
Binary file added tutorial/data.zip
Binary file not shown.
Loading