Skip to content

Commit a88c2f5

Browse files
authored
Merge pull request Azure-Samples#19 from sdgilley/sdg-tutorial
add tutorial files
2 parents 5b9336f + b6d02d8 commit a88c2f5

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+5055
-0
lines changed

tutorial/build_index.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
import os
2+
from dotenv import load_dotenv
3+
4+
load_dotenv()
5+
6+
from azure.ai.ml import MLClient
7+
from azure.identity import DefaultAzureCredential
8+
from azure.ai.ml.entities import Index
9+
10+
from promptflow.rag.config import (
11+
LocalSource,
12+
AzureAISearchConfig,
13+
EmbeddingsModelConfig,
14+
ConnectionConfig,
15+
)
16+
from promptflow.rag import build_index
17+
18+
client = MLClient(
19+
DefaultAzureCredential(),
20+
os.getenv("AZURE_SUBSCRIPTION_ID"),
21+
os.getenv("AZURE_RESOURCE_GROUP"),
22+
os.getenv("AZUREAI_PROJECT_NAME"),
23+
)
24+
import os
25+
26+
# append directory of the current script to data directory
27+
script_dir = os.path.dirname(os.path.abspath(__file__))
28+
data_directory = os.path.join(script_dir, "data/product-info/")
29+
30+
# Check if the directory exists
31+
if os.path.exists(data_directory):
32+
files = os.listdir(data_directory) # List all files in the directory
33+
if files:
34+
print(
35+
f"Data directory '{data_directory}' exists and contains {len(files)} files."
36+
)
37+
else:
38+
print(f"Data directory '{data_directory}' exists but is empty.")
39+
exit()
40+
else:
41+
print(f"Data directory '{data_directory}' does not exist.")
42+
exit()
43+
44+
index_name = "tutorial-index" # your desired index name
45+
index_path = build_index(
46+
name=index_name, # name of your index
47+
vector_store="azure_ai_search", # the type of vector store - in this case it is Azure AI Search. Users can also use "azure_cognitive search"
48+
embeddings_model_config=EmbeddingsModelConfig(
49+
model_name=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT"),
50+
deployment_name=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT"),
51+
connection_config=ConnectionConfig(
52+
subscription_id=client.subscription_id,
53+
resource_group_name=client.resource_group_name,
54+
workspace_name=client.workspace_name,
55+
connection_name=os.getenv("AZURE_OPENAI_CONNECTION_NAME"),
56+
),
57+
),
58+
input_source=LocalSource(input_data=data_directory), # the location of your files
59+
index_config=AzureAISearchConfig(
60+
ai_search_index_name=index_name, # the name of the index store inside the azure ai search service
61+
ai_search_connection_config=ConnectionConfig(
62+
subscription_id=client.subscription_id,
63+
resource_group_name=client.resource_group_name,
64+
workspace_name=client.workspace_name,
65+
connection_name=os.getenv("AZURE_SEARCH_CONNECTION_NAME"),
66+
),
67+
),
68+
tokens_per_chunk=800, # Optional field - Maximum number of tokens per chunk
69+
token_overlap_across_chunks=0, # Optional field - Number of tokens to overlap between chunks
70+
)
71+
72+
# register the index so that it shows up in the cloud project
73+
client.indexes.create_or_update(Index(name=index_name, path=index_path))

tutorial/copilot_flow/Dockerfile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
FROM mcr.microsoft.com/azureml/promptflow/promptflow-runtime:latest
2+
COPY ./requirements.txt .
3+
RUN pip install -r requirements.txt

tutorial/copilot_flow/chat.prompty

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
---
2+
name: Chat Prompt
3+
description: A prompty that uses the chat API to respond to queries grounded in relevant documents
4+
model:
5+
api: chat
6+
configuration:
7+
type: azure_openai
8+
inputs:
9+
chat_input:
10+
type: string
11+
chat_history:
12+
type: list
13+
is_chat_history: true
14+
default: []
15+
documents:
16+
type: object
17+
18+
---
19+
system:
20+
You are an AI assistant helping users with queries related to outdoor outdooor/camping gear and clothing.
21+
If the question is not related to outdoor/camping gear and clothing, just say 'Sorry, I only can answer queries related to outdoor/camping gear and clothing. So, how can I help?'
22+
Don't try to make up any answers.
23+
If the question is related to outdoor/camping gear and clothing but vague, ask for clarifying questions instead of referencing documents. If the question is general, for example it uses "it" or "they", ask the user to specify what product they are asking about.
24+
Use the following pieces of context to answer the questions about outdoor/camping gear and clothing as completely, correctly, and concisely as possible.
25+
Do not add documentation reference in the response.
26+
27+
# Documents
28+
{{documents}}
29+
30+
{% for item in chat_history %}
31+
{{item.role}}
32+
{{item.content}}
33+
{% endfor %}
34+
35+
user:
36+
{{chat_input}}

tutorial/copilot_flow/copilot.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
import os
2+
from dotenv import load_dotenv
3+
4+
load_dotenv()
5+
6+
from promptflow.core import Prompty, AzureOpenAIModelConfiguration
7+
from promptflow.tracing import trace
8+
from openai import AzureOpenAI
9+
10+
# <get_documents>
11+
@trace
12+
def get_documents(search_query: str, num_docs=3):
13+
from azure.identity import DefaultAzureCredential, get_bearer_token_provider
14+
from azure.search.documents import SearchClient
15+
from azure.search.documents.models import VectorizedQuery
16+
17+
token_provider = get_bearer_token_provider(
18+
DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
19+
)
20+
21+
index_name = os.getenv("AZUREAI_SEARCH_INDEX_NAME")
22+
23+
# retrieve documents relevant to the user's question from Cognitive Search
24+
search_client = SearchClient(
25+
endpoint=os.getenv("AZURE_SEARCH_ENDPOINT"),
26+
credential=DefaultAzureCredential(),
27+
index_name=index_name,
28+
)
29+
30+
aoai_client = AzureOpenAI(
31+
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
32+
azure_ad_token_provider=token_provider,
33+
api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
34+
)
35+
36+
# generate a vector embedding of the user's question
37+
embedding = aoai_client.embeddings.create(
38+
input=search_query, model=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT")
39+
)
40+
embedding_to_query = embedding.data[0].embedding
41+
42+
context = ""
43+
# use the vector embedding to do a vector search on the index
44+
vector_query = VectorizedQuery(
45+
vector=embedding_to_query, k_nearest_neighbors=num_docs, fields="contentVector"
46+
)
47+
results = trace(search_client.search)(
48+
search_text="", vector_queries=[vector_query], select=["id", "content"]
49+
)
50+
51+
for result in results:
52+
context += f"\n>>> From: {result['id']}\n{result['content']}"
53+
54+
return context
55+
56+
57+
# <get_documents>
58+
59+
from promptflow.core import Prompty, AzureOpenAIModelConfiguration
60+
61+
from pathlib import Path
62+
from typing import TypedDict
63+
64+
65+
class ChatResponse(TypedDict):
66+
context: dict
67+
reply: str
68+
69+
70+
def get_chat_response(chat_input: str, chat_history: list = []) -> ChatResponse:
71+
model_config = AzureOpenAIModelConfiguration(
72+
azure_deployment=os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT"),
73+
api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
74+
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
75+
)
76+
77+
searchQuery = chat_input
78+
79+
# Only extract intent if there is chat_history
80+
if len(chat_history) > 0:
81+
# extract current query intent given chat_history
82+
path_to_prompty = f"{Path(__file__).parent.absolute().as_posix()}/queryIntent.prompty" # pass absolute file path to prompty
83+
intentPrompty = Prompty.load(
84+
path_to_prompty,
85+
model={
86+
"configuration": model_config,
87+
"parameters": {
88+
"max_tokens": 256,
89+
},
90+
},
91+
)
92+
searchQuery = intentPrompty(query=chat_input, chat_history=chat_history)
93+
94+
# retrieve relevant documents and context given chat_history and current user query (chat_input)
95+
documents = get_documents(searchQuery, 3)
96+
97+
# send query + document context to chat completion for a response
98+
path_to_prompty = f"{Path(__file__).parent.absolute().as_posix()}/chat.prompty"
99+
chatPrompty = Prompty.load(
100+
path_to_prompty,
101+
model={
102+
"configuration": model_config,
103+
"parameters": {"max_tokens": 256, "temperature": 0.2},
104+
},
105+
)
106+
result = chatPrompty(
107+
chat_history=chat_history, chat_input=chat_input, documents=documents
108+
)
109+
110+
return dict(reply=result, context=documents)

tutorial/copilot_flow/flow.flex.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
entry: copilot:get_chat_response
2+
environment:
3+
python_requirements_txt: requirements.txt
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
{
2+
"chat_input": "how much does it cost?",
3+
"chat_history": [
4+
{
5+
"role": "user",
6+
"content": "are the trailwalker shoes waterproof?"
7+
},
8+
{
9+
"role": "assistant",
10+
"content": "Yes, the TrailWalker Hiking Shoes are waterproof. They are designed with a durable and waterproof construction to withstand various terrains and weather conditions."
11+
},
12+
{
13+
"role": "user",
14+
"content": "how much do they cost?"
15+
},
16+
{
17+
"role": "assistant",
18+
"content": "The TrailWalker Hiking Shoes are priced at $110."
19+
},
20+
{
21+
"role": "user",
22+
"content": "do you have waterproof tents?"
23+
},
24+
{
25+
"role": "assistant",
26+
"content": "Yes, we have waterproof tents available. Can you please provide more information about the type or size of tent you are looking for?"
27+
},
28+
{
29+
"role": "user",
30+
"content": "which is your most waterproof tent?"
31+
},
32+
{
33+
"role": "assistant",
34+
"content": "Our most waterproof tent is the Alpine Explorer Tent. It is designed with a waterproof material and has a rainfly with a waterproof rating of 3000mm. This tent provides reliable protection against rain and moisture."
35+
}
36+
]
37+
}
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
---
2+
name: Chat Prompt
3+
description: A prompty that extract users query intent based on the current_query and chat_history of the conversation
4+
model:
5+
api: chat
6+
configuration:
7+
type: azure_openai
8+
inputs:
9+
query:
10+
type: string
11+
chat_history:
12+
type: list
13+
is_chat_history: true
14+
default: []
15+
16+
---
17+
system:
18+
- You are an AI assistant reading a current user query and chat_history.
19+
- Given the chat_history, and current user's query, infer the user's intent expressed in the current user query.
20+
- Once you infer the intent, respond with a search query that can be used to retrieve relevant documents for the current user's query based on the intent
21+
- Be specific in what the user is asking about, but disregard parts of the chat history that are not relevant to the user's intent.
22+
23+
Example 1:
24+
With a chat_history like below:
25+
\```
26+
chat_history: [ {
27+
"role": "user",
28+
"content": "are the trailwalker shoes waterproof?"
29+
},
30+
{
31+
"role": "assistant",
32+
"content": "Yes, the TrailWalker Hiking Shoes are waterproof. They are designed with a durable and waterproof construction to withstand various terrains and weather conditions."
33+
}
34+
]
35+
\```
36+
User query: "how much do they cost?"
37+
38+
Intent: "The user wants to know how much the Trailwalker Hiking Shoes cost."
39+
Search query: "price of Trailwalker Hiking Shoes"
40+
41+
42+
Example 2:
43+
With a chat_history like below:
44+
\```
45+
chat_history: [ {
46+
"role": "user",
47+
"content": "are the trailwalker shoes waterproof?"
48+
},
49+
{
50+
"role": "assistant",
51+
"content": "Yes, the TrailWalker Hiking Shoes are waterproof. They are designed with a durable and waterproof construction to withstand various terrains and weather conditions."
52+
},
53+
{
54+
"role": "user",
55+
"content": "how much do they cost?"
56+
},
57+
{
58+
"role": "assistant",
59+
"content": "The TrailWalker Hiking Shoes are priced at $110."
60+
},
61+
{
62+
"role": "user",
63+
"content": "do you have waterproof tents?"
64+
},
65+
{
66+
"role": "assistant",
67+
"content": "Yes, we have waterproof tents available. Can you please provide more information about the type or size of tent you are looking for?"
68+
},
69+
{
70+
"role": "user",
71+
"content": "which is your most waterproof tent?"
72+
},
73+
{
74+
"role": "assistant",
75+
"content": "Our most waterproof tent is the Alpine Explorer Tent. It is designed with a waterproof material and has a rainfly with a waterproof rating of 3000mm. This tent provides reliable protection against rain and moisture."
76+
}
77+
]
78+
\```
79+
User query: "how much does it cost?"
80+
81+
Intent: "the user would like to know how much the Alpine Explorer Tent costs"
82+
Search query: "price of Alpine Explorer Tent"
83+
84+
{% for item in chat_history %}
85+
{{item.role}}
86+
{{item.content}}
87+
{% endfor %}
88+
89+
Current user query:
90+
{{query}}
91+
92+
Search query:
93+
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
openai
2+
azure-identity
3+
azure-search-documents==11.4.0
4+
promptflow[azure]==1.11.0
5+
promptflow-tracing==1.11.0
6+
promptflow-tools==1.4.0
7+
promptflow-evals==0.3.0
8+
jinja2
9+
aiohttp
10+
python-dotenv

tutorial/data.zip

95.3 KB
Binary file not shown.

0 commit comments

Comments
 (0)