Azure-Samples · lebaro-msft · Jul 9, 2024 · Jul 3, 2024 · Jul 5, 2024 · Jul 8, 2024
diff --git a/tutorial/build_index.py b/tutorial/build_index.py
@@ -0,0 +1,73 @@
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from azure.ai.ml import MLClient
+from azure.identity import DefaultAzureCredential
+from azure.ai.ml.entities import Index
+
+from promptflow.rag.config import (
+    LocalSource,
+    AzureAISearchConfig,
+    EmbeddingsModelConfig,
+    ConnectionConfig,
+)
+from promptflow.rag import build_index
+
+client = MLClient(
+    DefaultAzureCredential(),
+    os.getenv("AZURE_SUBSCRIPTION_ID"),
+    os.getenv("AZURE_RESOURCE_GROUP"),
+    os.getenv("AZUREAI_PROJECT_NAME"),
+)
+import os
+
+# append directory of the current script to data directory
+script_dir = os.path.dirname(os.path.abspath(__file__))
+data_directory = os.path.join(script_dir, "data/product-info/")
+
+# Check if the directory exists
+if os.path.exists(data_directory):
+    files = os.listdir(data_directory)  # List all files in the directory
+    if files:
+        print(
+            f"Data directory '{data_directory}' exists and contains {len(files)} files."
+        )
+    else:
+        print(f"Data directory '{data_directory}' exists but is empty.")
+        exit()
+else:
+    print(f"Data directory '{data_directory}' does not exist.")
+    exit()
+
+index_name = "tutorial-index"  # your desired index name
+index_path = build_index(
+    name=index_name,  # name of your index
+    vector_store="azure_ai_search",  # the type of vector store - in this case it is Azure AI Search. Users can also use "azure_cognitive search"
+    embeddings_model_config=EmbeddingsModelConfig(
+        model_name=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT"),
+        deployment_name=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT"),
+        connection_config=ConnectionConfig(
+            subscription_id=client.subscription_id,
+            resource_group_name=client.resource_group_name,
+            workspace_name=client.workspace_name,
+            connection_name=os.getenv("AZURE_OPENAI_CONNECTION_NAME"),
+        ),
+    ),
+    input_source=LocalSource(input_data=data_directory),  # the location of your files
+    index_config=AzureAISearchConfig(
+        ai_search_index_name=index_name,  # the name of the index store inside the azure ai search service
+        ai_search_connection_config=ConnectionConfig(
+            subscription_id=client.subscription_id,
+            resource_group_name=client.resource_group_name,
+            workspace_name=client.workspace_name,
+            connection_name=os.getenv("AZURE_SEARCH_CONNECTION_NAME"),
+        ),
+    ),
+    tokens_per_chunk=800,  # Optional field - Maximum number of tokens per chunk
+    token_overlap_across_chunks=0,  # Optional field - Number of tokens to overlap between chunks
+)
+
+# register the index so that it shows up in the cloud project
+client.indexes.create_or_update(Index(name=index_name, path=index_path))
diff --git a/tutorial/copilot_flow/Dockerfile b/tutorial/copilot_flow/Dockerfile
@@ -0,0 +1,3 @@
+FROM mcr.microsoft.com/azureml/promptflow/promptflow-runtime:latest
+COPY ./requirements.txt .
+RUN pip install -r requirements.txt
diff --git a/tutorial/copilot_flow/chat.prompty b/tutorial/copilot_flow/chat.prompty
@@ -0,0 +1,36 @@
+---
+name: Chat Prompt
+description: A prompty that uses the chat API to respond to queries grounded in relevant documents
+model:
+    api: chat
+    configuration:
+        type: azure_openai
+inputs:
+    chat_input:
+        type: string
+    chat_history:
+        type: list
+        is_chat_history: true
+        default: []
+    documents:
+        type: object
+
+---
+system:
+You are an AI assistant helping users with queries related to outdoor outdooor/camping gear and clothing.
+If the question is not related to outdoor/camping gear and clothing, just say 'Sorry, I only can answer queries related to outdoor/camping gear and clothing. So, how can I help?'
+Don't try to make up any answers.
+If the question is related to outdoor/camping gear and clothing but vague, ask for clarifying questions instead of referencing documents. If the question is general, for example it uses "it" or "they", ask the user to specify what product they are asking about.
+Use the following pieces of context to answer the questions about outdoor/camping gear and clothing as completely, correctly, and concisely as possible.
+Do not add documentation reference in the response.
+
+# Documents
+{{documents}}
+
+{% for item in chat_history %}
+{{item.role}}
+{{item.content}}
+{% endfor %}
+
+user:
+{{chat_input}}
diff --git a/tutorial/copilot_flow/copilot.py b/tutorial/copilot_flow/copilot.py
@@ -0,0 +1,110 @@
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from promptflow.core import Prompty, AzureOpenAIModelConfiguration
+from promptflow.tracing import trace
+from openai import AzureOpenAI
+
+# <get_documents>
+@trace
+def get_documents(search_query: str, num_docs=3):
+    from azure.identity import DefaultAzureCredential, get_bearer_token_provider
+    from azure.search.documents import SearchClient
+    from azure.search.documents.models import VectorizedQuery
+
+    token_provider = get_bearer_token_provider(
+        DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
+    )
+
+    index_name = os.getenv("AZUREAI_SEARCH_INDEX_NAME")
+
+    #  retrieve documents relevant to the user's question from Cognitive Search
+    search_client = SearchClient(
+        endpoint=os.getenv("AZURE_SEARCH_ENDPOINT"),
+        credential=DefaultAzureCredential(),
+        index_name=index_name,
+    )
+
+    aoai_client = AzureOpenAI(
+        azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
+        azure_ad_token_provider=token_provider,
+        api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
+    )
+
+    # generate a vector embedding of the user's question
+    embedding = aoai_client.embeddings.create(
+        input=search_query, model=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT")
+    )
+    embedding_to_query = embedding.data[0].embedding
+
+    context = ""
+    # use the vector embedding to do a vector search on the index
+    vector_query = VectorizedQuery(
+        vector=embedding_to_query, k_nearest_neighbors=num_docs, fields="contentVector"
+    )
+    results = trace(search_client.search)(
+        search_text="", vector_queries=[vector_query], select=["id", "content"]
+    )
+
+    for result in results:
+        context += f"\n>>> From: {result['id']}\n{result['content']}"
+
+    return context
+
+
+# <get_documents>
+
+from promptflow.core import Prompty, AzureOpenAIModelConfiguration
+
+from pathlib import Path
+from typing import TypedDict
+
+
+class ChatResponse(TypedDict):
+    context: dict
+    reply: str
+
+
+def get_chat_response(chat_input: str, chat_history: list = []) -> ChatResponse:
+    model_config = AzureOpenAIModelConfiguration(
+        azure_deployment=os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT"),
+        api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
+        azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
+    )
+
+    searchQuery = chat_input
+
+    # Only extract intent if there is chat_history
+    if len(chat_history) > 0:
+        # extract current query intent given chat_history
+        path_to_prompty = f"{Path(__file__).parent.absolute().as_posix()}/queryIntent.prompty"  # pass absolute file path to prompty
+        intentPrompty = Prompty.load(
+            path_to_prompty,
+            model={
+                "configuration": model_config,
+                "parameters": {
+                    "max_tokens": 256,
+                },
+            },
+        )
+        searchQuery = intentPrompty(query=chat_input, chat_history=chat_history)
+
+    # retrieve relevant documents and context given chat_history and current user query (chat_input)
+    documents = get_documents(searchQuery, 3)
+
+    # send query + document context to chat completion for a response
+    path_to_prompty = f"{Path(__file__).parent.absolute().as_posix()}/chat.prompty"
+    chatPrompty = Prompty.load(
+        path_to_prompty,
+        model={
+            "configuration": model_config,
+            "parameters": {"max_tokens": 256, "temperature": 0.2},
+        },
+    )
+    result = chatPrompty(
+        chat_history=chat_history, chat_input=chat_input, documents=documents
+    )
+
+    return dict(reply=result, context=documents)
diff --git a/tutorial/copilot_flow/flow.flex.yaml b/tutorial/copilot_flow/flow.flex.yaml
@@ -0,0 +1,3 @@
+entry: copilot:get_chat_response
+environment:
+  python_requirements_txt: requirements.txt
diff --git a/tutorial/copilot_flow/input_with_chat_history.json b/tutorial/copilot_flow/input_with_chat_history.json
@@ -0,0 +1,37 @@
+{
+    "chat_input": "how much does it cost?",
+    "chat_history": [
+        {
+        "role": "user",
+        "content": "are the trailwalker shoes waterproof?"
+        },
+        {
+        "role": "assistant",
+        "content": "Yes, the TrailWalker Hiking Shoes are waterproof. They are designed with a durable and waterproof construction to withstand various terrains and weather conditions."
+        },
+        {
+        "role": "user",
+        "content": "how much do they cost?"
+        },
+        {
+        "role": "assistant",
+        "content": "The TrailWalker Hiking Shoes are priced at $110."
+        },
+        {
+        "role": "user",
+        "content": "do you have waterproof tents?"
+        },
+        {
+        "role": "assistant",
+        "content": "Yes, we have waterproof tents available. Can you please provide more information about the type or size of tent you are looking for?"
+        },
+        {
+        "role": "user",
+        "content": "which is your most waterproof tent?"
+        },
+        {
+        "role": "assistant",
+        "content": "Our most waterproof tent is the Alpine Explorer Tent. It is designed with a waterproof material and has a rainfly with a waterproof rating of 3000mm. This tent provides reliable protection against rain and moisture."
+        }
+    ]
+    }
diff --git a/tutorial/copilot_flow/queryIntent.prompty b/tutorial/copilot_flow/queryIntent.prompty
@@ -0,0 +1,93 @@
+ ---
+  name: Chat Prompt
+  description: A prompty that extract users query intent based on the current_query and chat_history of the conversation
+  model:
+      api: chat
+      configuration:
+          type: azure_openai
+  inputs:
+      query:
+        type: string
+      chat_history:
+          type: list
+          is_chat_history: true
+          default: []
+
+  ---
+  system:
+  - You are an AI assistant reading a current user query and chat_history.
+  - Given the chat_history, and current user's query, infer the user's intent expressed in the current user query.
+  - Once you infer the intent, respond with a search query that can be used to retrieve relevant documents for the current user's query based on the intent
+  - Be specific in what the user is asking about, but disregard parts of the chat history that are not relevant to the user's intent.
+
+  Example 1:
+  With a chat_history like below:
+  \```
+  chat_history: [    {
+        "role": "user",
+        "content": "are the trailwalker shoes waterproof?"
+      },
+      {
+        "role": "assistant",
+        "content": "Yes, the TrailWalker Hiking Shoes are waterproof. They are designed with a durable and waterproof construction to withstand various terrains and weather conditions."
+      }
+  ]
+  \```
+  User query: "how much do they cost?"
+
+  Intent: "The user wants to know how much the Trailwalker Hiking Shoes cost."
+  Search query: "price of Trailwalker Hiking Shoes"
+
+
+  Example 2:
+  With a chat_history like below:
+  \```
+  chat_history: [    {
+        "role": "user",
+        "content": "are the trailwalker shoes waterproof?"
+      },
+      {
+        "role": "assistant",
+        "content": "Yes, the TrailWalker Hiking Shoes are waterproof. They are designed with a durable and waterproof construction to withstand various terrains and weather conditions."
+      },
+      {
+        "role": "user",
+        "content": "how much do they cost?"
+      },
+      {
+        "role": "assistant",
+        "content": "The TrailWalker Hiking Shoes are priced at $110."
+      },
+      {
+        "role": "user",
+        "content": "do you have waterproof tents?"
+      },
+      {
+        "role": "assistant",
+        "content": "Yes, we have waterproof tents available. Can you please provide more information about the type or size of tent you are looking for?"
+      },
+      {
+        "role": "user",
+        "content": "which is your most waterproof tent?"
+      },
+      {
+        "role": "assistant",
+        "content": "Our most waterproof tent is the Alpine Explorer Tent. It is designed with a waterproof material and has a rainfly with a waterproof rating of 3000mm. This tent provides reliable protection against rain and moisture."
+      }
+  ]
+  \```
+  User query: "how much does it cost?"
+
+  Intent: "the user would like to know how much the Alpine Explorer Tent costs"
+  Search query: "price of Alpine Explorer Tent"
+
+  {% for item in chat_history %}
+  {{item.role}}
+  {{item.content}}
+  {% endfor %}
+
+  Current user query:
+  {{query}}
+
+  Search query:
+
diff --git a/tutorial/copilot_flow/requirements.txt b/tutorial/copilot_flow/requirements.txt
@@ -0,0 +1,10 @@
+openai
+azure-identity
+azure-search-documents==11.4.0
+promptflow[azure]==1.11.0
+promptflow-tracing==1.11.0
+promptflow-tools==1.4.0
+promptflow-evals==0.3.0
+jinja2
+aiohttp
+python-dotenv
diff --git a/tutorial/data.zip b/tutorial/data.zip