diff --git a/.azdo/pipelines/azure-dev.yml b/.azdo/pipelines/azure-dev.yml
index 0e0b4ffef3..b500d40e03 100644
--- a/.azdo/pipelines/azure-dev.yml
+++ b/.azdo/pipelines/azure-dev.yml
@@ -69,6 +69,7 @@ steps:
       AZURE_OPENAI_CHATGPT_DEPLOYMENT_CAPACITY: $(AZURE_OPENAI_CHATGPT_DEPLOYMENT_CAPACITY)
       AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION: $(AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION)
       AZURE_OPENAI_CHATGPT_DEPLOYMENT_SKU: $(AZURE_OPENAI_CHATGPT_DEPLOYMENT_SKU)
+      AZURE_OPENAI_REASONING_EFFORT: $(AZURE_OPENAI_REASONING_EFFORT)
       AZURE_OPENAI_EMB_MODEL_NAME: $(AZURE_OPENAI_EMB_MODEL_NAME)
       AZURE_OPENAI_EMB_DEPLOYMENT: $(AZURE_OPENAI_EMB_DEPLOYMENT)
       AZURE_OPENAI_EMB_DEPLOYMENT_CAPACITY: $(AZURE_OPENAI_EMB_DEPLOYMENT_CAPACITY)
diff --git a/.github/workflows/azure-dev.yml b/.github/workflows/azure-dev.yml
index 37cb43b118..d20cc20f90 100644
--- a/.github/workflows/azure-dev.yml
+++ b/.github/workflows/azure-dev.yml
@@ -60,6 +60,7 @@ jobs:
       AZURE_OPENAI_CHATGPT_DEPLOYMENT: ${{ vars.AZURE_OPENAI_CHATGPT_DEPLOYMENT }}
       AZURE_OPENAI_CHATGPT_DEPLOYMENT_CAPACITY: ${{ vars.AZURE_OPENAI_CHATGPT_DEPLOYMENT_CAPACITY }}
       AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION: ${{ vars.AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION }}
+      AZURE_OPENAI_REASONING_EFFORT: ${{ vars.AZURE_OPENAI_REASONING_EFFORT }}
       AZURE_OPENAI_EMB_MODEL_NAME: ${{ vars.AZURE_OPENAI_EMB_MODEL_NAME }}
       AZURE_OPENAI_EMB_DEPLOYMENT: ${{ vars.AZURE_OPENAI_EMB_DEPLOYMENT }}
       AZURE_OPENAI_EMB_DEPLOYMENT_CAPACITY: ${{ vars.AZURE_OPENAI_EMB_DEPLOYMENT_CAPACITY }}
diff --git a/README.md b/README.md
index fc1f4e53ac..21c4cf37cc 100644
--- a/README.md
+++ b/README.md
@@ -256,6 +256,7 @@ You can find extensive documentation in the [docs](docs/README.md) folder:
     - [All features](docs/deploy_features.md)
     - [Login and access control](docs/login_and_acl.md)
     - [GPT-4 Turbo with Vision](docs/gpt4v.md)
+    - [Reasoning](docs/reasoning.md)
     - [Private endpoints](docs/deploy_private.md)
   - [Sharing deployment environments](docs/sharing_environments.md)
 - [Local development](docs/localdev.md)
diff --git a/app/backend/app.py b/app/backend/app.py
index b5e4084f76..e9380036e8 100644
--- a/app/backend/app.py
+++ b/app/backend/app.py
@@ -65,11 +65,13 @@
     CONFIG_CHAT_HISTORY_COSMOS_ENABLED,
     CONFIG_CHAT_VISION_APPROACH,
     CONFIG_CREDENTIAL,
+    CONFIG_DEFAULT_REASONING_EFFORT,
     CONFIG_GPT4V_DEPLOYED,
     CONFIG_INGESTER,
     CONFIG_LANGUAGE_PICKER_ENABLED,
     CONFIG_OPENAI_CLIENT,
     CONFIG_QUERY_REWRITING_ENABLED,
+    CONFIG_REASONING_EFFORT_ENABLED,
     CONFIG_SEARCH_CLIENT,
     CONFIG_SEMANTIC_RANKER_DEPLOYED,
     CONFIG_SPEECH_INPUT_ENABLED,
@@ -79,6 +81,7 @@
     CONFIG_SPEECH_SERVICE_LOCATION,
     CONFIG_SPEECH_SERVICE_TOKEN,
     CONFIG_SPEECH_SERVICE_VOICE,
+    CONFIG_STREAMING_ENABLED,
     CONFIG_USER_BLOB_CONTAINER_CLIENT,
     CONFIG_USER_UPLOAD_ENABLED,
     CONFIG_VECTOR_SEARCH_ENABLED,
@@ -293,6 +296,9 @@ def config():
             "showGPT4VOptions": current_app.config[CONFIG_GPT4V_DEPLOYED],
             "showSemanticRankerOption": current_app.config[CONFIG_SEMANTIC_RANKER_DEPLOYED],
             "showQueryRewritingOption": current_app.config[CONFIG_QUERY_REWRITING_ENABLED],
+            "showReasoningEffortOption": current_app.config[CONFIG_REASONING_EFFORT_ENABLED],
+            "streamingEnabled": current_app.config[CONFIG_STREAMING_ENABLED],
+            "defaultReasoningEffort": current_app.config[CONFIG_DEFAULT_REASONING_EFFORT],
             "showVectorOption": current_app.config[CONFIG_VECTOR_SEARCH_ENABLED],
             "showUserUpload": current_app.config[CONFIG_USER_UPLOAD_ENABLED],
             "showLanguagePicker": current_app.config[CONFIG_LANGUAGE_PICKER_ENABLED],
@@ -423,6 +429,7 @@ async def setup_clients():
     OPENAI_CHATGPT_MODEL = os.environ["AZURE_OPENAI_CHATGPT_MODEL"]
     OPENAI_EMB_MODEL = os.getenv("AZURE_OPENAI_EMB_MODEL_NAME", "text-embedding-ada-002")
     OPENAI_EMB_DIMENSIONS = int(os.getenv("AZURE_OPENAI_EMB_DIMENSIONS") or 1536)
+    OPENAI_REASONING_EFFORT = os.getenv("AZURE_OPENAI_REASONING_EFFORT")
     # Used with Azure OpenAI deployments
     AZURE_OPENAI_SERVICE = os.getenv("AZURE_OPENAI_SERVICE")
     AZURE_OPENAI_GPT4V_DEPLOYMENT = os.environ.get("AZURE_OPENAI_GPT4V_DEPLOYMENT")
@@ -640,6 +647,13 @@ async def setup_clients():
     current_app.config[CONFIG_QUERY_REWRITING_ENABLED] = (
         AZURE_SEARCH_QUERY_REWRITING == "true" and AZURE_SEARCH_SEMANTIC_RANKER != "disabled"
     )
+    current_app.config[CONFIG_DEFAULT_REASONING_EFFORT] = OPENAI_REASONING_EFFORT
+    current_app.config[CONFIG_REASONING_EFFORT_ENABLED] = OPENAI_CHATGPT_MODEL in Approach.GPT_REASONING_MODELS
+    current_app.config[CONFIG_STREAMING_ENABLED] = (
+        bool(USE_GPT4V)
+        or OPENAI_CHATGPT_MODEL not in Approach.GPT_REASONING_MODELS
+        or Approach.GPT_REASONING_MODELS[OPENAI_CHATGPT_MODEL].streaming
+    )
     current_app.config[CONFIG_VECTOR_SEARCH_ENABLED] = os.getenv("USE_VECTORS", "").lower() != "false"
     current_app.config[CONFIG_USER_UPLOAD_ENABLED] = bool(USE_USER_UPLOAD)
     current_app.config[CONFIG_LANGUAGE_PICKER_ENABLED] = ENABLE_LANGUAGE_PICKER
@@ -667,6 +681,7 @@ async def setup_clients():
         query_language=AZURE_SEARCH_QUERY_LANGUAGE,
         query_speller=AZURE_SEARCH_QUERY_SPELLER,
         prompt_manager=prompt_manager,
+        reasoning_effort=OPENAI_REASONING_EFFORT,
     )
 
     # ChatReadRetrieveReadApproach is used by /chat for multi-turn conversation
@@ -684,12 +699,26 @@ async def setup_clients():
         query_language=AZURE_SEARCH_QUERY_LANGUAGE,
         query_speller=AZURE_SEARCH_QUERY_SPELLER,
         prompt_manager=prompt_manager,
+        reasoning_effort=OPENAI_REASONING_EFFORT,
     )
 
     if USE_GPT4V:
         current_app.logger.info("USE_GPT4V is true, setting up GPT4V approach")
         if not AZURE_OPENAI_GPT4V_MODEL:
             raise ValueError("AZURE_OPENAI_GPT4V_MODEL must be set when USE_GPT4V is true")
+        if any(
+            model in Approach.GPT_REASONING_MODELS
+            for model in [
+                OPENAI_CHATGPT_MODEL,
+                AZURE_OPENAI_GPT4V_MODEL,
+                AZURE_OPENAI_CHATGPT_DEPLOYMENT,
+                AZURE_OPENAI_GPT4V_DEPLOYMENT,
+            ]
+        ):
+            raise ValueError(
+                "AZURE_OPENAI_CHATGPT_MODEL and AZURE_OPENAI_GPT4V_MODEL must not be a reasoning model when USE_GPT4V is true"
+            )
+
         token_provider = get_bearer_token_provider(azure_credential, "https://cognitiveservices.azure.com/.default")
 
         current_app.config[CONFIG_ASK_VISION_APPROACH] = RetrieveThenReadVisionApproach(
diff --git a/app/backend/approaches/approach.py b/app/backend/approaches/approach.py
index 3edf85989a..a315feec4c 100644
--- a/app/backend/approaches/approach.py
+++ b/app/backend/approaches/approach.py
@@ -6,9 +6,11 @@
     AsyncGenerator,
     Awaitable,
     Callable,
+    Dict,
     List,
     Optional,
     TypedDict,
+    Union,
     cast,
 )
 from urllib.parse import urljoin
@@ -21,8 +23,15 @@
     VectorizedQuery,
     VectorQuery,
 )
-from openai import AsyncOpenAI
-from openai.types.chat import ChatCompletionMessageParam
+from openai import AsyncOpenAI, AsyncStream
+from openai.types import CompletionUsage
+from openai.types.chat import (
+    ChatCompletion,
+    ChatCompletionChunk,
+    ChatCompletionMessageParam,
+    ChatCompletionReasoningEffort,
+    ChatCompletionToolParam,
+)
 
 from approaches.promptmanager import PromptManager
 from core.authentication import AuthenticationHelper
@@ -89,8 +98,59 @@ class ThoughtStep:
     description: Optional[Any]
     props: Optional[dict[str, Any]] = None
 
+    def update_token_usage(self, usage: CompletionUsage) -> None:
+        if self.props:
+            self.props["token_usage"] = TokenUsageProps.from_completion_usage(usage)
+
+
+@dataclass
+class DataPoints:
+    text: Optional[List[str]] = None
+    images: Optional[List] = None
+
+
+@dataclass
+class ExtraInfo:
+    data_points: DataPoints
+    thoughts: Optional[List[ThoughtStep]] = None
+    followup_questions: Optional[List[Any]] = None
+
+
+@dataclass
+class TokenUsageProps:
+    prompt_tokens: int
+    completion_tokens: int
+    reasoning_tokens: Optional[int]
+    total_tokens: int
+
+    @classmethod
+    def from_completion_usage(cls, usage: CompletionUsage) -> "TokenUsageProps":
+        return cls(
+            prompt_tokens=usage.prompt_tokens,
+            completion_tokens=usage.completion_tokens,
+            reasoning_tokens=(
+                usage.completion_tokens_details.reasoning_tokens if usage.completion_tokens_details else None
+            ),
+            total_tokens=usage.total_tokens,
+        )
+
+
+# GPT reasoning models don't support the same set of parameters as other models
+# https://learn.microsoft.com/azure/ai-services/openai/how-to/reasoning
+@dataclass
+class GPTReasoningModelSupport:
+    streaming: bool
+
 
 class Approach(ABC):
+    # List of GPT reasoning models support
+    GPT_REASONING_MODELS = {
+        "o1": GPTReasoningModelSupport(streaming=False),
+        "o3-mini": GPTReasoningModelSupport(streaming=True),
+    }
+    # Set a higher token limit for GPT reasoning models
+    RESPONSE_DEFAULT_TOKEN_LIMIT = 1024
+    RESPONSE_REASONING_DEFAULT_TOKEN_LIMIT = 8192
 
     def __init__(
         self,
@@ -106,6 +166,7 @@ def __init__(
         vision_endpoint: str,
         vision_token_provider: Callable[[], Awaitable[str]],
         prompt_manager: PromptManager,
+        reasoning_effort: Optional[str] = None,
     ):
         self.search_client = search_client
         self.openai_client = openai_client
@@ -119,6 +180,8 @@ def __init__(
         self.vision_endpoint = vision_endpoint
         self.vision_token_provider = vision_token_provider
         self.prompt_manager = prompt_manager
+        self.reasoning_effort = reasoning_effort
+        self.include_token_usage = True
 
     def build_filter(self, overrides: dict[str, Any], auth_claims: dict[str, Any]) -> Optional[str]:
         include_category = overrides.get("include_category")
@@ -281,6 +344,81 @@ def get_system_prompt_variables(self, override_prompt: Optional[str]) -> dict[st
         else:
             return {"override_prompt": override_prompt}
 
+    def get_response_token_limit(self, model: str, default_limit: int) -> int:
+        if model in self.GPT_REASONING_MODELS:
+            return self.RESPONSE_REASONING_DEFAULT_TOKEN_LIMIT
+
+        return default_limit
+
+    def create_chat_completion(
+        self,
+        chatgpt_deployment: Optional[str],
+        chatgpt_model: str,
+        messages: list[ChatCompletionMessageParam],
+        overrides: dict[str, Any],
+        response_token_limit: int,
+        should_stream: bool = False,
+        tools: Optional[List[ChatCompletionToolParam]] = None,
+        temperature: Optional[float] = None,
+        n: Optional[int] = None,
+        reasoning_effort: Optional[ChatCompletionReasoningEffort] = None,
+    ) -> Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]]:
+        if chatgpt_model in self.GPT_REASONING_MODELS:
+            params: Dict[str, Any] = {
+                # max_tokens is not supported
+                "max_completion_tokens": response_token_limit
+            }
+
+            # Adjust parameters for reasoning models
+            supported_features = self.GPT_REASONING_MODELS[chatgpt_model]
+            if supported_features.streaming and should_stream:
+                params["stream"] = True
+                params["stream_options"] = {"include_usage": True}
+            params["reasoning_effort"] = reasoning_effort or overrides.get("reasoning_effort") or self.reasoning_effort
+
+        else:
+            # Include parameters that may not be supported for reasoning models
+            params = {
+                "max_tokens": response_token_limit,
+                "temperature": temperature or overrides.get("temperature", 0.3),
+            }
+        if should_stream:
+            params["stream"] = True
+            params["stream_options"] = {"include_usage": True}
+
+        params["tools"] = tools
+
+        # Azure OpenAI takes the deployment name as the model name
+        return self.openai_client.chat.completions.create(
+            model=chatgpt_deployment if chatgpt_deployment else chatgpt_model,
+            messages=messages,
+            seed=overrides.get("seed", None),
+            n=n or 1,
+            **params,
+        )
+
+    def format_thought_step_for_chatcompletion(
+        self,
+        title: str,
+        messages: List[ChatCompletionMessageParam],
+        overrides: dict[str, Any],
+        model: str,
+        deployment: Optional[str],
+        usage: Optional[CompletionUsage] = None,
+        reasoning_effort: Optional[ChatCompletionReasoningEffort] = None,
+    ) -> ThoughtStep:
+        properties: Dict[str, Any] = {"model": model}
+        if deployment:
+            properties["deployment"] = deployment
+        # Only add reasoning_effort setting if the model supports it
+        if model in self.GPT_REASONING_MODELS:
+            properties["reasoning_effort"] = reasoning_effort or overrides.get(
+                "reasoning_effort", self.reasoning_effort
+            )
+        if usage:
+            properties["token_usage"] = TokenUsageProps.from_completion_usage(usage)
+        return ThoughtStep(title, messages, properties)
+
     async def run(
         self,
         messages: list[ChatCompletionMessageParam],
diff --git a/app/backend/approaches/chatapproach.py b/app/backend/approaches/chatapproach.py
index 1dc1b48171..bde515c13d 100644
--- a/app/backend/approaches/chatapproach.py
+++ b/app/backend/approaches/chatapproach.py
@@ -1,11 +1,19 @@
 import json
 import re
 from abc import ABC, abstractmethod
-from typing import Any, AsyncGenerator, Optional
+from typing import Any, AsyncGenerator, Awaitable, Optional, Union, cast
 
-from openai.types.chat import ChatCompletion, ChatCompletionMessageParam
+from openai import AsyncStream
+from openai.types.chat import (
+    ChatCompletion,
+    ChatCompletionChunk,
+    ChatCompletionMessageParam,
+)
 
-from approaches.approach import Approach
+from approaches.approach import (
+    Approach,
+    ExtraInfo,
+)
 
 
 class ChatApproach(Approach, ABC):
@@ -13,7 +21,9 @@ class ChatApproach(Approach, ABC):
     NO_RESPONSE = "0"
 
     @abstractmethod
-    async def run_until_final_call(self, messages, overrides, auth_claims, should_stream) -> tuple:
+    async def run_until_final_call(
+        self, messages, overrides, auth_claims, should_stream
+    ) -> tuple[ExtraInfo, Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]]]:
         pass
 
     def get_search_query(self, chat_completion: ChatCompletion, user_query: str):
@@ -49,12 +59,15 @@ async def run_without_streaming(
         extra_info, chat_coroutine = await self.run_until_final_call(
             messages, overrides, auth_claims, should_stream=False
         )
-        chat_completion_response: ChatCompletion = await chat_coroutine
+        chat_completion_response: ChatCompletion = await cast(Awaitable[ChatCompletion], chat_coroutine)
         content = chat_completion_response.choices[0].message.content
         role = chat_completion_response.choices[0].message.role
         if overrides.get("suggest_followup_questions"):
             content, followup_questions = self.extract_followup_questions(content)
-            extra_info["followup_questions"] = followup_questions
+            extra_info.followup_questions = followup_questions
+        # Assume last thought is for generating answer
+        if self.include_token_usage and extra_info.thoughts and chat_completion_response.usage:
+            extra_info.thoughts[-1].update_token_usage(chat_completion_response.usage)
         chat_app_response = {
             "message": {"content": content, "role": role},
             "context": extra_info,
@@ -72,6 +85,7 @@ async def run_with_streaming(
         extra_info, chat_coroutine = await self.run_until_final_call(
             messages, overrides, auth_claims, should_stream=True
         )
+        chat_coroutine = cast(Awaitable[AsyncStream[ChatCompletionChunk]], chat_coroutine)
         yield {"delta": {"role": "assistant"}, "context": extra_info, "session_state": session_state}
 
         followup_questions_started = False
@@ -80,6 +94,7 @@ async def run_with_streaming(
             # "2023-07-01-preview" API version has a bug where first response has empty choices
             event = event_chunk.model_dump()  # Convert pydantic model to dict
             if event["choices"]:
+                # No usage during streaming
                 completion = {
                     "delta": {
                         "content": event["choices"][0]["delta"].get("content"),
@@ -100,9 +115,19 @@ async def run_with_streaming(
                     followup_content += content
                 else:
                     yield completion
+            else:
+                # Final chunk at end of streaming should contain usage
+                # https://cookbook.openai.com/examples/how_to_stream_completions#4-how-to-get-token-usage-data-for-streamed-chat-completion-response
+                if event_chunk.usage and extra_info.thoughts and self.include_token_usage:
+                    extra_info.thoughts[-1].update_token_usage(event_chunk.usage)
+                    yield {"delta": {"role": "assistant"}, "context": extra_info, "session_state": session_state}
+
         if followup_content:
             _, followup_questions = self.extract_followup_questions(followup_content)
-            yield {"delta": {"role": "assistant"}, "context": {"followup_questions": followup_questions}}
+            yield {
+                "delta": {"role": "assistant"},
+                "context": {"context": extra_info, "followup_questions": followup_questions},
+            }
 
     async def run(
         self,
diff --git a/app/backend/approaches/chatreadretrieveread.py b/app/backend/approaches/chatreadretrieveread.py
index c839b03d30..4299fbfff5 100644
--- a/app/backend/approaches/chatreadretrieveread.py
+++ b/app/backend/approaches/chatreadretrieveread.py
@@ -1,4 +1,4 @@
-from typing import Any, Coroutine, List, Literal, Optional, Union, overload
+from typing import Any, Awaitable, List, Optional, Union, cast
 
 from azure.search.documents.aio import SearchClient
 from azure.search.documents.models import VectorQuery
@@ -10,7 +10,7 @@
     ChatCompletionToolParam,
 )
 
-from approaches.approach import ThoughtStep
+from approaches.approach import DataPoints, ExtraInfo, ThoughtStep
 from approaches.chatapproach import ChatApproach
 from approaches.promptmanager import PromptManager
 from core.authentication import AuthenticationHelper
@@ -38,7 +38,8 @@ def __init__(
         content_field: str,
         query_language: str,
         query_speller: str,
-        prompt_manager: PromptManager
+        prompt_manager: PromptManager,
+        reasoning_effort: Optional[str] = None,
     ):
         self.search_client = search_client
         self.openai_client = openai_client
@@ -56,24 +57,8 @@ def __init__(
         self.query_rewrite_prompt = self.prompt_manager.load_prompt("chat_query_rewrite.prompty")
         self.query_rewrite_tools = self.prompt_manager.load_tools("chat_query_rewrite_tools.json")
         self.answer_prompt = self.prompt_manager.load_prompt("chat_answer_question.prompty")
-
-    @overload
-    async def run_until_final_call(
-        self,
-        messages: list[ChatCompletionMessageParam],
-        overrides: dict[str, Any],
-        auth_claims: dict[str, Any],
-        should_stream: Literal[False],
-    ) -> tuple[dict[str, Any], Coroutine[Any, Any, ChatCompletion]]: ...
-
-    @overload
-    async def run_until_final_call(
-        self,
-        messages: list[ChatCompletionMessageParam],
-        overrides: dict[str, Any],
-        auth_claims: dict[str, Any],
-        should_stream: Literal[True],
-    ) -> tuple[dict[str, Any], Coroutine[Any, Any, AsyncStream[ChatCompletionChunk]]]: ...
+        self.reasoning_effort = reasoning_effort
+        self.include_token_usage = True
 
     async def run_until_final_call(
         self,
@@ -81,8 +66,7 @@ async def run_until_final_call(
         overrides: dict[str, Any],
         auth_claims: dict[str, Any],
         should_stream: bool = False,
-    ) -> tuple[dict[str, Any], Coroutine[Any, Any, Union[ChatCompletion, AsyncStream[ChatCompletionChunk]]]]:
-        seed = overrides.get("seed", None)
+    ) -> tuple[ExtraInfo, Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]]]:
         use_text_search = overrides.get("retrieval_mode") in ["text", "hybrid", None]
         use_vector_search = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
         use_semantic_ranker = True if overrides.get("semantic_ranker") else False
@@ -97,21 +81,33 @@ async def run_until_final_call(
         if not isinstance(original_user_query, str):
             raise ValueError("The most recent message content must be a string.")
 
+        reasoning_model_support = self.GPT_REASONING_MODELS.get(self.chatgpt_model)
+        if reasoning_model_support and (not reasoning_model_support.streaming and should_stream):
+            raise Exception(
+                f"{self.chatgpt_model} does not support streaming. Please use a different model or disable streaming."
+            )
+
         query_messages = self.prompt_manager.render_prompt(
             self.query_rewrite_prompt, {"user_query": original_user_query, "past_messages": messages[:-1]}
         )
         tools: List[ChatCompletionToolParam] = self.query_rewrite_tools
 
         # STEP 1: Generate an optimized keyword search query based on the chat history and the last question
-        chat_completion: ChatCompletion = await self.openai_client.chat.completions.create(
-            messages=query_messages,  # type: ignore
-            # Azure OpenAI takes the deployment name as the model name
-            model=self.chatgpt_deployment if self.chatgpt_deployment else self.chatgpt_model,
-            temperature=0.0,  # Minimize creativity for search query generation
-            max_tokens=100,  # Setting too low risks malformed JSON, setting too high may affect performance
-            n=1,
-            tools=tools,
-            seed=seed,
+
+        chat_completion = cast(
+            ChatCompletion,
+            await self.create_chat_completion(
+                self.chatgpt_deployment,
+                self.chatgpt_model,
+                messages=query_messages,
+                overrides=overrides,
+                response_token_limit=self.get_response_token_limit(
+                    self.chatgpt_model, 100
+                ),  # Setting too low risks malformed JSON, setting too high may affect performance
+                temperature=0.0,  # Minimize creativity for search query generation
+                tools=tools,
+                reasoning_effort="low",  # Minimize reasoning for search query generation
+            ),
         )
 
         query_text = self.get_search_query(chat_completion, original_user_query)
@@ -150,17 +146,17 @@ async def run_until_final_call(
             },
         )
 
-        extra_info = {
-            "data_points": {"text": text_sources},
-            "thoughts": [
-                ThoughtStep(
-                    "Prompt to generate search query",
-                    query_messages,
-                    (
-                        {"model": self.chatgpt_model, "deployment": self.chatgpt_deployment}
-                        if self.chatgpt_deployment
-                        else {"model": self.chatgpt_model}
-                    ),
+        extra_info = ExtraInfo(
+            DataPoints(text=text_sources),
+            thoughts=[
+                self.format_thought_step_for_chatcompletion(
+                    title="Prompt to generate search query",
+                    messages=query_messages,
+                    overrides=overrides,
+                    model=self.chatgpt_model,
+                    deployment=self.chatgpt_deployment,
+                    usage=chat_completion.usage,
+                    reasoning_effort="low",
                 ),
                 ThoughtStep(
                     "Search using generated search query",
@@ -179,26 +175,26 @@ async def run_until_final_call(
                     "Search results",
                     [result.serialize_for_results() for result in results],
                 ),
-                ThoughtStep(
-                    "Prompt to generate answer",
-                    messages,
-                    (
-                        {"model": self.chatgpt_model, "deployment": self.chatgpt_deployment}
-                        if self.chatgpt_deployment
-                        else {"model": self.chatgpt_model}
-                    ),
+                self.format_thought_step_for_chatcompletion(
+                    title="Prompt to generate answer",
+                    messages=messages,
+                    overrides=overrides,
+                    model=self.chatgpt_model,
+                    deployment=self.chatgpt_deployment,
+                    usage=None,
                 ),
             ],
-        }
-
-        chat_coroutine = self.openai_client.chat.completions.create(
-            # Azure OpenAI takes the deployment name as the model name
-            model=self.chatgpt_deployment if self.chatgpt_deployment else self.chatgpt_model,
-            messages=messages,
-            temperature=overrides.get("temperature", 0.3),
-            max_tokens=1024,
-            n=1,
-            stream=should_stream,
-            seed=seed,
+        )
+
+        chat_coroutine = cast(
+            Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]],
+            self.create_chat_completion(
+                self.chatgpt_deployment,
+                self.chatgpt_model,
+                messages,
+                overrides,
+                self.get_response_token_limit(self.chatgpt_model, 1024),
+                should_stream,
+            ),
         )
         return (extra_info, chat_coroutine)
diff --git a/app/backend/approaches/chatreadretrievereadvision.py b/app/backend/approaches/chatreadretrievereadvision.py
index 559f15bd1a..5384b2b769 100644
--- a/app/backend/approaches/chatreadretrievereadvision.py
+++ b/app/backend/approaches/chatreadretrievereadvision.py
@@ -1,4 +1,4 @@
-from typing import Any, Awaitable, Callable, Coroutine, List, Optional, Union
+from typing import Any, Awaitable, Callable, List, Optional, Union, cast
 
 from azure.search.documents.aio import SearchClient
 from azure.storage.blob.aio import ContainerClient
@@ -10,7 +10,7 @@
     ChatCompletionToolParam,
 )
 
-from approaches.approach import ThoughtStep
+from approaches.approach import DataPoints, ExtraInfo, ThoughtStep
 from approaches.chatapproach import ChatApproach
 from approaches.promptmanager import PromptManager
 from core.authentication import AuthenticationHelper
@@ -67,6 +67,8 @@ def __init__(
         self.query_rewrite_prompt = self.prompt_manager.load_prompt("chat_query_rewrite.prompty")
         self.query_rewrite_tools = self.prompt_manager.load_tools("chat_query_rewrite_tools.json")
         self.answer_prompt = self.prompt_manager.load_prompt("chat_answer_question_vision.prompty")
+        # Currently disabled due to issues with rendering token usage in the UI
+        self.include_token_usage = False
 
     async def run_until_final_call(
         self,
@@ -74,7 +76,7 @@ async def run_until_final_call(
         overrides: dict[str, Any],
         auth_claims: dict[str, Any],
         should_stream: bool = False,
-    ) -> tuple[dict[str, Any], Coroutine[Any, Any, Union[ChatCompletion, AsyncStream[ChatCompletionChunk]]]]:
+    ) -> tuple[ExtraInfo, Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]]]:
         seed = overrides.get("seed", None)
         use_text_search = overrides.get("retrieval_mode") in ["text", "hybrid", None]
         use_vector_search = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
@@ -164,12 +166,9 @@ async def run_until_final_call(
             },
         )
 
-        extra_info = {
-            "data_points": {
-                "text": text_sources,
-                "images": image_sources,
-            },
-            "thoughts": [
+        extra_info = ExtraInfo(
+            DataPoints(text=text_sources, images=image_sources),
+            [
                 ThoughtStep(
                     "Prompt to generate search query",
                     query_messages,
@@ -206,15 +205,18 @@ async def run_until_final_call(
                     ),
                 ),
             ],
-        }
+        )
 
-        chat_coroutine = self.openai_client.chat.completions.create(
-            model=self.gpt4v_deployment if self.gpt4v_deployment else self.gpt4v_model,
-            messages=messages,
-            temperature=overrides.get("temperature", 0.3),
-            max_tokens=1024,
-            n=1,
-            stream=should_stream,
-            seed=seed,
+        chat_coroutine = cast(
+            Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]],
+            self.openai_client.chat.completions.create(
+                model=self.gpt4v_deployment if self.gpt4v_deployment else self.gpt4v_model,
+                messages=messages,
+                temperature=overrides.get("temperature", 0.3),
+                max_tokens=1024,
+                n=1,
+                stream=should_stream,
+                seed=seed,
+            ),
         )
         return (extra_info, chat_coroutine)
diff --git a/app/backend/approaches/retrievethenread.py b/app/backend/approaches/retrievethenread.py
index fc87132fa7..8bdbb9785e 100644
--- a/app/backend/approaches/retrievethenread.py
+++ b/app/backend/approaches/retrievethenread.py
@@ -1,11 +1,11 @@
-from typing import Any, Optional
+from typing import Any, Optional, cast
 
 from azure.search.documents.aio import SearchClient
 from azure.search.documents.models import VectorQuery
 from openai import AsyncOpenAI
-from openai.types.chat import ChatCompletionMessageParam
+from openai.types.chat import ChatCompletion, ChatCompletionMessageParam
 
-from approaches.approach import Approach, ThoughtStep
+from approaches.approach import Approach, DataPoints, ExtraInfo, ThoughtStep
 from approaches.promptmanager import PromptManager
 from core.authentication import AuthenticationHelper
 
@@ -33,6 +33,7 @@ def __init__(
         query_language: str,
         query_speller: str,
         prompt_manager: PromptManager,
+        reasoning_effort: Optional[str] = None,
     ):
         self.search_client = search_client
         self.chatgpt_deployment = chatgpt_deployment
@@ -49,6 +50,8 @@ def __init__(
         self.query_speller = query_speller
         self.prompt_manager = prompt_manager
         self.answer_prompt = self.prompt_manager.load_prompt("ask_answer_question.prompty")
+        self.reasoning_effort = reasoning_effort
+        self.include_token_usage = True
 
     async def run(
         self,
@@ -60,7 +63,6 @@ async def run(
         if not isinstance(q, str):
             raise ValueError("The most recent message content must be a string.")
         overrides = context.get("overrides", {})
-        seed = overrides.get("seed", None)
         auth_claims = context.get("auth_claims", {})
         use_text_search = overrides.get("retrieval_mode") in ["text", "hybrid", None]
         use_vector_search = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
@@ -99,19 +101,20 @@ async def run(
             | {"user_query": q, "text_sources": text_sources},
         )
 
-        chat_completion = await self.openai_client.chat.completions.create(
-            # Azure OpenAI takes the deployment name as the model name
-            model=self.chatgpt_deployment if self.chatgpt_deployment else self.chatgpt_model,
-            messages=messages,
-            temperature=overrides.get("temperature", 0.3),
-            max_tokens=1024,
-            n=1,
-            seed=seed,
+        chat_completion = cast(
+            ChatCompletion,
+            await self.create_chat_completion(
+                self.chatgpt_deployment,
+                self.chatgpt_model,
+                messages=messages,
+                overrides=overrides,
+                response_token_limit=self.get_response_token_limit(self.chatgpt_model, 1024),
+            ),
         )
 
-        extra_info = {
-            "data_points": {"text": text_sources},
-            "thoughts": [
+        extra_info = ExtraInfo(
+            DataPoints(text=text_sources),
+            thoughts=[
                 ThoughtStep(
                     "Search using user query",
                     q,
@@ -129,17 +132,16 @@ async def run(
                     "Search results",
                     [result.serialize_for_results() for result in results],
                 ),
-                ThoughtStep(
-                    "Prompt to generate answer",
-                    messages,
-                    (
-                        {"model": self.chatgpt_model, "deployment": self.chatgpt_deployment}
-                        if self.chatgpt_deployment
-                        else {"model": self.chatgpt_model}
-                    ),
+                self.format_thought_step_for_chatcompletion(
+                    title="Prompt to generate answer",
+                    messages=messages,
+                    overrides=overrides,
+                    model=self.chatgpt_model,
+                    deployment=self.chatgpt_deployment,
+                    usage=chat_completion.usage,
                 ),
             ],
-        }
+        )
 
         return {
             "message": {
diff --git a/app/backend/approaches/retrievethenreadvision.py b/app/backend/approaches/retrievethenreadvision.py
index d532f16c72..8bd9a33cef 100644
--- a/app/backend/approaches/retrievethenreadvision.py
+++ b/app/backend/approaches/retrievethenreadvision.py
@@ -7,7 +7,7 @@
     ChatCompletionMessageParam,
 )
 
-from approaches.approach import Approach, ThoughtStep
+from approaches.approach import Approach, DataPoints, ExtraInfo, ThoughtStep
 from approaches.promptmanager import PromptManager
 from core.authentication import AuthenticationHelper
 from core.imageshelper import fetch_image
@@ -57,6 +57,8 @@ def __init__(
         self.vision_token_provider = vision_token_provider
         self.prompt_manager = prompt_manager
         self.answer_prompt = self.prompt_manager.load_prompt("ask_answer_question_vision.prompty")
+        # Currently disabled due to issues with rendering token usage in the UI
+        self.include_token_usage = False
 
     async def run(
         self,
@@ -136,9 +138,9 @@ async def run(
             seed=seed,
         )
 
-        extra_info = {
-            "data_points": {"text": text_sources, "images": image_sources},
-            "thoughts": [
+        extra_info = ExtraInfo(
+            DataPoints(text=text_sources, images=image_sources),
+            [
                 ThoughtStep(
                     "Search using user query",
                     q,
@@ -167,7 +169,7 @@ async def run(
                     ),
                 ),
             ],
-        }
+        )
 
         return {
             "message": {
diff --git a/app/backend/config.py b/app/backend/config.py
index a4abf5a0e6..5f3354f2da 100644
--- a/app/backend/config.py
+++ b/app/backend/config.py
@@ -11,6 +11,9 @@
 CONFIG_GPT4V_DEPLOYED = "gpt4v_deployed"
 CONFIG_SEMANTIC_RANKER_DEPLOYED = "semantic_ranker_deployed"
 CONFIG_QUERY_REWRITING_ENABLED = "query_rewriting_enabled"
+CONFIG_REASONING_EFFORT_ENABLED = "reasoning_effort_enabled"
+CONFIG_VISION_REASONING_EFFORT_ENABLED = "vision_reasoning_effort_enabled"
+CONFIG_DEFAULT_REASONING_EFFORT = "default_reasoning_effort"
 CONFIG_VECTOR_SEARCH_ENABLED = "vector_search_enabled"
 CONFIG_SEARCH_CLIENT = "search_client"
 CONFIG_OPENAI_CLIENT = "openai_client"
@@ -23,6 +26,7 @@
 CONFIG_SPEECH_SERVICE_LOCATION = "speech_service_location"
 CONFIG_SPEECH_SERVICE_TOKEN = "speech_service_token"
 CONFIG_SPEECH_SERVICE_VOICE = "speech_service_voice"
+CONFIG_STREAMING_ENABLED = "streaming_enabled"
 CONFIG_CHAT_HISTORY_BROWSER_ENABLED = "chat_history_browser_enabled"
 CONFIG_CHAT_HISTORY_COSMOS_ENABLED = "chat_history_cosmos_enabled"
 CONFIG_COSMOS_HISTORY_CLIENT = "cosmos_history_client"
diff --git a/app/frontend/src/api/models.ts b/app/frontend/src/api/models.ts
index 4acbd2e8da..c915a19ee5 100644
--- a/app/frontend/src/api/models.ts
+++ b/app/frontend/src/api/models.ts
@@ -21,6 +21,7 @@ export type ChatAppRequestOverrides = {
     semantic_ranker?: boolean;
     semantic_captions?: boolean;
     query_rewriting?: boolean;
+    reasoning_effort?: string;
     include_category?: string;
     exclude_category?: string;
     seed?: number;
@@ -48,7 +49,7 @@ export type ResponseMessage = {
 export type Thoughts = {
     title: string;
     description: any; // It can be any output from the api
-    props?: { [key: string]: string };
+    props?: { [key: string]: any };
 };
 
 export type ResponseContext = {
@@ -83,9 +84,12 @@ export type ChatAppRequest = {
 };
 
 export type Config = {
+    defaultReasoningEffort: string;
     showGPT4VOptions: boolean;
     showSemanticRankerOption: boolean;
     showQueryRewritingOption: boolean;
+    showReasoningEffortOption: boolean;
+    streamingEnabled: boolean;
     showVectorOption: boolean;
     showUserUpload: boolean;
     showLanguagePicker: boolean;
diff --git a/app/frontend/src/components/AnalysisPanel/AnalysisPanel.module.css b/app/frontend/src/components/AnalysisPanel/AnalysisPanel.module.css
index 2d22130da4..84b9f110ea 100644
--- a/app/frontend/src/components/AnalysisPanel/AnalysisPanel.module.css
+++ b/app/frontend/src/components/AnalysisPanel/AnalysisPanel.module.css
@@ -62,3 +62,75 @@
     max-width: 100%;
     object-fit: contain;
 }
+
+.header {
+    color: #123bb6;
+    position: relative;
+    font-size: 0.875em;
+    margin-bottom: 0.5em;
+}
+
+.reasoningEffort {
+    font-size: 14px;
+    margin-bottom: 8px;
+}
+
+.tokenUsageGraph {
+    margin: 16px 0;
+}
+
+.tokenBar {
+    height: 20px;
+    margin-bottom: 4px;
+    padding-left: 4px;
+    font-size: 12px;
+    display: flex;
+    align-items: center;
+    white-space: nowrap;
+    overflow: hidden;
+}
+
+.tokenBar {
+    height: 20px;
+    margin-bottom: 4px;
+    padding-left: 4px;
+    font-size: 12px;
+    display: flex;
+    flex-wrap: wrap;
+    align-items: center;
+    background-color: #d7d7d7;
+    white-space: nowrap;
+    overflow: visible;
+    min-width: max-content;
+}
+
+/* Adjust tokenLabel to allow bar-specific text color overrides */
+.tokenLabel {
+    padding-right: 4px;
+}
+
+.primaryBarContainer {
+    width: fit-content;
+    display: flex;
+    gap: 4px;
+}
+
+.promptBar {
+    background-color: #a82424;
+    color: #ffffff; /* White text for contrast */
+}
+
+.reasoningBar {
+    background-color: #265e29;
+    color: #ffffff;
+}
+
+.outputBar {
+    background-color: #12579b;
+    color: #ffffff;
+}
+
+.totalBar {
+    background-color: #424242;
+    color: #ffffff;
+}
diff --git a/app/frontend/src/components/AnalysisPanel/ThoughtProcess.tsx b/app/frontend/src/components/AnalysisPanel/ThoughtProcess.tsx
index 4c3a62617e..f666960da1 100644
--- a/app/frontend/src/components/AnalysisPanel/ThoughtProcess.tsx
+++ b/app/frontend/src/components/AnalysisPanel/ThoughtProcess.tsx
@@ -6,6 +6,7 @@ import { a11yLight } from "react-syntax-highlighter/dist/esm/styles/hljs";
 import styles from "./AnalysisPanel.module.css";
 
 import { Thoughts } from "../../api";
+import { TokenUsageGraph } from "./TokenUsageGraph";
 
 SyntaxHighlighter.registerLanguage("json", json);
 
@@ -22,12 +23,13 @@ export const ThoughtProcess = ({ thoughts }: Props) => {
                         <div className={styles.tStep}>{t.title}</div>
                         <Stack horizontal tokens={{ childrenGap: 5 }}>
                             {t.props &&
-                                (Object.keys(t.props) || []).map((k: any) => (
+                                (Object.keys(t.props).filter(k => k !== "token_usage") || []).map((k: any) => (
                                     <span className={styles.tProp} key={k}>
                                         {k}: {JSON.stringify(t.props?.[k])}
                                     </span>
                                 ))}
                         </Stack>
+                        {t.props?.token_usage && <TokenUsageGraph tokenUsage={t.props.token_usage} reasoningEffort={t.props.reasoning_effort} />}
                         {Array.isArray(t.description) ? (
                             <SyntaxHighlighter language="json" wrapLongLines className={styles.tCodeBlock} style={a11yLight}>
                                 {JSON.stringify(t.description, null, 2)}
diff --git a/app/frontend/src/components/AnalysisPanel/TokenUsageGraph.tsx b/app/frontend/src/components/AnalysisPanel/TokenUsageGraph.tsx
new file mode 100644
index 0000000000..e44db260ad
--- /dev/null
+++ b/app/frontend/src/components/AnalysisPanel/TokenUsageGraph.tsx
@@ -0,0 +1,44 @@
+import React from "react";
+import styles from "./AnalysisPanel.module.css";
+
+export interface TokenUsage {
+    prompt_tokens: number;
+    completion_tokens: number;
+    reasoning_tokens: number;
+    total_tokens: number;
+}
+
+interface TokenUsageGraphProps {
+    tokenUsage: TokenUsage;
+    reasoningEffort?: string;
+}
+
+export const TokenUsageGraph: React.FC<TokenUsageGraphProps> = ({ tokenUsage, reasoningEffort }) => {
+    const { prompt_tokens, completion_tokens, reasoning_tokens, total_tokens } = tokenUsage;
+
+    // Calculate percentage widths relative to total_tokens
+    const calcPercent = (value: number) => (total_tokens ? (value / total_tokens) * 100 : 0) + "%";
+
+    return (
+        <div className={styles.tokenUsageGraph}>
+            <div className={styles.header}>Token Usage</div>
+            <div className={styles.primaryBarContainer} style={{ width: "100%" }}>
+                <div className={`${styles.tokenBar} ${styles.promptBar}`} style={{ width: calcPercent(prompt_tokens) }}>
+                    <span className={styles.tokenLabel}>Prompt: {prompt_tokens}</span>
+                </div>
+                {reasoningEffort != null && reasoningEffort !== "" && (
+                    <div className={`${styles.tokenBar} ${styles.reasoningBar}`} style={{ width: calcPercent(reasoning_tokens) }}>
+                        <span className={styles.tokenLabel}>Reasoning: {reasoning_tokens}</span>
+                    </div>
+                )}
+                <div className={`${styles.tokenBar} ${styles.outputBar}`} style={{ width: calcPercent(completion_tokens - reasoning_tokens) }}>
+                    <span className={styles.tokenLabel}>Output: {completion_tokens - reasoning_tokens}</span>
+                </div>
+            </div>
+
+            <div className={`${styles.tokenBar} ${styles.totalBar}`} style={{ width: calcPercent(total_tokens) }}>
+                <span className={styles.tokenLabel}>Total: {total_tokens}</span>
+            </div>
+        </div>
+    );
+};
diff --git a/app/frontend/src/components/Settings/Settings.module.css b/app/frontend/src/components/Settings/Settings.module.css
index 1d512e58e2..f33b1f94df 100644
--- a/app/frontend/src/components/Settings/Settings.module.css
+++ b/app/frontend/src/components/Settings/Settings.module.css
@@ -1,3 +1,55 @@
 .settingsSeparator {
     margin-top: 0.75rem;
 }
+
+.option-slider {
+    width: 100%;
+    max-width: 400px;
+    margin: 20px auto;
+    position: relative;
+    user-select: none;
+}
+
+.slider-track {
+    width: 100%;
+    height: 40px;
+    background: #e0e0e0;
+    border-radius: 20px;
+    position: relative;
+}
+
+.slider-thumb {
+    position: absolute;
+    top: 0;
+    width: 33.33%;
+    height: 40px;
+    background: #0078d4;
+    border-radius: 20px;
+    color: #fff;
+    text-align: center;
+    line-height: 40px;
+    transition: left 0.3s ease;
+    cursor: pointer;
+}
+
+.slider-options {
+    display: flex;
+    justify-content: space-between;
+    position: absolute;
+    top: 0;
+    width: 100%;
+    height: 40px;
+}
+
+.slider-option {
+    width: 33.33%;
+    text-align: center;
+    line-height: 40px;
+    color: #333;
+    cursor: pointer;
+}
+
+.slider-option.active {
+    font-weight: bold;
+    color: #fff;
+}
diff --git a/app/frontend/src/components/Settings/Settings.tsx b/app/frontend/src/components/Settings/Settings.tsx
index 49f7aec5c9..b16beb0246 100644
--- a/app/frontend/src/components/Settings/Settings.tsx
+++ b/app/frontend/src/components/Settings/Settings.tsx
@@ -20,6 +20,7 @@ export interface SettingsProps {
     useSemanticRanker: boolean;
     useSemanticCaptions: boolean;
     useQueryRewriting: boolean;
+    reasoningEffort: string;
     excludeCategory: string;
     includeCategory: string;
     retrievalMode: RetrievalMode;
@@ -28,6 +29,7 @@ export interface SettingsProps {
     vectorFieldList: VectorFieldOptions[];
     showSemanticRankerOption: boolean;
     showQueryRewritingOption: boolean;
+    showReasoningEffortOption: boolean;
     showGPT4VOptions: boolean;
     showVectorOption: boolean;
     useOidSecurityFilter: boolean;
@@ -37,6 +39,7 @@ export interface SettingsProps {
     requireAccessControl: boolean;
     className?: string;
     onChange: (field: string, value: any) => void;
+    streamingEnabled?: boolean; // Only used in chat
     shouldStream?: boolean; // Only used in Chat
     useSuggestFollowupQuestions?: boolean; // Only used in Chat
     promptTemplatePrefix?: string;
@@ -54,6 +57,7 @@ export const Settings = ({
     useSemanticRanker,
     useSemanticCaptions,
     useQueryRewriting,
+    reasoningEffort,
     excludeCategory,
     includeCategory,
     retrievalMode,
@@ -62,6 +66,7 @@ export const Settings = ({
     vectorFieldList,
     showSemanticRankerOption,
     showQueryRewritingOption,
+    showReasoningEffortOption,
     showGPT4VOptions,
     showVectorOption,
     useOidSecurityFilter,
@@ -71,6 +76,7 @@ export const Settings = ({
     requireAccessControl,
     className,
     onChange,
+    streamingEnabled,
     shouldStream,
     useSuggestFollowupQuestions,
     promptTemplatePrefix,
@@ -99,6 +105,7 @@ export const Settings = ({
     const semanticRankerId = useId("semanticRanker");
     const semanticRankerFieldId = useId("semanticRankerField");
     const queryRewritingFieldId = useId("queryRewritingField");
+    const reasoningEffortFieldId = useId("reasoningEffortField");
     const semanticCaptionsId = useId("semanticCaptions");
     const semanticCaptionsFieldId = useId("semanticCaptionsField");
     const useOidSecurityFilterId = useId("useOidSecurityFilter");
@@ -259,6 +266,24 @@ export const Settings = ({
                 </>
             )}
 
+            {showReasoningEffortOption && (
+                <Dropdown
+                    id={reasoningEffortFieldId}
+                    selectedKey={reasoningEffort}
+                    label={t("labels.reasoningEffort")}
+                    onChange={(_ev?: React.FormEvent<HTMLElement | HTMLInputElement>, option?: IDropdownOption) =>
+                        onChange("reasoningEffort", option?.key || "")
+                    }
+                    aria-labelledby={reasoningEffortFieldId}
+                    options={[
+                        { key: "low", text: t("labels.reasoningEffortOptions.low") },
+                        { key: "medium", text: t("labels.reasoningEffortOptions.medium") },
+                        { key: "high", text: t("labels.reasoningEffortOptions.high") }
+                    ]}
+                    onRenderLabel={props => renderLabel(props, queryRewritingFieldId, queryRewritingFieldId, t("helpTexts.reasoningEffort"))}
+                />
+            )}
+
             {useLogin && (
                 <>
                     <Checkbox
@@ -308,6 +333,7 @@ export const Settings = ({
             {shouldStream !== undefined && (
                 <Checkbox
                     id={shouldStreamFieldId}
+                    disabled={!streamingEnabled}
                     className={styles.settingsSeparator}
                     checked={shouldStream}
                     label={t("labels.shouldStream")}
diff --git a/app/frontend/src/locales/da/translation.json b/app/frontend/src/locales/da/translation.json
index 13480a34e1..b8604e46c0 100644
--- a/app/frontend/src/locales/da/translation.json
+++ b/app/frontend/src/locales/da/translation.json
@@ -87,6 +87,12 @@
         "useSemanticRanker": "Brug semantisk ranking til søgning",
         "useSemanticCaptions": "Brug semantiske billedtekster",
         "useQueryRewriting": "Brug forespørgselsomskrivning til informationsgenfinding",
+        "reasoningEffort": "Ræsonnementsindsats",
+        "reasoningEffortOptions": {
+            "low": "Lav",
+            "medium": "Medium",
+            "high": "Høj"
+        },
         "useSuggestFollowupQuestions": "Foreslå opfølgende spørgsmål",
         "useGPT4V": "Brug GPT vision model",
         "gpt4VInput": {
@@ -128,6 +134,8 @@
         "excludeCategory": "Angiver en kategori, der skal ekskluderes fra søgeresultaterne. Der er ingen kategorier i det standard datasæt.",
         "useSemanticReranker": "Aktiverer Azure AI Search semantisk omrangør, en model der rangerer søgeresultater baseret på semantisk lighed til brugerens forespørgsel.",
         "useSemanticCaptions": "Sender semantiske billedtekster til LLM'en i stedet for hele søgeresultatet. En semantisk billedtekst udtrækkes fra et søgeresultat under processen med semantisk rangering.",
+        "useQueryRewriting": "Aktiverer Azure AI Search forespørgselsomskrivning, en proces der ændrer brugerens forespørgsel for at forbedre søgeresultaterne. Kræver at semantisk ranking er aktiveret.",
+        "reasoningEffort": "Indstiller ræsonnementsindsatsen for sprogmodellen. Højere værdier resulterer i mere ræsonnement, men kan tage længere tid om at generere et svar. Standardværdien er medium.",
         "suggestFollowupQuestions": "Beder LLM'en om at foreslå opfølgende spørgsmål baseret på brugerens forespørgsel.",
         "useGPT4Vision": "Bruger GPT-4-Turbo med Vision til at generere svar baseret på billeder og tekst fra indekset.",
         "vectorFields": "Angiver hvilke indlejringsfelter i Azure AI Search Index, der vil blive søgt, enten både 'Billeder og tekst' indlejringer, 'Billeder' kun eller 'Tekst' kun.",
diff --git a/app/frontend/src/locales/en/translation.json b/app/frontend/src/locales/en/translation.json
index 05c762875e..03ee719042 100644
--- a/app/frontend/src/locales/en/translation.json
+++ b/app/frontend/src/locales/en/translation.json
@@ -91,6 +91,12 @@
         "useSemanticRanker": "Use semantic ranker for retrieval",
         "useSemanticCaptions": "Use semantic captions",
         "useQueryRewriting": "Use query rewriting for retrieval",
+        "reasoningEffort": "Reasoning effort",
+        "reasoningEffortOptions": {
+            "low": "Low",
+            "medium": "Medium",
+            "high": "High"
+        },
         "useSuggestFollowupQuestions": "Suggest follow-up questions",
         "useGPT4V": "Use GPT vision model",
         "gpt4VInput": {
@@ -142,6 +148,8 @@
             "Enables the Azure AI Search semantic ranker, a model that re-ranks search results based on semantic similarity to the user's query.",
         "useQueryRewriting":
             "Enables Azure AI Search query rewriting, a process that modifies the user's query to improve search results. Requires semantic ranker to be enabled.",
+        "reasoningEffort":
+            "Sets the reasoning effort for the LLM. Higher values result in more reasoning, but may take longer to generate a response. The default is medium.",
         "useSemanticCaptions":
              "Sends semantic captions to the LLM instead of the full search result. A semantic caption is extracted from a search result during the process of semantic ranking.",
         "suggestFollowupQuestions": "Asks the LLM to suggest follow-up questions based on the user's query.",
diff --git a/app/frontend/src/locales/es/translation.json b/app/frontend/src/locales/es/translation.json
index 0675b69f94..206057ca4e 100644
--- a/app/frontend/src/locales/es/translation.json
+++ b/app/frontend/src/locales/es/translation.json
@@ -91,6 +91,12 @@
         "useSemanticRanker": "Usar clasificador semántico para la recuperación",
         "useSemanticCaptions": "Usar subtítulos semánticos",
         "useQueryRewriting": "Utiliza la reescritura de consultas para la recuperación",
+        "reasoningEffort": "Esfuerzo de razonamiento",
+        "reasoningEffortOptions": {
+            "low": "Bajo",
+            "medium": "Medio",
+            "high": "Alto"
+        },
         "useSuggestFollowupQuestions": "Sugerir preguntas de seguimiento",
         "useGPT4V": "Usar modelo de visión GPT",
         "gpt4VInput": {
@@ -143,6 +149,10 @@
             "Habilita el re-clasificador semántico de Azure AI Search, un modelo que re-clasifica los resultados de búsqueda basándose en la similitud semántica con la consulta del usuario.",
         "useSemanticCaptions":
             "Envía subtítulos semánticos al LLM en lugar del resultado de búsqueda completo. Un subtítulo semántico se extrae de un resultado de búsqueda durante el proceso de clasificación semántica.",
+        "useQueryRewriting":
+            "Habilita la reescritura de consultas de Azure AI Search, un proceso que modifica la consulta del usuario para mejorar los resultados de búsqueda. Requiere que el clasificador semántico esté habilitado.",
+        "reasoningEffort":
+            "Establece el esfuerzo de razonamiento para el LLM. Los valores más altos resultan en más razonamiento, pero pueden tardar más en generar una respuesta. El valor predeterminado es medio.",
         "suggestFollowupQuestions": "Pide al LLM que sugiera preguntas de seguimiento basándose en la consulta del usuario.",
         "useGPT4Vision": "Utiliza GPT-4-Turbo con Visión para generar respuestas basándose en imágenes y texto del índice.",
         "vectorFields":
diff --git a/app/frontend/src/locales/fr/translation.json b/app/frontend/src/locales/fr/translation.json
index 90aec6da3a..bd867c6796 100644
--- a/app/frontend/src/locales/fr/translation.json
+++ b/app/frontend/src/locales/fr/translation.json
@@ -92,6 +92,12 @@
         "useSemanticCaptions": "Utiliser les titres sémantiques",
         "useSuggestFollowupQuestions": "Suggérer des questions de suivi",
         "useQueryRewriting": "Utilisez la réécriture des requêtes pour la récupération",
+        "reasoningEffort": "Effort de raisonnement",
+        "reasoningEffortOptions": {
+            "low": "Faible",
+            "medium": "Moyen",
+            "high": "Élevé"
+        },
         "useGPT4V": "Utiliser le modèle GPT Vision",
         "gpt4VInput": {
             "label": "Entrées du modèle GPT Vision",
@@ -144,6 +150,10 @@
         "useSemanticCaptions":
             "Envoie des légendes sémantiques à l'LLM au lieu du résultat de recherche complet. Une légende sémantique est extraite d'un résultat de recherche lors du processus de classement sémantique.",
         "suggestFollowupQuestions": "Demande à l'LLM de suggérer des questions de suivi en fonction de la requête de l'utilisateur.",
+        "useQueryRewriting":
+            "Active la réécriture de requêtes d'Azure AI Search, un processus qui modifie la requête de l'utilisateur pour améliorer les résultats de recherche. Nécessite que le reclasseur sémantique soit activé.",
+        "reasoningEffort":
+            "Définit l'effort de raisonnement pour le LLM. Des valeurs plus élevées entraînent plus de raisonnement, mais peuvent prendre plus de temps pour générer une réponse. La valeur par défaut est moyenne.",
         "useGPT4Vision": "Utilise GPT-4-Turbo avec Vision pour générer des réponses basées sur des images et du texte de l'index.",
         "vectorFields":
             "Spécifie quels champs d'incorporation dans l'index de recherche Azure AI seront recherchés, à la fois les incorporations 'Images et texte', 'Images' seulement, ou 'Texte' seulement.",
diff --git a/app/frontend/src/locales/it/translation.json b/app/frontend/src/locales/it/translation.json
index ee1c2c25ba..e1146a3fbd 100644
--- a/app/frontend/src/locales/it/translation.json
+++ b/app/frontend/src/locales/it/translation.json
@@ -91,6 +91,12 @@
         "useSemanticRanker": "Usa il reranker semantico",
         "useSemanticCaptions": "Usa didascalie semantiche",
         "useQueryRewriting": "Usa la riscrittura delle query per il recupero",
+        "reasoningEffort": "Sforzo di ragionamento",
+        "reasoningEffortOptions": {
+            "low": "Basso",
+            "medium": "Medio",
+            "high": "Alto"
+        },
         "useSuggestFollowupQuestions": "Suggerisci domande di follow-up",
         "useGPT4V": "Usa il modello GPT Vision",
         "gpt4VInput": {
@@ -143,6 +149,10 @@
             "Abilita il ranking semantico di Azure AI Search, un modello che riordina i risultati di ricerca in base alla somiglianza semantica con la query dell'utente.",
         "useSemanticCaptions":
             "Invia didascalie semantiche all'LLM invece del risultato di ricerca completo. Una didascalia semantica è estratta da un risultato di ricerca durante il processo di ranking semantico.",
+        "useQueryRewriting":
+            "Abilita la riscrittura delle query di Azure AI Search, un processo che modifica la query dell'utente per migliorare i risultati di ricerca. Richiede che il reranker semantico sia abilitato.",
+        "reasoningEffort":
+            "Imposta lo sforzo di ragionamento per l'LLM. Valori più alti comportano un maggiore ragionamento, ma potrebbero richiedere più tempo per generare una risposta. Il valore predefinito è medio.",
         "suggestFollowupQuestions": "Chiede all'LLM di suggerire domande di follow-up in base alla query dell'utente.",
         "useGPT4Vision": "Utilizza GPT-4-Turbo con Vision per generare risposte basate su immagini e testo dell'indice.",
         "vectorFields":
diff --git a/app/frontend/src/locales/ja/translation.json b/app/frontend/src/locales/ja/translation.json
index f59593e3fb..17f317e29e 100644
--- a/app/frontend/src/locales/ja/translation.json
+++ b/app/frontend/src/locales/ja/translation.json
@@ -91,6 +91,12 @@
         "useSemanticRanker": "取得にセマンティック・ランカーを使用",
         "useSemanticCaptions": "セマンティック・キャプションを使用",
         "useQueryRewriting": "検索のためにクエリの書き換えを使用する",
+        "reasoningEffort": "推論の労力",
+        "reasoningEffortOptions": {
+            "low": "低",
+            "medium": "中",
+            "high": "高"
+        },
         "useSuggestFollowupQuestions": "フォローアップの質問を提案",
         "useGPT4V": "GPT Visionモデルを使用",
         "gpt4VInput": {
@@ -140,6 +146,10 @@
             "Azure AI Searchのセマンティック・ランカーを有効にします(ユーザーのクエリに対するセマンティック類似性に基づいて検索結果をリランク付けするモデル)。",
         "useSemanticCaptions":
             "完全な検索結果ではなく、LLMにセマンティック・キャプションを送信します。セマンティック・キャプションは、セマンティック・ランキングの処理中に検索結果から抽出されます。",
+        "useQueryRewriting":
+            "Azure AI Searchのクエリの書き換えを有効にします。これは、ユーザーのクエリを変更して検索結果を改善するプロセスです。セマンティック・ランカーが有効になっている必要があります。",
+        "reasoningEffort":
+            "LLMの推論労力を設定します。値が高いほど推論が増加しますが、応答の生成に時間がかかる場合があります。デフォルトは中です。",
         "suggestFollowupQuestions": "ユーザーのクエリに基づいて、LLMにフォローアップの質問を提案するように問い合わせます。",
         "useGPT4Vision": "インデックスから画像とテキストを利用して回答を生成するためGPT-4-Turbo with Visionを使用します。",
         "vectorFields":
diff --git a/app/frontend/src/locales/nl/translation.json b/app/frontend/src/locales/nl/translation.json
index b277096b28..c75b871353 100644
--- a/app/frontend/src/locales/nl/translation.json
+++ b/app/frontend/src/locales/nl/translation.json
@@ -91,6 +91,12 @@
         "useSemanticRanker": "Semantische rangschikking gebruiken",
         "useSemanticCaptions": "Semantische bijschriften gebruiken",
         "useQueryRewriting": "Gebruik de herformulering van zoekopdrachten om informatie op te halen",
+        "reasoningEffort": "Redeneerinspanning",
+        "reasoningEffortOptions": {
+            "low": "Laag",
+            "medium": "Gemiddeld",
+            "high": "Hoog"
+        },
         "useSuggestFollowupQuestions": "Vervolgvragen voorstellen",
         "useGPT4V": "GPT-visiemodel gebruiken",
         "gpt4VInput": {
@@ -142,6 +148,10 @@
             "Activeert de semantische ranker van Azure AI Search, een model dat zoekresultaten rangschikt op basis van semantische overeenkomst met de zoekopdracht.",
         "useSemanticCaptions":
             "Stuurt semantische bijschriften naar het taalmodel in plaats van het volledige zoekresultaat. Een semantisch bijschrift wordt geëxtraheerd voor ieder zoekresultaat tijdens het toepassen van de semantische ranker.",
+        "useQueryRewriting":
+            "Schakelt Azure AI Search query herformulering in, een proces dat de query van de gebruiker wijzigt om zoekresultaten te verbeteren. Vereist dat semantische rangschikking is ingeschakeld.",
+        "reasoningEffort":
+            "Stelt de redeneerinspanning voor het taalmodel in. Hogere waarden resulteren in meer redeneren, maar kunnen langer duren om een reactie te genereren. De standaard is gemiddeld.",
         "suggestFollowupQuestions": "Vraagt het taalmodel om vervolgvragen voor te stellen op basis van de vraag.",
         "useGPT4Vision": "Gebruikt GPT-4-Turbo met Vision om antwoorden te genereren op basis van afbeeldingen en tekst uit de index.",
         "vectorFields":
diff --git a/app/frontend/src/locales/ptBR/translation.json b/app/frontend/src/locales/ptBR/translation.json
index 326017e72e..9de54cf6e7 100644
--- a/app/frontend/src/locales/ptBR/translation.json
+++ b/app/frontend/src/locales/ptBR/translation.json
@@ -92,6 +92,12 @@
         "useSemanticCaptions": "Usar legendas semânticas",
         "useSuggestFollowupQuestions": "Sugerir perguntas complementares",
         "useQueryRewriting": "Utilize a reescrita de consultas para a recuperação",
+        "reasoningEffort": "Esforço de raciocínio",
+        "reasoningEffortOptions": {
+            "low": "Baixo",
+            "medium": "Médio",
+            "high": "Alto"
+        },
         "useGPT4V": "Usar modelo de visão GPT",
         "gpt4VInput": {
             "label": "Entradas do modelo de visão GPT",
@@ -142,8 +148,12 @@
             "Ativa o rankeador semântico da pesquisa do Azure AI, um modelo que reordena os resultados da pesquisa com base na similaridade semântica à consulta do usuário.",
         "useSemanticCaptions":
              "Envia legendas semânticas para o LLM em vez do resultado completo da pesquisa. Uma legenda semântica é extraída durante o processo de ranqueamento semântico.",
+        "useQueryRewriting":
+             "Habilita a reescrita de consultas do Azure AI Search, um processo que modifica a consulta do usuário para melhorar os resultados da pesquisa. Requer que o rankeador semântico esteja habilitado.",
+        "reasoningEffort":
+             "Define o esforço de raciocínio para o LLM. Valores mais altos resultam em mais raciocínio, mas podem levar mais tempo para gerar uma resposta. O padrão é médio.",
         "suggestFollowupQuestions": "Solicita ao LLM que sugira perguntas de acompanhamento com base na consulta do usuário.",
-         "useGPT4Vision": "Usa GPT-4-Turbo com Visão para gerar respostas com base em imagens e texto do índice.",
+        "useGPT4Vision": "Usa GPT-4-Turbo com Visão para gerar respostas com base em imagens e texto do índice.",
         "vectorFields":
             "Especifica quais campos de incorporação (embeddings) no Índice de Pesquisa do Azure AI serão pesquisados: 'Imagens e texto', 'Imagens' ou 'Texto'.",
         "gpt4VisionInputs":
diff --git a/app/frontend/src/locales/tr/translation.json b/app/frontend/src/locales/tr/translation.json
index 1fc0f95f14..5686c0fa1e 100644
--- a/app/frontend/src/locales/tr/translation.json
+++ b/app/frontend/src/locales/tr/translation.json
@@ -91,6 +91,12 @@
         "useSemanticRanker": "Anlamsal sıralayıcı kullan",
         "useSemanticCaptions": "Anlamsal altyazılar kullan",
         "useQueryRewriting": "Bilgi erişimi için sorgu yeniden yazımını kullanın",
+        "reasoningEffort": "Akıl yürütme çabası",
+        "reasoningEffortOptions": {
+            "low": "Düşük",
+            "medium": "Orta",
+            "high": "Yüksek"
+        },
         "useSuggestFollowupQuestions": "Takip soruları öner",
         "useGPT4V": "GPT vizyon modelini kullan",
         "gpt4VInput": {
@@ -142,6 +148,10 @@
             "Azure AI Arama anlamsal sıralayıcısını etkinleştirir, bu model arama sonuçlarını kullanıcının sorgusuna anlamsal benzerliğe göre sıralar.",
         "useSemanticCaptions":
             "Tam arama sonucu yerine anlamsal altyazıları dil modeline gönderir. Anlamsal sıralama sırasında her arama sonucu için bir anlamsal altyazı çıkarılır.",
+        "useQueryRewriting":
+            "Azure AI arama sonuçlarını iyileştirmek için kullanıcı sorgusunu değiştiren bir işlem. Anlamsal sıralayıcının etkin olmasını gerektirir.",
+        "reasoningEffort":
+            "Dil modeli için akıl yürütme çabasını ayarlar. Daha yüksek değerler daha fazla akıl yürütme ile sonuçlanır, ancak yanıt oluşturmak daha uzun sürebilir. Varsayılan orta seviyedir.",
         "suggestFollowupQuestions": "Kullanıcının sorusuna dayalı olarak dil modelinden takip soruları önermesini ister.",
         "useGPT4Vision": "Görseller ve metinlerden oluşan indekslere dayalı olarak yanıtlar oluşturmak için GPT-4-Turbo ile Vision kullanır.",
         "vectorFields":
@@ -154,4 +164,4 @@
         "useOidSecurityFilter": "Kimliği doğrulanmış kullanıcının OID'sine göre arama sonuçlarını filtreler.",
         "useGroupsSecurityFilter": "Kimliği doğrulanmış kullanıcının gruplarına göre arama sonuçlarını filtreler."
     }
-}
\ No newline at end of file
+}
diff --git a/app/frontend/src/pages/ask/Ask.tsx b/app/frontend/src/pages/ask/Ask.tsx
index 660409283f..8e38076adb 100644
--- a/app/frontend/src/pages/ask/Ask.tsx
+++ b/app/frontend/src/pages/ask/Ask.tsx
@@ -33,6 +33,7 @@ export function Component(): JSX.Element {
     const [useSemanticRanker, setUseSemanticRanker] = useState<boolean>(true);
     const [useSemanticCaptions, setUseSemanticCaptions] = useState<boolean>(false);
     const [useQueryRewriting, setUseQueryRewriting] = useState<boolean>(false);
+    const [reasoningEffort, setReasoningEffort] = useState<string>("");
     const [useGPT4V, setUseGPT4V] = useState<boolean>(false);
     const [gpt4vInput, setGPT4VInput] = useState<GPT4VInput>(GPT4VInput.TextAndImages);
     const [includeCategory, setIncludeCategory] = useState<string>("");
@@ -44,6 +45,7 @@ export function Component(): JSX.Element {
     const [showGPT4VOptions, setShowGPT4VOptions] = useState<boolean>(false);
     const [showSemanticRankerOption, setShowSemanticRankerOption] = useState<boolean>(false);
     const [showQueryRewritingOption, setShowQueryRewritingOption] = useState<boolean>(false);
+    const [showReasoningEffortOption, setShowReasoningEffortOption] = useState<boolean>(false);
     const [showVectorOption, setShowVectorOption] = useState<boolean>(false);
     const [showUserUpload, setShowUserUpload] = useState<boolean>(false);
     const [showLanguagePicker, setshowLanguagePicker] = useState<boolean>(false);
@@ -82,6 +84,10 @@ export function Component(): JSX.Element {
             setShowSemanticRankerOption(config.showSemanticRankerOption);
             setUseQueryRewriting(config.showQueryRewritingOption);
             setShowQueryRewritingOption(config.showQueryRewritingOption);
+            setShowReasoningEffortOption(config.showReasoningEffortOption);
+            if (config.showReasoningEffortOption) {
+                setReasoningEffort(config.defaultReasoningEffort);
+            }
             setShowVectorOption(config.showVectorOption);
             if (!config.showVectorOption) {
                 setRetrievalMode(RetrievalMode.Text);
@@ -131,6 +137,7 @@ export function Component(): JSX.Element {
                         semantic_ranker: useSemanticRanker,
                         semantic_captions: useSemanticCaptions,
                         query_rewriting: useQueryRewriting,
+                        reasoning_effort: reasoningEffort,
                         use_oid_security_filter: useOidSecurityFilter,
                         use_groups_security_filter: useGroupsSecurityFilter,
                         vector_fields: vectorFieldList,
@@ -188,6 +195,9 @@ export function Component(): JSX.Element {
             case "useQueryRewriting":
                 setUseQueryRewriting(value);
                 break;
+            case "reasoningEffort":
+                setReasoningEffort(value);
+                break;
             case "excludeCategory":
                 setExcludeCategory(value);
                 break;
@@ -330,6 +340,7 @@ export function Component(): JSX.Element {
                     useSemanticRanker={useSemanticRanker}
                     useSemanticCaptions={useSemanticCaptions}
                     useQueryRewriting={useQueryRewriting}
+                    reasoningEffort={reasoningEffort}
                     excludeCategory={excludeCategory}
                     includeCategory={includeCategory}
                     retrievalMode={retrievalMode}
@@ -338,6 +349,7 @@ export function Component(): JSX.Element {
                     vectorFieldList={vectorFieldList}
                     showSemanticRankerOption={showSemanticRankerOption}
                     showQueryRewritingOption={showQueryRewritingOption}
+                    showReasoningEffortOption={showReasoningEffortOption}
                     showGPT4VOptions={showGPT4VOptions}
                     showVectorOption={showVectorOption}
                     useOidSecurityFilter={useOidSecurityFilter}
diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx
index 97995df926..5d00c2c914 100644
--- a/app/frontend/src/pages/chat/Chat.tsx
+++ b/app/frontend/src/pages/chat/Chat.tsx
@@ -49,6 +49,8 @@ const Chat = () => {
     const [retrievalMode, setRetrievalMode] = useState<RetrievalMode>(RetrievalMode.Hybrid);
     const [useSemanticRanker, setUseSemanticRanker] = useState<boolean>(true);
     const [useQueryRewriting, setUseQueryRewriting] = useState<boolean>(false);
+    const [reasoningEffort, setReasoningEffort] = useState<string>("");
+    const [streamingEnabled, setStreamingEnabled] = useState<boolean>(true);
     const [shouldStream, setShouldStream] = useState<boolean>(true);
     const [useSemanticCaptions, setUseSemanticCaptions] = useState<boolean>(false);
     const [includeCategory, setIncludeCategory] = useState<string>("");
@@ -78,6 +80,7 @@ const Chat = () => {
     const [showGPT4VOptions, setShowGPT4VOptions] = useState<boolean>(false);
     const [showSemanticRankerOption, setShowSemanticRankerOption] = useState<boolean>(false);
     const [showQueryRewritingOption, setShowQueryRewritingOption] = useState<boolean>(false);
+    const [showReasoningEffortOption, setShowReasoningEffortOption] = useState<boolean>(false);
     const [showVectorOption, setShowVectorOption] = useState<boolean>(false);
     const [showUserUpload, setShowUserUpload] = useState<boolean>(false);
     const [showLanguagePicker, setshowLanguagePicker] = useState<boolean>(false);
@@ -104,6 +107,14 @@ const Chat = () => {
             setShowSemanticRankerOption(config.showSemanticRankerOption);
             setUseQueryRewriting(config.showQueryRewritingOption);
             setShowQueryRewritingOption(config.showQueryRewritingOption);
+            setShowReasoningEffortOption(config.showReasoningEffortOption);
+            setStreamingEnabled(config.streamingEnabled);
+            if (!config.streamingEnabled) {
+                setShouldStream(false);
+            }
+            if (config.showReasoningEffortOption) {
+                setReasoningEffort(config.defaultReasoningEffort);
+            }
             setShowVectorOption(config.showVectorOption);
             if (!config.showVectorOption) {
                 setRetrievalMode(RetrievalMode.Text);
@@ -202,6 +213,7 @@ const Chat = () => {
                         semantic_ranker: useSemanticRanker,
                         semantic_captions: useSemanticCaptions,
                         query_rewriting: useQueryRewriting,
+                        reasoning_effort: reasoningEffort,
                         suggest_followup_questions: useSuggestFollowupQuestions,
                         use_oid_security_filter: useOidSecurityFilter,
                         use_groups_security_filter: useGroupsSecurityFilter,
@@ -293,6 +305,9 @@ const Chat = () => {
             case "useQueryRewriting":
                 setUseQueryRewriting(value);
                 break;
+            case "reasoningEffort":
+                setReasoningEffort(value);
+                break;
             case "useSemanticCaptions":
                 setUseSemanticCaptions(value);
                 break;
@@ -509,6 +524,7 @@ const Chat = () => {
                         useSemanticRanker={useSemanticRanker}
                         useSemanticCaptions={useSemanticCaptions}
                         useQueryRewriting={useQueryRewriting}
+                        reasoningEffort={reasoningEffort}
                         excludeCategory={excludeCategory}
                         includeCategory={includeCategory}
                         retrievalMode={retrievalMode}
@@ -517,6 +533,7 @@ const Chat = () => {
                         vectorFieldList={vectorFieldList}
                         showSemanticRankerOption={showSemanticRankerOption}
                         showQueryRewritingOption={showQueryRewritingOption}
+                        showReasoningEffortOption={showReasoningEffortOption}
                         showGPT4VOptions={showGPT4VOptions}
                         showVectorOption={showVectorOption}
                         useOidSecurityFilter={useOidSecurityFilter}
@@ -525,6 +542,7 @@ const Chat = () => {
                         loggedIn={loggedIn}
                         requireAccessControl={requireAccessControl}
                         shouldStream={shouldStream}
+                        streamingEnabled={streamingEnabled}
                         useSuggestFollowupQuestions={useSuggestFollowupQuestions}
                         showSuggestFollowupQuestions={true}
                         onChange={handleSettingsChange}
diff --git a/docs/deploy_features.md b/docs/deploy_features.md
index c1390b05ec..5da19ddc37 100644
--- a/docs/deploy_features.md
+++ b/docs/deploy_features.md
@@ -1,10 +1,10 @@
-
 # RAG chat: Enabling optional features
 
 This document covers optional features that can be enabled in the deployed Azure resources.
 You should typically enable these features before running `azd up`. Once you've set them, return to the [deployment steps](../README.md#deploying).
 
 * [Using different chat completion models](#using-different-chat-completion-models)
+* [Using reasoning models](#using-reasoning-models)
 * [Using text-embedding-3 models](#using-text-embedding-3-models)
 * [Enabling GPT-4 Turbo with Vision](#enabling-gpt-4-turbo-with-vision)
 * [Enabling media description with Azure Content Understanding](#enabling-media-description-with-azure-content-understanding)
@@ -121,6 +121,13 @@ This process does *not* delete your previous model deployment. If you want to de
 > [!NOTE]
 > To revert back to a previous model, run the same commands with the previous model name and version.
 
+## Using reasoning models
+
+⚠️ This feature is not currently compatible with [vision integration](./gpt4v.md).
+
+This feature allows you to use reasoning models to generate responses based on retrieved content. These models spend more time processing and understanding the user's request.
+To enable reasoning models, follow the steps in [the reasoning models guide](./reasoning.md).
+
 ## Using text-embedding-3 models
 
 By default, the deployed Azure web app uses the `text-embedding-ada-002` embedding model. If you want to use one of the text-embedding-3 models, you can do so by following these steps:
diff --git a/docs/images/reasoning.png b/docs/images/reasoning.png
new file mode 100644
index 0000000000..8a3ea205c5
Binary files /dev/null and b/docs/images/reasoning.png differ
diff --git a/docs/images/token-usage.png b/docs/images/token-usage.png
new file mode 100644
index 0000000000..4541674f43
Binary files /dev/null and b/docs/images/token-usage.png differ
diff --git a/docs/reasoning.md b/docs/reasoning.md
new file mode 100644
index 0000000000..51e8ac023d
--- /dev/null
+++ b/docs/reasoning.md
@@ -0,0 +1,69 @@
+# RAG chat: Using reasoning models
+
+This repository includes an optional feature that uses reasoning models to generate responses based on retrieved content. These models spend more time processing and understanding the user's request.
+
+## Using the feature
+
+### Supported Models
+
+* o3-mini
+* o1
+
+### Prerequisites
+
+* The ability to deploy a reasoning model in the [supported regions](https://learn.microsoft.com/azure/ai-services/openai/concepts/models#standard-deployment-model-availability). If you're not sure, try to create a o3-mini deployment from your Azure OpenAI deployments page.
+
+### Deployment
+
+1. **Enable reasoning:**
+
+   Set the environment variables for your Azure OpenAI GPT deployments to your reasoning model
+
+   For o3-mini:
+
+   ```shell
+   azd env set AZURE_OPENAI_CHATGPT_MODEL o3-mini
+   azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT o3-mini
+   azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION 2025-01-31
+   azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT_SKU GlobalStandard
+   ```
+
+   For o1:
+
+   ```shell
+   azd env set AZURE_OPENAI_CHATGPT_MODEL o1
+   azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT o1
+   azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT_VERSION 2024-12-17
+   azd env set AZURE_OPENAI_CHATGPT_DEPLOYMENT_SKU GlobalStandard
+   ```
+
+2. **(Optional) Set default reasoning effort**
+
+   You can configure how much effort the reasoning model spends on processing and understanding the user's request. Valid options are `low`, `medium`, and `high`. Reasoning effort defaults to `medium` if not set.
+
+   Set the environment variable for reasoning effort
+
+   ```shell
+   azd env set AZURE_OPENAI_REASONING_EFFORT medium
+   ```
+
+3. **Update the infrastructure and application:**
+
+   Execute `azd up` to provision the infrastructure changes (only the new model, if you ran `up` previously) and deploy the application code with the updated environment variables.
+
+4. **Try out the feature:**
+
+   Open the web app and start a new chat. The reasoning model will be used for all chat completion requests, including the query rewriting step.
+
+5. **Experiment with reasoning effort:**
+
+   Select the developer options in the web app and change "Reasoning Effort" to `low`, `medium`, or `high`. This will override the default reasoning effort of "medium".
+
+   ![Reasoning configuration screenshot](./images/reasoning.png)
+
+6. **Understand token usage:**
+
+   The reasoning models use additional billed tokens behind the scenes for the thinking process.
+   To see the token usage, select the lightbulb icon on a chat answer. This will open the "Thought process" tab, which shows the reasoning model's thought process and the token usage for each chat completion.
+
+   ![Thought process token usage](./images/token-usage.png)
diff --git a/infra/main.bicep b/infra/main.bicep
index fa7a87c1b5..a83db625b8 100644
--- a/infra/main.bicep
+++ b/infra/main.bicep
@@ -37,6 +37,8 @@ param storageResourceGroupLocation string = location
 param storageContainerName string = 'content'
 param storageSkuName string // Set in main.parameters.json
 
+param defaultReasoningEffort string // Set in main.parameters.json
+
 param userStorageAccountName string = ''
 param userStorageContainerName string = 'user-content'
 
@@ -400,6 +402,7 @@ var appEnvVariables = {
   AZURE_OPENAI_EMB_DIMENSIONS: embedding.dimensions
   AZURE_OPENAI_CHATGPT_MODEL: chatGpt.modelName
   AZURE_OPENAI_GPT4V_MODEL: gpt4v.modelName
+  AZURE_OPENAI_REASONING_EFFORT: defaultReasoningEffort
   // Specific to Azure OpenAI
   AZURE_OPENAI_SERVICE: isAzureOpenAiHost && deployAzureOpenAi ? openAi.outputs.name : ''
   AZURE_OPENAI_CHATGPT_DEPLOYMENT: chatGpt.deploymentName
diff --git a/infra/main.parameters.json b/infra/main.parameters.json
index 6f825b99fd..b75b9c4210 100644
--- a/infra/main.parameters.json
+++ b/infra/main.parameters.json
@@ -83,6 +83,9 @@
     "searchServiceQueryRewriting": {
       "value": "${AZURE_SEARCH_QUERY_REWRITING=false}"
     },
+    "defaultReasoningEffort": {
+      "value": "${AZURE_OPENAI_REASONING_EFFORT=medium}"
+    },
     "storageAccountName": {
       "value": "${AZURE_STORAGE_ACCOUNT}"
     },
diff --git a/tests/conftest.py b/tests/conftest.py
index fd2fa139a0..67d1b60eba 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,6 +1,6 @@
 import json
 import os
-from typing import IO
+from typing import IO, Any, Dict
 from unittest import mock
 
 import aiohttp
@@ -14,7 +14,7 @@
 from azure.search.documents.indexes.aio import SearchIndexClient
 from azure.search.documents.indexes.models import SearchField, SearchIndex
 from azure.storage.blob.aio import ContainerClient
-from openai.types import CreateEmbeddingResponse, Embedding
+from openai.types import CompletionUsage, CreateEmbeddingResponse, Embedding
 from openai.types.chat import ChatCompletion, ChatCompletionChunk
 from openai.types.chat.chat_completion import (
     ChatCompletionMessage,
@@ -108,9 +108,9 @@ def patch(openai_client):
 @pytest.fixture
 def mock_openai_chatcompletion(monkeypatch):
     class AsyncChatCompletionIterator:
-        def __init__(self, answer: str):
+        def __init__(self, answer: str, reasoning: bool, usage: Dict[str, Any]):
             chunk_id = "test-id"
-            model = "gpt-4o-mini"
+            model = "gpt-4o-mini" if not reasoning else "o3-mini"
             self.responses = [
                 {"object": "chat.completion.chunk", "choices": [], "id": chunk_id, "model": model, "created": 1},
                 {
@@ -170,6 +170,17 @@ def __init__(self, answer: str):
                     }
                 )
 
+            self.responses.append(
+                {
+                    "object": "chat.completion.chunk",
+                    "choices": [],
+                    "id": chunk_id,
+                    "model": model,
+                    "created": 1,
+                    "usage": usage,
+                }
+            )
+
         def __aiter__(self):
             return self
 
@@ -184,6 +195,19 @@ async def mock_acreate(*args, **kwargs):
         assert kwargs.get("seed") is None or kwargs.get("seed") == 42
 
         messages = kwargs["messages"]
+        model = kwargs["model"]
+        reasoning = model == "o3-mini"
+        completion_usage: Dict[str, any] = {
+            "completion_tokens": 896,
+            "prompt_tokens": 23,
+            "total_tokens": 919,
+            "completion_tokens_details": {
+                "accepted_prediction_tokens": 0,
+                "audio_tokens": 0,
+                "reasoning_tokens": 384 if reasoning else 0,
+                "rejected_prediction_tokens": 0,
+            },
+        }
         last_question = messages[-1]["content"]
         if last_question == "Generate search query for: What is the capital of France?":
             answer = "capital of France"
@@ -196,7 +220,7 @@ async def mock_acreate(*args, **kwargs):
             if messages[0]["content"].find("Generate 3 very brief follow-up questions") > -1:
                 answer = "The capital of France is Paris. [Benefit_Options-2.pdf]. <<What is the capital of Spain?>>"
         if "stream" in kwargs and kwargs["stream"] is True:
-            return AsyncChatCompletionIterator(answer)
+            return AsyncChatCompletionIterator(answer, reasoning, completion_usage)
         else:
             return ChatCompletion(
                 object="chat.completion",
@@ -208,6 +232,7 @@ async def mock_acreate(*args, **kwargs):
                 id="test-123",
                 created=0,
                 model="test-model",
+                usage=CompletionUsage.model_validate(completion_usage),
             )
 
     def patch(openai_client):
@@ -292,6 +317,24 @@ def mock_blob_container_client(monkeypatch):
     },
 ]
 
+reasoning_envs = [
+    {
+        "OPENAI_HOST": "azure",
+        "AZURE_OPENAI_SERVICE": "test-openai-service",
+        "AZURE_OPENAI_CHATGPT_MODEL": "o3-mini",
+        "AZURE_OPENAI_CHATGPT_DEPLOYMENT": "o3-mini",
+        "AZURE_OPENAI_EMB_DEPLOYMENT": "test-ada",
+    },
+    {
+        "OPENAI_HOST": "azure",
+        "AZURE_OPENAI_SERVICE": "test-openai-service",
+        "AZURE_OPENAI_CHATGPT_MODEL": "o3-mini",
+        "AZURE_OPENAI_CHATGPT_DEPLOYMENT": "o3-mini",
+        "AZURE_OPENAI_EMB_DEPLOYMENT": "test-ada",
+        "AZURE_OPENAI_REASONING_EFFORT": "low",
+    },
+]
+
 
 @pytest.fixture(params=envs, ids=["client0", "client1"])
 def mock_env(monkeypatch, request):
@@ -319,6 +362,30 @@ def mock_env(monkeypatch, request):
             yield
 
 
+@pytest.fixture(params=reasoning_envs, ids=["reasoning_client0", "reasoning_client1"])
+def mock_reasoning_env(monkeypatch, request):
+    with mock.patch.dict(os.environ, clear=True):
+        monkeypatch.setenv("AZURE_STORAGE_ACCOUNT", "test-storage-account")
+        monkeypatch.setenv("AZURE_STORAGE_CONTAINER", "test-storage-container")
+        monkeypatch.setenv("AZURE_STORAGE_RESOURCE_GROUP", "test-storage-rg")
+        monkeypatch.setenv("AZURE_SUBSCRIPTION_ID", "test-storage-subid")
+        monkeypatch.setenv("ENABLE_LANGUAGE_PICKER", "true")
+        monkeypatch.setenv("USE_SPEECH_INPUT_BROWSER", "true")
+        monkeypatch.setenv("USE_SPEECH_OUTPUT_AZURE", "true")
+        monkeypatch.setenv("AZURE_SEARCH_INDEX", "test-search-index")
+        monkeypatch.setenv("AZURE_SEARCH_SERVICE", "test-search-service")
+        monkeypatch.setenv("AZURE_SPEECH_SERVICE_ID", "test-id")
+        monkeypatch.setenv("AZURE_SPEECH_SERVICE_LOCATION", "eastus")
+        monkeypatch.setenv("ALLOWED_ORIGIN", "https://frontend.com")
+        monkeypatch.setenv("TEST_ENABLE_REASONING", "true")
+        for key, value in request.param.items():
+            monkeypatch.setenv(key, value)
+
+        with mock.patch("app.AzureDeveloperCliCredential") as mock_default_azure_credential:
+            mock_default_azure_credential.return_value = MockAzureCredential()
+            yield
+
+
 @pytest_asyncio.fixture(scope="function")
 async def client(
     monkeypatch,
@@ -338,6 +405,25 @@ async def client(
         yield test_app.test_client()
 
 
+@pytest_asyncio.fixture(scope="function")
+async def reasoning_client(
+    monkeypatch,
+    mock_reasoning_env,
+    mock_openai_chatcompletion,
+    mock_openai_embedding,
+    mock_acs_search,
+    mock_blob_container_client,
+    mock_azurehttp_calls,
+):
+    quart_app = app.create_app()
+
+    async with quart_app.test_app() as test_app:
+        test_app.app.config.update({"TESTING": True})
+        mock_openai_chatcompletion(test_app.app.config[app.CONFIG_OPENAI_CLIENT])
+        mock_openai_embedding(test_app.app.config[app.CONFIG_OPENAI_CLIENT])
+        yield test_app.test_client()
+
+
 @pytest_asyncio.fixture(scope="function")
 async def client_with_expiring_token(
     monkeypatch,
diff --git a/tests/e2e.py b/tests/e2e.py
index 509890e5b1..81db25dd3c 100644
--- a/tests/e2e.py
+++ b/tests/e2e.py
@@ -232,6 +232,7 @@ def handle_config(route: Route):
                     "showSemanticRankerOption": True,
                     "showUserUpload": False,
                     "showVectorOption": True,
+                    "streamingEnabled": True,
                 }
             ),
             status=200,
diff --git a/tests/snapshots/test_app/test_ask_prompt_template/client0/result.json b/tests/snapshots/test_app/test_ask_prompt_template/client0/result.json
index d7605f3af2..29bf5c4b32 100644
--- a/tests/snapshots/test_app/test_ask_prompt_template/client0/result.json
+++ b/tests/snapshots/test_app/test_ask_prompt_template/client0/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": "What is the capital of France?",
@@ -65,7 +67,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_ask_prompt_template/client1/result.json b/tests/snapshots/test_app/test_ask_prompt_template/client1/result.json
index e11f5fb671..cdb33e9ff1 100644
--- a/tests/snapshots/test_app/test_ask_prompt_template/client1/result.json
+++ b/tests/snapshots/test_app/test_ask_prompt_template/client1/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": "What is the capital of France?",
@@ -66,7 +68,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_ask_prompt_template_concat/client0/result.json b/tests/snapshots/test_app/test_ask_prompt_template_concat/client0/result.json
index 62e395e3ac..9dac59bde7 100644
--- a/tests/snapshots/test_app/test_ask_prompt_template_concat/client0/result.json
+++ b/tests/snapshots/test_app/test_ask_prompt_template_concat/client0/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": "What is the capital of France?",
@@ -65,7 +67,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_ask_prompt_template_concat/client1/result.json b/tests/snapshots/test_app/test_ask_prompt_template_concat/client1/result.json
index 3bd997ecee..9976288c77 100644
--- a/tests/snapshots/test_app/test_ask_prompt_template_concat/client1/result.json
+++ b/tests/snapshots/test_app/test_ask_prompt_template_concat/client1/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": "What is the capital of France?",
@@ -66,7 +68,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_ask_rtr_hybrid/client0/result.json b/tests/snapshots/test_app/test_ask_rtr_hybrid/client0/result.json
index b74c4f4172..30ddcddf68 100644
--- a/tests/snapshots/test_app/test_ask_rtr_hybrid/client0/result.json
+++ b/tests/snapshots/test_app/test_ask_rtr_hybrid/client0/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": "What is the capital of France?",
@@ -65,7 +67,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_ask_rtr_hybrid/client1/result.json b/tests/snapshots/test_app/test_ask_rtr_hybrid/client1/result.json
index 9025a25ecf..dbf67f961a 100644
--- a/tests/snapshots/test_app/test_ask_rtr_hybrid/client1/result.json
+++ b/tests/snapshots/test_app/test_ask_rtr_hybrid/client1/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": "What is the capital of France?",
@@ -66,7 +68,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_ask_rtr_text/client0/result.json b/tests/snapshots/test_app/test_ask_rtr_text/client0/result.json
index a76e0a8593..fd52926eca 100644
--- a/tests/snapshots/test_app/test_ask_rtr_text/client0/result.json
+++ b/tests/snapshots/test_app/test_ask_rtr_text/client0/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": "What is the capital of France?",
@@ -65,7 +67,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_ask_rtr_text/client1/result.json b/tests/snapshots/test_app/test_ask_rtr_text/client1/result.json
index 825fb9dc3f..751b64760d 100644
--- a/tests/snapshots/test_app/test_ask_rtr_text/client1/result.json
+++ b/tests/snapshots/test_app/test_ask_rtr_text/client1/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": "What is the capital of France?",
@@ -66,7 +68,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_ask_rtr_text_filter/auth_client0/result.json b/tests/snapshots/test_app/test_ask_rtr_text_filter/auth_client0/result.json
index 69aa3315b6..c4617a9651 100644
--- a/tests/snapshots/test_app/test_ask_rtr_text_filter/auth_client0/result.json
+++ b/tests/snapshots/test_app/test_ask_rtr_text_filter/auth_client0/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": "What is the capital of France?",
@@ -66,7 +68,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_ask_rtr_text_filter_public_documents/auth_public_documents_client0/result.json b/tests/snapshots/test_app/test_ask_rtr_text_filter_public_documents/auth_public_documents_client0/result.json
index a543cb8cfd..cb68ea0377 100644
--- a/tests/snapshots/test_app/test_ask_rtr_text_filter_public_documents/auth_public_documents_client0/result.json
+++ b/tests/snapshots/test_app/test_ask_rtr_text_filter_public_documents/auth_public_documents_client0/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": "What is the capital of France?",
@@ -66,7 +68,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_ask_rtr_text_semanticcaptions/client0/result.json b/tests/snapshots/test_app/test_ask_rtr_text_semanticcaptions/client0/result.json
index bf4e0718aa..ece30dbcc8 100644
--- a/tests/snapshots/test_app/test_ask_rtr_text_semanticcaptions/client0/result.json
+++ b/tests/snapshots/test_app/test_ask_rtr_text_semanticcaptions/client0/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: Caption: A whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": "What is the capital of France?",
@@ -65,7 +67,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_ask_rtr_text_semanticcaptions/client1/result.json b/tests/snapshots/test_app/test_ask_rtr_text_semanticcaptions/client1/result.json
index dfef2fe537..950a125e97 100644
--- a/tests/snapshots/test_app/test_ask_rtr_text_semanticcaptions/client1/result.json
+++ b/tests/snapshots/test_app/test_ask_rtr_text_semanticcaptions/client1/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: Caption: A whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": "What is the capital of France?",
@@ -66,7 +68,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_ask_rtr_text_semanticranker/client0/result.json b/tests/snapshots/test_app/test_ask_rtr_text_semanticranker/client0/result.json
index fb6ada98b8..4b76175c16 100644
--- a/tests/snapshots/test_app/test_ask_rtr_text_semanticranker/client0/result.json
+++ b/tests/snapshots/test_app/test_ask_rtr_text_semanticranker/client0/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": "What is the capital of France?",
@@ -65,7 +67,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_ask_rtr_text_semanticranker/client1/result.json b/tests/snapshots/test_app/test_ask_rtr_text_semanticranker/client1/result.json
index dee62c6f17..0afea366fa 100644
--- a/tests/snapshots/test_app/test_ask_rtr_text_semanticranker/client1/result.json
+++ b/tests/snapshots/test_app/test_ask_rtr_text_semanticranker/client1/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": "What is the capital of France?",
@@ -66,7 +68,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_ask_vision/client0/result.json b/tests/snapshots/test_app/test_ask_vision/client0/result.json
index 50effbc9d1..7d11ee12eb 100644
--- a/tests/snapshots/test_app/test_ask_vision/client0/result.json
+++ b/tests/snapshots/test_app/test_ask_vision/client0/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": "Are interest rates high?",
@@ -65,7 +67,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_ask_vision/client1/result.json b/tests/snapshots/test_app/test_ask_vision/client1/result.json
index 7cc4da207b..38fe9c5b82 100644
--- a/tests/snapshots/test_app/test_ask_vision/client1/result.json
+++ b/tests/snapshots/test_app/test_ask_vision/client1/result.json
@@ -8,6 +8,7 @@
                 "Financial Market Analysis Report 2023-6.png: 3</td><td>1</td></tr></table> Financial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors Impact of Interest Rates, Inflation, and GDP Growth on Financial Markets 5 4 3 2 1 0 -1 2018 2019 -2 -3 -4 -5 2020 2021 2022 2023 Macroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance. -Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends Relative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100) 2028 Based on historical data, current trends, and economic indicators, this section presents predictions "
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": "Are interest rates high?",
diff --git a/tests/snapshots/test_app/test_chat_followup/client0/result.json b/tests/snapshots/test_app/test_chat_followup/client0/result.json
index b32fdae0f7..7f4fa26166 100644
--- a/tests/snapshots/test_app/test_chat_followup/client0/result.json
+++ b/tests/snapshots/test_app/test_chat_followup/client0/result.json
@@ -1,6 +1,7 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
@@ -37,7 +38,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -92,7 +99,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_followup/client1/result.json b/tests/snapshots/test_app/test_chat_followup/client1/result.json
index 40ba2b4b9b..cf31b9483e 100644
--- a/tests/snapshots/test_app/test_chat_followup/client1/result.json
+++ b/tests/snapshots/test_app/test_chat_followup/client1/result.json
@@ -1,6 +1,7 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
@@ -38,7 +39,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -94,7 +101,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_hybrid/client0/result.json b/tests/snapshots/test_app/test_chat_hybrid/client0/result.json
index 830844085c..0fdac0a03d 100644
--- a/tests/snapshots/test_app/test_chat_hybrid/client0/result.json
+++ b/tests/snapshots/test_app/test_chat_hybrid/client0/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -34,7 +36,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -89,7 +97,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_hybrid/client1/result.json b/tests/snapshots/test_app/test_chat_hybrid/client1/result.json
index c9cf58fc11..c6db9307c9 100644
--- a/tests/snapshots/test_app/test_chat_hybrid/client1/result.json
+++ b/tests/snapshots/test_app/test_chat_hybrid/client1/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -35,7 +37,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -91,7 +99,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_hybrid_semantic_captions/client0/result.json b/tests/snapshots/test_app/test_chat_hybrid_semantic_captions/client0/result.json
index 0ba4f8b056..6d47c7cad0 100644
--- a/tests/snapshots/test_app/test_chat_hybrid_semantic_captions/client0/result.json
+++ b/tests/snapshots/test_app/test_chat_hybrid_semantic_captions/client0/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: Caption: A whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -34,7 +36,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -89,7 +97,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_hybrid_semantic_captions/client1/result.json b/tests/snapshots/test_app/test_chat_hybrid_semantic_captions/client1/result.json
index b381a62b1e..f89b711cda 100644
--- a/tests/snapshots/test_app/test_chat_hybrid_semantic_captions/client1/result.json
+++ b/tests/snapshots/test_app/test_chat_hybrid_semantic_captions/client1/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: Caption: A whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -35,7 +37,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -91,7 +99,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_hybrid_semantic_ranker/client0/result.json b/tests/snapshots/test_app/test_chat_hybrid_semantic_ranker/client0/result.json
index 51cb153345..1fa0ba1dd2 100644
--- a/tests/snapshots/test_app/test_chat_hybrid_semantic_ranker/client0/result.json
+++ b/tests/snapshots/test_app/test_chat_hybrid_semantic_ranker/client0/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -34,7 +36,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -89,7 +97,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_hybrid_semantic_ranker/client1/result.json b/tests/snapshots/test_app/test_chat_hybrid_semantic_ranker/client1/result.json
index b2bcd57a6a..0ff86af91b 100644
--- a/tests/snapshots/test_app/test_chat_hybrid_semantic_ranker/client1/result.json
+++ b/tests/snapshots/test_app/test_chat_hybrid_semantic_ranker/client1/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -35,7 +37,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -91,7 +99,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_prompt_template/client0/result.json b/tests/snapshots/test_app/test_chat_prompt_template/client0/result.json
index 5c44d59fe0..7c526c680b 100644
--- a/tests/snapshots/test_app/test_chat_prompt_template/client0/result.json
+++ b/tests/snapshots/test_app/test_chat_prompt_template/client0/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -34,7 +36,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -89,7 +97,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_prompt_template/client1/result.json b/tests/snapshots/test_app/test_chat_prompt_template/client1/result.json
index bdbe6f4bda..5bcd584284 100644
--- a/tests/snapshots/test_app/test_chat_prompt_template/client1/result.json
+++ b/tests/snapshots/test_app/test_chat_prompt_template/client1/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -35,7 +37,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -91,7 +99,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_prompt_template_concat/client0/result.json b/tests/snapshots/test_app/test_chat_prompt_template_concat/client0/result.json
index 66114f869b..16d9bc346a 100644
--- a/tests/snapshots/test_app/test_chat_prompt_template_concat/client0/result.json
+++ b/tests/snapshots/test_app/test_chat_prompt_template_concat/client0/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -34,7 +36,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -89,7 +97,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_prompt_template_concat/client1/result.json b/tests/snapshots/test_app/test_chat_prompt_template_concat/client1/result.json
index 01f7c27f28..43a36decc0 100644
--- a/tests/snapshots/test_app/test_chat_prompt_template_concat/client1/result.json
+++ b/tests/snapshots/test_app/test_chat_prompt_template_concat/client1/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -35,7 +37,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -91,7 +99,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_seed/client0/result.json b/tests/snapshots/test_app/test_chat_seed/client0/result.json
index 830844085c..0fdac0a03d 100644
--- a/tests/snapshots/test_app/test_chat_seed/client0/result.json
+++ b/tests/snapshots/test_app/test_chat_seed/client0/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -34,7 +36,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -89,7 +97,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_seed/client1/result.json b/tests/snapshots/test_app/test_chat_seed/client1/result.json
index c9cf58fc11..c6db9307c9 100644
--- a/tests/snapshots/test_app/test_chat_seed/client1/result.json
+++ b/tests/snapshots/test_app/test_chat_seed/client1/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -35,7 +37,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -91,7 +99,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_session_state_persists/client0/result.json b/tests/snapshots/test_app/test_chat_session_state_persists/client0/result.json
index e0392cc324..92a71a0331 100644
--- a/tests/snapshots/test_app/test_chat_session_state_persists/client0/result.json
+++ b/tests/snapshots/test_app/test_chat_session_state_persists/client0/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -34,7 +36,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -89,7 +97,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_session_state_persists/client1/result.json b/tests/snapshots/test_app/test_chat_session_state_persists/client1/result.json
index 628200117b..c32515de71 100644
--- a/tests/snapshots/test_app/test_chat_session_state_persists/client1/result.json
+++ b/tests/snapshots/test_app/test_chat_session_state_persists/client1/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -35,7 +37,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -91,7 +99,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_stream_followup/client0/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_followup/client0/result.jsonlines
index 1c44746909..53eee9c9f2 100644
--- a/tests/snapshots/test_app/test_chat_stream_followup/client0/result.jsonlines
+++ b/tests/snapshots/test_app/test_chat_stream_followup/client0/result.jsonlines
@@ -1,4 +1,5 @@
-{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."]}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini"}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": true, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].\n\n\n\n\nGenerate 3 very brief follow-up questions that the user would likely ask next.\nEnclose the follow-up questions in double angle brackets. Example:\n<<Are there exclusions for prescriptions?>>\n<<Which pharmacies can be ordered from?>>\n<<What is the limit for over-the-counter medication?>>\nDo not repeat questions that have already been asked.\nMake sure the last question ends with \">>\"."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini"}}]}, "session_state": null}
+{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": true, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].\n\n\n\n\nGenerate 3 very brief follow-up questions that the user would likely ask next.\nEnclose the follow-up questions in double angle brackets. Example:\n<<Are there exclusions for prescriptions?>>\n<<Which pharmacies can be ordered from?>>\n<<What is the limit for over-the-counter medication?>>\nDo not repeat questions that have already been asked.\nMake sure the last question ends with \">>\"."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini"}}], "followup_questions": null}, "session_state": null}
 {"delta": {"content": null, "role": "assistant"}}
 {"delta": {"content": "The capital of France is Paris. [Benefit_Options-2.pdf]. ", "role": "assistant"}}
-{"delta": {"role": "assistant"}, "context": {"followup_questions": ["What is the capital of Spain?"]}}
+{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": true, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].\n\n\n\n\nGenerate 3 very brief follow-up questions that the user would likely ask next.\nEnclose the follow-up questions in double angle brackets. Example:\n<<Are there exclusions for prescriptions?>>\n<<Which pharmacies can be ordered from?>>\n<<What is the limit for over-the-counter medication?>>\nDo not repeat questions that have already been asked.\nMake sure the last question ends with \">>\"."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "session_state": null}
+{"delta": {"role": "assistant"}, "context": {"context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": true, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].\n\n\n\n\nGenerate 3 very brief follow-up questions that the user would likely ask next.\nEnclose the follow-up questions in double angle brackets. Example:\n<<Are there exclusions for prescriptions?>>\n<<Which pharmacies can be ordered from?>>\n<<What is the limit for over-the-counter medication?>>\nDo not repeat questions that have already been asked.\nMake sure the last question ends with \">>\"."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "followup_questions": ["What is the capital of Spain?"]}}
diff --git a/tests/snapshots/test_app/test_chat_stream_followup/client1/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_followup/client1/result.jsonlines
index 9174de77ad..422c3bcf83 100644
--- a/tests/snapshots/test_app/test_chat_stream_followup/client1/result.jsonlines
+++ b/tests/snapshots/test_app/test_chat_stream_followup/client1/result.jsonlines
@@ -1,4 +1,5 @@
-{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."]}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt"}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": true, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].\n\n\n\n\nGenerate 3 very brief follow-up questions that the user would likely ask next.\nEnclose the follow-up questions in double angle brackets. Example:\n<<Are there exclusions for prescriptions?>>\n<<Which pharmacies can be ordered from?>>\n<<What is the limit for over-the-counter medication?>>\nDo not repeat questions that have already been asked.\nMake sure the last question ends with \">>\"."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt"}}]}, "session_state": null}
+{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": true, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].\n\n\n\n\nGenerate 3 very brief follow-up questions that the user would likely ask next.\nEnclose the follow-up questions in double angle brackets. Example:\n<<Are there exclusions for prescriptions?>>\n<<Which pharmacies can be ordered from?>>\n<<What is the limit for over-the-counter medication?>>\nDo not repeat questions that have already been asked.\nMake sure the last question ends with \">>\"."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt"}}], "followup_questions": null}, "session_state": null}
 {"delta": {"content": null, "role": "assistant"}}
 {"delta": {"content": "The capital of France is Paris. [Benefit_Options-2.pdf]. ", "role": "assistant"}}
-{"delta": {"role": "assistant"}, "context": {"followup_questions": ["What is the capital of Spain?"]}}
+{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": true, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].\n\n\n\n\nGenerate 3 very brief follow-up questions that the user would likely ask next.\nEnclose the follow-up questions in double angle brackets. Example:\n<<Are there exclusions for prescriptions?>>\n<<Which pharmacies can be ordered from?>>\n<<What is the limit for over-the-counter medication?>>\nDo not repeat questions that have already been asked.\nMake sure the last question ends with \">>\"."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "session_state": null}
+{"delta": {"role": "assistant"}, "context": {"context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": true, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].\n\n\n\n\nGenerate 3 very brief follow-up questions that the user would likely ask next.\nEnclose the follow-up questions in double angle brackets. Example:\n<<Are there exclusions for prescriptions?>>\n<<Which pharmacies can be ordered from?>>\n<<What is the limit for over-the-counter medication?>>\nDo not repeat questions that have already been asked.\nMake sure the last question ends with \">>\"."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "followup_questions": ["What is the capital of Spain?"]}}
diff --git a/tests/snapshots/test_app/test_chat_stream_session_state_persists/client0/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_session_state_persists/client0/result.jsonlines
index eb640e5693..f458d12065 100644
--- a/tests/snapshots/test_app/test_chat_stream_session_state_persists/client0/result.jsonlines
+++ b/tests/snapshots/test_app/test_chat_stream_session_state_persists/client0/result.jsonlines
@@ -1,3 +1,4 @@
-{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."]}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini"}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini"}}]}, "session_state": {"conversation_id": 1234}}
+{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini"}}], "followup_questions": null}, "session_state": {"conversation_id": 1234}}
 {"delta": {"content": null, "role": "assistant"}}
 {"delta": {"content": "The capital of France is Paris. [Benefit_Options-2.pdf].", "role": null}}
+{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "session_state": {"conversation_id": 1234}}
diff --git a/tests/snapshots/test_app/test_chat_stream_session_state_persists/client1/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_session_state_persists/client1/result.jsonlines
index 3aac83b926..5ae6b512b3 100644
--- a/tests/snapshots/test_app/test_chat_stream_session_state_persists/client1/result.jsonlines
+++ b/tests/snapshots/test_app/test_chat_stream_session_state_persists/client1/result.jsonlines
@@ -1,3 +1,4 @@
-{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."]}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt"}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt"}}]}, "session_state": {"conversation_id": 1234}}
+{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt"}}], "followup_questions": null}, "session_state": {"conversation_id": 1234}}
 {"delta": {"content": null, "role": "assistant"}}
 {"delta": {"content": "The capital of France is Paris. [Benefit_Options-2.pdf].", "role": null}}
+{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "session_state": {"conversation_id": 1234}}
diff --git a/tests/snapshots/test_app/test_chat_stream_text/client0/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_text/client0/result.jsonlines
index 73f5507752..9e578d110a 100644
--- a/tests/snapshots/test_app/test_chat_stream_text/client0/result.jsonlines
+++ b/tests/snapshots/test_app/test_chat_stream_text/client0/result.jsonlines
@@ -1,3 +1,4 @@
-{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."]}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini"}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini"}}]}, "session_state": null}
+{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini"}}], "followup_questions": null}, "session_state": null}
 {"delta": {"content": null, "role": "assistant"}}
 {"delta": {"content": "The capital of France is Paris. [Benefit_Options-2.pdf].", "role": null}}
+{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "session_state": null}
diff --git a/tests/snapshots/test_app/test_chat_stream_text/client1/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_text/client1/result.jsonlines
index 262c07139b..c1ea935fa6 100644
--- a/tests/snapshots/test_app/test_chat_stream_text/client1/result.jsonlines
+++ b/tests/snapshots/test_app/test_chat_stream_text/client1/result.jsonlines
@@ -1,3 +1,4 @@
-{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."]}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt"}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt"}}]}, "session_state": null}
+{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt"}}], "followup_questions": null}, "session_state": null}
 {"delta": {"content": null, "role": "assistant"}}
 {"delta": {"content": "The capital of France is Paris. [Benefit_Options-2.pdf].", "role": null}}
+{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "session_state": null}
diff --git a/tests/snapshots/test_app/test_chat_stream_text_filter/auth_client0/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_text_filter/auth_client0/result.jsonlines
index 22f2de85cc..d8beed42bc 100644
--- a/tests/snapshots/test_app/test_chat_stream_text_filter/auth_client0/result.jsonlines
+++ b/tests/snapshots/test_app/test_chat_stream_text_filter/auth_client0/result.jsonlines
@@ -1,3 +1,4 @@
-{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."]}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt"}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": "category ne 'excluded' and (oids/any(g:search.in(g, 'OID_X')) or groups/any(g:search.in(g, 'GROUP_Y, GROUP_Z')))", "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt"}}]}, "session_state": null}
+{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": "category ne 'excluded' and (oids/any(g:search.in(g, 'OID_X')) or groups/any(g:search.in(g, 'GROUP_Y, GROUP_Z')))", "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt"}}], "followup_questions": null}, "session_state": null}
 {"delta": {"content": null, "role": "assistant"}}
 {"delta": {"content": "The capital of France is Paris. [Benefit_Options-2.pdf].", "role": null}}
+{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": "category ne 'excluded' and (oids/any(g:search.in(g, 'OID_X')) or groups/any(g:search.in(g, 'GROUP_Y, GROUP_Z')))", "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "session_state": null}
diff --git a/tests/snapshots/test_app/test_chat_stream_text_filter/client0/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_text_filter/client0/result.jsonlines
deleted file mode 100644
index 3cb1cceef5..0000000000
--- a/tests/snapshots/test_app/test_chat_stream_text_filter/client0/result.jsonlines
+++ /dev/null
@@ -1,2 +0,0 @@
-{"data_points": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "thoughts": "Searched for:<br>capital of France<br><br>Conversations:<br>{'role': 'system', 'content': \"Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\\nFor tabular information return it as an html table. Do not return markdown format. If the question is not in English, answer in the language used in the question.\\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, e.g. [info1.txt]. Don't combine sources, list each source separately, e.g. [info1.txt][info2.pdf].\\n\\n\\n\"}<br><br>{'role': 'user', 'content': 'What is the capital of France?\\n\\nSources:\\nBenefit_Options-2.pdf: There is a whistleblower policy.'}"}
-{"choices": [{"delta": {"content": "The capital of France is Paris."}}]}
diff --git a/tests/snapshots/test_app/test_chat_stream_text_reasoning/reasoning_client0/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_text_reasoning/reasoning_client0/result.jsonlines
new file mode 100644
index 0000000000..06a45f2747
--- /dev/null
+++ b/tests/snapshots/test_app/test_chat_stream_text_reasoning/reasoning_client0/result.jsonlines
@@ -0,0 +1,4 @@
+{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "o3-mini", "deployment": "o3-mini", "reasoning_effort": "low", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 384, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "o3-mini", "deployment": "o3-mini", "reasoning_effort": null}}], "followup_questions": null}, "session_state": null}
+{"delta": {"content": null, "role": "assistant"}}
+{"delta": {"content": "The capital of France is Paris. [Benefit_Options-2.pdf].", "role": null}}
+{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "o3-mini", "deployment": "o3-mini", "reasoning_effort": "low", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 384, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "o3-mini", "deployment": "o3-mini", "reasoning_effort": null, "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 384, "total_tokens": 919}}}], "followup_questions": null}, "session_state": null}
diff --git a/tests/snapshots/test_app/test_chat_stream_text_reasoning/reasoning_client1/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_text_reasoning/reasoning_client1/result.jsonlines
new file mode 100644
index 0000000000..d7adb3e45a
--- /dev/null
+++ b/tests/snapshots/test_app/test_chat_stream_text_reasoning/reasoning_client1/result.jsonlines
@@ -0,0 +1,4 @@
+{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "o3-mini", "deployment": "o3-mini", "reasoning_effort": "low", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 384, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "o3-mini", "deployment": "o3-mini", "reasoning_effort": "low"}}], "followup_questions": null}, "session_state": null}
+{"delta": {"content": null, "role": "assistant"}}
+{"delta": {"content": "The capital of France is Paris. [Benefit_Options-2.pdf].", "role": null}}
+{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Benefit_Options-2.pdf: There is a whistleblower policy."], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: What is the capital of France?"}], "props": {"model": "o3-mini", "deployment": "o3-mini", "reasoning_effort": "low", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 384, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "capital of France", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": false, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2", "content": "There is a whistleblower policy.", "embedding": null, "imageEmbedding": null, "category": null, "sourcepage": "Benefit_Options-2.pdf", "sourcefile": "Benefit_Options.pdf", "oids": null, "groups": null, "captions": [{"additional_properties": {}, "text": "Caption: A whistleblower policy.", "highlights": []}], "score": 0.03279569745063782, "reranker_score": 3.4577205181121826}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy."}], "props": {"model": "o3-mini", "deployment": "o3-mini", "reasoning_effort": "low", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 384, "total_tokens": 919}}}], "followup_questions": null}, "session_state": null}
diff --git a/tests/snapshots/test_app/test_chat_stream_vision/client0/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_vision/client0/result.jsonlines
index cb1d487ae0..d2b7cd3347 100644
--- a/tests/snapshots/test_app/test_chat_stream_vision/client0/result.jsonlines
+++ b/tests/snapshots/test_app/test_chat_stream_vision/client0/result.jsonlines
@@ -1,3 +1,4 @@
-{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Financial Market Analysis Report 2023.pdf#page=6: 3</td><td>1</td></tr></table> Financial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors Impact of Interest Rates, Inflation, and GDP Growth on Financial Markets 5 4 3 2 1 0 -1 2018 2019 -2 -3 -4 -5 2020 2021 2022 2023 Macroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance. -Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends Relative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100) 2028 Based on historical data, current trends, and economic indicators, this section presents predictions "]}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: Are interest rates high?"}], "props": {"model": "gpt-4o-mini"}}, {"title": "Search using generated search query", "description": "interest rates", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": true, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Financial_Market_Analysis_Report_2023_pdf-46696E616E6369616C204D61726B657420416E616C79736973205265706F727420323032332E706466-page-14", "content": "3</td><td>1</td></tr></table>\nFinancial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors\nImpact of Interest Rates, Inflation, and GDP Growth on Financial Markets\n5\n4\n3\n2\n1\n0\n-1 2018 2019\n-2\n-3\n-4\n-5\n2020\n2021 2022 2023\nMacroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance.\n-Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends\nRelative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100)\n2028\nBased on historical data, current trends, and economic indicators, this section presents predictions ", "embedding": "[-0.012668486, -0.02251158 ...+8 more]", "imageEmbedding": null, "category": null, "sourcepage": "Financial Market Analysis Report 2023-6.png", "sourcefile": "Financial Market Analysis Report 2023.pdf", "oids": null, "groups": null, "captions": [], "score": 0.04972677677869797, "reranker_score": 3.1704962253570557}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "Are interest rates high?\n\nSources:\n\nFinancial Market Analysis Report 2023.pdf#page=6: 3</td><td>1</td></tr></table> Financial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors Impact of Interest Rates, Inflation, and GDP Growth on Financial Markets 5 4 3 2 1 0 -1 2018 2019 -2 -3 -4 -5 2020 2021 2022 2023 Macroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance. -Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends Relative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100) 2028 Based on historical data, current trends, and economic indicators, this section presents predictions"}], "props": {"model": "gpt-4o-mini"}}]}, "session_state": null}
+{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Financial Market Analysis Report 2023.pdf#page=6: 3</td><td>1</td></tr></table> Financial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors Impact of Interest Rates, Inflation, and GDP Growth on Financial Markets 5 4 3 2 1 0 -1 2018 2019 -2 -3 -4 -5 2020 2021 2022 2023 Macroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance. -Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends Relative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100) 2028 Based on historical data, current trends, and economic indicators, this section presents predictions "], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: Are interest rates high?"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "interest rates", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": true, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Financial_Market_Analysis_Report_2023_pdf-46696E616E6369616C204D61726B657420416E616C79736973205265706F727420323032332E706466-page-14", "content": "3</td><td>1</td></tr></table>\nFinancial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors\nImpact of Interest Rates, Inflation, and GDP Growth on Financial Markets\n5\n4\n3\n2\n1\n0\n-1 2018 2019\n-2\n-3\n-4\n-5\n2020\n2021 2022 2023\nMacroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance.\n-Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends\nRelative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100)\n2028\nBased on historical data, current trends, and economic indicators, this section presents predictions ", "embedding": "[-0.012668486, -0.02251158 ...+8 more]", "imageEmbedding": null, "category": null, "sourcepage": "Financial Market Analysis Report 2023-6.png", "sourcefile": "Financial Market Analysis Report 2023.pdf", "oids": null, "groups": null, "captions": [], "score": 0.04972677677869797, "reranker_score": 3.1704962253570557}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "Are interest rates high?\n\nSources:\n\nFinancial Market Analysis Report 2023.pdf#page=6: 3</td><td>1</td></tr></table> Financial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors Impact of Interest Rates, Inflation, and GDP Growth on Financial Markets 5 4 3 2 1 0 -1 2018 2019 -2 -3 -4 -5 2020 2021 2022 2023 Macroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance. -Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends Relative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100) 2028 Based on historical data, current trends, and economic indicators, this section presents predictions"}], "props": {"model": "gpt-4o-mini"}}], "followup_questions": null}, "session_state": null}
 {"delta": {"content": null, "role": "assistant"}}
 {"delta": {"content": "The capital of France is Paris. [Benefit_Options-2.pdf].", "role": null}}
+{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Financial Market Analysis Report 2023.pdf#page=6: 3</td><td>1</td></tr></table> Financial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors Impact of Interest Rates, Inflation, and GDP Growth on Financial Markets 5 4 3 2 1 0 -1 2018 2019 -2 -3 -4 -5 2020 2021 2022 2023 Macroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance. -Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends Relative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100) 2028 Based on historical data, current trends, and economic indicators, this section presents predictions "], "images": null}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: Are interest rates high?"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}, {"title": "Search using generated search query", "description": "interest rates", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "use_vector_search": true, "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Financial_Market_Analysis_Report_2023_pdf-46696E616E6369616C204D61726B657420416E616C79736973205265706F727420323032332E706466-page-14", "content": "3</td><td>1</td></tr></table>\nFinancial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors\nImpact of Interest Rates, Inflation, and GDP Growth on Financial Markets\n5\n4\n3\n2\n1\n0\n-1 2018 2019\n-2\n-3\n-4\n-5\n2020\n2021 2022 2023\nMacroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance.\n-Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends\nRelative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100)\n2028\nBased on historical data, current trends, and economic indicators, this section presents predictions ", "embedding": "[-0.012668486, -0.02251158 ...+8 more]", "imageEmbedding": null, "category": null, "sourcepage": "Financial Market Analysis Report 2023-6.png", "sourcefile": "Financial Market Analysis Report 2023.pdf", "oids": null, "groups": null, "captions": [], "score": 0.04972677677869797, "reranker_score": 3.1704962253570557}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf]."}, {"role": "user", "content": "Are interest rates high?\n\nSources:\n\nFinancial Market Analysis Report 2023.pdf#page=6: 3</td><td>1</td></tr></table> Financial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors Impact of Interest Rates, Inflation, and GDP Growth on Financial Markets 5 4 3 2 1 0 -1 2018 2019 -2 -3 -4 -5 2020 2021 2022 2023 Macroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance. -Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends Relative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100) 2028 Based on historical data, current trends, and economic indicators, this section presents predictions"}], "props": {"model": "gpt-4o-mini", "token_usage": {"prompt_tokens": 23, "completion_tokens": 896, "reasoning_tokens": 0, "total_tokens": 919}}}], "followup_questions": null}, "session_state": null}
diff --git a/tests/snapshots/test_app/test_chat_stream_vision/client1/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_vision/client1/result.jsonlines
index 3cbba8cfb6..5f639076f1 100644
--- a/tests/snapshots/test_app/test_chat_stream_vision/client1/result.jsonlines
+++ b/tests/snapshots/test_app/test_chat_stream_vision/client1/result.jsonlines
@@ -1,3 +1,3 @@
-{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Financial Market Analysis Report 2023-6.png: 3</td><td>1</td></tr></table> Financial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors Impact of Interest Rates, Inflation, and GDP Growth on Financial Markets 5 4 3 2 1 0 -1 2018 2019 -2 -3 -4 -5 2020 2021 2022 2023 Macroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance. -Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends Relative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100) 2028 Based on historical data, current trends, and economic indicators, this section presents predictions "], "images": ["data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z/C/HgAGgwJ/lK3Q6wAAAABJRU5ErkJggg=="]}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: Are interest rates high?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt"}}, {"title": "Search using generated search query", "description": "interest rates", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "vector_fields": ["embedding", "imageEmbedding"], "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Financial_Market_Analysis_Report_2023_pdf-46696E616E6369616C204D61726B657420416E616C79736973205265706F727420323032332E706466-page-14", "content": "3</td><td>1</td></tr></table>\nFinancial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors\nImpact of Interest Rates, Inflation, and GDP Growth on Financial Markets\n5\n4\n3\n2\n1\n0\n-1 2018 2019\n-2\n-3\n-4\n-5\n2020\n2021 2022 2023\nMacroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance.\n-Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends\nRelative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100)\n2028\nBased on historical data, current trends, and economic indicators, this section presents predictions ", "embedding": "[-0.012668486, -0.02251158 ...+8 more]", "imageEmbedding": null, "category": null, "sourcepage": "Financial Market Analysis Report 2023-6.png", "sourcefile": "Financial Market Analysis Report 2023.pdf", "oids": null, "groups": null, "captions": [], "score": 0.04972677677869797, "reranker_score": 3.1704962253570557}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "You are an intelligent assistant helping analyze the Annual Financial Report of Contoso Ltd., The documents contain text, graphs, tables and images.\nEach image source has the file name in the top left corner of the image with coordinates (10,10) pixels and is in the format SourceFileName:<file_name>\nEach text source starts in a new line and has the file name followed by colon and the actual information\nAlways include the source name from the image or text for each fact you use in the response in the format: [filename]\nAnswer the following question using only the data provided in the sources below.\nIf asking a clarifying question to the user would help, ask the question.\nBe brief in your answers.\nThe text and image source can be the same file name, don't use the image title when citing the image source, only use the file name as mentioned\nIf you cannot answer using the sources below, say you don't know. Return just the answer without any input texts."}, {"role": "user", "content": [{"type": "text", "text": "Are interest rates high?"}, {"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z/C/HgAGgwJ/lK3Q6wAAAABJRU5ErkJggg=="}}, {"type": "text", "text": "Sources:\n\nFinancial Market Analysis Report 2023-6.png: 3</td><td>1</td></tr></table> Financial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors Impact of Interest Rates, Inflation, and GDP Growth on Financial Markets 5 4 3 2 1 0 -1 2018 2019 -2 -3 -4 -5 2020 2021 2022 2023 Macroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance. -Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends Relative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100) 2028 Based on historical data, current trends, and economic indicators, this section presents predictions"}]}], "props": {"model": "gpt-4"}}]}, "session_state": null}
+{"delta": {"role": "assistant"}, "context": {"data_points": {"text": ["Financial Market Analysis Report 2023-6.png: 3</td><td>1</td></tr></table> Financial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors Impact of Interest Rates, Inflation, and GDP Growth on Financial Markets 5 4 3 2 1 0 -1 2018 2019 -2 -3 -4 -5 2020 2021 2022 2023 Macroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance. -Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends Relative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100) 2028 Based on historical data, current trends, and economic indicators, this section presents predictions "], "images": ["data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z/C/HgAGgwJ/lK3Q6wAAAABJRU5ErkJggg=="]}, "thoughts": [{"title": "Prompt to generate search query", "description": [{"role": "system", "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0."}, {"role": "user", "content": "How did crypto do last year?"}, {"role": "assistant", "content": "Summarize Cryptocurrency Market Dynamics from last year"}, {"role": "user", "content": "What are my health plans?"}, {"role": "assistant", "content": "Show available health plans"}, {"role": "user", "content": "Generate search query for: Are interest rates high?"}], "props": {"model": "gpt-4o-mini", "deployment": "test-chatgpt"}}, {"title": "Search using generated search query", "description": "interest rates", "props": {"use_semantic_captions": false, "use_semantic_ranker": false, "use_query_rewriting": false, "top": 3, "filter": null, "vector_fields": ["embedding", "imageEmbedding"], "use_text_search": true}}, {"title": "Search results", "description": [{"id": "file-Financial_Market_Analysis_Report_2023_pdf-46696E616E6369616C204D61726B657420416E616C79736973205265706F727420323032332E706466-page-14", "content": "3</td><td>1</td></tr></table>\nFinancial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors\nImpact of Interest Rates, Inflation, and GDP Growth on Financial Markets\n5\n4\n3\n2\n1\n0\n-1 2018 2019\n-2\n-3\n-4\n-5\n2020\n2021 2022 2023\nMacroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance.\n-Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends\nRelative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100)\n2028\nBased on historical data, current trends, and economic indicators, this section presents predictions ", "embedding": "[-0.012668486, -0.02251158 ...+8 more]", "imageEmbedding": null, "category": null, "sourcepage": "Financial Market Analysis Report 2023-6.png", "sourcefile": "Financial Market Analysis Report 2023.pdf", "oids": null, "groups": null, "captions": [], "score": 0.04972677677869797, "reranker_score": 3.1704962253570557}], "props": null}, {"title": "Prompt to generate answer", "description": [{"role": "system", "content": "You are an intelligent assistant helping analyze the Annual Financial Report of Contoso Ltd., The documents contain text, graphs, tables and images.\nEach image source has the file name in the top left corner of the image with coordinates (10,10) pixels and is in the format SourceFileName:<file_name>\nEach text source starts in a new line and has the file name followed by colon and the actual information\nAlways include the source name from the image or text for each fact you use in the response in the format: [filename]\nAnswer the following question using only the data provided in the sources below.\nIf asking a clarifying question to the user would help, ask the question.\nBe brief in your answers.\nThe text and image source can be the same file name, don't use the image title when citing the image source, only use the file name as mentioned\nIf you cannot answer using the sources below, say you don't know. Return just the answer without any input texts."}, {"role": "user", "content": [{"type": "text", "text": "Are interest rates high?"}, {"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z/C/HgAGgwJ/lK3Q6wAAAABJRU5ErkJggg=="}}, {"type": "text", "text": "Sources:\n\nFinancial Market Analysis Report 2023-6.png: 3</td><td>1</td></tr></table> Financial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors Impact of Interest Rates, Inflation, and GDP Growth on Financial Markets 5 4 3 2 1 0 -1 2018 2019 -2 -3 -4 -5 2020 2021 2022 2023 Macroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance. -Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends Relative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100) 2028 Based on historical data, current trends, and economic indicators, this section presents predictions"}]}], "props": {"model": "gpt-4"}}], "followup_questions": null}, "session_state": null}
 {"delta": {"content": null, "role": "assistant"}}
 {"delta": {"content": "From the provided sources, the impact of interest rates and GDP growth on financial markets can be observed through the line graph. [Financial Market Analysis Report 2023-7.png]", "role": null}}
diff --git a/tests/snapshots/test_app/test_chat_text/client0/result.json b/tests/snapshots/test_app/test_chat_text/client0/result.json
index 9bf26a6b06..1451d011ba 100644
--- a/tests/snapshots/test_app/test_chat_text/client0/result.json
+++ b/tests/snapshots/test_app/test_chat_text/client0/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -34,7 +36,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -89,7 +97,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_text/client1/result.json b/tests/snapshots/test_app/test_chat_text/client1/result.json
index d06c5bf501..55c42ce819 100644
--- a/tests/snapshots/test_app/test_chat_text/client1/result.json
+++ b/tests/snapshots/test_app/test_chat_text/client1/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -35,7 +37,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -91,7 +99,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_text_filter/auth_client0/result.json b/tests/snapshots/test_app/test_chat_text_filter/auth_client0/result.json
index fd0b26751b..8de34fcda6 100644
--- a/tests/snapshots/test_app/test_chat_text_filter/auth_client0/result.json
+++ b/tests/snapshots/test_app/test_chat_text_filter/auth_client0/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -35,7 +37,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -91,7 +99,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_text_filter_public_documents/auth_public_documents_client0/result.json b/tests/snapshots/test_app/test_chat_text_filter_public_documents/auth_public_documents_client0/result.json
index d730c316ea..23d76162ed 100644
--- a/tests/snapshots/test_app/test_chat_text_filter_public_documents/auth_public_documents_client0/result.json
+++ b/tests/snapshots/test_app/test_chat_text_filter_public_documents/auth_public_documents_client0/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -35,7 +37,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -91,7 +99,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_text_reasoning/reasoning_client0/result.json b/tests/snapshots/test_app/test_chat_text_reasoning/reasoning_client0/result.json
new file mode 100644
index 0000000000..4b6538d397
--- /dev/null
+++ b/tests/snapshots/test_app/test_chat_text_reasoning/reasoning_client0/result.json
@@ -0,0 +1,121 @@
+{
+    "context": {
+        "data_points": {
+            "images": null,
+            "text": [
+                "Benefit_Options-2.pdf: There is a whistleblower policy."
+            ]
+        },
+        "followup_questions": null,
+        "thoughts": [
+            {
+                "description": [
+                    {
+                        "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0.",
+                        "role": "system"
+                    },
+                    {
+                        "content": "How did crypto do last year?",
+                        "role": "user"
+                    },
+                    {
+                        "content": "Summarize Cryptocurrency Market Dynamics from last year",
+                        "role": "assistant"
+                    },
+                    {
+                        "content": "What are my health plans?",
+                        "role": "user"
+                    },
+                    {
+                        "content": "Show available health plans",
+                        "role": "assistant"
+                    },
+                    {
+                        "content": "Generate search query for: What is the capital of France?",
+                        "role": "user"
+                    }
+                ],
+                "props": {
+                    "deployment": "o3-mini",
+                    "model": "o3-mini",
+                    "reasoning_effort": "low",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 384,
+                        "total_tokens": 919
+                    }
+                },
+                "title": "Prompt to generate search query"
+            },
+            {
+                "description": "capital of France",
+                "props": {
+                    "filter": null,
+                    "top": 3,
+                    "use_query_rewriting": false,
+                    "use_semantic_captions": false,
+                    "use_semantic_ranker": false,
+                    "use_text_search": true,
+                    "use_vector_search": false
+                },
+                "title": "Search using generated search query"
+            },
+            {
+                "description": [
+                    {
+                        "captions": [
+                            {
+                                "additional_properties": {},
+                                "highlights": [],
+                                "text": "Caption: A whistleblower policy."
+                            }
+                        ],
+                        "category": null,
+                        "content": "There is a whistleblower policy.",
+                        "embedding": null,
+                        "groups": null,
+                        "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2",
+                        "imageEmbedding": null,
+                        "oids": null,
+                        "reranker_score": 3.4577205181121826,
+                        "score": 0.03279569745063782,
+                        "sourcefile": "Benefit_Options.pdf",
+                        "sourcepage": "Benefit_Options-2.pdf"
+                    }
+                ],
+                "props": null,
+                "title": "Search results"
+            },
+            {
+                "description": [
+                    {
+                        "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].",
+                        "role": "system"
+                    },
+                    {
+                        "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy.",
+                        "role": "user"
+                    }
+                ],
+                "props": {
+                    "deployment": "o3-mini",
+                    "model": "o3-mini",
+                    "reasoning_effort": null,
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 384,
+                        "total_tokens": 919
+                    }
+                },
+                "title": "Prompt to generate answer"
+            }
+        ]
+    },
+    "message": {
+        "content": "The capital of France is Paris. [Benefit_Options-2.pdf].",
+        "role": "assistant"
+    },
+    "session_state": null
+}
\ No newline at end of file
diff --git a/tests/snapshots/test_app/test_chat_text_reasoning/reasoning_client1/result.json b/tests/snapshots/test_app/test_chat_text_reasoning/reasoning_client1/result.json
new file mode 100644
index 0000000000..205768aaa9
--- /dev/null
+++ b/tests/snapshots/test_app/test_chat_text_reasoning/reasoning_client1/result.json
@@ -0,0 +1,121 @@
+{
+    "context": {
+        "data_points": {
+            "images": null,
+            "text": [
+                "Benefit_Options-2.pdf: There is a whistleblower policy."
+            ]
+        },
+        "followup_questions": null,
+        "thoughts": [
+            {
+                "description": [
+                    {
+                        "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base.\nYou have access to Azure AI Search index with 100's of documents.\nGenerate a search query based on the conversation and the new question.\nDo not include cited source filenames and document names e.g. info.txt or doc.pdf in the search query terms.\nDo not include any text inside [] or <<>> in the search query terms.\nDo not include any special characters like '+'.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return just the number 0.",
+                        "role": "system"
+                    },
+                    {
+                        "content": "How did crypto do last year?",
+                        "role": "user"
+                    },
+                    {
+                        "content": "Summarize Cryptocurrency Market Dynamics from last year",
+                        "role": "assistant"
+                    },
+                    {
+                        "content": "What are my health plans?",
+                        "role": "user"
+                    },
+                    {
+                        "content": "Show available health plans",
+                        "role": "assistant"
+                    },
+                    {
+                        "content": "Generate search query for: What is the capital of France?",
+                        "role": "user"
+                    }
+                ],
+                "props": {
+                    "deployment": "o3-mini",
+                    "model": "o3-mini",
+                    "reasoning_effort": "low",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 384,
+                        "total_tokens": 919
+                    }
+                },
+                "title": "Prompt to generate search query"
+            },
+            {
+                "description": "capital of France",
+                "props": {
+                    "filter": null,
+                    "top": 3,
+                    "use_query_rewriting": false,
+                    "use_semantic_captions": false,
+                    "use_semantic_ranker": false,
+                    "use_text_search": true,
+                    "use_vector_search": false
+                },
+                "title": "Search using generated search query"
+            },
+            {
+                "description": [
+                    {
+                        "captions": [
+                            {
+                                "additional_properties": {},
+                                "highlights": [],
+                                "text": "Caption: A whistleblower policy."
+                            }
+                        ],
+                        "category": null,
+                        "content": "There is a whistleblower policy.",
+                        "embedding": null,
+                        "groups": null,
+                        "id": "file-Benefit_Options_pdf-42656E656669745F4F7074696F6E732E706466-page-2",
+                        "imageEmbedding": null,
+                        "oids": null,
+                        "reranker_score": 3.4577205181121826,
+                        "score": 0.03279569745063782,
+                        "sourcefile": "Benefit_Options.pdf",
+                        "sourcepage": "Benefit_Options-2.pdf"
+                    }
+                ],
+                "props": null,
+                "title": "Search results"
+            },
+            {
+                "description": [
+                    {
+                        "content": "Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\nIf the question is not in English, answer in the language used in the question.\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].",
+                        "role": "system"
+                    },
+                    {
+                        "content": "What is the capital of France?\n\nSources:\n\nBenefit_Options-2.pdf: There is a whistleblower policy.",
+                        "role": "user"
+                    }
+                ],
+                "props": {
+                    "deployment": "o3-mini",
+                    "model": "o3-mini",
+                    "reasoning_effort": "low",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 384,
+                        "total_tokens": 919
+                    }
+                },
+                "title": "Prompt to generate answer"
+            }
+        ]
+    },
+    "message": {
+        "content": "The capital of France is Paris. [Benefit_Options-2.pdf].",
+        "role": "assistant"
+    },
+    "session_state": null
+}
\ No newline at end of file
diff --git a/tests/snapshots/test_app/test_chat_text_semantic_ranker/client0/result.json b/tests/snapshots/test_app/test_chat_text_semantic_ranker/client0/result.json
index 9f0e47f443..80bc43104d 100644
--- a/tests/snapshots/test_app/test_chat_text_semantic_ranker/client0/result.json
+++ b/tests/snapshots/test_app/test_chat_text_semantic_ranker/client0/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -34,7 +36,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -89,7 +97,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_text_semantic_ranker/client1/result.json b/tests/snapshots/test_app/test_chat_text_semantic_ranker/client1/result.json
index 3ed4ac1600..969d01f46c 100644
--- a/tests/snapshots/test_app/test_chat_text_semantic_ranker/client1/result.json
+++ b/tests/snapshots/test_app/test_chat_text_semantic_ranker/client1/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -35,7 +37,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -91,7 +99,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_text_semanticcaptions/client0/result.json b/tests/snapshots/test_app/test_chat_text_semanticcaptions/client0/result.json
index 0f72f7cf21..317f577d61 100644
--- a/tests/snapshots/test_app/test_chat_text_semanticcaptions/client0/result.json
+++ b/tests/snapshots/test_app/test_chat_text_semanticcaptions/client0/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: Caption: A whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -34,7 +36,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -89,7 +97,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_text_semanticcaptions/client1/result.json b/tests/snapshots/test_app/test_chat_text_semanticcaptions/client1/result.json
index 7186d891c1..2e3999cf6a 100644
--- a/tests/snapshots/test_app/test_chat_text_semanticcaptions/client1/result.json
+++ b/tests/snapshots/test_app/test_chat_text_semanticcaptions/client1/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: Caption: A whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -35,7 +37,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -91,7 +99,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_text_semanticranker/client0/result.json b/tests/snapshots/test_app/test_chat_text_semanticranker/client0/result.json
index 9f0e47f443..80bc43104d 100644
--- a/tests/snapshots/test_app/test_chat_text_semanticranker/client0/result.json
+++ b/tests/snapshots/test_app/test_chat_text_semanticranker/client0/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -34,7 +36,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -89,7 +97,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_text_semanticranker/client1/result.json b/tests/snapshots/test_app/test_chat_text_semanticranker/client1/result.json
index 3ed4ac1600..969d01f46c 100644
--- a/tests/snapshots/test_app/test_chat_text_semanticranker/client1/result.json
+++ b/tests/snapshots/test_app/test_chat_text_semanticranker/client1/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -35,7 +37,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -91,7 +99,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_vector/client0/result.json b/tests/snapshots/test_app/test_chat_vector/client0/result.json
index 097f7a0ac1..e7b84204c1 100644
--- a/tests/snapshots/test_app/test_chat_vector/client0/result.json
+++ b/tests/snapshots/test_app/test_chat_vector/client0/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -34,7 +36,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -89,7 +97,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_vector/client1/result.json b/tests/snapshots/test_app/test_chat_vector/client1/result.json
index b9e3def516..c568dbe297 100644
--- a/tests/snapshots/test_app/test_chat_vector/client1/result.json
+++ b/tests/snapshots/test_app/test_chat_vector/client1/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -35,7 +37,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -91,7 +99,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_vector_semantic_ranker/client0/result.json b/tests/snapshots/test_app/test_chat_vector_semantic_ranker/client0/result.json
index 451c3f37ed..43834ab3b5 100644
--- a/tests/snapshots/test_app/test_chat_vector_semantic_ranker/client0/result.json
+++ b/tests/snapshots/test_app/test_chat_vector_semantic_ranker/client0/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -34,7 +36,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -89,7 +97,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_vector_semantic_ranker/client1/result.json b/tests/snapshots/test_app/test_chat_vector_semantic_ranker/client1/result.json
index 902bc4f8b8..866a2ac2b8 100644
--- a/tests/snapshots/test_app/test_chat_vector_semantic_ranker/client1/result.json
+++ b/tests/snapshots/test_app/test_chat_vector_semantic_ranker/client1/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -35,7 +37,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -91,7 +99,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_vision/client0/result.json b/tests/snapshots/test_app/test_chat_vision/client0/result.json
index c71ac89f90..5592597666 100644
--- a/tests/snapshots/test_app/test_chat_vision/client0/result.json
+++ b/tests/snapshots/test_app/test_chat_vision/client0/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Financial Market Analysis Report 2023.pdf#page=6: 3</td><td>1</td></tr></table> Financial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors Impact of Interest Rates, Inflation, and GDP Growth on Financial Markets 5 4 3 2 1 0 -1 2018 2019 -2 -3 -4 -5 2020 2021 2022 2023 Macroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance. -Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends Relative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100) 2028 Based on historical data, current trends, and economic indicators, this section presents predictions "
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -34,7 +36,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -83,7 +91,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_vision/client1/result.json b/tests/snapshots/test_app/test_chat_vision/client1/result.json
index d1e69099ae..9a5514bf24 100644
--- a/tests/snapshots/test_app/test_chat_vision/client1/result.json
+++ b/tests/snapshots/test_app/test_chat_vision/client1/result.json
@@ -8,6 +8,7 @@
                 "Financial Market Analysis Report 2023-6.png: 3</td><td>1</td></tr></table> Financial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors Impact of Interest Rates, Inflation, and GDP Growth on Financial Markets 5 4 3 2 1 0 -1 2018 2019 -2 -3 -4 -5 2020 2021 2022 2023 Macroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance. -Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends Relative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100) 2028 Based on historical data, current trends, and economic indicators, this section presents predictions "
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
diff --git a/tests/snapshots/test_app/test_chat_vision_vectors/client0/result.json b/tests/snapshots/test_app/test_chat_vision_vectors/client0/result.json
index 418debb6f1..7adcb574bb 100644
--- a/tests/snapshots/test_app/test_chat_vision_vectors/client0/result.json
+++ b/tests/snapshots/test_app/test_chat_vision_vectors/client0/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -34,7 +36,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -89,7 +97,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_vision_vectors/client1/result.json b/tests/snapshots/test_app/test_chat_vision_vectors/client1/result.json
index d39e6d10fb..5527109145 100644
--- a/tests/snapshots/test_app/test_chat_vision_vectors/client1/result.json
+++ b/tests/snapshots/test_app/test_chat_vision_vectors/client1/result.json
@@ -8,6 +8,7 @@
                 "Financial Market Analysis Report 2023-6.png: 3</td><td>1</td></tr></table> Financial markets are interconnected, with movements in one segment often influencing others. This section examines the correlations between stock indices, cryptocurrency prices, and commodity prices, revealing how changes in one market can have ripple effects across the financial ecosystem.Impact of Macroeconomic Factors Impact of Interest Rates, Inflation, and GDP Growth on Financial Markets 5 4 3 2 1 0 -1 2018 2019 -2 -3 -4 -5 2020 2021 2022 2023 Macroeconomic factors such as interest rates, inflation, and GDP growth play a pivotal role in shaping financial markets. This section analyzes how these factors have influenced stock, cryptocurrency, and commodity markets over recent years, providing insights into the complex relationship between the economy and financial market performance. -Interest Rates % -Inflation Data % GDP Growth % :unselected: :unselected:Future Predictions and Trends Relative Growth Trends for S&P 500, Bitcoin, and Oil Prices (2024 Indexed to 100) 2028 Based on historical data, current trends, and economic indicators, this section presents predictions "
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
diff --git a/tests/snapshots/test_app/test_chat_with_history/client0/result.json b/tests/snapshots/test_app/test_chat_with_history/client0/result.json
index 2601897f30..071e3aa95b 100644
--- a/tests/snapshots/test_app/test_chat_with_history/client0/result.json
+++ b/tests/snapshots/test_app/test_chat_with_history/client0/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -42,7 +44,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -105,7 +113,13 @@
                     }
                 ],
                 "props": {
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/snapshots/test_app/test_chat_with_history/client1/result.json b/tests/snapshots/test_app/test_chat_with_history/client1/result.json
index 9e237875fa..195dcb1587 100644
--- a/tests/snapshots/test_app/test_chat_with_history/client1/result.json
+++ b/tests/snapshots/test_app/test_chat_with_history/client1/result.json
@@ -1,10 +1,12 @@
 {
     "context": {
         "data_points": {
+            "images": null,
             "text": [
                 "Benefit_Options-2.pdf: There is a whistleblower policy."
             ]
         },
+        "followup_questions": null,
         "thoughts": [
             {
                 "description": [
@@ -43,7 +45,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate search query"
             },
@@ -107,7 +115,13 @@
                 ],
                 "props": {
                     "deployment": "test-chatgpt",
-                    "model": "gpt-4o-mini"
+                    "model": "gpt-4o-mini",
+                    "token_usage": {
+                        "completion_tokens": 896,
+                        "prompt_tokens": 23,
+                        "reasoning_tokens": 0,
+                        "total_tokens": 919
+                    }
                 },
                 "title": "Prompt to generate answer"
             }
diff --git a/tests/test_app.py b/tests/test_app.py
index 5c34f1a5af..f3bbeaccd1 100644
--- a/tests/test_app.py
+++ b/tests/test_app.py
@@ -769,6 +769,44 @@ async def test_chat_stream_text(client, snapshot):
     snapshot.assert_match(result, "result.jsonlines")
 
 
+@pytest.mark.asyncio
+async def test_chat_text_reasoning(reasoning_client, snapshot):
+    response = await reasoning_client.post(
+        "/chat",
+        json={
+            "messages": [{"content": "What is the capital of France?", "role": "user"}],
+            "context": {
+                "overrides": {"retrieval_mode": "text"},
+            },
+        },
+    )
+    assert response.status_code == 200
+    result = await response.get_json()
+    assert result["context"]["thoughts"][0]["props"]["token_usage"] is not None
+    assert result["context"]["thoughts"][0]["props"]["reasoning_effort"] is not None
+    assert result["context"]["thoughts"][3]["props"]["token_usage"] is not None
+    assert result["context"]["thoughts"][3]["props"]["token_usage"]["reasoning_tokens"] > 0
+    assert result["context"]["thoughts"][3]["props"]["reasoning_effort"] == os.getenv("AZURE_OPENAI_REASONING_EFFORT")
+
+    snapshot.assert_match(json.dumps(result, indent=4), "result.json")
+
+
+@pytest.mark.asyncio
+async def test_chat_stream_text_reasoning(reasoning_client, snapshot):
+    response = await reasoning_client.post(
+        "/chat/stream",
+        json={
+            "messages": [{"content": "What is the capital of France?", "role": "user"}],
+            "context": {
+                "overrides": {"retrieval_mode": "text"},
+            },
+        },
+    )
+    assert response.status_code == 200
+    result = await response.get_data()
+    snapshot.assert_match(result, "result.jsonlines")
+
+
 @pytest.mark.asyncio
 async def test_chat_stream_text_filter(auth_client, snapshot):
     response = await auth_client.post(
diff --git a/tests/test_app_config.py b/tests/test_app_config.py
index f5fa64c5ae..bb5a595217 100644
--- a/tests/test_app_config.py
+++ b/tests/test_app_config.py
@@ -259,3 +259,49 @@ async def test_app_config_for_client(client):
     assert result["showGPT4VOptions"] == (os.getenv("USE_GPT4V") == "true")
     assert result["showSemanticRankerOption"] is True
     assert result["showVectorOption"] is True
+    assert result["streamingEnabled"] is True
+    assert result["showReasoningEffortOption"] is False
+
+
+@pytest.mark.asyncio
+async def test_app_config_for_reasoning(monkeypatch, minimal_env):
+    monkeypatch.setenv("AZURE_OPENAI_CHATGPT_MODEL", "o3-mini")
+    monkeypatch.setenv("AZURE_OPENAI_CHATGPT_DEPLOYMENT", "o3-mini")
+    quart_app = app.create_app()
+    async with quart_app.test_app() as test_app:
+        client = test_app.test_client()
+        response = await client.get("/config")
+        assert response.status_code == 200
+        result = await response.get_json()
+        assert result["streamingEnabled"] is True
+        assert result["showReasoningEffortOption"] is True
+
+
+@pytest.mark.asyncio
+async def test_app_config_for_reasoning_without_streaming(monkeypatch, minimal_env):
+    monkeypatch.setenv("AZURE_OPENAI_CHATGPT_MODEL", "o1")
+    monkeypatch.setenv("AZURE_OPENAI_CHATGPT_DEPLOYMENT", "o1")
+    quart_app = app.create_app()
+    async with quart_app.test_app() as test_app:
+        client = test_app.test_client()
+        response = await client.get("/config")
+        assert response.status_code == 200
+        result = await response.get_json()
+        assert result["streamingEnabled"] is False
+        assert result["showReasoningEffortOption"] is True
+
+
+@pytest.mark.asyncio
+async def test_app_config_for_reasoning_override_effort(monkeypatch, minimal_env):
+    monkeypatch.setenv("AZURE_OPENAI_REASONING_EFFORT", "low")
+    monkeypatch.setenv("AZURE_OPENAI_CHATGPT_MODEL", "o3-mini")
+    monkeypatch.setenv("AZURE_OPENAI_CHATGPT_DEPLOYMENT", "o3-mini")
+    quart_app = app.create_app()
+    async with quart_app.test_app() as test_app:
+        client = test_app.test_client()
+        response = await client.get("/config")
+        assert response.status_code == 200
+        result = await response.get_json()
+        assert result["streamingEnabled"] is True
+        assert result["showReasoningEffortOption"] is True
+        assert result["defaultReasoningEffort"] == "low"