Skip to content

Commit 62f8b58

Browse files
Add query rewriting option (#2437)
Upgrade Search SDK and add support for query rewriting by setting AZURE_SEARCH_QUERY_REWRITING --------- Co-authored-by: Pamela Fox <[email protected]>
1 parent b6f9b76 commit 62f8b58

File tree

92 files changed

+197
-12
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

92 files changed

+197
-12
lines changed

Diff for: .azdo/pipelines/azure-dev.yml

+1
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ steps:
5959
AZURE_SEARCH_QUERY_LANGUAGE: $(AZURE_SEARCH_QUERY_LANGUAGE)
6060
AZURE_SEARCH_QUERY_SPELLER: $(AZURE_SEARCH_QUERY_SPELLER)
6161
AZURE_SEARCH_SEMANTIC_RANKER: $(AZURE_SEARCH_SEMANTIC_RANKER)
62+
AZURE_SEARCH_QUERY_REWRITING: $(AZURE_SEARCH_QUERY_REWRITING)
6263
AZURE_STORAGE_ACCOUNT: $(AZURE_STORAGE_ACCOUNT)
6364
AZURE_STORAGE_RESOURCE_GROUP: $(AZURE_STORAGE_RESOURCE_GROUP)
6465
AZURE_STORAGE_SKU: $(AZURE_STORAGE_SKU)

Diff for: .github/workflows/azure-dev.yml

+1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ jobs:
4949
AZURE_SEARCH_QUERY_LANGUAGE: ${{ vars.AZURE_SEARCH_QUERY_LANGUAGE }}
5050
AZURE_SEARCH_QUERY_SPELLER: ${{ vars.AZURE_SEARCH_QUERY_SPELLER }}
5151
AZURE_SEARCH_SEMANTIC_RANKER: ${{ vars.AZURE_SEARCH_SEMANTIC_RANKER }}
52+
AZURE_SEARCH_QUERY_REWRITING: ${{ vars.AZURE_SEARCH_QUERY_REWRITING }}
5253
AZURE_STORAGE_ACCOUNT: ${{ vars.AZURE_STORAGE_ACCOUNT }}
5354
AZURE_STORAGE_RESOURCE_GROUP: ${{ vars.AZURE_STORAGE_RESOURCE_GROUP }}
5455
AZURE_STORAGE_SKU: ${{ vars.AZURE_STORAGE_SKU }}

Diff for: app/backend/app.py

+6
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
CONFIG_INGESTER,
7070
CONFIG_LANGUAGE_PICKER_ENABLED,
7171
CONFIG_OPENAI_CLIENT,
72+
CONFIG_QUERY_REWRITING_ENABLED,
7273
CONFIG_SEARCH_CLIENT,
7374
CONFIG_SEMANTIC_RANKER_DEPLOYED,
7475
CONFIG_SPEECH_INPUT_ENABLED,
@@ -291,6 +292,7 @@ def config():
291292
{
292293
"showGPT4VOptions": current_app.config[CONFIG_GPT4V_DEPLOYED],
293294
"showSemanticRankerOption": current_app.config[CONFIG_SEMANTIC_RANKER_DEPLOYED],
295+
"showQueryRewritingOption": current_app.config[CONFIG_QUERY_REWRITING_ENABLED],
294296
"showVectorOption": current_app.config[CONFIG_VECTOR_SEARCH_ENABLED],
295297
"showUserUpload": current_app.config[CONFIG_USER_UPLOAD_ENABLED],
296298
"showLanguagePicker": current_app.config[CONFIG_LANGUAGE_PICKER_ENABLED],
@@ -453,6 +455,7 @@ async def setup_clients():
453455
AZURE_SEARCH_QUERY_LANGUAGE = os.getenv("AZURE_SEARCH_QUERY_LANGUAGE") or "en-us"
454456
AZURE_SEARCH_QUERY_SPELLER = os.getenv("AZURE_SEARCH_QUERY_SPELLER") or "lexicon"
455457
AZURE_SEARCH_SEMANTIC_RANKER = os.getenv("AZURE_SEARCH_SEMANTIC_RANKER", "free").lower()
458+
AZURE_SEARCH_QUERY_REWRITING = os.getenv("AZURE_SEARCH_QUERY_REWRITING", "false").lower()
456459

457460
AZURE_SPEECH_SERVICE_ID = os.getenv("AZURE_SPEECH_SERVICE_ID")
458461
AZURE_SPEECH_SERVICE_LOCATION = os.getenv("AZURE_SPEECH_SERVICE_LOCATION")
@@ -634,6 +637,9 @@ async def setup_clients():
634637

635638
current_app.config[CONFIG_GPT4V_DEPLOYED] = bool(USE_GPT4V)
636639
current_app.config[CONFIG_SEMANTIC_RANKER_DEPLOYED] = AZURE_SEARCH_SEMANTIC_RANKER != "disabled"
640+
current_app.config[CONFIG_QUERY_REWRITING_ENABLED] = (
641+
AZURE_SEARCH_QUERY_REWRITING == "true" and AZURE_SEARCH_SEMANTIC_RANKER != "disabled"
642+
)
637643
current_app.config[CONFIG_VECTOR_SEARCH_ENABLED] = os.getenv("USE_VECTORS", "").lower() != "false"
638644
current_app.config[CONFIG_USER_UPLOAD_ENABLED] = bool(USE_USER_UPLOAD)
639645
current_app.config[CONFIG_LANGUAGE_PICKER_ENABLED] = ENABLE_LANGUAGE_PICKER

Diff for: app/backend/approaches/approach.py

+2
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ async def search(
149149
use_semantic_captions: bool,
150150
minimum_search_score: Optional[float],
151151
minimum_reranker_score: Optional[float],
152+
use_query_rewriting: Optional[bool] = None,
152153
) -> List[Document]:
153154
search_text = query_text if use_text_search else ""
154155
search_vectors = vectors if use_vector_search else []
@@ -158,6 +159,7 @@ async def search(
158159
filter=filter,
159160
top=top,
160161
query_caption="extractive|highlight-false" if use_semantic_captions else None,
162+
query_rewrites="generative" if use_query_rewriting else None,
161163
vector_queries=search_vectors,
162164
query_type=QueryType.SEMANTIC,
163165
query_language=self.query_language,

Diff for: app/backend/approaches/chatreadretrieveread.py

+3
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ async def run_until_final_call(
8989
use_vector_search = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
9090
use_semantic_ranker = True if overrides.get("semantic_ranker") else False
9191
use_semantic_captions = True if overrides.get("semantic_captions") else False
92+
use_query_rewriting = True if overrides.get("query_rewriting") else False
9293
top = overrides.get("top", 3)
9394
minimum_search_score = overrides.get("minimum_search_score", 0.0)
9495
minimum_reranker_score = overrides.get("minimum_reranker_score", 0.0)
@@ -147,6 +148,7 @@ async def run_until_final_call(
147148
use_semantic_captions,
148149
minimum_search_score,
149150
minimum_reranker_score,
151+
use_query_rewriting,
150152
)
151153

152154
# STEP 3: Generate a contextual and content specific answer using the search results and chat history
@@ -190,6 +192,7 @@ async def run_until_final_call(
190192
{
191193
"use_semantic_captions": use_semantic_captions,
192194
"use_semantic_ranker": use_semantic_ranker,
195+
"use_query_rewriting": use_query_rewriting,
193196
"top": top,
194197
"filter": filter,
195198
"use_vector_search": use_vector_search,

Diff for: app/backend/approaches/chatreadretrievereadvision.py

+3
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ async def run_until_final_call(
8181
use_text_search = overrides.get("retrieval_mode") in ["text", "hybrid", None]
8282
use_vector_search = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
8383
use_semantic_ranker = True if overrides.get("semantic_ranker") else False
84+
use_query_rewriting = True if overrides.get("query_rewriting") else False
8485
use_semantic_captions = True if overrides.get("semantic_captions") else False
8586
top = overrides.get("top", 3)
8687
minimum_search_score = overrides.get("minimum_search_score", 0.0)
@@ -151,6 +152,7 @@ async def run_until_final_call(
151152
use_semantic_captions,
152153
minimum_search_score,
153154
minimum_reranker_score,
155+
use_query_rewriting,
154156
)
155157

156158
# STEP 3: Generate a contextual and content specific answer using the search results and chat history
@@ -207,6 +209,7 @@ async def run_until_final_call(
207209
{
208210
"use_semantic_captions": use_semantic_captions,
209211
"use_semantic_ranker": use_semantic_ranker,
212+
"use_query_rewriting": use_query_rewriting,
210213
"top": top,
211214
"filter": filter,
212215
"vector_fields": vector_fields,

Diff for: app/backend/approaches/retrievethenread.py

+3
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ async def run(
6767
use_text_search = overrides.get("retrieval_mode") in ["text", "hybrid", None]
6868
use_vector_search = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
6969
use_semantic_ranker = True if overrides.get("semantic_ranker") else False
70+
use_query_rewriting = True if overrides.get("query_rewriting") else False
7071
use_semantic_captions = True if overrides.get("semantic_captions") else False
7172
top = overrides.get("top", 3)
7273
minimum_search_score = overrides.get("minimum_search_score", 0.0)
@@ -89,6 +90,7 @@ async def run(
8990
use_semantic_captions,
9091
minimum_search_score,
9192
minimum_reranker_score,
93+
use_query_rewriting,
9294
)
9395

9496
# Process results
@@ -118,6 +120,7 @@ async def run(
118120
{
119121
"use_semantic_captions": use_semantic_captions,
120122
"use_semantic_ranker": use_semantic_ranker,
123+
"use_query_rewriting": use_query_rewriting,
121124
"top": top,
122125
"filter": filter,
123126
"use_vector_search": use_vector_search,

Diff for: app/backend/approaches/retrievethenreadvision.py

+3
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ async def run(
7676
use_text_search = overrides.get("retrieval_mode") in ["text", "hybrid", None]
7777
use_vector_search = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
7878
use_semantic_ranker = True if overrides.get("semantic_ranker") else False
79+
use_query_rewriting = True if overrides.get("query_rewriting") else False
7980
use_semantic_captions = True if overrides.get("semantic_captions") else False
8081
top = overrides.get("top", 3)
8182
minimum_search_score = overrides.get("minimum_search_score", 0.0)
@@ -108,6 +109,7 @@ async def run(
108109
use_semantic_captions,
109110
minimum_search_score,
110111
minimum_reranker_score,
112+
use_query_rewriting,
111113
)
112114

113115
# Process results
@@ -145,6 +147,7 @@ async def run(
145147
{
146148
"use_semantic_captions": use_semantic_captions,
147149
"use_semantic_ranker": use_semantic_ranker,
150+
"use_query_rewriting": use_query_rewriting,
148151
"top": top,
149152
"filter": filter,
150153
"vector_fields": vector_fields,

Diff for: app/backend/config.py

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
CONFIG_AUTH_CLIENT = "auth_client"
1111
CONFIG_GPT4V_DEPLOYED = "gpt4v_deployed"
1212
CONFIG_SEMANTIC_RANKER_DEPLOYED = "semantic_ranker_deployed"
13+
CONFIG_QUERY_REWRITING_ENABLED = "query_rewriting_enabled"
1314
CONFIG_VECTOR_SEARCH_ENABLED = "vector_search_enabled"
1415
CONFIG_SEARCH_CLIENT = "search_client"
1516
CONFIG_OPENAI_CLIENT = "openai_client"

Diff for: app/backend/requirements.in

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ tenacity
77
azure-ai-documentintelligence==1.0.0b4
88
azure-cognitiveservices-speech
99
azure-cosmos
10-
azure-search-documents==11.6.0b6
10+
azure-search-documents==11.6.0b9
1111
azure-storage-blob
1212
azure-storage-file-datalake
1313
uvicorn

Diff for: app/backend/requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ azure-monitor-opentelemetry==1.6.1
5757
# via -r requirements.in
5858
azure-monitor-opentelemetry-exporter==1.0.0b32
5959
# via azure-monitor-opentelemetry
60-
azure-search-documents==11.6.0b6
60+
azure-search-documents==11.6.0b9
6161
# via -r requirements.in
6262
azure-storage-blob==12.22.0
6363
# via

Diff for: app/frontend/src/api/models.ts

+2
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ export type ChatAppRequestOverrides = {
2020
retrieval_mode?: RetrievalMode;
2121
semantic_ranker?: boolean;
2222
semantic_captions?: boolean;
23+
query_rewriting?: boolean;
2324
include_category?: string;
2425
exclude_category?: string;
2526
seed?: number;
@@ -84,6 +85,7 @@ export type ChatAppRequest = {
8485
export type Config = {
8586
showGPT4VOptions: boolean;
8687
showSemanticRankerOption: boolean;
88+
showQueryRewritingOption: boolean;
8789
showVectorOption: boolean;
8890
showUserUpload: boolean;
8991
showLanguagePicker: boolean;

Diff for: app/frontend/src/components/Settings/Settings.tsx

+20
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,15 @@ export interface SettingsProps {
1919
minimumRerankerScore: number;
2020
useSemanticRanker: boolean;
2121
useSemanticCaptions: boolean;
22+
useQueryRewriting: boolean;
2223
excludeCategory: string;
2324
includeCategory: string;
2425
retrievalMode: RetrievalMode;
2526
useGPT4V: boolean;
2627
gpt4vInput: GPT4VInput;
2728
vectorFieldList: VectorFieldOptions[];
2829
showSemanticRankerOption: boolean;
30+
showQueryRewritingOption: boolean;
2931
showGPT4VOptions: boolean;
3032
showVectorOption: boolean;
3133
useOidSecurityFilter: boolean;
@@ -51,13 +53,15 @@ export const Settings = ({
5153
minimumRerankerScore,
5254
useSemanticRanker,
5355
useSemanticCaptions,
56+
useQueryRewriting,
5457
excludeCategory,
5558
includeCategory,
5659
retrievalMode,
5760
useGPT4V,
5861
gpt4vInput,
5962
vectorFieldList,
6063
showSemanticRankerOption,
64+
showQueryRewritingOption,
6165
showGPT4VOptions,
6266
showVectorOption,
6367
useOidSecurityFilter,
@@ -94,6 +98,7 @@ export const Settings = ({
9498
const excludeCategoryFieldId = useId("excludeCategoryField");
9599
const semanticRankerId = useId("semanticRanker");
96100
const semanticRankerFieldId = useId("semanticRankerField");
101+
const queryRewritingFieldId = useId("queryRewritingField");
97102
const semanticCaptionsId = useId("semanticCaptions");
98103
const semanticCaptionsFieldId = useId("semanticCaptionsField");
99104
const useOidSecurityFilterId = useId("useOidSecurityFilter");
@@ -239,6 +244,21 @@ export const Settings = ({
239244
</>
240245
)}
241246

247+
{showQueryRewritingOption && (
248+
<>
249+
<Checkbox
250+
id={queryRewritingFieldId}
251+
className={styles.settingsSeparator}
252+
checked={useQueryRewriting}
253+
disabled={!useSemanticRanker}
254+
label={t("labels.useQueryRewriting")}
255+
onChange={(_ev, checked) => onChange("useQueryRewriting", !!checked)}
256+
aria-labelledby={queryRewritingFieldId}
257+
onRenderLabel={props => renderLabel(props, queryRewritingFieldId, queryRewritingFieldId, t("helpTexts.useQueryRewriting"))}
258+
/>
259+
</>
260+
)}
261+
242262
{useLogin && (
243263
<>
244264
<Checkbox

Diff for: app/frontend/src/locales/da/translation.json

+1
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@
8686
"excludeCategory": "Ekskludér kategori",
8787
"useSemanticRanker": "Brug semantisk ranking til søgning",
8888
"useSemanticCaptions": "Brug semantiske billedtekster",
89+
"useQueryRewriting": "Brug forespørgselsomskrivning til informationsgenfinding",
8990
"useSuggestFollowupQuestions": "Foreslå opfølgende spørgsmål",
9091
"useGPT4V": "Brug GPT vision model",
9192
"gpt4VInput": {

Diff for: app/frontend/src/locales/en/translation.json

+3
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@
9090
"excludeCategory": "Exclude category",
9191
"useSemanticRanker": "Use semantic ranker for retrieval",
9292
"useSemanticCaptions": "Use semantic captions",
93+
"useQueryRewriting": "Use query rewriting for retrieval",
9394
"useSuggestFollowupQuestions": "Suggest follow-up questions",
9495
"useGPT4V": "Use GPT vision model",
9596
"gpt4VInput": {
@@ -139,6 +140,8 @@
139140
"Specifies a category to exclude from the search results. There are no categories used in the default data set.",
140141
"useSemanticReranker":
141142
"Enables the Azure AI Search semantic ranker, a model that re-ranks search results based on semantic similarity to the user's query.",
143+
"useQueryRewriting":
144+
"Enables Azure AI Search query rewriting, a process that modifies the user's query to improve search results. Requires semantic ranker to be enabled.",
142145
"useSemanticCaptions":
143146
"Sends semantic captions to the LLM instead of the full search result. A semantic caption is extracted from a search result during the process of semantic ranking.",
144147
"suggestFollowupQuestions": "Asks the LLM to suggest follow-up questions based on the user's query.",

Diff for: app/frontend/src/locales/es/translation.json

+1
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@
9090
"excludeCategory": "Excluir categoría",
9191
"useSemanticRanker": "Usar clasificador semántico para la recuperación",
9292
"useSemanticCaptions": "Usar subtítulos semánticos",
93+
"useQueryRewriting": "Utiliza la reescritura de consultas para la recuperación",
9394
"useSuggestFollowupQuestions": "Sugerir preguntas de seguimiento",
9495
"useGPT4V": "Usar modelo de visión GPT",
9596
"gpt4VInput": {

Diff for: app/frontend/src/locales/fr/translation.json

+1
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@
9191
"useSemanticRanker": "Utiliser le reclasseur sémantique",
9292
"useSemanticCaptions": "Utiliser les titres sémantiques",
9393
"useSuggestFollowupQuestions": "Suggérer des questions de suivi",
94+
"useQueryRewriting": "Utilisez la réécriture des requêtes pour la récupération",
9495
"useGPT4V": "Utiliser le modèle GPT Vision",
9596
"gpt4VInput": {
9697
"label": "Entrées du modèle GPT Vision",

Diff for: app/frontend/src/locales/it/translation.json

+1
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@
9090
"excludeCategory": "Escludi categoria",
9191
"useSemanticRanker": "Usa il reranker semantico",
9292
"useSemanticCaptions": "Usa didascalie semantiche",
93+
"useQueryRewriting": "Usa la riscrittura delle query per il recupero",
9394
"useSuggestFollowupQuestions": "Suggerisci domande di follow-up",
9495
"useGPT4V": "Usa il modello GPT Vision",
9596
"gpt4VInput": {

Diff for: app/frontend/src/locales/ja/translation.json

+1
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@
9090
"excludeCategory": "カテゴリを除外",
9191
"useSemanticRanker": "取得にセマンティック・ランカーを使用",
9292
"useSemanticCaptions": "セマンティック・キャプションを使用",
93+
"useQueryRewriting": "検索のためにクエリの書き換えを使用する",
9394
"useSuggestFollowupQuestions": "フォローアップの質問を提案",
9495
"useGPT4V": "GPT Visionモデルを使用",
9596
"gpt4VInput": {

Diff for: app/frontend/src/locales/nl/translation.json

+1
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@
9090
"excludeCategory": "Categorie uitsluiten",
9191
"useSemanticRanker": "Semantische rangschikking gebruiken",
9292
"useSemanticCaptions": "Semantische bijschriften gebruiken",
93+
"useQueryRewriting": "Gebruik de herformulering van zoekopdrachten om informatie op te halen",
9394
"useSuggestFollowupQuestions": "Vervolgvragen voorstellen",
9495
"useGPT4V": "GPT-visiemodel gebruiken",
9596
"gpt4VInput": {

Diff for: app/frontend/src/locales/ptBR/translation.json

+1
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@
9191
"useSemanticRanker": "Usar rankeador semântico para recuperação",
9292
"useSemanticCaptions": "Usar legendas semânticas",
9393
"useSuggestFollowupQuestions": "Sugerir perguntas complementares",
94+
"useQueryRewriting": "Utilize a reescrita de consultas para a recuperação",
9495
"useGPT4V": "Usar modelo de visão GPT",
9596
"gpt4VInput": {
9697
"label": "Entradas do modelo de visão GPT",

Diff for: app/frontend/src/locales/tr/translation.json

+1
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@
9090
"excludeCategory": "Kategori hariç tut",
9191
"useSemanticRanker": "Anlamsal sıralayıcı kullan",
9292
"useSemanticCaptions": "Anlamsal altyazılar kullan",
93+
"useQueryRewriting": "Bilgi erişimi için sorgu yeniden yazımını kullanın",
9394
"useSuggestFollowupQuestions": "Takip soruları öner",
9495
"useGPT4V": "GPT vizyon modelini kullan",
9596
"gpt4VInput": {

0 commit comments

Comments
 (0)