Merge pull request #2402 from tisnik/reranker-as-customized-part

openshift-merge-bot[bot] · web-flow · commit 1f735e8de28e · 2025-04-03T17:57:56.000Z
OLS-1615: Reranker as customized part
diff --git a/ols/customize/__init__.py b/ols/customize/__init__.py
@@ -6,3 +6,4 @@
 project = os.getenv("PROJECT", "ols")
 prompts = importlib.import_module(f"ols.customize.{project}.prompts")
 keywords = importlib.import_module(f"ols.customize.{project}.keywords")
+reranker = importlib.import_module(f"ols.customize.{project}.reranker")
diff --git a/ols/customize/ols/reranker.py b/ols/customize/ols/reranker.py
@@ -0,0 +1,14 @@
+"""Reranker for post-processing the Vector DB search results."""
+
+import logging
+
+from llama_index.core.schema import NodeWithScore
+
+logger = logging.getLogger(__name__)
+
+
+def rerank(retrieved_nodes: list[NodeWithScore]) -> list[NodeWithScore]:
+    """Rerank Vector DB search results."""
+    message = f"reranker.rerank() is called with {len(retrieved_nodes)} result(s)."
+    logger.debug(message)
+    return retrieved_nodes
diff --git a/ols/src/query_helpers/docs_summarizer.py b/ols/src/query_helpers/docs_summarizer.py
@@ -13,6 +13,7 @@
 from ols.app.metrics import TokenMetricUpdater
 from ols.app.models.models import RagChunk, SummarizerResponse, TokenCounter, ToolCall
 from ols.constants import MAX_ITERATIONS, RAG_CONTENT_LIMIT, GenericLLMParameters
+from ols.customize import reranker
 from ols.src.prompts.prompt_generator import GeneratePrompt
 from ols.src.query_helpers.query_helper import QueryHelper
 from ols.src.tools.oc_cli import token_works_for_oc
@@ -97,8 +98,10 @@ def _prepare_prompt(
         # Retrieve RAG content
         if vector_index:
             retriever = vector_index.as_retriever(similarity_top_k=RAG_CONTENT_LIMIT)
+            retrieved_nodes = retriever.retrieve(query)
+            retrieved_nodes = reranker.rerank(retrieved_nodes)
             rag_chunks, available_tokens = token_handler.truncate_rag_context(
-                retriever.retrieve(query), available_tokens
+                retrieved_nodes, available_tokens
             )
         else:
             logger.warning("Proceeding without RAG content. Check start up messages.")
diff --git a/tests/unit/query_helpers/test_docs_summarizer.py b/tests/unit/query_helpers/test_docs_summarizer.py
@@ -1,5 +1,6 @@
 """Unit tests for DocsSummarizer class."""
 
+import logging
 from unittest.mock import ANY, patch
 
 import pytest
@@ -13,11 +14,13 @@
 config.ols_config.authentication_config.module = "k8s"
 
 
+from ols.app.models.config import LoggingConfig  # noqa:E402
 from ols.src.query_helpers.docs_summarizer import (  # noqa:E402
     DocsSummarizer,
     QueryHelper,
 )
 from ols.utils import suid  # noqa:E402
+from ols.utils.logging_configurator import configure_logging  # noqa:E402
 from tests import constants  # noqa:E402
 from tests.mock_classes.mock_langchain_interface import (  # noqa:E402
     mock_langchain_interface,
@@ -145,6 +148,29 @@ def test_summarize_no_reference_content():
     assert not summary.history_truncated
 
 
+def test_summarize_reranker(caplog):
+    """Basic test to make sure the reranker is called as expected."""
+    logging_config = LoggingConfig(app_log_level="debug")
+
+    configure_logging(logging_config)
+    logger = logging.getLogger("ols")
+    logger.handlers = [caplog.handler]  # add caplog handler to logger
+
+    with (
+        patch("ols.utils.token_handler.RAG_SIMILARITY_CUTOFF", 0.4),
+        patch("ols.utils.token_handler.MINIMUM_CONTEXT_TOKEN_LIMIT", 3),
+    ):
+        summarizer = DocsSummarizer(llm_loader=mock_llm_loader(None))
+        question = "What's the ultimate question with answer 42?"
+        rag_index = MockLlamaIndex()
+        # no history is passed into create_response() method
+        summary = summarizer.create_response(question, rag_index)
+        check_summary_result(summary, question)
+
+        # Check captured log text to see if reranker was called.
+        assert "reranker.rerank() is called with 1 result(s)." in caplog.text
+
+
 @pytest.mark.asyncio
 async def test_response_generator():
     """Test response generator method."""