Merge pull request #12 from qdrant/feature/embedding-providers

kacperlukawski · web-flow · commit f252489c3db0 · 2025-03-05T23:10:05.000+01:00
Abstract the embedding providers
diff --git a/.gitignore b/.gitignore
@@ -159,4 +159,7 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
+.idea/
+
+# Project-specific settings
+.aider*
diff --git a/README.md b/README.md
@@ -38,7 +38,7 @@ uv run mcp-server-qdrant \
   --qdrant-url "http://localhost:6333" \
   --qdrant-api-key "your_api_key" \
   --collection-name "my_collection" \
-  --fastembed-model-name "sentence-transformers/all-MiniLM-L6-v2"
+  --embedding-model "sentence-transformers/all-MiniLM-L6-v2"
 ```
 
 ### Installing via Smithery
@@ -78,7 +78,7 @@ This MCP server will automatically create a collection with the specified name i
 
 By default, the server will use the `sentence-transformers/all-MiniLM-L6-v2` embedding model to encode memories.
 For the time being, only [FastEmbed](https://qdrant.github.io/fastembed/) models are supported, and you can change it
-by passing the `--fastembed-model-name` argument to the server.
+by passing the `--embedding-model` argument to the server.
 
 ### Using the local mode of Qdrant
 
@@ -108,11 +108,31 @@ The configuration of the server can be also done using environment variables:
 - `QDRANT_URL`: URL of the Qdrant server, e.g. `http://localhost:6333`
 - `QDRANT_API_KEY`: API key for the Qdrant server
 - `COLLECTION_NAME`: Name of the collection to use
-- `FASTEMBED_MODEL_NAME`: Name of the FastEmbed model to use
+- `EMBEDDING_MODEL`: Name of the embedding model to use
+- `EMBEDDING_PROVIDER`: Embedding provider to use (currently only "fastembed" is supported)
 - `QDRANT_LOCAL_PATH`: Path to the local Qdrant database
 
 You cannot provide `QDRANT_URL` and `QDRANT_LOCAL_PATH` at the same time.
 
+## Contributing
+
+If you have suggestions for how mcp-server-qdrant could be improved, or want to report a bug, open an issue!
+We'd love all and any contributions.
+
+### Testing `mcp-server-qdrant` locally
+
+The [MCP inspector](https://github.com/modelcontextprotocol/inspector) is a developer tool for testing and debugging MCP
+servers. It runs both a client UI (default port 5173) and an MCP proxy server (default port 3000). Open the client UI in
+your browser to use the inspector.
+
+```shell
+npx @modelcontextprotocol/inspector uv run mcp-server-qdrant \
+  --collection-name test \
+  --qdrant-local-path /tmp/qdrant-local-test
+```
+
+Once started, open your browser to http://localhost:5173 to access the inspector interface.
+
 ## License
 
 This MCP server is licensed under the MIT License. This means you are free to use, modify, and distribute the software,
diff --git a/pyproject.toml b/pyproject.toml
@@ -18,8 +18,15 @@ dev-dependencies = [
     "pre-commit>=4.1.0",
     "pyright>=1.1.389",
     "pytest>=8.3.3",
-    "ruff>=0.8.0",
+    "pytest-asyncio>=0.23.0",
+    "ruff>=0.8.0"
 ]
 
 [project.scripts]
 mcp-server-qdrant = "mcp_server_qdrant:main"
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+python_files = "test_*.py"
+python_functions = "test_*"
+asyncio_mode = "auto"
diff --git a/src/mcp_server_qdrant/embeddings/__init__.py b/src/mcp_server_qdrant/embeddings/__init__.py
@@ -0,0 +1,5 @@
+from .base import EmbeddingProvider
+from .factory import create_embedding_provider
+from .fastembed import FastEmbedProvider
+
+__all__ = ["EmbeddingProvider", "FastEmbedProvider", "create_embedding_provider"]
diff --git a/src/mcp_server_qdrant/embeddings/base.py b/src/mcp_server_qdrant/embeddings/base.py
@@ -0,0 +1,16 @@
+from abc import ABC, abstractmethod
+from typing import List
+
+
+class EmbeddingProvider(ABC):
+    """Abstract base class for embedding providers."""
+
+    @abstractmethod
+    async def embed_documents(self, documents: List[str]) -> List[List[float]]:
+        """Embed a list of documents into vectors."""
+        pass
+
+    @abstractmethod
+    async def embed_query(self, query: str) -> List[float]:
+        """Embed a query into a vector."""
+        pass
diff --git a/src/mcp_server_qdrant/embeddings/factory.py b/src/mcp_server_qdrant/embeddings/factory.py
@@ -0,0 +1,18 @@
+from mcp_server_qdrant.embeddings import EmbeddingProvider
+
+
+def create_embedding_provider(provider_type: str, **kwargs) -> EmbeddingProvider:
+    """
+    Create an embedding provider based on the specified type.
+
+    :param provider_type: The type of embedding provider to create.
+    :param kwargs: Additional arguments to pass to the provider constructor.
+    :return: An instance of the specified embedding provider.
+    """
+    if provider_type.lower() == "fastembed":
+        from .fastembed import FastEmbedProvider
+
+        model_name = kwargs.get("model_name", "sentence-transformers/all-MiniLM-L6-v2")
+        return FastEmbedProvider(model_name)
+    else:
+        raise ValueError(f"Unsupported embedding provider: {provider_type}")
diff --git a/src/mcp_server_qdrant/embeddings/fastembed.py b/src/mcp_server_qdrant/embeddings/fastembed.py
@@ -0,0 +1,37 @@
+import asyncio
+from typing import List
+
+from fastembed import TextEmbedding
+
+from .base import EmbeddingProvider
+
+
+class FastEmbedProvider(EmbeddingProvider):
+    """FastEmbed implementation of the embedding provider."""
+
+    def __init__(self, model_name: str):
+        """
+        Initialize the FastEmbed provider.
+
+        :param model_name: The name of the FastEmbed model to use.
+        """
+        self.model_name = model_name
+        self.embedding_model = TextEmbedding(model_name)
+
+    async def embed_documents(self, documents: List[str]) -> List[List[float]]:
+        """Embed a list of documents into vectors."""
+        # Run in a thread pool since FastEmbed is synchronous
+        loop = asyncio.get_event_loop()
+        embeddings = await loop.run_in_executor(
+            None, lambda: list(self.embedding_model.passage_embed(documents))
+        )
+        return [embedding.tolist() for embedding in embeddings]
+
+    async def embed_query(self, query: str) -> List[float]:
+        """Embed a query into a vector."""
+        # Run in a thread pool since FastEmbed is synchronous
+        loop = asyncio.get_event_loop()
+        embeddings = await loop.run_in_executor(
+            None, lambda: list(self.embedding_model.query_embed([query]))
+        )
+        return embeddings[0].tolist()
diff --git a/src/mcp_server_qdrant/qdrant.py b/src/mcp_server_qdrant/qdrant.py
@@ -1,6 +1,8 @@
 from typing import Optional
 
-from qdrant_client import AsyncQdrantClient
+from qdrant_client import AsyncQdrantClient, models
+
+from .embeddings.base import EmbeddingProvider
 
 
 class QdrantConnector:
@@ -9,7 +11,7 @@ class QdrantConnector:
     :param qdrant_url: The URL of the Qdrant server.
     :param qdrant_api_key: The API key to use for the Qdrant server.
     :param collection_name: The name of the collection to use.
-    :param fastembed_model_name: The name of the FastEmbed model to use.
+    :param embedding_provider: The embedding provider to use.
     :param qdrant_local_path: The path to the storage directory for the Qdrant client, if local mode is used.
     """
 
@@ -18,29 +20,54 @@ def __init__(
         qdrant_url: Optional[str],
         qdrant_api_key: Optional[str],
         collection_name: str,
-        fastembed_model_name: str,
+        embedding_provider: EmbeddingProvider,
         qdrant_local_path: Optional[str] = None,
     ):
         self._qdrant_url = qdrant_url.rstrip("/") if qdrant_url else None
         self._qdrant_api_key = qdrant_api_key
         self._collection_name = collection_name
-        self._fastembed_model_name = fastembed_model_name
-        # For the time being, FastEmbed models are the only supported ones.
-        # A list of all available models can be found here:
-        # https://qdrant.github.io/fastembed/examples/Supported_Models/
+        self._embedding_provider = embedding_provider
         self._client = AsyncQdrantClient(
             location=qdrant_url, api_key=qdrant_api_key, path=qdrant_local_path
         )
-        self._client.set_model(fastembed_model_name)
+
+    async def _ensure_collection_exists(self):
+        """Ensure that the collection exists, creating it if necessary."""
+        collection_exists = await self._client.collection_exists(self._collection_name)
+        if not collection_exists:
+            # Create the collection with the appropriate vector size
+            # We'll get the vector size by embedding a sample text
+            sample_vector = await self._embedding_provider.embed_query("sample text")
+            vector_size = len(sample_vector)
+
+            await self._client.create_collection(
+                collection_name=self._collection_name,
+                vectors_config=models.VectorParams(
+                    size=vector_size,
+                    distance=models.Distance.COSINE,
+                ),
+            )
 
     async def store_memory(self, information: str):
         """
         Store a memory in the Qdrant collection.
         :param information: The information to store.
         """
-        await self._client.add(
-            self._collection_name,
-            documents=[information],
+        await self._ensure_collection_exists()
+
+        # Embed the document
+        embeddings = await self._embedding_provider.embed_documents([information])
+
+        # Add to Qdrant
+        await self._client.upsert(
+            collection_name=self._collection_name,
+            points=[
+                models.PointStruct(
+                    id=hash(information),  # Simple hash as ID
+                    vector=embeddings[0],
+                    payload={"document": information},
+                )
+            ],
         )
 
     async def find_memories(self, query: str) -> list[str]:
@@ -53,9 +80,14 @@ async def find_memories(self, query: str) -> list[str]:
         if not collection_exists:
             return []
 
-        search_results = await self._client.query(
-            self._collection_name,
-            query_text=query,
+        # Embed the query
+        query_vector = await self._embedding_provider.embed_query(query)
+
+        # Search in Qdrant
+        search_results = await self._client.search(
+            collection_name=self._collection_name,
+            query_vector=query_vector,
             limit=10,
         )
-        return [result.document for result in search_results]
+
+        return [result.payload["document"] for result in search_results]
diff --git a/src/mcp_server_qdrant/server.py b/src/mcp_server_qdrant/server.py
@@ -7,31 +7,39 @@
 from mcp.server import NotificationOptions, Server
 from mcp.server.models import InitializationOptions
 
+from .embeddings.factory import create_embedding_provider
 from .qdrant import QdrantConnector
 
 
 def serve(
     qdrant_url: Optional[str],
     qdrant_api_key: Optional[str],
     collection_name: str,
-    fastembed_model_name: str,
+    embedding_provider_type: str,
+    embedding_model_name: str,
     qdrant_local_path: Optional[str] = None,
 ) -> Server:
     """
     Instantiate the server and configure tools to store and find memories in Qdrant.
     :param qdrant_url: The URL of the Qdrant server.
     :param qdrant_api_key: The API key to use for the Qdrant server.
     :param collection_name: The name of the collection to use.
-    :param fastembed_model_name: The name of the FastEmbed model to use.
+    :param embedding_provider_type: The type of embedding provider to use.
+    :param embedding_model_name: The name of the embedding model to use.
     :param qdrant_local_path: The path to the storage directory for the Qdrant client, if local mode is used.
     """
     server = Server("qdrant")
 
+    # Create the embedding provider
+    embedding_provider = create_embedding_provider(
+        embedding_provider_type, model_name=embedding_model_name
+    )
+
     qdrant = QdrantConnector(
         qdrant_url,
         qdrant_api_key,
         collection_name,
-        fastembed_model_name,
+        embedding_provider,
         qdrant_local_path,
     )
 
@@ -133,10 +141,18 @@ async def handle_tool_call(
     help="Collection name",
 )
 @click.option(
-    "--fastembed-model-name",
-    envvar="FASTEMBED_MODEL_NAME",
-    required=True,
-    help="FastEmbed model name",
+    "--embedding-provider",
+    envvar="EMBEDDING_PROVIDER",
+    required=False,
+    help="Embedding provider to use",
+    default="fastembed",
+    type=click.Choice(["fastembed"], case_sensitive=False),
+)
+@click.option(
+    "--embedding-model",
+    envvar="EMBEDDING_MODEL",
+    required=False,
+    help="Embedding model name",
     default="sentence-transformers/all-MiniLM-L6-v2",
 )
 @click.option(
@@ -149,7 +165,8 @@ def main(
     qdrant_url: Optional[str],
     qdrant_api_key: str,
     collection_name: Optional[str],
-    fastembed_model_name: str,
+    embedding_provider: str,
+    embedding_model: str,
     qdrant_local_path: Optional[str],
 ):
     # XOR of url and local path, since we accept only one of them
@@ -164,7 +181,8 @@ async def _run():
                 qdrant_url,
                 qdrant_api_key,
                 collection_name,
-                fastembed_model_name,
+                embedding_provider,
+                embedding_model,
                 qdrant_local_path,
             )
             await server.run(
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -0,0 +1 @@
+# This file can be empty, it just marks the directory as a Python package
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+# This file can be empty, it just marks the directory as a Python package`