vectorize-io
diff --git a/‎.gitignore
+175 b/‎.gitignore
+175
diff --git a/‎langchain/README.md
+2 b/‎langchain/README.md
+2
diff --git a/‎langchain/langchain_vectorize/__init__.py
+1 b/‎langchain/langchain_vectorize/__init__.py
+1
diff --git a/‎langchain/langchain_vectorize/retrievers.py
+145 b/‎langchain/langchain_vectorize/retrievers.py
+145
@@ -0,0 +1,175 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+# Ruff stuff:
+.ruff_cache/
+
+# PyPI configuration file
+.pypirc
+
@@ -0,0 +1,2 @@
+# langchain-vectorize
+
@@ -0,0 +1 @@
+"""Vectorize integrations with LangChain."""
@@ -0,0 +1,145 @@
+"""Vectorize LangChain retrievers."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Optional
+
+import vectorize_client
+from langchain_core.documents import Document
+from langchain_core.retrievers import BaseRetriever
+from typing_extensions import override
+from vectorize_client import (
+    ApiClient,
+    Configuration,
+    PipelinesApi,
+    RetrieveDocumentsRequest,
+)
+
+if TYPE_CHECKING:
+    from langchain_core.callbacks import CallbackManagerForRetrieverRun
+    from langchain_core.runnables import RunnableConfig
+
+_METADATA_FIELDS = {
+    "relevancy",
+    "chunk_id",
+    "total_chunks",
+    "origin",
+    "origin_id",
+    "similarity",
+    "source",
+    "unique_source",
+    "source_display_name",
+    "pipeline_id",
+    "org_id",
+}
+_NOT_SET = object()
+
+
+class VectorizeRetriever(BaseRetriever):
+    """Vectorize retriever."""
+
+    api_token: str
+    """The Vectorize API token."""
+    organization: Optional[str] = None  # noqa: UP007
+    """The Vectorize organization ID."""
+    pipeline_id: Optional[str] = None  # noqa: UP007
+    """The Vectorize pipeline ID."""
+    num_results: int = 5
+    """The number of documents to return."""
+    rerank: bool = False
+    """Whether to rerank the results."""
+    metadata_filters: list[dict[str, Any]] = []
+    """The metadata filters to apply when retrieving the documents."""
+
+    _pipelines: PipelinesApi | None = None
+
+    @override
+    def model_post_init(self, /, context: Any) -> None:
+        api = ApiClient(Configuration(access_token=self.api_token))
+        self._pipelines = PipelinesApi(api)
+
+    @staticmethod
+    def _convert_document(document: vectorize_client.models.Document) -> Document:
+        metadata = {field: getattr(document, field) for field in _METADATA_FIELDS}
+        return Document(id=document.id, page_content=document.text, metadata=metadata)
+
+    @override
+    def _get_relevant_documents(
+        self,
+        query: str,
+        *,
+        run_manager: CallbackManagerForRetrieverRun,
+        organization: str | None = None,
+        pipeline_id: str | None = None,
+        num_results: int | None = None,
+        rerank: bool | None = None,
+        metadata_filters: list[dict[str, Any]] | None = None,
+    ) -> list[Document]:
+        request = RetrieveDocumentsRequest(
+            question=query,
+            num_results=num_results or self.num_results,
+            rerank=rerank or self.rerank,
+            metadata_filters=metadata_filters or self.metadata_filters,
+        )
+        response = self._pipelines.retrieve_documents(
+            organization or self.organization, pipeline_id or self.pipeline_id, request
+        )
+        return [self._convert_document(doc) for doc in response.documents]
+
+    @override
+    def invoke(
+        self,
+        input: str,
+        config: RunnableConfig | None = None,
+        *,
+        organization: str = "",
+        pipeline_id: str = "",
+        num_results: int = _NOT_SET,
+        rerank: bool = _NOT_SET,
+        metadata_filters: list[dict[str, Any]] = _NOT_SET,
+    ) -> list[Document]:
+        """Invoke the retriever to get relevant documents.
+
+        Main entry point for retriever invocations.
+
+        Args:
+            input: The query string.
+            config: Configuration for the retriever. Defaults to None.
+            organization: The organization to retrieve documents from.
+                If set, overrides the organization set at the initialization of the
+                retriever.
+            pipeline_id: The pipeline ID to retrieve documents from.
+                If set, overrides the pipeline ID set at the initialization of the
+                retriever.
+            num_results: The number of results to retrieve.
+                If set, overrides the number of results set at the initialization of
+                the retriever.
+            rerank: Whether to rerank the retrieved documents.
+                If set, overrides the reranking set at the initialization of the
+                retriever.
+            metadata_filters: The metadata filters to apply when retrieving documents.
+                If set, overrides the metadata filters set at the initialization of the
+                retriever.
+
+        Returns:
+            List of relevant documents.
+
+        Examples:
+
+            .. code-block:: python
+
+                retriever.invoke("query")
+        """
+        kwargs = {}
+        if organization:
+            kwargs["organization"] = organization
+        if pipeline_id:
+            kwargs["pipeline_id"] = pipeline_id
+        if num_results is not _NOT_SET:
+            kwargs["num_results"] = num_results
+        if rerank is not _NOT_SET:
+            kwargs["rerank"] = rerank
+        if metadata_filters is not _NOT_SET:
+            kwargs["metadata_filters"] = metadata_filters
+
+        return super().invoke(input, config, **kwargs)
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+"""Vectorize integrations with LangChain."""`