diff --git a/ai-integrations/langchain-parent-document-retrieval.ipynb b/ai-integrations/langchain-parent-document-retrieval.ipynb
index 0ba6392..dd54ceb 100644
--- a/ai-integrations/langchain-parent-document-retrieval.ipynb
+++ b/ai-integrations/langchain-parent-document-retrieval.ipynb
@@ -11,7 +11,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "This notebook is a companion to the [LangChain Parent Document Retrieval](https://www.mongodb.com/docs/atlas/atlas-vector-search/ai-integrations/langchain/parent-document-retrieval/) page. Refer to the page for set-up instructions and detailed explanations.\n",
+ "This notebook is a companion to the [Parent Document Retrieval](https://www.mongodb.com/docs/atlas/atlas-vector-search/ai-integrations/langchain/parent-document-retrieval/) page. Refer to the page for set-up instructions and detailed explanations.\n",
"\n",
"\n",
"
\n",
diff --git a/ai-integrations/langchain-self-query-retrieval.ipynb b/ai-integrations/langchain-self-query-retrieval.ipynb
new file mode 100644
index 0000000..4bd0ed2
--- /dev/null
+++ b/ai-integrations/langchain-self-query-retrieval.ipynb
@@ -0,0 +1,407 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "57956324",
+ "metadata": {},
+ "source": [
+ "# LangChain MongoDB Integration - Self-Querying Retrieval\n",
+ "\n",
+ "This notebook is a companion to the [Self-Querying Retrieval](https://www.mongodb.com/docs/atlas/atlas-vector-search/ai-integrations/langchain/parent-document-retrieval/) page. Refer to the page for set-up instructions and detailed explanations.\n",
+ "\n",
+ "\n",
+ "
\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a9924d95",
+ "metadata": {},
+ "source": [
+ "## Prerequisites\n",
+ "\n",
+ "To complete this tutorial, you must have the following:\n",
+ "- A MongoDB Atlas cluster\n",
+ "- A Voyage AI API key\n",
+ "- An OpenAI API key"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fca4d65e",
+ "metadata": {},
+ "source": [
+ "## Set up the environment"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6283b411",
+ "metadata": {
+ "vscode": {
+ "languageId": "shellscript"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "pip install --quiet --upgrade langchain-mongodb langchain-voyageai langchain-openai langchain langchain-community langchain-core lark"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2c7b5f46",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "\n",
+ "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
+ "os.environ[\"VOYAGE_API_KEY\"] = \"\"\n",
+ "MONGODB_URI = \"\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "33ce1710",
+ "metadata": {},
+ "source": [
+ "## Instantiate the vector store"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "155f5870",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain_mongodb import MongoDBAtlasVectorSearch\n",
+ "from langchain_voyageai import VoyageAIEmbeddings\n",
+ "\n",
+ "# Use the voyage-3-large embedding model\n",
+ "embedding_model = VoyageAIEmbeddings(model=\"voyage-3-large\")\n",
+ "\n",
+ "# Create the vector store\n",
+ "vector_store = MongoDBAtlasVectorSearch.from_connection_string(\n",
+ " connection_string = MONGODB_URI,\n",
+ " embedding = embedding_model,\n",
+ " namespace = \"langchain_db.self_query\",\n",
+ " text_key = \"page_content\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "652be7fb",
+ "metadata": {},
+ "source": [
+ "## Add data to the vector store"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "29191663",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain_core.documents import Document\n",
+ "\n",
+ "docs = [\n",
+ " Document(\n",
+ " page_content=\"A bunch of scientists bring back dinosaurs and mayhem breaks loose\",\n",
+ " metadata={\"year\": 1993, \"rating\": 7.7, \"genre\": \"action\"},\n",
+ " ),\n",
+ " Document(\n",
+ " page_content=\"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...\",\n",
+ " metadata={\"year\": 2010, \"genre\": \"thriller\", \"rating\": 8.2},\n",
+ " ),\n",
+ " Document(\n",
+ " page_content=\"A bunch of normal-sized women are supremely wholesome and some men pine after them\",\n",
+ " metadata={\"year\": 2019, \"rating\": 8.3, \"genre\": \"drama\"},\n",
+ " ),\n",
+ " Document(\n",
+ " page_content=\"Three men walk into the Zone, three men walk out of the Zone\",\n",
+ " metadata={\"year\": 1979, \"rating\": 9.9, \"genre\": \"science fiction\"},\n",
+ " ),\n",
+ " Document(\n",
+ " page_content=\"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\",\n",
+ " metadata={\"year\": 2006, \"genre\": \"thriller\", \"rating\": 9.0},\n",
+ " ),\n",
+ " Document(\n",
+ " page_content=\"Toys come alive and have a blast doing so\",\n",
+ " metadata={\"year\": 1995, \"genre\": \"animated\", \"rating\": 9.3},\n",
+ " ),\n",
+ "]\n",
+ "\n",
+ "# Add data to the vector store, which automaticaly embeds the documents\n",
+ "vector_store.add_documents(docs)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a5762ee0",
+ "metadata": {},
+ "source": [
+ "## Create the Atlas Vector Search index with filters"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "39a44b99",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Use LangChain helper method to create the vector search index\n",
+ "vector_store.create_vector_search_index(\n",
+ " dimensions = 1024, # The dimensions of the vector embeddings to be indexed\n",
+ " filters = [ \"genre\", \"rating\", \"year\" ], # The metadata fields to be indexed for filtering\n",
+ " wait_until_complete = 60 # Number of seconds to wait for the index to build (can take around a minute)\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "28f5ae2c",
+ "metadata": {},
+ "source": [
+ "## Create the Self-Querying Retriever"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "53bfaa67",
+ "metadata": {},
+ "source": [
+ "### Define metadata field and document information"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e393ed2e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain.chains.query_constructor.schema import AttributeInfo\n",
+ "\n",
+ "# Define the document content description \n",
+ "document_content_description = \"Brief summary of a movie\"\n",
+ "\n",
+ "# Define the metadata fields to filter on\n",
+ "metadata_field_info = [\n",
+ " AttributeInfo(\n",
+ " name=\"genre\",\n",
+ " description=\"The genre of the movie\",\n",
+ " type=\"string\",\n",
+ " ),\n",
+ " AttributeInfo(\n",
+ " name=\"year\",\n",
+ " description=\"The year the movie was released\",\n",
+ " type=\"integer\",\n",
+ " ),\n",
+ " AttributeInfo(\n",
+ " name=\"rating\", \n",
+ " description=\"A 1-10 rating for the movie\", \n",
+ " type=\"float\"\n",
+ " ),\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4bdd30c4",
+ "metadata": {},
+ "source": [
+ "### Initialize the self-querying retriever"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "977730d1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from langchain_mongodb.retrievers import MongoDBAtlasSelfQueryRetriever\n",
+ "from langchain_openai import ChatOpenAI\n",
+ "\n",
+ "llm = ChatOpenAI(model=\"gpt-4o\")\n",
+ "retriever = MongoDBAtlasSelfQueryRetriever.from_llm(\n",
+ " llm=llm,\n",
+ " vectorstore=vector_store,\n",
+ " metadata_field_info=metadata_field_info,\n",
+ " document_contents=document_content_description\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b188c52d",
+ "metadata": {},
+ "source": [
+ "## Run Queries with the Self-Querying Retriever"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "833d90d9",
+ "metadata": {},
+ "source": [
+ "### Queries with filters"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9bbc3e6a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# This example specifies a filter (rating > 9)\n",
+ "retriever.invoke(\"What are some highly rated movies (above 9)?\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7a5bd474",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# This example specifies a semantic search and a filter (rating > 9)\n",
+ "retriever.invoke(\"I want to watch a movie about toys rated higher than 9\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5c062276",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# This example specifies a composite filter (rating >= 9 and genre = thriller)\n",
+ "retriever.invoke(\"What's a highly rated (above or equal 9) thriller film?\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "eb67054b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# This example specifies a query and composite filter (year > 1990 and year < 2005 and genre = action)\n",
+ "retriever.invoke(\n",
+ " \"What's a movie after 1990 but before 2005 that's all about dinosaurs, \" +\n",
+ " \"and preferably has a lot of action\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "052ee6e4",
+ "metadata": {},
+ "source": [
+ "### Query with no filters"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3902ca22",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# This example only specifies a semantic search query\n",
+ "retriever.invoke(\"What are some movies about dinosaurs\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "937966cf",
+ "metadata": {},
+ "source": [
+ "## Use the Retriever in Your RAG Pipeline"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "dfa7b140",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pprint\n",
+ "from langchain_core.output_parsers import StrOutputParser\n",
+ "from langchain_core.prompts import PromptTemplate\n",
+ "from langchain_core.runnables import RunnablePassthrough\n",
+ "from langchain_openai import ChatOpenAI\n",
+ "\n",
+ "llm = ChatOpenAI(model=\"gpt-4o\")\n",
+ "\n",
+ "# Configure self-query retriever with a document limit\n",
+ "retriever = MongoDBAtlasSelfQueryRetriever.from_llm(\n",
+ " llm=llm,\n",
+ " vectorstore=vector_store,\n",
+ " metadata_field_info=metadata_field_info,\n",
+ " document_contents=document_content_description,\n",
+ " enable_limit=True\n",
+ ")\n",
+ "\n",
+ "# Define a prompt template\n",
+ "template = \"\"\"\n",
+ " Use the following pieces of context to answer the question at the end.\n",
+ " {context}\n",
+ " Question: {question}\n",
+ "\"\"\"\n",
+ "prompt = PromptTemplate.from_template(template)\n",
+ "\n",
+ "# Construct a chain to answer questions on your data\n",
+ "chain = (\n",
+ " { \"context\": retriever, \"question\": RunnablePassthrough()}\n",
+ " | prompt \n",
+ " | llm\n",
+ " | StrOutputParser()\n",
+ ")\n",
+ "\n",
+ "# Prompt the chain\n",
+ "question = \"What are two movies about dinosaurs after 1990?\" # year > 1990 and document limit of 2\n",
+ "answer = chain.invoke(question)\n",
+ "\n",
+ "print(\"Question: \" + question)\n",
+ "print(\"Answer: \" + answer)\n",
+ "\n",
+ "# Return source documents\n",
+ "documents = retriever.invoke(question)\n",
+ "print(\"\\nSource documents:\")\n",
+ "pprint.pprint(documents)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}