From bd8f2f57b623f142d86c3ffa9efaf8e5f57df5aa Mon Sep 17 00:00:00 2001
From: davidhou17 <55004296+davidhou17@users.noreply.github.com>
Date: Wed, 28 May 2025 15:56:19 -0500
Subject: [PATCH] create new notebook

---
 .../langchain-parent-document-retrieval.ipynb |   2 +-
 .../langchain-self-query-retrieval.ipynb      | 407 ++++++++++++++++++
 2 files changed, 408 insertions(+), 1 deletion(-)
 create mode 100644 ai-integrations/langchain-self-query-retrieval.ipynb
diff --git a/ai-integrations/langchain-parent-document-retrieval.ipynb b/ai-integrations/langchain-parent-document-retrieval.ipynb
index 0ba6392..dd54ceb 100644
--- a/ai-integrations/langchain-parent-document-retrieval.ipynb
+++ b/ai-integrations/langchain-parent-document-retrieval.ipynb
@@ -11,7 +11,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "This notebook is a companion to the [LangChain Parent Document Retrieval](https://www.mongodb.com/docs/atlas/atlas-vector-search/ai-integrations/langchain/parent-document-retrieval/) page. Refer to the page for set-up instructions and detailed explanations.\n",
+    "This notebook is a companion to the [Parent Document Retrieval](https://www.mongodb.com/docs/atlas/atlas-vector-search/ai-integrations/langchain/parent-document-retrieval/) page. Refer to the page for set-up instructions and detailed explanations.\n",
     "\n",
     "<a target=\"_blank\" href=\"https://colab.research.google.com/github/mongodb/docs-notebooks/blob/main/ai-integrations/langchain-hybrid-search.ipynb\">\n",
     "  <img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/>\n",
diff --git a/ai-integrations/langchain-self-query-retrieval.ipynb b/ai-integrations/langchain-self-query-retrieval.ipynb
new file mode 100644
index 0000000..4bd0ed2
--- /dev/null
+++ b/ai-integrations/langchain-self-query-retrieval.ipynb
@@ -0,0 +1,407 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "57956324",
+   "metadata": {},
+   "source": [
+    "# LangChain MongoDB Integration - Self-Querying Retrieval\n",
+    "\n",
+    "This notebook is a companion to the [Self-Querying Retrieval](https://www.mongodb.com/docs/atlas/atlas-vector-search/ai-integrations/langchain/parent-document-retrieval/) page. Refer to the page for set-up instructions and detailed explanations.\n",
+    "\n",
+    "<a target=\"_blank\" href=\"https://colab.research.google.com/github/mongodb/docs-notebooks/blob/main/ai-integrations/langchain-self-query-retrieval.ipynb\">\n",
+    "  <img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/>\n",
+    "</a>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a9924d95",
+   "metadata": {},
+   "source": [
+    "## Prerequisites\n",
+    "\n",
+    "To complete this tutorial, you must have the following:\n",
+    "- A MongoDB Atlas cluster\n",
+    "- A Voyage AI API key\n",
+    "- An OpenAI API key"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fca4d65e",
+   "metadata": {},
+   "source": [
+    "## Set up the environment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6283b411",
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "pip install --quiet --upgrade langchain-mongodb langchain-voyageai langchain-openai langchain langchain-community langchain-core lark"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2c7b5f46",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "os.environ[\"OPENAI_API_KEY\"] = \"<openai-key>\"\n",
+    "os.environ[\"VOYAGE_API_KEY\"] = \"<voyage-key>\"\n",
+    "MONGODB_URI = \"<connection-string>\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "33ce1710",
+   "metadata": {},
+   "source": [
+    "## Instantiate the vector store"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "155f5870",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_mongodb import MongoDBAtlasVectorSearch\n",
+    "from langchain_voyageai import VoyageAIEmbeddings\n",
+    "\n",
+    "# Use the voyage-3-large embedding model\n",
+    "embedding_model = VoyageAIEmbeddings(model=\"voyage-3-large\")\n",
+    "\n",
+    "# Create the vector store\n",
+    "vector_store = MongoDBAtlasVectorSearch.from_connection_string(\n",
+    "   connection_string = MONGODB_URI,\n",
+    "   embedding = embedding_model,\n",
+    "   namespace = \"langchain_db.self_query\",\n",
+    "   text_key = \"page_content\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "652be7fb",
+   "metadata": {},
+   "source": [
+    "## Add data to the vector store"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "29191663",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_core.documents import Document\n",
+    "\n",
+    "docs = [\n",
+    "    Document(\n",
+    "        page_content=\"A bunch of scientists bring back dinosaurs and mayhem breaks loose\",\n",
+    "        metadata={\"year\": 1993, \"rating\": 7.7, \"genre\": \"action\"},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...\",\n",
+    "        metadata={\"year\": 2010, \"genre\": \"thriller\", \"rating\": 8.2},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"A bunch of normal-sized women are supremely wholesome and some men pine after them\",\n",
+    "        metadata={\"year\": 2019, \"rating\": 8.3, \"genre\": \"drama\"},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"Three men walk into the Zone, three men walk out of the Zone\",\n",
+    "        metadata={\"year\": 1979, \"rating\": 9.9, \"genre\": \"science fiction\"},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\",\n",
+    "        metadata={\"year\": 2006, \"genre\": \"thriller\", \"rating\": 9.0},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"Toys come alive and have a blast doing so\",\n",
+    "        metadata={\"year\": 1995, \"genre\": \"animated\", \"rating\": 9.3},\n",
+    "    ),\n",
+    "]\n",
+    "\n",
+    "# Add data to the vector store, which automaticaly embeds the documents\n",
+    "vector_store.add_documents(docs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a5762ee0",
+   "metadata": {},
+   "source": [
+    "## Create the Atlas Vector Search index with filters"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "39a44b99",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Use LangChain helper method to create the vector search index\n",
+    "vector_store.create_vector_search_index(\n",
+    "   dimensions = 1024, # The dimensions of the vector embeddings to be indexed\n",
+    "   filters = [ \"genre\", \"rating\", \"year\" ], # The metadata fields to be indexed for filtering\n",
+    "   wait_until_complete = 60 # Number of seconds to wait for the index to build (can take around a minute)\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "28f5ae2c",
+   "metadata": {},
+   "source": [
+    "## Create the Self-Querying Retriever"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "53bfaa67",
+   "metadata": {},
+   "source": [
+    "### Define metadata field and document information"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e393ed2e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.chains.query_constructor.schema import AttributeInfo\n",
+    "\n",
+    "# Define the document content description \n",
+    "document_content_description = \"Brief summary of a movie\"\n",
+    "\n",
+    "# Define the metadata fields to filter on\n",
+    "metadata_field_info = [\n",
+    "    AttributeInfo(\n",
+    "        name=\"genre\",\n",
+    "        description=\"The genre of the movie\",\n",
+    "        type=\"string\",\n",
+    "    ),\n",
+    "    AttributeInfo(\n",
+    "        name=\"year\",\n",
+    "        description=\"The year the movie was released\",\n",
+    "        type=\"integer\",\n",
+    "    ),\n",
+    "    AttributeInfo(\n",
+    "        name=\"rating\", \n",
+    "        description=\"A 1-10 rating for the movie\", \n",
+    "        type=\"float\"\n",
+    "    ),\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4bdd30c4",
+   "metadata": {},
+   "source": [
+    "### Initialize the self-querying retriever"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "977730d1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_mongodb.retrievers import MongoDBAtlasSelfQueryRetriever\n",
+    "from langchain_openai import ChatOpenAI\n",
+    "\n",
+    "llm = ChatOpenAI(model=\"gpt-4o\")\n",
+    "retriever = MongoDBAtlasSelfQueryRetriever.from_llm(\n",
+    "    llm=llm,\n",
+    "    vectorstore=vector_store,\n",
+    "    metadata_field_info=metadata_field_info,\n",
+    "    document_contents=document_content_description\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b188c52d",
+   "metadata": {},
+   "source": [
+    "## Run Queries with the Self-Querying Retriever"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "833d90d9",
+   "metadata": {},
+   "source": [
+    "### Queries with filters"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9bbc3e6a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This example specifies a filter (rating > 9)\n",
+    "retriever.invoke(\"What are some highly rated movies (above 9)?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7a5bd474",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This example specifies a semantic search and a filter (rating > 9)\n",
+    "retriever.invoke(\"I want to watch a movie about toys rated higher than 9\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5c062276",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This example specifies a composite filter (rating >= 9 and genre = thriller)\n",
+    "retriever.invoke(\"What's a highly rated (above or equal 9) thriller film?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eb67054b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This example specifies a query and composite filter (year > 1990 and year < 2005 and genre = action)\n",
+    "retriever.invoke(\n",
+    "    \"What's a movie after 1990 but before 2005 that's all about dinosaurs, \" +\n",
+    "    \"and preferably has a lot of action\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "052ee6e4",
+   "metadata": {},
+   "source": [
+    "### Query with no filters"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3902ca22",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This example only specifies a semantic search query\n",
+    "retriever.invoke(\"What are some movies about dinosaurs\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "937966cf",
+   "metadata": {},
+   "source": [
+    "## Use the Retriever in Your RAG Pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dfa7b140",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pprint\n",
+    "from langchain_core.output_parsers import StrOutputParser\n",
+    "from langchain_core.prompts import PromptTemplate\n",
+    "from langchain_core.runnables import RunnablePassthrough\n",
+    "from langchain_openai import ChatOpenAI\n",
+    "\n",
+    "llm = ChatOpenAI(model=\"gpt-4o\")\n",
+    "\n",
+    "# Configure self-query retriever with a document limit\n",
+    "retriever = MongoDBAtlasSelfQueryRetriever.from_llm(\n",
+    "    llm=llm,\n",
+    "    vectorstore=vector_store,\n",
+    "    metadata_field_info=metadata_field_info,\n",
+    "    document_contents=document_content_description,\n",
+    "    enable_limit=True\n",
+    ")\n",
+    "\n",
+    "# Define a prompt template\n",
+    "template = \"\"\"\n",
+    "   Use the following pieces of context to answer the question at the end.\n",
+    "   {context}\n",
+    "   Question: {question}\n",
+    "\"\"\"\n",
+    "prompt = PromptTemplate.from_template(template)\n",
+    "\n",
+    "# Construct a chain to answer questions on your data\n",
+    "chain = (\n",
+    "   { \"context\": retriever, \"question\": RunnablePassthrough()}\n",
+    "   | prompt   \n",
+    "   | llm\n",
+    "   | StrOutputParser()\n",
+    ")\n",
+    "\n",
+    "# Prompt the chain\n",
+    "question = \"What are two movies about dinosaurs after 1990?\" # year > 1990 and document limit of 2\n",
+    "answer = chain.invoke(question)\n",
+    "\n",
+    "print(\"Question: \" + question)\n",
+    "print(\"Answer: \" + answer)\n",
+    "\n",
+    "# Return source documents\n",
+    "documents = retriever.invoke(question)\n",
+    "print(\"\\nSource documents:\")\n",
+    "pprint.pprint(documents)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}