Skip to content

Commit 956b09f

Browse files
core[patch]: stop deleting records with "scoped_full" when doc is empty (#30520)
Fix a bug that causes `scoped_full` in index to delete records when there are no input docs.
1 parent b28a474 commit 956b09f

File tree

2 files changed

+158
-2
lines changed

2 files changed

+158
-2
lines changed

libs/core/langchain_core/indexing/api.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -473,7 +473,9 @@ def index(
473473
record_manager.delete_keys(uids_to_delete)
474474
num_deleted += len(uids_to_delete)
475475

476-
if cleanup == "full" or cleanup == "scoped_full":
476+
if cleanup == "full" or (
477+
cleanup == "scoped_full" and scoped_full_cleanup_source_ids
478+
):
477479
delete_group_ids: Optional[Sequence[str]] = None
478480
if cleanup == "scoped_full":
479481
delete_group_ids = list(scoped_full_cleanup_source_ids)
@@ -786,7 +788,9 @@ async def aindex(
786788
await record_manager.adelete_keys(uids_to_delete)
787789
num_deleted += len(uids_to_delete)
788790

789-
if cleanup == "full" or cleanup == "scoped_full":
791+
if cleanup == "full" or (
792+
cleanup == "scoped_full" and scoped_full_cleanup_source_ids
793+
):
790794
delete_group_ids: Optional[Sequence[str]] = None
791795
if cleanup == "scoped_full":
792796
delete_group_ids = list(scoped_full_cleanup_source_ids)

libs/core/tests/unit_tests/indexing/test_indexing.py

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -822,6 +822,158 @@ async def test_ascoped_full_fails_with_bad_source_ids(
822822
)
823823

824824

825+
def test_index_empty_doc_scoped_full(
826+
record_manager: InMemoryRecordManager, vector_store: InMemoryVectorStore
827+
) -> None:
828+
"""Test Indexing with scoped_full strategy"""
829+
loader = ToyLoader(
830+
documents=[
831+
Document(
832+
page_content="This is a test document.",
833+
metadata={"source": "1"},
834+
),
835+
Document(
836+
page_content="This is another document.",
837+
metadata={"source": "1"},
838+
),
839+
Document(
840+
page_content="This is yet another document.",
841+
metadata={"source": "1"},
842+
),
843+
Document(
844+
page_content="This is a test document from another source.",
845+
metadata={"source": "2"},
846+
),
847+
]
848+
)
849+
850+
with patch.object(
851+
record_manager, "get_time", return_value=datetime(2021, 1, 1).timestamp()
852+
):
853+
assert index(
854+
loader,
855+
record_manager,
856+
vector_store,
857+
cleanup="scoped_full",
858+
source_id_key="source",
859+
) == {
860+
"num_added": 4,
861+
"num_deleted": 0,
862+
"num_skipped": 0,
863+
"num_updated": 0,
864+
}
865+
866+
with patch.object(
867+
record_manager, "get_time", return_value=datetime(2021, 1, 2).timestamp()
868+
):
869+
assert index(
870+
loader,
871+
record_manager,
872+
vector_store,
873+
cleanup="scoped_full",
874+
source_id_key="source",
875+
) == {
876+
"num_added": 0,
877+
"num_deleted": 0,
878+
"num_skipped": 4,
879+
"num_updated": 0,
880+
}
881+
882+
loader = ToyLoader(documents=[])
883+
884+
with patch.object(
885+
record_manager, "get_time", return_value=datetime(2021, 1, 3).timestamp()
886+
):
887+
assert index(
888+
loader,
889+
record_manager,
890+
vector_store,
891+
cleanup="scoped_full",
892+
source_id_key="source",
893+
) == {
894+
"num_added": 0,
895+
"num_deleted": 0,
896+
"num_skipped": 0,
897+
"num_updated": 0,
898+
}
899+
900+
901+
async def test_aindex_empty_doc_scoped_full(
902+
arecord_manager: InMemoryRecordManager, vector_store: InMemoryVectorStore
903+
) -> None:
904+
"""Test Indexing with scoped_full strategy."""
905+
loader = ToyLoader(
906+
documents=[
907+
Document(
908+
page_content="This is a test document.",
909+
metadata={"source": "1"},
910+
),
911+
Document(
912+
page_content="This is another document.",
913+
metadata={"source": "1"},
914+
),
915+
Document(
916+
page_content="This is yet another document.",
917+
metadata={"source": "1"},
918+
),
919+
Document(
920+
page_content="This is a test document from another source.",
921+
metadata={"source": "2"},
922+
),
923+
]
924+
)
925+
926+
with patch.object(
927+
arecord_manager, "get_time", return_value=datetime(2021, 1, 1).timestamp()
928+
):
929+
assert await aindex(
930+
loader,
931+
arecord_manager,
932+
vector_store,
933+
cleanup="scoped_full",
934+
source_id_key="source",
935+
) == {
936+
"num_added": 4,
937+
"num_deleted": 0,
938+
"num_skipped": 0,
939+
"num_updated": 0,
940+
}
941+
942+
with patch.object(
943+
arecord_manager, "get_time", return_value=datetime(2021, 1, 2).timestamp()
944+
):
945+
assert await aindex(
946+
loader,
947+
arecord_manager,
948+
vector_store,
949+
cleanup="scoped_full",
950+
source_id_key="source",
951+
) == {
952+
"num_added": 0,
953+
"num_deleted": 0,
954+
"num_skipped": 4,
955+
"num_updated": 0,
956+
}
957+
958+
loader = ToyLoader(documents=[])
959+
960+
with patch.object(
961+
arecord_manager, "get_time", return_value=datetime(2021, 1, 3).timestamp()
962+
):
963+
assert await aindex(
964+
loader,
965+
arecord_manager,
966+
vector_store,
967+
cleanup="scoped_full",
968+
source_id_key="source",
969+
) == {
970+
"num_added": 0,
971+
"num_deleted": 0,
972+
"num_skipped": 0,
973+
"num_updated": 0,
974+
}
975+
976+
825977
def test_no_delete(
826978
record_manager: InMemoryRecordManager, vector_store: InMemoryVectorStore
827979
) -> None:

0 commit comments

Comments
 (0)