Skip to content

Commit 04da695

Browse files
craig[bot]andy-kimballmsbutler
committed
141109: sql/vecindex: add SearchForDelete method to VectorIndex r=drewkimball a=andy-kimball Previously, the Delete method had both searched for a vector's partition and handle deleting the vector from that partition. However, the execution engine wants to find the partition without actually deleting the vector, since it will do the deletion as part of its own KV batching logic. The new SearchForDelete method handles just the search portion of deletion, returning the SearchResult containing the partition key. In addition, the Delete method has been refactored to use SearchForDelete internally. Epic: CRDB-42943 Release note: None 141143: sql: skip TestBackfillWithProtectedTS r=fqazi a=msbutler I've observed it flake bors several times. Informs #139862 Release note: none Co-authored-by: Andrew Kimball <[email protected]> Co-authored-by: Michael Butler <[email protected]>
3 parents 3ef1c5b + 57e5d2e + 119930a commit 04da695

File tree

4 files changed

+275
-72
lines changed

4 files changed

+275
-72
lines changed

pkg/sql/backfill_protected_timestamp_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ func TestBackfillWithProtectedTS(t *testing.T) {
233233
skip.UnderDeadlock(t, "test takes too long")
234234
skip.UnderStress(t, "test takes too long")
235235
skip.UnderRace(t, "test takes too long")
236+
skip.WithIssue(t, 139862)
236237

237238
ctx := context.Background()
238239
backfillQueryWait := make(chan struct{})
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
# ----------
2+
# Search tree with multiple partitions and duplicate data.
3+
# ----------
4+
new-index dims=2 min-partition-size=1 max-partition-size=4 beam-size=2
5+
vec1: (1, 2)
6+
vec2: (7, 4)
7+
vec3: (4, 3)
8+
vec4: (2, 8)
9+
vec5: (5, 1)
10+
vec6: (3, 6)
11+
vec7: (6, 5)
12+
vec8: (8, 3)
13+
vec9: (1, 7)
14+
vec10: (1, 7)
15+
vec11: (1, 7)
16+
vec12: (1, 7)
17+
vec13: (1, 7)
18+
----
19+
• 1 (3.2083, 5.125)
20+
21+
├───• 10 (1.5, 7)
22+
│ │
23+
│ ├───• 9 (1, 7)
24+
│ │ │
25+
│ │ ├───• vec9 (1, 7)
26+
│ │ └───• vec13 (1, 7)
27+
│ │
28+
│ ├───• 8 (1, 7)
29+
│ │ │
30+
│ │ ├───• vec12 (1, 7)
31+
│ │ ├───• vec10 (1, 7)
32+
│ │ └───• vec11 (1, 7)
33+
│ │
34+
│ └───• 7 (2.5, 7)
35+
│ │
36+
│ ├───• vec4 (2, 8)
37+
│ └───• vec6 (3, 6)
38+
39+
└───• 11 (4.9167, 3.25)
40+
41+
├───• 5 (6.5, 4.5)
42+
│ │
43+
│ ├───• vec7 (6, 5)
44+
│ ├───• vec2 (7, 4)
45+
│ └───• vec8 (8, 3)
46+
47+
└───• 3 (3.3333, 2)
48+
49+
├───• vec3 (4, 3)
50+
├───• vec5 (5, 1)
51+
└───• vec1 (1, 2)
52+
53+
# Search for vector that exists.
54+
search-for-delete
55+
vec1
56+
----
57+
vec1: partition 3
58+
59+
# Search for vector that does not exist in tree.
60+
search-for-delete
61+
vec100: (10, 10)
62+
----
63+
vec100: vector not found
64+
65+
# Search for vector that exists, but without a matching key.
66+
search-for-delete
67+
vec100: (1, 2)
68+
----
69+
vec100: vector not found
70+
71+
# Search for duplicate vector.
72+
search-for-delete
73+
vec12
74+
----
75+
vec12: partition 8
76+
77+
# Delete vector from store, but leave it in index.
78+
delete not-found
79+
vec1
80+
----
81+
• 1 (3.2083, 5.125)
82+
83+
├───• 10 (1.5, 7)
84+
│ │
85+
│ ├───• 9 (1, 7)
86+
│ │ │
87+
│ │ ├───• vec9 (1, 7)
88+
│ │ └───• vec13 (1, 7)
89+
│ │
90+
│ ├───• 8 (1, 7)
91+
│ │ │
92+
│ │ ├───• vec12 (1, 7)
93+
│ │ ├───• vec10 (1, 7)
94+
│ │ └───• vec11 (1, 7)
95+
│ │
96+
│ └───• 7 (2.5, 7)
97+
│ │
98+
│ ├───• vec4 (2, 8)
99+
│ └───• vec6 (3, 6)
100+
101+
└───• 11 (4.9167, 3.25)
102+
103+
├───• 5 (6.5, 4.5)
104+
│ │
105+
│ ├───• vec7 (6, 5)
106+
│ ├───• vec2 (7, 4)
107+
│ └───• vec8 (8, 3)
108+
109+
└───• 3 (3.3333, 2)
110+
111+
├───• vec3 (4, 3)
112+
├───• vec5 (5, 1)
113+
└───• vec1 (MISSING)
114+
115+
# Try to find the missing vector.
116+
search-for-delete
117+
vec1: (1, 2)
118+
----
119+
vec1: vector not found
120+
121+
# Vector should now be gone from the index.
122+
format-tree
123+
----
124+
• 1 (3.2083, 5.125)
125+
126+
├───• 10 (1.5, 7)
127+
│ │
128+
│ ├───• 9 (1, 7)
129+
│ │ │
130+
│ │ ├───• vec9 (1, 7)
131+
│ │ └───• vec13 (1, 7)
132+
│ │
133+
│ ├───• 8 (1, 7)
134+
│ │ │
135+
│ │ ├───• vec12 (1, 7)
136+
│ │ ├───• vec10 (1, 7)
137+
│ │ └───• vec11 (1, 7)
138+
│ │
139+
│ └───• 7 (2.5, 7)
140+
│ │
141+
│ ├───• vec4 (2, 8)
142+
│ └───• vec6 (3, 6)
143+
144+
└───• 11 (4.9167, 3.25)
145+
146+
├───• 5 (6.5, 4.5)
147+
│ │
148+
│ ├───• vec7 (6, 5)
149+
│ ├───• vec2 (7, 4)
150+
│ └───• vec8 (8, 3)
151+
152+
└───• 3 (3.3333, 2)
153+
154+
├───• vec3 (4, 3)
155+
└───• vec5 (5, 1)

pkg/sql/vecindex/vector_index.go

Lines changed: 62 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,8 @@ type searchContext struct {
120120
tempVectorsWithKeys []vecstore.VectorWithKey
121121
}
122122

123-
// VectorIndex implements the C-SPANN algorithm, which adapts Microsofts SPANN
124-
// and SPFresh algorithms to work well with CockroachDBs unique distributed
123+
// VectorIndex implements the C-SPANN algorithm, which adapts Microsoft's SPANN
124+
// and SPFresh algorithms to work well with CockroachDB's unique distributed
125125
// architecture. This enables CockroachDB to efficiently answer approximate
126126
// nearest neighbor (ANN) queries with high accuracy, low latency, and fresh
127127
// results, with millions or even billions of indexed vectors. In a departure
@@ -314,60 +314,17 @@ func (vi *VectorIndex) Insert(
314314
func (vi *VectorIndex) Delete(
315315
ctx context.Context, txn vecstore.Txn, vector vector.T, key vecstore.PrimaryKey,
316316
) error {
317-
// Potentially throttle delete operation if background work is falling behind.
318-
if err := vi.fixups.DelayInsertOrDelete(ctx); err != nil {
317+
result, err := vi.SearchForDelete(ctx, txn, vector, key)
318+
if err != nil {
319319
return err
320320
}
321-
322-
// Search for the vector in the index.
323-
searchCtx := searchContext{
324-
Txn: txn,
325-
Original: vector,
326-
Level: vecstore.LeafLevel,
327-
Options: SearchOptions{
328-
SkipRerank: vi.options.DisableErrorBounds,
329-
UpdateStats: true,
330-
},
321+
if result == nil {
322+
return nil
331323
}
332-
searchCtx.Ctx = internal.WithWorkspace(ctx, &searchCtx.Workspace)
333-
334-
// Randomize the vector.
335-
tempRandomized := searchCtx.Workspace.AllocVector(vi.quantizer.GetDims())
336-
defer searchCtx.Workspace.FreeVector(tempRandomized)
337-
searchCtx.Randomized = vi.randomizeVector(vector, tempRandomized)
338-
339-
searchSet := vecstore.SearchSet{MaxResults: 1, MatchKey: key}
340324

341-
// Search with the base beam size. If that fails to find the vector, try again
342-
// with a larger beam size, in order to minimize the chance of dangling
343-
// vector references in the index.
344-
baseBeamSize := max(vi.options.BaseBeamSize, 1)
345-
for {
346-
searchCtx.Options.BaseBeamSize = baseBeamSize
347-
348-
err := vi.searchHelper(&searchCtx, &searchSet)
349-
if err != nil {
350-
return err
351-
}
352-
results := searchSet.PopUnsortedResults()
353-
if len(results) == 0 {
354-
// Retry search with significantly higher beam size.
355-
if baseBeamSize == vi.options.BaseBeamSize {
356-
baseBeamSize *= 8
357-
continue
358-
}
359-
return nil
360-
}
361-
362-
// Remove the vector from its partition in the store.
363-
_, err = vi.removeFromPartition(ctx, txn, results[0].ParentPartitionKey, results[0].ChildKey)
364-
if errors.Is(err, vecstore.ErrRestartOperation) {
365-
// If store requested the operation be retried, then re-run the
366-
// search and delete.
367-
continue
368-
}
369-
return err
370-
}
325+
// Remove the vector from its partition in the store.
326+
_, err = vi.removeFromPartition(ctx, txn, result.ParentPartitionKey, result.ChildKey)
327+
return err
371328
}
372329

373330
// Search finds vectors in the index that are closest to the given query vector
@@ -438,6 +395,59 @@ func (vi *VectorIndex) SearchForInsert(
438395
return result, nil
439396
}
440397

398+
// SearchForDelete finds the leaf partition containing the vector to be deleted.
399+
// It returns a single search result containing the key of that partition, or
400+
// nil if the vector cannot be found. This is useful for callers that directly
401+
// delete KV rows rather than using this library to do it.
402+
func (vi *VectorIndex) SearchForDelete(
403+
ctx context.Context, txn vecstore.Txn, vector vector.T, key vecstore.PrimaryKey,
404+
) (*vecstore.SearchResult, error) {
405+
// Potentially throttle operation if background work is falling behind.
406+
if err := vi.fixups.DelayInsertOrDelete(ctx); err != nil {
407+
return nil, err
408+
}
409+
410+
searchCtx := searchContext{
411+
Txn: txn,
412+
Original: vector,
413+
Level: vecstore.LeafLevel,
414+
Options: SearchOptions{
415+
SkipRerank: vi.options.DisableErrorBounds,
416+
UpdateStats: true,
417+
},
418+
}
419+
searchCtx.Ctx = internal.WithWorkspace(ctx, &searchCtx.Workspace)
420+
421+
// Randomize the vector.
422+
tempRandomized := searchCtx.Workspace.AllocVector(vi.quantizer.GetDims())
423+
defer searchCtx.Workspace.FreeVector(tempRandomized)
424+
searchCtx.Randomized = vi.randomizeVector(vector, tempRandomized)
425+
426+
searchCtx.tempSearchSet = vecstore.SearchSet{MaxResults: 1, MatchKey: key}
427+
428+
// Search with the base beam size. If that fails to find the vector, try again
429+
// with a larger beam size, in order to minimize the chance of dangling
430+
// vector references in the index.
431+
baseBeamSize := max(vi.options.BaseBeamSize, 1)
432+
for i := 0; i < 2; i++ {
433+
searchCtx.Options.BaseBeamSize = baseBeamSize
434+
435+
err := vi.searchHelper(&searchCtx, &searchCtx.tempSearchSet)
436+
if err != nil {
437+
return nil, err
438+
}
439+
results := searchCtx.tempSearchSet.PopUnsortedResults()
440+
if len(results) == 0 {
441+
// Retry search with significantly higher beam size.
442+
baseBeamSize *= 8
443+
} else {
444+
return &results[0], nil
445+
}
446+
}
447+
448+
return nil, nil
449+
}
450+
441451
// SuspendFixups suspends background fixup processing until ProcessFixups is
442452
// explicitly called. It is used for testing.
443453
func (vi *VectorIndex) SuspendFixups() {

0 commit comments

Comments
 (0)