Skip to content

Commit 9da04db

Browse files
Preslav LeConvex, Inc.
Preslav Le
authored and
Convex, Inc.
committed
Add retention running flag to BackfillIndexState (#24069)
Create a new index_created_lower_bound field so we can update index entry in backfilled state. Currently, we rely on it never updating. Also add retention_running flag so we can so the period we don't run retention for in newly backfilled indexes is minimal. This change adds and populates the new fields and adds a migration to backfill them properly. I am going to change IndexWorker in a separate PR. GitOrigin-RevId: 1b28ed0391f203a373ad4706cfcc65b2628eece2
1 parent 6377b2b commit 9da04db

File tree

15 files changed

+112
-58
lines changed

15 files changed

+112
-58
lines changed

Diff for: crates/application/src/lib.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -2075,7 +2075,8 @@ impl<RT: Runtime> Application<RT> {
20752075
let mut tx = self.begin(identity.clone()).await?;
20762076
for (index_name, index_fields) in indexes.into_iter() {
20772077
let index_fields = self._validate_user_defined_index_fields(index_fields)?;
2078-
let index_metadata = IndexMetadata::new_backfilling(index_name, index_fields);
2078+
let index_metadata =
2079+
IndexMetadata::new_backfilling(*tx.begin_timestamp(), index_name, index_fields);
20792080
let mut model = IndexModel::new(&mut tx);
20802081
if let Some(existing_index_metadata) = model
20812082
.pending_index_metadata(&index_metadata.name)?

Diff for: crates/common/src/bootstrap_model/index/database_index/backfill_state.rs

+29-6
Original file line numberDiff line numberDiff line change
@@ -4,29 +4,52 @@ use serde::{
44
Deserialize,
55
Serialize,
66
};
7+
use sync_types::Timestamp;
78

89
/// Represents state of currently backfilling index.
910
/// We currently do not checkpoint. Will extend the struct when we do.
1011
#[derive(Debug, Clone, PartialEq, Eq)]
1112
#[cfg_attr(any(test, feature = "testing"), derive(proptest_derive::Arbitrary))]
12-
pub struct DatabaseIndexBackfillState;
13+
pub struct DatabaseIndexBackfillState {
14+
// A timestamp when the index was created. Note that this timestamp is slightly
15+
// before the index was committed because we don't know the commit timestamp.
16+
// We need to run retention from this timestamp, because live writes write to
17+
// the index the moment the index committed.
18+
pub index_created_lower_bound: Option<Timestamp>,
19+
// We have done the backfill and the only step left is catch up retention.
20+
pub retention_started: bool,
21+
}
1322

1423
#[derive(Serialize, Deserialize)]
1524
#[serde(rename_all = "camelCase")]
16-
pub struct SerializedDatabaseIndexBackfillState {}
25+
pub struct SerializedDatabaseIndexBackfillState {
26+
// TODO: Backfill and remove optional.
27+
index_created_lower_bound: Option<i64>,
28+
retention_started: Option<bool>,
29+
}
1730

1831
impl TryFrom<DatabaseIndexBackfillState> for SerializedDatabaseIndexBackfillState {
1932
type Error = anyhow::Error;
2033

21-
fn try_from(_config: DatabaseIndexBackfillState) -> anyhow::Result<Self> {
22-
Ok(Self {})
34+
fn try_from(config: DatabaseIndexBackfillState) -> anyhow::Result<Self> {
35+
Ok(Self {
36+
index_created_lower_bound: config.index_created_lower_bound.map(|ts| ts.into()),
37+
retention_started: Some(config.retention_started),
38+
})
2339
}
2440
}
2541

2642
impl TryFrom<SerializedDatabaseIndexBackfillState> for DatabaseIndexBackfillState {
2743
type Error = anyhow::Error;
2844

29-
fn try_from(_config: SerializedDatabaseIndexBackfillState) -> anyhow::Result<Self> {
30-
Ok(Self)
45+
fn try_from(config: SerializedDatabaseIndexBackfillState) -> anyhow::Result<Self> {
46+
Ok(Self {
47+
index_created_lower_bound: config
48+
.index_created_lower_bound
49+
.map(|ts| ts.try_into())
50+
.transpose()?,
51+
// Treat legacy records as retention not started.
52+
retention_started: config.retention_started.unwrap_or(false),
53+
})
3154
}
3255
}

Diff for: crates/common/src/bootstrap_model/index/database_index/index_state.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,12 @@ impl TryFrom<SerializedDatabaseIndexState> for DatabaseIndexState {
6767
},
6868
SerializedDatabaseIndexState::Backfilled2 => DatabaseIndexState::Backfilled,
6969
SerializedDatabaseIndexState::Enabled => DatabaseIndexState::Enabled,
70+
// TODO(Presley): Backfill and delete Disabled state.
7071
SerializedDatabaseIndexState::Disabled => {
71-
DatabaseIndexState::Backfilling(DatabaseIndexBackfillState)
72+
DatabaseIndexState::Backfilling(DatabaseIndexBackfillState {
73+
index_created_lower_bound: None,
74+
retention_started: false,
75+
})
7276
},
7377
})
7478
}

Diff for: crates/common/src/bootstrap_model/index/index_metadata.rs

+10-2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use serde::{
44
Deserialize,
55
Serialize,
66
};
7+
use sync_types::Timestamp;
78
use value::{
89
codegen_convex_serialization,
910
ConvexValue,
@@ -61,12 +62,19 @@ pub struct IndexMetadata<T: TableIdentifier> {
6162
}
6263

6364
impl<T: TableIdentifier> IndexMetadata<T> {
64-
pub fn new_backfilling(name: GenericIndexName<T>, fields: IndexedFields) -> Self {
65+
pub fn new_backfilling(
66+
index_created_lower_bound: Timestamp,
67+
name: GenericIndexName<T>,
68+
fields: IndexedFields,
69+
) -> Self {
6570
Self {
6671
name,
6772
config: IndexConfig::Database {
6873
developer_config: DeveloperDatabaseIndexConfig { fields },
69-
on_disk_state: DatabaseIndexState::Backfilling(DatabaseIndexBackfillState {}),
74+
on_disk_state: DatabaseIndexState::Backfilling(DatabaseIndexBackfillState {
75+
index_created_lower_bound: Some(index_created_lower_bound),
76+
retention_started: false,
77+
}),
7078
},
7179
}
7280
}

Diff for: crates/database/src/bootstrap_model/index.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,7 @@ impl<'a, RT: Runtime> IndexModel<'a, RT> {
319319
for (index_descriptor, index_schema) in &table_schema.indexes {
320320
let index_name = IndexName::new(table_name.clone(), index_descriptor.clone())?;
321321
indexes_in_schema.push(IndexMetadata::new_backfilling(
322+
*self.tx.begin_timestamp(),
322323
index_name.clone(),
323324
index_schema.fields.clone(),
324325
))
@@ -788,11 +789,10 @@ impl<'a, RT: Runtime> IndexModel<'a, RT> {
788789
}
789790
let index_name = TabletIndexName::new(target_table, index.name.descriptor().clone())?;
790791
let metadata = match index.into_value().config {
791-
// Table is empty, so it's okay to create indexes in state Enabled.
792792
IndexConfig::Database {
793793
developer_config: DeveloperDatabaseIndexConfig { fields },
794794
..
795-
} => IndexMetadata::new_backfilling(index_name, fields),
795+
} => IndexMetadata::new_backfilling(*self.tx.begin_timestamp(), index_name, fields),
796796
IndexConfig::Search {
797797
developer_config:
798798
DeveloperSearchIndexConfig {

Diff for: crates/database/src/retention.rs

+9-7
Original file line numberDiff line numberDiff line change
@@ -864,13 +864,15 @@ impl<RT: Runtime> LeaderRetentionManager<RT> {
864864
return Ok(());
865865
};
866866

867-
// Don't run retention for indexes that are currently backfilling. This
868-
// is important for correctness since IndexBackfilling and retention
869-
// interact poorly. NOTE that accumulate only adds indexes. Thus we won't
870-
// stop running retention if index is deleted or goes from Enabled to
871-
// Backfilling.
872-
if let DatabaseIndexState::Backfilling { .. } = on_disk_state {
873-
return Ok(());
867+
// Don't run retention for indexes that are still backfilling unless IndexWorker
868+
// has explicitly opted-in to running retention. This is important for
869+
// correctness since index backfill and retention interact poorly.
870+
// NOTE: accumulate only adds indexes. Thus we won't stop running
871+
// retention if index is deleted or changes from Enabled to Backfilling.
872+
if let DatabaseIndexState::Backfilling(state) = on_disk_state {
873+
if !state.retention_started {
874+
return Ok(());
875+
}
874876
}
875877

876878
all_indexes.insert(index_id, (index.name, developer_config.fields));

Diff for: crates/database/src/tests/mod.rs

+9
Original file line numberDiff line numberDiff line change
@@ -676,8 +676,10 @@ where
676676
let index_name = IndexName::new(table_name.clone(), "a_and_b".parse()?)?;
677677

678678
let mut tx = database.begin(Identity::system()).await?;
679+
let begin_ts = tx.begin_timestamp();
679680
IndexModel::new(&mut tx)
680681
.add_application_index(IndexMetadata::new_backfilling(
682+
*begin_ts,
681683
index_name.clone(),
682684
vec![str::parse("a")?, str::parse("b")?].try_into()?,
683685
))
@@ -1466,12 +1468,14 @@ async fn test_add_indexes_limit(rt: TestRuntime) -> anyhow::Result<()> {
14661468
// load once to initialize
14671469
let DbFixtures { db, tp, .. } = DbFixtures::new(&rt).await?;
14681470
let mut tx = db.begin(Identity::system()).await?;
1471+
let begin_ts = tx.begin_timestamp();
14691472

14701473
// Add the maximum allowed number of indexes.
14711474
for i in 0..MAX_USER_INDEXES {
14721475
let field_name = format!("field_{}", i);
14731476
IndexModel::new(&mut tx)
14741477
.add_application_index(IndexMetadata::new_backfilling(
1478+
*begin_ts,
14751479
IndexName::new("table".parse()?, format!("by_{}", field_name).parse()?)?,
14761480
vec![field_name.parse()?].try_into()?,
14771481
))
@@ -1481,6 +1485,7 @@ async fn test_add_indexes_limit(rt: TestRuntime) -> anyhow::Result<()> {
14811485
// Try to add one more. Should fail.
14821486
let err = IndexModel::new(&mut tx)
14831487
.add_application_index(IndexMetadata::new_backfilling(
1488+
*begin_ts,
14841489
IndexName::new("table".parse()?, "by_field_max".parse()?)?,
14851490
vec!["field_max".parse()?].try_into()?,
14861491
))
@@ -1506,8 +1511,10 @@ async fn test_add_indexes_limit(rt: TestRuntime) -> anyhow::Result<()> {
15061511
)
15071512
.await?;
15081513
let mut tx = db.begin(Identity::system()).await?;
1514+
let begin_ts = tx.begin_timestamp();
15091515
let err = IndexModel::new(&mut tx)
15101516
.add_application_index(IndexMetadata::new_backfilling(
1517+
*begin_ts,
15111518
IndexName::new("table".parse()?, "by_field_max".parse()?)?,
15121519
vec!["field_32".parse()?].try_into()?,
15131520
))
@@ -1590,8 +1597,10 @@ async fn test_index_backfill(rt: TestRuntime) -> anyhow::Result<()> {
15901597

15911598
let index_name = IndexName::new(table_name, "a_and_b".parse()?)?;
15921599
let mut tx = db.begin_system().await?;
1600+
let begin_ts = tx.begin_timestamp();
15931601
IndexModel::new(&mut tx)
15941602
.add_application_index(IndexMetadata::new_backfilling(
1603+
*begin_ts,
15951604
index_name.clone(),
15961605
vec![str::parse("a")?, str::parse("b")?].try_into()?,
15971606
))

Diff for: crates/database/src/writes.rs

+3
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,7 @@ mod tests {
328328
},
329329
};
330330
use maplit::btreeset;
331+
use sync_types::Timestamp;
331332
use value::{
332333
assert_obj,
333334
ResolvedDocumentId,
@@ -369,6 +370,7 @@ mod tests {
369370
id_generator.generate(&INDEX_TABLE),
370371
CreationTime::ONE,
371372
IndexMetadata::new_backfilling(
373+
Timestamp::MIN,
372374
TabletIndexName::new(user_table1.table_id, "by_likes".parse()?)?,
373375
IndexedFields::by_id(),
374376
)
@@ -398,6 +400,7 @@ mod tests {
398400
id_generator.generate(&INDEX_TABLE),
399401
CreationTime::ONE,
400402
IndexMetadata::new_backfilling(
403+
Timestamp::MIN,
401404
TabletIndexName::new(user_table2.table_id, "by_likes".parse()?)?,
402405
IndexedFields::by_id(),
403406
)

Diff for: crates/indexing/src/tests.rs

+21-28
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ use common::{
3838
GenericIndexName,
3939
PersistenceVersion,
4040
TableName,
41-
TabletIndexName,
4241
Timestamp,
4342
},
4443
};
@@ -328,15 +327,23 @@ fn test_second_pending_index_for_name_fails() -> anyhow::Result<()> {
328327
let by_name = GenericIndexName::new(table.table_id, "by_name".parse()?)?;
329328
let pending = gen_index_document(
330329
&mut id_generator,
331-
IndexMetadata::new_backfilling(by_name.clone(), vec!["name".parse()?].try_into()?),
330+
IndexMetadata::new_backfilling(
331+
Timestamp::MIN,
332+
by_name.clone(),
333+
vec!["name".parse()?].try_into()?,
334+
),
332335
)?;
333336
let result = index_registry.update(None, Some(&pending));
334337
assert!(result.is_ok());
335338
let name_collision = ResolvedDocument::new(
336339
id_generator.generate(&INDEX_TABLE),
337340
CreationTime::ONE,
338-
IndexMetadata::new_backfilling(by_name.clone(), vec!["other_field".parse()?].try_into()?)
339-
.try_into()?,
341+
IndexMetadata::new_backfilling(
342+
Timestamp::MIN,
343+
by_name.clone(),
344+
vec!["other_field".parse()?].try_into()?,
345+
)
346+
.try_into()?,
340347
)?;
341348
let result = index_registry.update(None, Some(&name_collision));
342349
assert!(result.is_err());
@@ -703,44 +710,30 @@ fn new_enabled_doc(
703710
name: &str,
704711
fields: Vec<&str>,
705712
) -> anyhow::Result<ResolvedDocument> {
706-
new_index_doc(
707-
id_generator,
708-
table_id,
709-
name,
710-
fields,
711-
&IndexMetadata::new_enabled,
712-
)
713-
}
713+
let index_name = GenericIndexName::new(table_id, name.parse()?)?;
714+
let field_paths = fields
715+
.into_iter()
716+
.map(|field| field.parse())
717+
.collect::<anyhow::Result<Vec<FieldPath>>>()?;
714718

715-
fn new_pending_doc(
716-
id_generator: &mut dyn IdGenerator,
717-
table_id: TableId,
718-
name: &str,
719-
fields: Vec<&str>,
720-
) -> anyhow::Result<ResolvedDocument> {
721-
new_index_doc(
722-
id_generator,
723-
table_id,
724-
name,
725-
fields,
726-
&IndexMetadata::new_backfilling,
727-
)
719+
let metadata = IndexMetadata::new_enabled(index_name, field_paths.try_into()?);
720+
gen_index_document(id_generator, metadata)
728721
}
729722

730-
fn new_index_doc(
723+
fn new_pending_doc(
731724
id_generator: &mut dyn IdGenerator,
732725
table_id: TableId,
733726
name: &str,
734727
fields: Vec<&str>,
735-
get_metadata: &dyn Fn(TabletIndexName, IndexedFields) -> TabletIndexMetadata,
736728
) -> anyhow::Result<ResolvedDocument> {
737729
let index_name = GenericIndexName::new(table_id, name.parse()?)?;
738730
let field_paths = fields
739731
.into_iter()
740732
.map(|field| field.parse())
741733
.collect::<anyhow::Result<Vec<FieldPath>>>()?;
742734

743-
let metadata = get_metadata(index_name, field_paths.try_into()?);
735+
let metadata =
736+
IndexMetadata::new_backfilling(Timestamp::MIN, index_name, field_paths.try_into()?);
744737
gen_index_document(id_generator, metadata)
745738
}
746739

Diff for: crates/isolate/src/test_helpers.rs

+1
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,7 @@ impl<RT: Runtime, P: Persistence + Clone> UdfTest<RT, P> {
347347
let mut tx = self.database.begin(Identity::system()).await?;
348348
let index_name = name.parse()?;
349349
let index = IndexMetadata::new_backfilling(
350+
*tx.begin_timestamp(),
350351
index_name,
351352
IndexedFields::try_from(vec![field.parse()?])?,
352353
);

Diff for: crates/isolate/src/tests/adversarial.rs

+2
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ async fn test_read_many_documents(rt: TestRuntime) -> anyhow::Result<()> {
122122
async fn test_reads_too_many(rt: TestRuntime) -> anyhow::Result<()> {
123123
let t = UdfTest::default(rt).await?;
124124
t.add_index(IndexMetadata::new_backfilling(
125+
*t.database.now_ts_for_reads(),
125126
"test.by_hello".parse()?,
126127
IndexedFields::try_from(vec!["hello".parse()?])?,
127128
))
@@ -138,6 +139,7 @@ async fn test_reads_too_many(rt: TestRuntime) -> anyhow::Result<()> {
138139
async fn test_reads_many(rt: TestRuntime) -> anyhow::Result<()> {
139140
let t = UdfTest::default(rt).await?;
140141
t.add_index(IndexMetadata::new_backfilling(
142+
*t.database.now_ts_for_reads(),
141143
"test.by_hello".parse()?,
142144
IndexedFields::try_from(vec!["hello".parse()?])?,
143145
))

Diff for: crates/isolate/src/tests/query.rs

+2
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ use crate::{
3636

3737
async fn add_index<RT: Runtime, P: Persistence + Clone>(t: &UdfTest<RT, P>) -> anyhow::Result<()> {
3838
t.add_index(IndexMetadata::new_backfilling(
39+
*t.database.now_ts_for_reads(),
3940
"myTable.by_a_b".parse()?,
4041
IndexedFields::try_from(vec!["a".parse()?, "b".parse()?])?,
4142
))
@@ -739,6 +740,7 @@ async fn test_query_journal_start_to_end(rt: TestRuntime) -> anyhow::Result<()>
739740
async fn test_query_journal_middle_to_middle(rt: TestRuntime) -> anyhow::Result<()> {
740741
let t = UdfTest::default(rt).await?;
741742
t.add_index(IndexMetadata::new_backfilling(
743+
*t.database.now_ts_for_reads(),
742744
"test.by_hello".parse()?,
743745
IndexedFields::try_from(vec!["hello".parse()?])?,
744746
))

Diff for: crates/model/src/config/index_test_utils.rs

+5-2
Original file line numberDiff line numberDiff line change
@@ -116,10 +116,13 @@ pub(crate) use db_schema_with_indexes;
116116

117117
use super::types::ConfigMetadata;
118118

119-
pub fn assert_root_cause<T: Debug>(result: anyhow::Result<T>, expected: &str) {
119+
pub fn assert_root_cause_contains<T: Debug>(result: anyhow::Result<T>, expected: &str) {
120120
let error = result.unwrap_err();
121121
let root_cause = error.root_cause();
122-
assert_eq!(format!("{}", root_cause), expected);
122+
assert!(
123+
format!("{}", root_cause).contains(expected),
124+
"Root cause \"{root_cause}\" does not contain expected string:\n\"{expected}\""
125+
);
123126
}
124127

125128
/// Simulate a CLI pushing a schema, waiting for backfill, then committing the

0 commit comments

Comments
 (0)