Skip to content

Commit 99d2c47

Browse files
authored
Return 429 instead of 503 when all the shards of an index are rate limited (#5651)
1 parent 43fc2e4 commit 99d2c47

File tree

4 files changed

+172
-33
lines changed

4 files changed

+172
-33
lines changed

quickwit/quickwit-ingest/src/ingest_v2/router.rs

+112-21
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ use super::debouncing::{
4949
};
5050
use super::ingester::PERSIST_REQUEST_TIMEOUT;
5151
use super::metrics::IngestResultMetrics;
52-
use super::routing_table::RoutingTable;
52+
use super::routing_table::{NextOpenShardError, RoutingTable};
5353
use super::workbench::IngestWorkbench;
5454
use super::{pending_subrequests, IngesterPool};
5555
use crate::{get_ingest_router_buffer_size, LeaderId};
@@ -287,6 +287,7 @@ impl IngestRouter {
287287
}
288288
for persist_failure in persist_response.failures {
289289
workbench.record_persist_failure(&persist_failure);
290+
290291
match persist_failure.reason() {
291292
PersistFailureReason::ShardClosed => {
292293
let shard_id = persist_failure.shard_id().clone();
@@ -314,7 +315,7 @@ impl IngestRouter {
314315
// That way we will avoid to retry the persist request on the very
315316
// same node.
316317
let shard_id = persist_failure.shard_id().clone();
317-
workbench.rate_limited_shard.insert(shard_id);
318+
workbench.rate_limited_shards.insert(shard_id);
318319
}
319320
_ => {}
320321
}
@@ -363,39 +364,44 @@ impl IngestRouter {
363364
self.populate_routing_table_debounced(workbench, debounced_request)
364365
.await;
365366

366-
// List of subrequest IDs for which no shards are available to route the subrequests to.
367-
let mut no_shards_available_subrequest_ids = Vec::new();
367+
// Subrequests for which no shards are available to route the subrequests to.
368+
let mut no_shards_available_subrequest_ids: Vec<SubrequestId> = Vec::new();
369+
// Subrequests for which the shards are rate limited.
370+
let mut rate_limited_subrequest_ids: Vec<SubrequestId> = Vec::new();
368371

369372
let mut per_leader_persist_subrequests: HashMap<&LeaderId, Vec<PersistSubrequest>> =
370373
HashMap::new();
371374

375+
let rate_limited_shards: &HashSet<ShardId> = &workbench.rate_limited_shards;
372376
let state_guard = self.state.lock().await;
373377

374-
// TODO: Here would be the most optimal place to split the body of the HTTP request into
375-
// lines, validate, transform and then pack the docs into compressed batches routed
376-
// to the right shards.
377-
378-
let rate_limited_shards: &HashSet<ShardId> = &workbench.rate_limited_shard;
379378
for subrequest in pending_subrequests(&workbench.subworkbenches) {
380-
let Some(shard) = state_guard
379+
let next_open_shard_res_opt = state_guard
381380
.routing_table
382381
.find_entry(&subrequest.index_id, &subrequest.source_id)
383-
.and_then(|entry| {
382+
.map(|entry| {
384383
entry.next_open_shard_round_robin(&self.ingester_pool, rate_limited_shards)
385-
})
386-
else {
387-
no_shards_available_subrequest_ids.push(subrequest.subrequest_id);
388-
continue;
384+
});
385+
let next_open_shard = match next_open_shard_res_opt {
386+
Some(Ok(next_open_shard)) => next_open_shard,
387+
Some(Err(NextOpenShardError::RateLimited)) => {
388+
rate_limited_subrequest_ids.push(subrequest.subrequest_id);
389+
continue;
390+
}
391+
Some(Err(NextOpenShardError::NoShardsAvailable)) | None => {
392+
no_shards_available_subrequest_ids.push(subrequest.subrequest_id);
393+
continue;
394+
}
389395
};
390396
let persist_subrequest = PersistSubrequest {
391397
subrequest_id: subrequest.subrequest_id,
392-
index_uid: shard.index_uid.clone().into(),
393-
source_id: shard.source_id.clone(),
394-
shard_id: Some(shard.shard_id.clone()),
398+
index_uid: next_open_shard.index_uid.clone().into(),
399+
source_id: next_open_shard.source_id.clone(),
400+
shard_id: Some(next_open_shard.shard_id.clone()),
395401
doc_batch: subrequest.doc_batch.clone(),
396402
};
397403
per_leader_persist_subrequests
398-
.entry(&shard.leader_id)
404+
.entry(&next_open_shard.leader_id)
399405
.or_default()
400406
.push(persist_subrequest);
401407
}
@@ -421,6 +427,7 @@ impl IngestRouter {
421427
commit_type: commit_type as i32,
422428
};
423429
workbench.record_persist_request(&persist_request);
430+
424431
let persist_future = async move {
425432
let persist_result = tokio::time::timeout(
426433
PERSIST_REQUEST_TIMEOUT,
@@ -443,6 +450,9 @@ impl IngestRouter {
443450
for subrequest_id in no_shards_available_subrequest_ids {
444451
workbench.record_no_shards_available(subrequest_id);
445452
}
453+
for subrequest_id in rate_limited_subrequest_ids {
454+
workbench.record_rate_limited(subrequest_id);
455+
}
446456
self.process_persist_results(workbench, persist_futures)
447457
.await;
448458
}
@@ -610,7 +620,6 @@ impl IngestRouterService for IngestRouter {
610620
.retry_batch_persist(ingest_request, MAX_PERSIST_ATTEMPTS)
611621
.await)
612622
};
613-
614623
update_ingest_metrics(&ingest_res, num_subrequests);
615624

616625
ingest_res
@@ -1916,7 +1925,7 @@ mod tests {
19161925
}
19171926

19181927
#[tokio::test]
1919-
async fn test_do_not_retry_rate_limited_shards() {
1928+
async fn test_router_does_not_retry_rate_limited_shards() {
19201929
// We avoid retrying a shard limited shard at the scale of a workbench.
19211930
let self_node_id = "test-router".into();
19221931
let control_plane = ControlPlaneServiceClient::from_mock(MockControlPlaneService::new());
@@ -2075,4 +2084,86 @@ mod tests {
20752084
};
20762085
router.ingest(ingest_request).await.unwrap();
20772086
}
2087+
2088+
#[tokio::test]
2089+
async fn test_router_returns_rate_limited_failure() {
2090+
let self_node_id = "test-router".into();
2091+
let control_plane = ControlPlaneServiceClient::from_mock(MockControlPlaneService::new());
2092+
let ingester_pool = IngesterPool::default();
2093+
let replication_factor = 1;
2094+
let router = IngestRouter::new(
2095+
self_node_id,
2096+
control_plane,
2097+
ingester_pool.clone(),
2098+
replication_factor,
2099+
EventBroker::default(),
2100+
);
2101+
let mut state_guard = router.state.lock().await;
2102+
let index_uid: IndexUid = IndexUid::for_test("test-index-0", 0);
2103+
2104+
state_guard.routing_table.replace_shards(
2105+
index_uid.clone(),
2106+
"test-source",
2107+
vec![Shard {
2108+
index_uid: Some(index_uid.clone()),
2109+
source_id: "test-source".to_string(),
2110+
shard_id: Some(ShardId::from(1)),
2111+
shard_state: ShardState::Open as i32,
2112+
leader_id: "test-ingester-0".to_string(),
2113+
..Default::default()
2114+
}],
2115+
);
2116+
drop(state_guard);
2117+
2118+
let mut mock_ingester_0 = MockIngesterService::new();
2119+
mock_ingester_0
2120+
.expect_persist()
2121+
.times(1)
2122+
.returning(move |request| {
2123+
assert_eq!(request.leader_id, "test-ingester-0");
2124+
assert_eq!(request.commit_type(), CommitTypeV2::Auto);
2125+
assert_eq!(request.subrequests.len(), 1);
2126+
let subrequest = &request.subrequests[0];
2127+
assert_eq!(subrequest.subrequest_id, 0);
2128+
let index_uid = subrequest.index_uid().clone();
2129+
assert_eq!(subrequest.source_id, "test-source");
2130+
assert_eq!(subrequest.shard_id(), ShardId::from(1));
2131+
assert_eq!(
2132+
subrequest.doc_batch,
2133+
Some(DocBatchV2::for_test(["test-doc-foo"]))
2134+
);
2135+
2136+
let response = PersistResponse {
2137+
leader_id: request.leader_id,
2138+
successes: Vec::new(),
2139+
failures: vec![PersistFailure {
2140+
subrequest_id: 0,
2141+
index_uid: Some(index_uid),
2142+
source_id: "test-source".to_string(),
2143+
shard_id: Some(ShardId::from(1)),
2144+
reason: PersistFailureReason::ShardRateLimited as i32,
2145+
}],
2146+
};
2147+
Ok(response)
2148+
});
2149+
let ingester_0 = IngesterServiceClient::from_mock(mock_ingester_0);
2150+
ingester_pool.insert("test-ingester-0".into(), ingester_0.clone());
2151+
2152+
let ingest_request = IngestRequestV2 {
2153+
subrequests: vec![IngestSubrequest {
2154+
subrequest_id: 0,
2155+
index_id: "test-index-0".to_string(),
2156+
source_id: "test-source".to_string(),
2157+
doc_batch: Some(DocBatchV2::for_test(["test-doc-foo"])),
2158+
}],
2159+
commit_type: CommitTypeV2::Auto as i32,
2160+
};
2161+
let ingest_response = router.ingest(ingest_request).await.unwrap();
2162+
assert_eq!(ingest_response.successes.len(), 0);
2163+
assert_eq!(ingest_response.failures.len(), 1);
2164+
assert_eq!(
2165+
ingest_response.failures[0].reason(),
2166+
IngestFailureReason::ShardRateLimited
2167+
);
2168+
}
20782169
}

quickwit/quickwit-ingest/src/ingest_v2/routing_table.rs

+51-10
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,9 @@ impl RoutingTableEntry {
143143
&self,
144144
ingester_pool: &IngesterPool,
145145
rate_limited_shards: &HashSet<ShardId>,
146-
) -> Option<&RoutingEntry> {
146+
) -> Result<&RoutingEntry, NextOpenShardError> {
147+
let mut error = NextOpenShardError::NoShardsAvailable;
148+
147149
for (shards, round_robin_idx) in [
148150
(&self.local_shards, &self.local_round_robin_idx),
149151
(&self.remote_shards, &self.remote_round_robin_idx),
@@ -154,17 +156,20 @@ impl RoutingTableEntry {
154156
for _attempt in 0..shards.len() {
155157
let shard_idx = round_robin_idx.fetch_add(1, Ordering::Relaxed);
156158
let shard_routing_entry: &RoutingEntry = &shards[shard_idx % shards.len()];
157-
if !shard_routing_entry.shard_state.is_open()
158-
|| rate_limited_shards.contains(&shard_routing_entry.shard_id)
159-
{
159+
160+
if !shard_routing_entry.shard_state.is_open() {
161+
continue;
162+
}
163+
if rate_limited_shards.contains(&shard_routing_entry.shard_id) {
164+
error = NextOpenShardError::RateLimited;
160165
continue;
161166
}
162167
if ingester_pool.contains_key(&shard_routing_entry.leader_id) {
163-
return Some(shard_routing_entry);
168+
return Ok(shard_routing_entry);
164169
}
165170
}
166171
}
167-
None
172+
Err(error)
168173
}
169174

170175
/// Inserts the open shards the routing table is not aware of.
@@ -323,6 +328,12 @@ impl RoutingTableEntry {
323328
}
324329
}
325330

331+
#[derive(Debug, PartialEq, Eq)]
332+
pub(super) enum NextOpenShardError {
333+
NoShardsAvailable,
334+
RateLimited,
335+
}
336+
326337
/// Stores the list of shards the router is aware of for each index and source. The resolution from
327338
/// index and source to shards is performed using index ID (not index UID) and source ID.
328339
#[derive(Debug)]
@@ -657,12 +668,12 @@ mod tests {
657668
let source_id: SourceId = "test-source".into();
658669
let table_entry = RoutingTableEntry::empty(index_uid.clone(), source_id.clone());
659670
let ingester_pool = IngesterPool::default();
660-
661671
let mut rate_limited_shards = HashSet::new();
662672

663-
let shard_opt =
664-
table_entry.next_open_shard_round_robin(&ingester_pool, &rate_limited_shards);
665-
assert!(shard_opt.is_none());
673+
let error = table_entry
674+
.next_open_shard_round_robin(&ingester_pool, &rate_limited_shards)
675+
.unwrap_err();
676+
assert_eq!(error, NextOpenShardError::NoShardsAvailable);
666677

667678
ingester_pool.insert("test-ingester-0".into(), IngesterServiceClient::mocked());
668679
ingester_pool.insert("test-ingester-1".into(), IngesterServiceClient::mocked());
@@ -778,6 +789,36 @@ mod tests {
778789
assert_eq!(shard.shard_id, ShardId::from(2));
779790
}
780791

792+
#[test]
793+
fn test_routing_table_entry_next_open_shard_round_robin_rate_limited_error() {
794+
let index_uid = IndexUid::for_test("test-index", 0);
795+
let source_id: SourceId = "test-source".into();
796+
797+
let ingester_pool = IngesterPool::default();
798+
ingester_pool.insert("test-ingester-0".into(), IngesterServiceClient::mocked());
799+
800+
let rate_limited_shards = HashSet::from_iter([ShardId::from(1)]);
801+
802+
let table_entry = RoutingTableEntry {
803+
index_uid: index_uid.clone(),
804+
source_id: source_id.clone(),
805+
local_shards: vec![RoutingEntry {
806+
index_uid: index_uid.clone(),
807+
source_id: "test-source".to_string(),
808+
shard_id: ShardId::from(1),
809+
shard_state: ShardState::Open,
810+
leader_id: "test-ingester-0".into(),
811+
}],
812+
local_round_robin_idx: AtomicUsize::default(),
813+
remote_shards: Vec::new(),
814+
remote_round_robin_idx: AtomicUsize::default(),
815+
};
816+
let error = table_entry
817+
.next_open_shard_round_robin(&ingester_pool, &rate_limited_shards)
818+
.unwrap_err();
819+
assert_eq!(error, NextOpenShardError::RateLimited);
820+
}
821+
781822
#[test]
782823
fn test_routing_table_entry_insert_open_shards() {
783824
let index_uid_0 = IndexUid::for_test("test-index", 0);

quickwit/quickwit-ingest/src/ingest_v2/workbench.rs

+8-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ use super::router::PersistRequestSummary;
3737
#[derive(Default)]
3838
pub(super) struct IngestWorkbench {
3939
pub subworkbenches: BTreeMap<SubrequestId, IngestSubworkbench>,
40-
pub rate_limited_shard: HashSet<ShardId>,
40+
pub rate_limited_shards: HashSet<ShardId>,
4141
pub num_successes: usize,
4242
/// The number of batch persist attempts. This is not sum of the number of attempts for each
4343
/// subrequest.
@@ -240,6 +240,13 @@ impl IngestWorkbench {
240240
self.record_failure(subrequest_id, SubworkbenchFailure::NoShardsAvailable);
241241
}
242242

243+
pub fn record_rate_limited(&mut self, subrequest_id: SubrequestId) {
244+
self.record_failure(
245+
subrequest_id,
246+
SubworkbenchFailure::RateLimited(RateLimitingCause::ShardRateLimiting),
247+
);
248+
}
249+
243250
/// Marks a node as unavailable for the span of the workbench.
244251
///
245252
/// Remaining attempts will treat the node as if it was not in the ingester pool.

quickwit/quickwit-proto/src/ingest/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ pub type IngestV2Result<T> = std::result::Result<T, IngestV2Error>;
3636
pub enum RateLimitingCause {
3737
#[error("router load shedding")]
3838
RouterLoadShedding,
39-
#[error("load shadding")]
39+
#[error("load shedding")]
4040
LoadShedding,
4141
#[error("wal full (memory or disk)")]
4242
WalFull,

0 commit comments

Comments
 (0)