Skip to content

Commit 7bc495a

Browse files
authored
Reduce aggregation result footprint (#5661)
* Replace serde_json::Value with serde_json_borrowed::OwnedValue * Use borrow serde for ES API
1 parent 6c3159e commit 7bc495a

File tree

12 files changed

+171
-39
lines changed

12 files changed

+171
-39
lines changed

quickwit/Cargo.lock

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

quickwit/quickwit-cli/src/index.rs

+2-3
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,8 @@ use quickwit_config::{ConfigFormat, IndexConfig};
3333
use quickwit_metastore::{IndexMetadata, Split, SplitState};
3434
use quickwit_proto::search::{CountHits, SortField, SortOrder};
3535
use quickwit_proto::types::IndexId;
36-
use quickwit_rest_client::models::IngestSource;
36+
use quickwit_rest_client::models::{IngestSource, SearchResponseRestClient};
3737
use quickwit_rest_client::rest_client::{CommitType, IngestEvent};
38-
use quickwit_search::SearchResponseRest;
3938
use quickwit_serve::{ListSplitsQueryParams, SearchRequestQueryString, SortBy};
4039
use quickwit_storage::{load_file, StorageResolver};
4140
use tabled::settings::object::{FirstRow, Rows, Segment};
@@ -1083,7 +1082,7 @@ fn progress_bar_style() -> ProgressStyle {
10831082
.tick_strings(&["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"])
10841083
}
10851084

1086-
pub async fn search_index(args: SearchIndexArgs) -> anyhow::Result<SearchResponseRest> {
1085+
pub async fn search_index(args: SearchIndexArgs) -> anyhow::Result<SearchResponseRestClient> {
10871086
let aggs: Option<serde_json::Value> = args
10881087
.aggregation
10891088
.map(|aggs_string| {

quickwit/quickwit-rest-client/src/models.rs

+16
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ use std::time::Duration;
1717

1818
use reqwest::StatusCode;
1919
use serde::de::DeserializeOwned;
20+
use serde::{Deserialize, Serialize};
21+
use serde_json::Value as JsonValue;
2022

2123
use crate::error::{ApiError, Error, ErrorResponsePayload};
2224

@@ -71,6 +73,20 @@ impl ApiResponse {
7173
}
7274
}
7375

76+
/// A cousin of [`quickwit_search::SearchResponseRest`] that implements [`Deserialize`]
77+
///
78+
/// This version of the response is necessary because
79+
/// `serde_json_borrow::OwnedValue` is not deserializeable.
80+
#[derive(Deserialize, Serialize, PartialEq, Debug)]
81+
pub struct SearchResponseRestClient {
82+
pub num_hits: u64,
83+
pub hits: Vec<JsonValue>,
84+
pub snippets: Option<Vec<JsonValue>>,
85+
pub elapsed_time_micros: u64,
86+
pub errors: Vec<String>,
87+
pub aggregations: Option<JsonValue>,
88+
}
89+
7490
#[derive(Clone)]
7591
pub enum IngestSource {
7692
Str(String),

quickwit/quickwit-rest-client/src/rest_client.rs

+4-6
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ use quickwit_indexing::actors::IndexingServiceCounters;
2222
pub use quickwit_ingest::CommitType;
2323
use quickwit_metastore::{IndexMetadata, Split, SplitInfo};
2424
use quickwit_proto::ingest::Shard;
25-
use quickwit_search::SearchResponseRest;
2625
use quickwit_serve::{
2726
ListSplitsQueryParams, ListSplitsResponse, RestIngestResponse, SearchRequestQueryString,
2827
};
@@ -32,7 +31,7 @@ use serde::Serialize;
3231
use serde_json::json;
3332

3433
use crate::error::Error;
35-
use crate::models::{ApiResponse, IngestSource, Timeout};
34+
use crate::models::{ApiResponse, IngestSource, SearchResponseRestClient, Timeout};
3635
use crate::BatchLineReader;
3736

3837
pub const DEFAULT_BASE_URL: &str = "http://127.0.0.1:7280";
@@ -210,7 +209,7 @@ impl QuickwitClient {
210209
&self,
211210
index_id: &str,
212211
search_query: SearchRequestQueryString,
213-
) -> Result<SearchResponseRest, Error> {
212+
) -> Result<SearchResponseRestClient, Error> {
214213
let path = format!("{index_id}/search");
215214
let bytes = serde_json::to_string(&search_query)
216215
.unwrap()
@@ -735,7 +734,6 @@ mod test {
735734
use quickwit_indexing::mock_split;
736735
use quickwit_ingest::CommitType;
737736
use quickwit_metastore::IndexMetadata;
738-
use quickwit_search::SearchResponseRest;
739737
use quickwit_serve::{
740738
ListSplitsQueryParams, ListSplitsResponse, RestIngestResponse, SearchRequestQueryString,
741739
};
@@ -750,7 +748,7 @@ mod test {
750748
use wiremock::{Mock, MockServer, ResponseTemplate};
751749

752750
use crate::error::Error;
753-
use crate::models::IngestSource;
751+
use crate::models::{IngestSource, SearchResponseRestClient};
754752
use crate::rest_client::QuickwitClientBuilder;
755753

756754
#[tokio::test]
@@ -773,7 +771,7 @@ mod test {
773771
let search_query_params = SearchRequestQueryString {
774772
..Default::default()
775773
};
776-
let expected_search_response = SearchResponseRest {
774+
let expected_search_response = SearchResponseRestClient {
777775
num_hits: 0,
778776
hits: Vec::new(),
779777
snippets: None,

quickwit/quickwit-search/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ prost = { workspace = true }
2727
rayon = { workspace = true }
2828
serde = { workspace = true }
2929
serde_json = { workspace = true }
30+
serde_json_borrow = { workspace = true }
3031
tantivy = { workspace = true }
3132
tantivy-fst = { workspace = true }
3233
thiserror = { workspace = true }

quickwit/quickwit-search/src/lib.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,9 @@ pub use crate::root::{
8686
IndexMetasForLeafSearch, SearchJob,
8787
};
8888
pub use crate::search_job_placer::{Job, SearchJobPlacer};
89-
pub use crate::search_response_rest::{SearchPlanResponseRest, SearchResponseRest};
89+
pub use crate::search_response_rest::{
90+
AggregationResults, SearchPlanResponseRest, SearchResponseRest,
91+
};
9092
pub use crate::search_stream::root_search_stream;
9193
pub use crate::service::{MockSearchService, SearchService, SearchServiceImpl};
9294

quickwit/quickwit-search/src/search_response_rest.rs

+19-3
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
// limitations under the License.
1414

1515
use std::convert::TryFrom;
16+
use std::io;
1617

1718
use quickwit_common::truncate_str;
1819
use quickwit_proto::search::SearchResponse;
@@ -22,9 +23,24 @@ use serde_json::Value as JsonValue;
2223

2324
use crate::error::SearchError;
2425

26+
/// A lightweight serializable representation of aggregation results.
27+
///
28+
/// We use `serde_json_borrow` here to avoid unnecessary
29+
/// allocations. On large aggregation results with tens of thousands of
30+
/// entries this has a significant impact compared to `serde_json`.
31+
#[derive(Serialize, PartialEq, Debug)]
32+
pub struct AggregationResults(serde_json_borrow::OwnedValue);
33+
34+
impl AggregationResults {
35+
/// Parse an aggregation result form a serialized JSON string.
36+
pub fn from_json(json_str: &str) -> io::Result<Self> {
37+
serde_json_borrow::OwnedValue::from_str(json_str).map(Self)
38+
}
39+
}
40+
2541
/// SearchResponseRest represents the response returned by the REST search API
2642
/// and is meant to be serialized into JSON.
27-
#[derive(Serialize, Deserialize, PartialEq, Debug, utoipa::ToSchema)]
43+
#[derive(Serialize, PartialEq, Debug, utoipa::ToSchema)]
2844
pub struct SearchResponseRest {
2945
/// Overall number of documents matching the query.
3046
pub num_hits: u64,
@@ -42,7 +58,7 @@ pub struct SearchResponseRest {
4258
/// Aggregations.
4359
#[schema(value_type = Object)]
4460
#[serde(skip_serializing_if = "Option::is_none")]
45-
pub aggregations: Option<JsonValue>,
61+
pub aggregations: Option<AggregationResults>,
4662
}
4763

4864
impl TryFrom<SearchResponse> for SearchResponseRest {
@@ -79,7 +95,7 @@ impl TryFrom<SearchResponse> for SearchResponseRest {
7995
};
8096

8197
let aggregations_opt = if let Some(aggregation_json) = search_response.aggregation {
82-
let aggregation: JsonValue = serde_json::from_str(&aggregation_json)
98+
let aggregation = AggregationResults::from_json(&aggregation_json)
8399
.map_err(|err| SearchError::Internal(err.to_string()))?;
84100
Some(aggregation)
85101
} else {

quickwit/quickwit-serve/src/elasticsearch_api/mod.rs

+24-15
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,6 @@ mod tests {
160160

161161
use super::elastic_api_handlers;
162162
use super::model::ElasticsearchError;
163-
use crate::elasticsearch_api::model::MultiSearchResponse;
164163
use crate::elasticsearch_api::rest_handler::es_compat_cluster_info_handler;
165164
use crate::rest::recover_fn;
166165
use crate::BuildInfo;
@@ -224,12 +223,17 @@ mod tests {
224223
assert_eq!(resp.status(), 200);
225224
assert!(resp.headers().get("x-elastic-product").is_none(),);
226225
let string_body = String::from_utf8(resp.body().to_vec()).unwrap();
227-
let es_msearch_response: MultiSearchResponse = serde_json::from_str(&string_body).unwrap();
228-
assert_eq!(es_msearch_response.responses.len(), 2);
229-
for response in es_msearch_response.responses {
230-
assert_eq!(response.status, 200);
231-
assert_eq!(response.error, None);
232-
assert!(response.response.is_some())
226+
let es_msearch_response: serde_json::Value = serde_json::from_str(&string_body).unwrap();
227+
let responses = es_msearch_response
228+
.get("responses")
229+
.unwrap()
230+
.as_array()
231+
.unwrap();
232+
assert_eq!(responses.len(), 2);
233+
for response in responses {
234+
assert_eq!(response.get("status").unwrap().as_u64().unwrap(), 200);
235+
assert_eq!(response.get("error"), None);
236+
response.get("hits").unwrap();
233237
}
234238
}
235239

@@ -279,15 +283,20 @@ mod tests {
279283
.reply(&es_search_api_handler)
280284
.await;
281285
assert_eq!(resp.status(), 200);
282-
let es_msearch_response: MultiSearchResponse = serde_json::from_slice(resp.body()).unwrap();
283-
assert_eq!(es_msearch_response.responses.len(), 2);
284-
assert_eq!(es_msearch_response.responses[0].status, 200);
285-
assert!(es_msearch_response.responses[0].error.is_none());
286-
assert_eq!(es_msearch_response.responses[1].status, 500);
287-
assert!(es_msearch_response.responses[1].response.is_none());
288-
let error_cause = es_msearch_response.responses[1].error.as_ref().unwrap();
286+
let es_msearch_response: serde_json::Value = serde_json::from_slice(resp.body()).unwrap();
287+
let responses = es_msearch_response
288+
.get("responses")
289+
.unwrap()
290+
.as_array()
291+
.unwrap();
292+
assert_eq!(responses.len(), 2);
293+
assert_eq!(responses[0].get("status").unwrap().as_u64().unwrap(), 200);
294+
assert_eq!(responses[0].get("error"), None);
295+
assert_eq!(responses[1].get("status").unwrap().as_u64().unwrap(), 500);
296+
assert_eq!(responses[1].get("hits"), None);
297+
let error_cause = responses[1].get("error").unwrap();
289298
assert_eq!(
290-
error_cause.reason.as_ref().unwrap(),
299+
error_cause.get("reason").unwrap().as_str().unwrap(),
291300
"internal error: `something bad happened`"
292301
);
293302
}

quickwit/quickwit-serve/src/elasticsearch_api/model/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ mod multi_search;
2121
mod scroll;
2222
mod search_body;
2323
mod search_query_params;
24+
mod search_response;
2425
mod stats;
2526

2627
pub use bulk_body::BulkAction;
@@ -41,6 +42,7 @@ use quickwit_proto::search::{SortDatetimeFormat, SortOrder};
4142
pub use scroll::ScrollQueryParams;
4243
pub use search_body::SearchBody;
4344
pub use search_query_params::{DeleteQueryParams, SearchQueryParams, SearchQueryParamsCount};
45+
pub use search_response::ElasticsearchResponse;
4446
use serde::{Deserialize, Serialize};
4547
pub use stats::{ElasticsearchStatsResponse, StatsResponseEntry};
4648

quickwit/quickwit-serve/src/elasticsearch_api/model/multi_search.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,14 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
use elasticsearch_dsl::search::SearchResponse as ElasticsearchResponse;
1615
use elasticsearch_dsl::ErrorCause;
1716
use hyper::StatusCode;
1817
use serde::{Deserialize, Serialize};
1918
use serde_with::formats::PreferMany;
2019
use serde_with::{serde_as, OneOrMany};
2120

2221
use super::search_query_params::ExpandWildcards;
22+
use super::search_response::ElasticsearchResponse;
2323
use super::ElasticsearchError;
2424
use crate::simple_list::{from_simple_list, to_simple_list};
2525

@@ -100,12 +100,12 @@ pub struct MultiSearchHeader {
100100
pub routing: Option<Vec<String>>,
101101
}
102102

103-
#[derive(Serialize, Deserialize)]
103+
#[derive(Serialize)]
104104
pub struct MultiSearchResponse {
105105
pub responses: Vec<MultiSearchSingleResponse>,
106106
}
107107

108-
#[derive(Serialize, Deserialize, Debug)]
108+
#[derive(Serialize, Debug)]
109109
pub struct MultiSearchSingleResponse {
110110
#[serde(with = "http_serde::status_code")]
111111
pub status: StatusCode,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
// Copyright 2021-Present Datadog, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use elasticsearch_dsl::{ClusterStatistics, HitsMetadata, ShardStatistics, Suggest};
16+
use quickwit_search::AggregationResults;
17+
use serde::Serialize;
18+
19+
type Map<K, V> = std::collections::BTreeMap<K, V>;
20+
21+
/// Search response
22+
///
23+
/// This is a fork of [`elasticsearch_dsl::SearchResponse`] with the
24+
/// `aggregations` field using [`AggregationResults`] instead of
25+
/// [`serde_json::Value`].
26+
#[derive(Debug, Default, Serialize, PartialEq)]
27+
pub struct ElasticsearchResponse {
28+
/// The time that it took Elasticsearch to process the query
29+
pub took: u32,
30+
31+
/// The search has been cancelled and results are partial
32+
pub timed_out: bool,
33+
34+
/// Indicates if search has been terminated early
35+
#[serde(default)]
36+
pub terminated_early: Option<bool>,
37+
38+
/// Scroll Id
39+
#[serde(skip_serializing_if = "Option::is_none")]
40+
#[serde(rename = "_scroll_id")]
41+
pub scroll_id: Option<String>,
42+
43+
/// Dynamically fetched fields
44+
#[serde(default)]
45+
pub fields: Map<String, serde_json::Value>,
46+
47+
/// Point in time Id
48+
#[serde(skip_serializing_if = "Option::is_none")]
49+
pub pit_id: Option<String>,
50+
51+
/// Number of reduce phases
52+
#[serde(skip_serializing_if = "Option::is_none")]
53+
pub num_reduce_phases: Option<u64>,
54+
55+
/// Maximum document score. [None] when documents are implicitly sorted
56+
/// by a field other than `_score`
57+
#[serde(skip_serializing_if = "Option::is_none")]
58+
pub max_score: Option<f32>,
59+
60+
/// Number of clusters touched with their states
61+
#[serde(skip_serializing_if = "Option::is_none", rename = "_clusters")]
62+
pub clusters: Option<ClusterStatistics>,
63+
64+
/// Number of shards touched with their states
65+
#[serde(rename = "_shards")]
66+
pub shards: ShardStatistics,
67+
68+
/// Search hits
69+
pub hits: HitsMetadata,
70+
71+
/// Search aggregations
72+
#[serde(skip_serializing_if = "Option::is_none")]
73+
pub aggregations: Option<AggregationResults>,
74+
75+
#[serde(skip_serializing_if = "Map::is_empty", default)]
76+
/// Suggest response
77+
pub suggest: Map<String, Vec<Suggest>>,
78+
}

0 commit comments

Comments
 (0)