15
15
use std:: collections:: hash_map:: Entry ;
16
16
use std:: collections:: HashMap ;
17
17
use std:: sync:: Arc ;
18
+ use std:: time:: SystemTime ;
18
19
19
20
use common_arrow:: arrow:: array:: Array ;
20
21
use common_arrow:: arrow:: chunk:: Chunk ;
@@ -28,9 +29,11 @@ use common_arrow::parquet::metadata::ColumnDescriptor;
28
29
use common_arrow:: parquet:: read:: BasicDecompressor ;
29
30
use common_arrow:: parquet:: read:: PageMetaData ;
30
31
use common_arrow:: parquet:: read:: PageReader ;
32
+ use common_base:: rangemap:: RangeMerger ;
31
33
use common_base:: runtime:: UnlimitedFuture ;
32
34
use common_catalog:: plan:: PartInfoPtr ;
33
35
use common_catalog:: plan:: Projection ;
36
+ use common_catalog:: table_context:: TableContext ;
34
37
use common_datablocks:: DataBlock ;
35
38
use common_datavalues:: DataSchemaRef ;
36
39
use common_exception:: ErrorCode ;
@@ -45,6 +48,7 @@ use futures::TryStreamExt;
45
48
use opendal:: Object ;
46
49
use opendal:: Operator ;
47
50
use tracing:: debug_span;
51
+ use tracing:: info;
48
52
use tracing:: Instrument ;
49
53
50
54
use crate :: fuse_part:: FusePartInfo ;
@@ -270,8 +274,27 @@ impl BlockReader {
270
274
self . try_next_block ( & mut deserializer)
271
275
}
272
276
273
- pub async fn read_columns_data ( & self , part : PartInfoPtr ) -> Result < Vec < ( usize , Vec < u8 > ) > > {
274
- let part = FusePartInfo :: from_part ( & part) ?;
277
+ /// Merge overlap io request to one.
278
+ fn merge_io_requests (
279
+ max_gap_size : u64 ,
280
+ max_range_size : u64 ,
281
+ part : & PartInfoPtr ,
282
+ ) -> Result < Vec < std:: ops:: Range < u64 > > > {
283
+ let part = FusePartInfo :: from_part ( part) ?;
284
+ let ranges = part
285
+ . columns_meta
286
+ . values ( )
287
+ . map ( |v| ( v. offset ..v. offset + v. len ) )
288
+ . collect :: < Vec < _ > > ( ) ;
289
+ Ok ( RangeMerger :: from_iter ( ranges, max_gap_size, max_range_size) . ranges ( ) )
290
+ }
291
+
292
+ pub async fn read_columns_data (
293
+ & self ,
294
+ ctx : Arc < dyn TableContext > ,
295
+ raw_part : PartInfoPtr ,
296
+ ) -> Result < Vec < ( usize , Vec < u8 > ) > > {
297
+ let part = FusePartInfo :: from_part ( & raw_part) ?;
275
298
let columns = self . projection . project_column_leaves ( & self . column_leaves ) ?;
276
299
let indices = Self :: build_projection_indices ( & columns) ;
277
300
let mut join_handlers = Vec :: with_capacity ( indices. len ( ) ) ;
@@ -286,7 +309,41 @@ impl BlockReader {
286
309
) ) ) ;
287
310
}
288
311
289
- futures:: future:: try_join_all ( join_handlers) . await
312
+ let now = SystemTime :: now ( ) ;
313
+ let res = futures:: future:: try_join_all ( join_handlers) . await ;
314
+ let normal_cost = now. elapsed ( ) . unwrap ( ) . as_millis ( ) ;
315
+
316
+ // Merge io requests.
317
+ let max_gap_size = ctx. get_settings ( ) . get_max_storage_io_requests_merge_gap ( ) ?;
318
+ let max_range_size = ctx. get_settings ( ) . get_max_storage_io_requests_page_size ( ) ?;
319
+ let ranges = Self :: merge_io_requests ( max_gap_size, max_range_size, & raw_part) ?;
320
+ let mut merge_io_handlers = Vec :: with_capacity ( ranges. len ( ) ) ;
321
+ for ( index, range) in ranges. iter ( ) . enumerate ( ) {
322
+ merge_io_handlers. push ( UnlimitedFuture :: create ( Self :: read_range (
323
+ self . operator . object ( & part. location ) ,
324
+ index,
325
+ range. start ,
326
+ range. end ,
327
+ ) ) ) ;
328
+ }
329
+ let now = SystemTime :: now ( ) ;
330
+ let _ = futures:: future:: try_join_all ( merge_io_handlers) . await ;
331
+ let merge_cost = now. elapsed ( ) . unwrap ( ) . as_millis ( ) ;
332
+
333
+ info ! (
334
+ "async read normal partition={}, count={}, took:{} ms" ,
335
+ part. location,
336
+ part. columns_meta. len( ) ,
337
+ normal_cost,
338
+ ) ;
339
+ info ! (
340
+ "async read merge partition={}, count={}, took:{} ms" ,
341
+ part. location,
342
+ ranges. len( ) ,
343
+ merge_cost,
344
+ ) ;
345
+
346
+ res
290
347
}
291
348
292
349
pub fn support_blocking_api ( & self ) -> bool {
@@ -316,6 +373,17 @@ impl BlockReader {
316
373
Ok ( results)
317
374
}
318
375
376
+ pub async fn read_range (
377
+ o : Object ,
378
+ index : usize ,
379
+ start : u64 ,
380
+ end : u64 ,
381
+ ) -> Result < ( usize , Vec < u8 > ) > {
382
+ let chunk = o. range_read ( start..end) . await ?;
383
+
384
+ Ok ( ( index, chunk) )
385
+ }
386
+
319
387
pub async fn read_column (
320
388
o : Object ,
321
389
index : usize ,
0 commit comments