@@ -319,6 +319,8 @@ namespace NKikimr::NTable::NPage {
319
319
Y_ABORT_UNLESS (Children);
320
320
TChild result = Children.front ();
321
321
Children.pop_front ();
322
+ PrevDataSize = result.DataSize ;
323
+ PrevRowCount = result.RowCount ;
322
324
return result;
323
325
}
324
326
@@ -334,15 +336,41 @@ namespace NKikimr::NTable::NPage {
334
336
return Children.size ();
335
337
}
336
338
339
+ ui64 GetPrevRowCount () const {
340
+ return PrevRowCount;
341
+ }
342
+
343
+ ui64 GetPrevDataSize () const {
344
+ return PrevDataSize;
345
+ }
346
+
347
+ ui64 GetNextRowCount (ui64 prevRowCount) const {
348
+ return Children[1 ].RowCount - prevRowCount;
349
+ }
350
+
351
+ ui64 GetNextDataSize (ui64 prevDataSize) const {
352
+ return Children[1 ].DataSize - prevDataSize;
353
+ }
354
+
355
+ ui64 GetRowCount () const {
356
+ return Children.back ().RowCount - PrevRowCount;
357
+ }
358
+
359
+ ui64 GetDataSize () const {
360
+ return Children.back ().DataSize - PrevDataSize;
361
+ }
362
+
337
363
private:
338
364
size_t KeysSize = 0 ;
365
+ ui64 PrevRowCount = 0 ;
366
+ ui64 PrevDataSize = 0 ;
339
367
TDeque<TString> Keys;
340
368
TDeque<TChild> Children;
341
369
};
342
370
343
371
public:
344
372
TBtreeIndexBuilder (TIntrusiveConstPtr<TPartScheme> scheme, TGroupId groupId,
345
- ui32 nodeTargetSize, ui32 nodeKeysMin, ui32 nodeKeysMax)
373
+ ui32 nodeTargetSize, ui32 nodeKeysMin, ui32 nodeKeysMax, ui32 leafDataSizeMax, ui32 leafRowsCountMax )
346
374
: Scheme(std::move(scheme))
347
375
, GroupId(groupId)
348
376
, GroupInfo(Scheme->GetLayout (groupId))
@@ -351,6 +379,8 @@ namespace NKikimr::NTable::NPage {
351
379
, NodeTargetSize(nodeTargetSize)
352
380
, NodeKeysMin(nodeKeysMin)
353
381
, NodeKeysMax(nodeKeysMax)
382
+ , LeafDataSizeMax(leafDataSizeMax)
383
+ , LeafRowsCountMax(leafRowsCountMax)
354
384
{
355
385
Y_ABORT_UNLESS (NodeTargetSize > 0 );
356
386
Y_ABORT_UNLESS (NodeKeysMin > 0 );
@@ -386,23 +416,35 @@ namespace NKikimr::NTable::NPage {
386
416
387
417
Levels[0 ].PushChild (child);
388
418
}
419
+
420
+ void Flush (IPageWriter &pager) {
421
+ for (ui32 levelIndex = 0 ; levelIndex < Levels.size (); levelIndex++) {
422
+ bool hasChanges = false ;
423
+
424
+ // Note: in theory we may want to flush one level multiple times when different triggers are applicable
425
+ while (CanFlush (levelIndex)) {
426
+ DoFlush (levelIndex, pager, false );
427
+ hasChanges = true ;
428
+ }
429
+
430
+ if (!hasChanges) {
431
+ break ; // no more changes
432
+ }
433
+ }
434
+ }
389
435
390
- std::optional<TBtreeIndexMeta> Flush (IPageWriter &pager, bool last) {
391
- Y_ABORT_UNLESS (Levels.size () < Max<ui32>(), " Levels size is out of bounds" );
436
+ TBtreeIndexMeta Finish (IPageWriter &pager) {
392
437
for (ui32 levelIndex = 0 ; levelIndex < Levels.size (); levelIndex++) {
393
- if (last && !Levels[levelIndex].GetKeysCount ()) {
438
+ if (!Levels[levelIndex].GetKeysCount ()) {
394
439
Y_ABORT_UNLESS (Levels[levelIndex].GetChildrenCount () == 1 , " Should be root" );
395
- return TBtreeIndexMeta{ Levels[levelIndex].PopChild (), levelIndex, IndexSize };
440
+ Y_ABORT_UNLESS (levelIndex + 1 == Levels.size (), " Should be root" );
441
+ return {Levels[levelIndex].PopChild (), levelIndex, IndexSize};
396
442
}
397
443
398
- if (!TryFlush (levelIndex, pager, last)) {
399
- Y_ABORT_UNLESS (!last);
400
- break ;
401
- }
444
+ DoFlush (levelIndex, pager, true );
402
445
}
403
446
404
- Y_ABORT_UNLESS (!last, " Should have returned root" );
405
- return { };
447
+ Y_ABORT_UNLESS (false , " Should have returned root" );
406
448
}
407
449
408
450
void Reset () {
@@ -415,43 +457,47 @@ namespace NKikimr::NTable::NPage {
415
457
}
416
458
417
459
private:
418
- bool TryFlush (ui32 levelIndex, IPageWriter &pager, bool last) {
419
- if (!last && Levels[levelIndex].GetKeysCount () <= 2 * NodeKeysMax) {
420
- // Note: node should meet both NodeKeysMin and NodeSize restrictions for split
460
+ bool CanFlush (ui32 levelIndex) {
461
+ const ui64 waitFullNodes = 2 ;
421
462
422
- if (Levels[levelIndex].GetKeysCount () <= 2 * NodeKeysMin) {
423
- // not enough keys for split
424
- return false ;
425
- }
426
-
427
- // Note: this size check is approximate and we might not produce 2 full-sized pages
428
- if (CalcPageSize (Levels[levelIndex]) <= 2 * NodeTargetSize) {
429
- // not enough bytes for split
430
- return false ;
431
- }
463
+ if (Levels[levelIndex].GetKeysCount () <= waitFullNodes * NodeKeysMin) {
464
+ // node keys min restriction should be always satisfied
465
+ return false ;
432
466
}
433
467
468
+ // Note: size checks are approximate and flush might not produce 2 full-sized pages
469
+
470
+ return
471
+ Levels[levelIndex].GetKeysCount () > waitFullNodes * NodeKeysMax ||
472
+ CalcPageSize (Levels[levelIndex]) > waitFullNodes * NodeTargetSize ||
473
+ levelIndex == 0 && Levels[levelIndex].GetDataSize () > waitFullNodes * LeafDataSizeMax ||
474
+ levelIndex == 0 && Levels[levelIndex].GetRowCount () > waitFullNodes * LeafRowsCountMax;
475
+ }
476
+
477
+ void DoFlush (ui32 levelIndex, IPageWriter &pager, bool last) {
434
478
Writer.EnsureEmpty ();
479
+ auto prevDataSize = Levels[levelIndex].GetPrevDataSize ();
480
+ auto prevRowCount = Levels[levelIndex].GetPrevRowCount ();
435
481
436
- // Note: for now we build last nodes from all remaining level's keys
437
- // we may to try splitting them more evenly later
482
+ if (last) {
483
+ // Note: for now we build last nodes from all remaining level's keys
484
+ // we may to try splitting them more evenly later
438
485
439
- while (last || Writer.GetKeysCount () < NodeKeysMin || Writer.CalcPageSize () < NodeTargetSize) {
440
- if (!last && Levels[levelIndex].GetKeysCount () < 3 ) {
441
- // we shouldn't produce empty nodes (but can violate NodeKeysMin restriction)
442
- break ;
486
+ while (Levels[levelIndex].GetKeysCount ()) {
487
+ Writer.AddChild (Levels[levelIndex].PopChild ());
488
+ Writer.AddKey (Levels[levelIndex].PopKey ());
443
489
}
444
- if (!last && Writer.GetKeysCount () >= NodeKeysMax) {
445
- // have enough keys
446
- break ;
447
- }
448
- if (last && !Levels[levelIndex].GetKeysCount ()) {
449
- // nothing left
450
- break ;
490
+ } else {
491
+ while (Writer.GetKeysCount () < NodeKeysMin || (
492
+ // can add more to writer if:
493
+ Levels[levelIndex].GetKeysCount () > 2 &&
494
+ Writer.GetKeysCount () < NodeKeysMax &&
495
+ Writer.CalcPageSize () < NodeTargetSize &&
496
+ (levelIndex != 0 || Levels[levelIndex].GetNextDataSize (prevDataSize) < LeafDataSizeMax) &&
497
+ (levelIndex != 0 || Levels[levelIndex].GetNextRowCount (prevRowCount) < LeafRowsCountMax))) {
498
+ Writer.AddChild (Levels[levelIndex].PopChild ());
499
+ Writer.AddKey (Levels[levelIndex].PopKey ());
451
500
}
452
-
453
- Writer.AddChild (Levels[levelIndex].PopChild ());
454
- Writer.AddKey (Levels[levelIndex].PopKey ());
455
501
}
456
502
auto lastChild = Levels[levelIndex].PopChild ();
457
503
Writer.AddChild (lastChild);
@@ -462,6 +508,7 @@ namespace NKikimr::NTable::NPage {
462
508
463
509
if (levelIndex + 1 == Levels.size ()) {
464
510
Levels.emplace_back ();
511
+ Y_ABORT_UNLESS (Levels.size () < Max<ui32>(), " Levels size is out of bounds" );
465
512
}
466
513
Levels[levelIndex + 1 ].PushChild (TChild{pageId, lastChild.RowCount , lastChild.DataSize , lastChild.ErasedRowCount });
467
514
if (!last) {
@@ -475,8 +522,6 @@ namespace NKikimr::NTable::NPage {
475
522
} else {
476
523
Y_ABORT_UNLESS (Levels[levelIndex].GetKeysCount (), " Shouldn't leave empty levels" );
477
524
}
478
-
479
- return true ;
480
525
}
481
526
482
527
size_t CalcPageSize (const TLevel& level) const {
@@ -497,6 +542,8 @@ namespace NKikimr::NTable::NPage {
497
542
const ui32 NodeTargetSize;
498
543
const ui32 NodeKeysMin;
499
544
const ui32 NodeKeysMax;
545
+ const ui32 LeafDataSizeMax;
546
+ const ui32 LeafRowsCountMax;
500
547
501
548
TRowId ChildRowCount = 0 ;
502
549
TRowId ChildErasedRowCount = 0 ;
0 commit comments