@@ -4540,6 +4540,130 @@ Y_UNIT_TEST_SUITE(DataShardSnapshots) {
4540
4540
}
4541
4541
}
4542
4542
4543
+ void CompactBorrowed (TTestActorRuntime& runtime, ui64 shardId, const TTableId& tableId) {
4544
+ auto msg = MakeHolder<TEvDataShard::TEvCompactBorrowed>(tableId.PathId );
4545
+ auto sender = runtime.AllocateEdgeActor ();
4546
+ runtime.SendToPipe (shardId, sender, msg.Release (), 0 , GetPipeConfigWithRetries ());
4547
+ runtime.GrabEdgeEventRethrow <TEvDataShard::TEvCompactBorrowedResult>(sender);
4548
+ }
4549
+
4550
+ Y_UNIT_TEST (PostMergeNotCompactedTooEarly) {
4551
+ TPortManager pm;
4552
+ TServerSettings serverSettings (pm.GetPort (2134 ));
4553
+ serverSettings.SetDomainName (" Root" )
4554
+ .SetUseRealThreads (false )
4555
+ .SetDomainPlanResolution (100 );
4556
+
4557
+ Tests::TServer::TPtr server = new TServer (serverSettings);
4558
+ auto &runtime = *server->GetRuntime ();
4559
+ auto sender = runtime.AllocateEdgeActor ();
4560
+
4561
+ runtime.SetLogPriority (NKikimrServices::TX_DATASHARD, NLog::PRI_TRACE);
4562
+
4563
+ InitRoot (server, sender);
4564
+
4565
+ TDisableDataShardLogBatching disableDataShardLogBatching;
4566
+
4567
+ KqpSchemeExec (runtime, R"(
4568
+ CREATE TABLE `/Root/table` (key int, value bytes, PRIMARY KEY (key))
4569
+ WITH (AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 1,
4570
+ PARTITION_AT_KEYS = (5));
4571
+ )" );
4572
+
4573
+ const auto shards = GetTableShards (server, sender, " /Root/table" );
4574
+ UNIT_ASSERT_VALUES_EQUAL (shards.size (), 2u );
4575
+ const auto tableId = ResolveTableId (server, sender, " /Root/table" );
4576
+
4577
+ for (int i = 0 ; i < 20 ; ++i) {
4578
+ Cerr << " ... upserting key " << i << Endl;
4579
+ auto query = Sprintf (R"(
4580
+ UPSERT INTO `/Root/table` (key, value) VALUES (%d, '%s');
4581
+ )" , i, TString (128 * 1024 , ' x' ).c_str ());
4582
+ ExecSQL (server, sender, query);
4583
+ if (i >= 5 ) {
4584
+ Cerr << " ... compacting shard " << shards.at (1 ) << Endl;
4585
+ CompactTable (runtime, shards.at (1 ), tableId, false );
4586
+ } else if (i == 4 ) {
4587
+ Cerr << " ... compacting shard " << shards.at (0 ) << Endl;
4588
+ CompactTable (runtime, shards.at (0 ), tableId, false );
4589
+ }
4590
+ }
4591
+
4592
+ // Read (and snapshot) current data, so it doesn't go away on compaction
4593
+ UNIT_ASSERT_VALUES_EQUAL (
4594
+ KqpSimpleExec (runtime, " SELECT COUNT(*) FROM `/Root/table`;" ),
4595
+ " { items { uint64_value: 20 } }" );
4596
+
4597
+ // Delete all the data in shard 0, this is small and will stay in memtable
4598
+ // But when borrowed dst compaction will have pressure to compact it all
4599
+ ExecSQL (server, sender, " DELETE FROM `/Root/table` WHERE key < 5" );
4600
+
4601
+ std::vector<TEvDataShard::TEvSplitTransferSnapshot::TPtr> snapshots;
4602
+ auto captureSnapshots = runtime.AddObserver <TEvDataShard::TEvSplitTransferSnapshot>(
4603
+ [&](TEvDataShard::TEvSplitTransferSnapshot::TPtr& ev) {
4604
+ auto * msg = ev->Get ();
4605
+ Cerr << " ... captured snapshot from " << msg->Record .GetSrcTabletId () << Endl;
4606
+ snapshots.emplace_back (ev.Release ());
4607
+ });
4608
+
4609
+ Cerr << " ... merging table" << Endl;
4610
+ SetSplitMergePartCountLimit (server->GetRuntime (), -1 );
4611
+ ui64 txId = AsyncMergeTable (server, sender, " /Root/table" , shards);
4612
+ Cerr << " ... started merge " << txId << Endl;
4613
+ WaitFor (runtime, [&]{ return snapshots.size () >= 2 ; }, " both src tablet snapshots" );
4614
+
4615
+ std::vector<TEvBlobStorage::TEvGet::TPtr> gets ;
4616
+ auto captureGets = runtime.AddObserver <TEvBlobStorage::TEvGet>(
4617
+ [&](TEvBlobStorage::TEvGet::TPtr& ev) {
4618
+ auto * msg = ev->Get ();
4619
+ if (msg->Queries [0 ].Id .TabletID () == shards.at (1 )) {
4620
+ Cerr << " ... blocking blob get of " << msg->Queries [0 ].Id << Endl;
4621
+ gets .emplace_back (ev.Release ());
4622
+ }
4623
+ });
4624
+
4625
+ // Release snapshot for shard 0 then shard 1
4626
+ captureSnapshots.Remove ();
4627
+ Cerr << " ... unlocking snapshots from tablet " << shards.at (0 ) << Endl;
4628
+ for (auto & ev : snapshots) {
4629
+ if (ev && ev->Get ()->Record .GetSrcTabletId () == shards.at (0 )) {
4630
+ runtime.Send (ev.Release (), 0 , true );
4631
+ }
4632
+ }
4633
+ Cerr << " ... unblocking snapshots from tablet " << shards.at (1 ) << Endl;
4634
+ for (auto & ev : snapshots) {
4635
+ if (ev && ev->Get ()->Record .GetSrcTabletId () == shards.at (1 )) {
4636
+ runtime.Send (ev.Release (), 0 , true );
4637
+ }
4638
+ }
4639
+
4640
+ // Let it commit above snapshots and incorrectly compact after the first one is loaded and merged
4641
+ runtime.SimulateSleep (TDuration::Seconds (1 ));
4642
+ UNIT_ASSERT (gets .size () > 0 );
4643
+
4644
+ Cerr << " ... unblocking blob gets" << Endl;
4645
+ captureGets.Remove ();
4646
+ for (auto & ev : gets ) {
4647
+ runtime.Send (ev.Release (), 0 , true );
4648
+ }
4649
+
4650
+ // Let it finish loading the second snapshot
4651
+ runtime.SimulateSleep (TDuration::Seconds (1 ));
4652
+
4653
+ // Wait for merge to complete and start a borrowed compaction
4654
+ // When bug is present it will cause newly compacted to part to have epoch larger than previously compacted
4655
+ WaitTxNotification (server, sender, txId);
4656
+ const auto merged = GetTableShards (server, sender, " /Root/table" );
4657
+ UNIT_ASSERT_VALUES_EQUAL (merged.size (), 1u );
4658
+ Cerr << " ... compacting borrowed parts in shard " << merged.at (0 ) << Endl;
4659
+ CompactBorrowed (runtime, merged.at (0 ), tableId);
4660
+
4661
+ // Validate we have an expected number of rows
4662
+ UNIT_ASSERT_VALUES_EQUAL (
4663
+ KqpSimpleExec (runtime, " SELECT COUNT(*) FROM `/Root/table`;" ),
4664
+ " { items { uint64_value: 15 } }" );
4665
+ }
4666
+
4543
4667
}
4544
4668
4545
4669
} // namespace NKikimr
0 commit comments