@@ -29,10 +29,33 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor<TBlob
29
29
}
30
30
};
31
31
32
+ enum EWakeUpTag : ui64 {
33
+ VPatchStartTag,
34
+ VPatchDiffTag,
35
+ MovedPatchTag,
36
+ NeverTag,
37
+ };
38
+
39
+ static TString ToString (ui64 wakeUp) {
40
+ switch (wakeUp) {
41
+ case VPatchStartTag: return " VPatchStartTag" ;
42
+ case VPatchDiffTag: return " VPatchDiffTag" ;
43
+ case MovedPatchTag: return " MovedPatchTag" ;
44
+ case NeverTag: return " NeverTag" ;
45
+ default : return " unknown@" + ToString (wakeUp);
46
+ }
47
+ }
48
+
32
49
static constexpr ui32 TypicalHandoffCount = 2 ;
33
50
static constexpr ui32 TypicalPartPlacementCount = 1 + TypicalHandoffCount;
34
51
static constexpr ui32 TypicalMaxPartsCount = TypicalPartPlacementCount * TypicalPartsInBlob;
35
52
53
+ static constexpr ui32 VPatchStartWaitingMultiplier = 2 ;
54
+ static constexpr ui32 VPatchDiffWaitingMultiplier = 6 ;
55
+ static constexpr ui32 MovedPatchWaitingMultiplier = 4 ;
56
+
57
+ static constexpr ui32 DefaultNsForChangeStrategy = 30'000'000 ; // 30 ms
58
+
36
59
TString Buffer;
37
60
38
61
ui32 OriginalGroupId;
@@ -47,6 +70,7 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor<TBlob
47
70
float ApproximateFreeSpaceShare = 0 ;
48
71
49
72
TInstant StartTime;
73
+ TInstant StageStart;
50
74
TInstant Deadline;
51
75
52
76
NLWTrace::TOrbit Orbit;
@@ -62,19 +86,24 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor<TBlob
62
86
ui32 SentStarts = 0 ;
63
87
ui32 ReceivedFoundParts = 0 ;
64
88
ui32 ErrorResponses = 0 ;
89
+ ui32 SentVPatchDiff = 0 ;
65
90
ui32 ReceivedResults = 0 ;
66
91
67
92
TStackVec<TPartPlacement, TypicalMaxPartsCount> FoundParts;
68
93
TStackVec<bool , TypicalDisksInSubring> ReceivedResponseFlags;
69
94
TStackVec<bool , TypicalDisksInSubring> EmptyResponseFlags;
70
95
TStackVec<bool , TypicalDisksInSubring> ErrorResponseFlags;
71
96
TStackVec<bool , TypicalDisksInSubring> ForceStopFlags;
97
+ TStackVec<bool , TypicalDisksInSubring> SlowFlags;
72
98
TBlobStorageGroupInfo::TVDiskIds VDisks;
73
99
74
100
bool UseVPatch = false ;
75
101
bool IsGoodPatchedBlobId = false ;
76
102
bool IsAllowedErasure = false ;
77
103
bool IsSecured = false ;
104
+ bool HasSlowVDisk = false ;
105
+ bool IsContinuedVPatch = false ;
106
+ bool IsMovedPatch = false ;
78
107
79
108
#define PATCH_LOG (priority, service, marker, msg, ...) \
80
109
STLOG (priority, service, marker, msg, \
@@ -97,6 +126,15 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor<TBlob
97
126
return mon->ActivePatch ;
98
127
}
99
128
129
+ void ScheduleWakeUp (TInstant startTime, EWakeUpTag tag) {
130
+ TDuration duration = TActivationContext::Now () - startTime;
131
+ Schedule (duration, new TEvents::TEvWakeup (tag));
132
+ }
133
+
134
+ void ScheduleWakeUp (EWakeUpTag tag) {
135
+ ScheduleWakeUp (StageStart, tag);
136
+ }
137
+
100
138
static constexpr ERequestType RequestType () {
101
139
return ERequestType::Patch;
102
140
}
@@ -280,6 +318,12 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor<TBlob
280
318
void Handle (TEvBlobStorage::TEvVPatchFoundParts::TPtr &ev) {
281
319
ReceivedFoundParts++;
282
320
321
+ if (Info->Type .ErasureFamily () != TErasureType::ErasureMirror) {
322
+ if (ReceivedFoundParts == SentStarts / 2 + SentStarts % 2 ) {
323
+ ScheduleWakeUp (VPatchStartTag);
324
+ }
325
+ }
326
+
283
327
NKikimrBlobStorage::TEvVPatchFoundParts &record = ev->Get ()->Record ;
284
328
285
329
Y_ABORT_UNLESS (record.HasCookie ());
@@ -313,6 +357,7 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor<TBlob
313
357
PATCH_LOG (PRI_DEBUG, BS_PROXY_PATCH, BPPA26, " Received VPatchFoundParts" ,
314
358
(Status, status),
315
359
(SubgroupIdx, (ui32)subgroupIdx),
360
+ (VDiskId, VDisks[subgroupIdx]),
316
361
(ReceivedResults, static_cast <TString>(TStringBuilder () << ReceivedFoundParts << ' /' << SentStarts)),
317
362
(ErrorReason, errorReason));
318
363
@@ -342,6 +387,13 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor<TBlob
342
387
}
343
388
ReceivedResults++;
344
389
390
+
391
+ if (Info->Type .ErasureFamily () != TErasureType::ErasureMirror) {
392
+ if (ReceivedResults == SentVPatchDiff / 2 + SentVPatchDiff % 2 ) {
393
+ ScheduleWakeUp (VPatchDiffTag);
394
+ }
395
+ }
396
+
345
397
PullOutStatusFlagsAndFressSpace (record);
346
398
Y_ABORT_UNLESS (record.HasStatus ());
347
399
NKikimrProto::EReplyStatus status = record.GetStatus ();
@@ -353,6 +405,7 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor<TBlob
353
405
PATCH_LOG (PRI_DEBUG, BS_PROXY_PATCH, BPPA23, " Received VPatchResult" ,
354
406
(Status, status),
355
407
(SubgroupIdx, (ui32)subgroupIdx),
408
+ (VDiskID, VDisks[subgroupIdx]),
356
409
(ReceivedResults, static_cast <TString>(TStringBuilder () << ReceivedResults << ' /' << Info->Type .TotalPartCount ())),
357
410
(ErrorReason, errorReason));
358
411
@@ -500,6 +553,7 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor<TBlob
500
553
}
501
554
PATCH_LOG (PRI_DEBUG, BS_PROXY_PATCH, BPPA20, " Send TEvVPatchDiff" ,
502
555
(VDiskIdxInSubgroup, idxInSubgroup),
556
+ (VDiskId, VDisks[idxInSubgroup]),
503
557
(PatchedVDiskIdxInSubgroup, patchedIdxInSubgroup),
504
558
(PartId, (ui64)partPlacement.PartId ),
505
559
(DiffsForPart, diffsForPart.size ()),
@@ -530,6 +584,7 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor<TBlob
530
584
ui32 vdiskIdx = vdiskIdxForParts[partIdx];
531
585
Y_VERIFY_S (vdiskIdx == partIdx || vdiskIdx >= dataParts, " vdiskIdx# " << vdiskIdx << " partIdx# " << partIdx);
532
586
placements.push_back (TPartPlacement{static_cast <ui8>(vdiskIdx), static_cast <ui8>(partIdx + 1 )});
587
+ SentVPatchDiff++;
533
588
}
534
589
SendDiffs (placements);
535
590
}
@@ -538,15 +593,38 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor<TBlob
538
593
PATCH_LOG (PRI_DEBUG, BS_PROXY_PATCH, BPPA09, " Start Moved strategy" ,
539
594
(SentStarts, SentStarts));
540
595
Become (&TThis::MovedPatchState);
596
+ IsMovedPatch = true ;
597
+ std::optional<ui32> subgroupIdx = 0 ;
541
598
542
- ui32 subgroupIdx = 0 ;
543
599
if (OkVDisksWithParts) {
544
600
ui32 okVDiskIdx = RandomNumber<ui32>(OkVDisksWithParts.size ());
545
601
subgroupIdx = OkVDisksWithParts[okVDiskIdx];
546
602
} else {
603
+ ui64 worstNs = 0 ;
604
+ ui64 nextToWorstNs = 0 ;
605
+ i32 worstSubGroubIdx = -1 ;
606
+ GetWorstPredictedDelaysNs (NKikimrBlobStorage::EVDiskQueueId::PutAsyncBlob, &worstNs, &nextToWorstNs, &worstSubGroubIdx);
607
+ if (worstNs > nextToWorstNs * 2 ) {
608
+ SlowFlags[worstSubGroubIdx] = true ;
609
+ HasSlowVDisk = true ;
610
+ }
611
+ if (HasSlowVDisk) {
612
+ TStackVec<ui32, TypicalDisksInSubring> goodDisks;
613
+ for (ui32 idx = 0 ; idx < VDisks.size (); ++idx) {
614
+ if (!SlowFlags[idx] && !ErrorResponseFlags[idx]) {
615
+ goodDisks.push_back (idx);
616
+ }
617
+ }
618
+ if (goodDisks.size ()) {
619
+ ui32 okVDiskIdx = RandomNumber<ui32>(goodDisks.size ());
620
+ subgroupIdx = goodDisks[okVDiskIdx];
621
+ }
622
+ }
623
+ }
624
+ if (!subgroupIdx) {
547
625
subgroupIdx = RandomNumber<ui32>(Info->Type .TotalPartCount ());
548
626
}
549
- TVDiskID vDisk = Info->GetVDiskInSubgroup (subgroupIdx, OriginalId.Hash ());
627
+ TVDiskID vDisk = Info->GetVDiskInSubgroup (* subgroupIdx, OriginalId.Hash ());
550
628
TDeque<std::unique_ptr<TEvBlobStorage::TEvVMovedPatch>> events;
551
629
552
630
ui64 cookie = ((ui64)OriginalId.Hash () << 32 ) | PatchedId.Hash ();
@@ -575,7 +653,7 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor<TBlob
575
653
576
654
void StartFallback () {
577
655
Mon->PatchesWithFallback ->Inc ();
578
- if (WithMovingPatchRequestToStaticNode && UseVPatch && !IsSecured) {
656
+ if (WithMovingPatchRequestToStaticNode && UseVPatch && !IsSecured && !IsMovedPatch ) {
579
657
PATCH_LOG (PRI_DEBUG, BS_PROXY_PATCH, BPPA05, " Start Moved strategy from fallback" );
580
658
StartMovedPatch ();
581
659
} else {
@@ -588,20 +666,31 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor<TBlob
588
666
589
667
void StartVPatch () {
590
668
Become (&TThis::VPatchState);
591
-
669
+ StageStart = TActivationContext::Now ();
592
670
Info->PickSubgroup (OriginalId.Hash (), &VDisks, nullptr );
593
671
ReceivedResponseFlags.assign (VDisks.size (), false );
594
672
ErrorResponseFlags.assign (VDisks.size (), false );
595
673
EmptyResponseFlags.assign (VDisks.size (), false );
596
674
ForceStopFlags.assign (VDisks.size (), false );
675
+ SlowFlags.assign (VDisks.size (), false );
676
+
677
+ ui64 worstNs = 0 ;
678
+ ui64 nextToWorstNs = 0 ;
679
+ i32 worstSubGroubIdx = -1 ;
680
+ GetWorstPredictedDelaysNs (NKikimrBlobStorage::EVDiskQueueId::GetFastRead, &worstNs, &nextToWorstNs, &worstSubGroubIdx);
681
+ if (worstNs > nextToWorstNs * 2 ) {
682
+ SlowFlags[worstSubGroubIdx] = true ;
683
+ HasSlowVDisk = true ;
684
+ }
597
685
598
686
TDeque<std::unique_ptr<TEvBlobStorage::TEvVPatchStart>> events;
599
-
600
687
for (ui32 idx = 0 ; idx < VDisks.size (); ++idx) {
601
- std::unique_ptr<TEvBlobStorage::TEvVPatchStart> ev = std::make_unique<TEvBlobStorage::TEvVPatchStart>(
602
- OriginalId, PatchedId, VDisks[idx], Deadline, idx, true );
603
- events.emplace_back (std::move (ev));
604
- SentStarts++;
688
+ if (!SlowFlags[idx]) {
689
+ std::unique_ptr<TEvBlobStorage::TEvVPatchStart> ev = std::make_unique<TEvBlobStorage::TEvVPatchStart>(
690
+ OriginalId, PatchedId, VDisks[idx], Deadline, idx, true );
691
+ events.emplace_back (std::move (ev));
692
+ SentStarts++;
693
+ }
605
694
}
606
695
607
696
PATCH_LOG (PRI_DEBUG, BS_PROXY_PATCH, BPPA08, " Start VPatch strategy" ,
@@ -702,6 +791,17 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor<TBlob
702
791
bool ContinueVPatch () {
703
792
PATCH_LOG (PRI_DEBUG, BS_PROXY_PATCH, BPPA15, " Continue VPatch strategy" ,
704
793
(FoundParts, ConvertFoundPartsToString ()));
794
+ StageStart = TActivationContext::Now ();
795
+ IsContinuedVPatch = true ;
796
+
797
+ ui64 worstNs = 0 ;
798
+ ui64 nextToWorstNs = 0 ;
799
+ i32 worstSubGroubIdx = -1 ;
800
+ GetWorstPredictedDelaysNs (NKikimrBlobStorage::EVDiskQueueId::GetFastRead, &worstNs, &nextToWorstNs, &worstSubGroubIdx);
801
+ if (worstNs > nextToWorstNs * 2 ) {
802
+ SlowFlags[worstSubGroubIdx] = true ;
803
+ HasSlowVDisk = true ;
804
+ }
705
805
706
806
if (Info->Type .GetErasure () == TErasureType::ErasureMirror3dc) {
707
807
return ContinueVPatchForMirror3dc ();
@@ -714,6 +814,9 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor<TBlob
714
814
handoffForParts.resize (inPrimary.size ());
715
815
716
816
for (auto &[subgroupIdx, partId] : FoundParts) {
817
+ if (SlowFlags[subgroupIdx]) {
818
+ continue ;
819
+ }
717
820
if (subgroupIdx == partId - 1 ) {
718
821
inPrimary[partId - 1 ] = true ;
719
822
} else {
@@ -784,6 +887,7 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor<TBlob
784
887
785
888
void Bootstrap () {
786
889
PATCH_LOG (PRI_DEBUG, BS_PROXY_PATCH, BPPA01, " Actor bootstrapped" );
890
+ Schedule (TDuration::MicroSeconds (60'000'000 ), new TEvents::TEvWakeup (NeverTag));
787
891
788
892
TLogoBlobID truePatchedBlobId = PatchedId;
789
893
bool result = true ;
@@ -806,13 +910,14 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor<TBlob
806
910
return ;
807
911
}
808
912
913
+ Info->PickSubgroup (OriginalId.Hash (), &VDisks, nullptr );
809
914
IsSecured = (Info->GetEncryptionMode () != TBlobStorageGroupInfo::EEM_NONE);
810
915
811
916
IsGoodPatchedBlobId = result;
812
917
IsAllowedErasure = Info->Type .ErasureFamily () == TErasureType::ErasureParityBlock
813
918
|| Info->Type .GetErasure () == TErasureType::ErasureNone
814
919
|| Info->Type .GetErasure () == TErasureType::ErasureMirror3dc;
815
- if (IsGoodPatchedBlobId && IsAllowedErasure && UseVPatch && OriginalGroupId == Info->GroupID && !IsSecured) {
920
+ if (false && IsGoodPatchedBlobId && IsAllowedErasure && UseVPatch && OriginalGroupId == Info->GroupID && !IsSecured) {
816
921
PATCH_LOG (PRI_DEBUG, BS_PROXY_PATCH, BPPA03, " Start VPatch strategy from bootstrap" );
817
922
StartVPatch ();
818
923
} else {
@@ -826,16 +931,90 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor<TBlob
826
931
}
827
932
}
828
933
934
+ void GetWorstPredictedDelaysNs (NKikimrBlobStorage::EVDiskQueueId queueId,
935
+ ui64 *outWorstNs, ui64 *outNextToWorstNs, i32 *outWorstSubgroupIdx) const
936
+ {
937
+ *outWorstSubgroupIdx = -1 ;
938
+ *outWorstNs = 0 ;
939
+ *outNextToWorstNs = 0 ;
940
+ for (ui32 diskIdx = 0 ; diskIdx < VDisks.size (); ++diskIdx) {
941
+ ui64 predictedNs = GroupQueues->GetPredictedDelayNsByOrderNumber (diskIdx, queueId);;
942
+ if (predictedNs > *outWorstNs) {
943
+ *outNextToWorstNs = *outWorstNs;
944
+ *outWorstNs = predictedNs;
945
+ *outWorstSubgroupIdx = diskIdx;
946
+ } else if (predictedNs > *outNextToWorstNs) {
947
+ *outNextToWorstNs = predictedNs;
948
+ }
949
+ }
950
+ }
951
+
952
+ void SetSlowDisks () {
953
+ for (ui32 idx = 0 ; idx < SlowFlags.size (); ++idx) {
954
+ SlowFlags[idx] = !ReceivedResponseFlags[idx] && !EmptyResponseFlags[idx] && !ErrorResponseFlags[idx];
955
+ if (SlowFlags[idx]) {
956
+ HasSlowVDisk = true ;
957
+ }
958
+ }
959
+ }
960
+
961
+ template <ui64 ExpectedTag>
962
+ void HandleWakeUp (TEvents::TEvWakeup::TPtr &ev) {
963
+ PATCH_LOG (PRI_DEBUG, BS_PROXY_PATCH, BPPA36, " HandleWakeUp" ,
964
+ (ExpectedTag, ToString (ExpectedTag)),
965
+ (ReceivedTag, ToString (ev->Get ()->Tag )));
966
+ if (ev->Get ()->Tag == ExpectedTag) {
967
+ SetSlowDisks ();
968
+ StartFallback ();
969
+ }
970
+ if (ev->Get ()->Tag == NeverTag) {
971
+ SetSlowDisks ();
972
+ StartFallback ();
973
+ PATCH_LOG (PRI_DEBUG, BS_PROXY_PATCH, BPPA40, " Found NeverTag wake up" , (ExpectedTag, ToString (ExpectedTag)));
974
+ }
975
+ }
976
+
977
+ void HandleVPatchWakeUp (TEvents::TEvWakeup::TPtr &ev) {
978
+ ui64 expectedTag = (IsContinuedVPatch ? VPatchDiffTag : VPatchStartTag);
979
+ PATCH_LOG (PRI_DEBUG, BS_PROXY_PATCH, BPPA37, " HandleWakeUp" ,
980
+ (ExpectedTag, ToString (expectedTag)),
981
+ (ReceivedTag, ToString (ev->Get ()->Tag )));
982
+ if (ev->Get ()->Tag == expectedTag) {
983
+ SetSlowDisks ();
984
+ StartFallback ();
985
+ }
986
+ if (ev->Get ()->Tag == NeverTag) {
987
+ SetSlowDisks ();
988
+ StartFallback ();
989
+ PATCH_LOG (PRI_DEBUG, BS_PROXY_PATCH, BPPA41, " Found NeverTag wake up" , (ExpectedTag, ToString (expectedTag)));
990
+ }
991
+ }
992
+
993
+ void HandleNeverTagWakeUp (TEvents::TEvWakeup::TPtr &ev) {
994
+ PATCH_LOG (PRI_DEBUG, BS_PROXY_PATCH, BPPA42, " HandleWakeUp" ,
995
+ (ExpectedTag, ToString (NeverTag)),
996
+ (ReceivedTag, ToString (ev->Get ()->Tag )));
997
+ if (ev->Get ()->Tag == NeverTag) {
998
+ PATCH_LOG (PRI_DEBUG, BS_PROXY_PATCH, BPPA43, " Found NeverTag wake up in naive state" );
999
+ ReplyAndDie (NKikimrProto::DEADLINE);
1000
+ }
1001
+ }
1002
+
829
1003
STATEFN (NaiveState) {
830
1004
if (ProcessEvent (ev)) {
831
1005
return ;
832
1006
}
833
1007
switch (ev->GetTypeRewrite ()) {
834
1008
hFunc (TEvBlobStorage::TEvGetResult, Handle );
835
1009
hFunc (TEvBlobStorage::TEvPutResult, Handle );
1010
+
1011
+ IgnoreFunc (TEvents::TEvWakeup);
1012
+ // hFunc(TEvents::TEvWakeup, HandleWakeUp<NeverTag>);
836
1013
IgnoreFunc (TEvBlobStorage::TEvVPatchResult);
1014
+ IgnoreFunc (TEvBlobStorage::TEvVPatchFoundParts);
1015
+ IgnoreFunc (TEvBlobStorage::TEvVMovedPatchResult);
837
1016
default :
838
- Y_ABORT (" Received unknown event" );
1017
+ Y_FAIL_S (" Received unknown event " << TypeName (*ev-> GetBase ()) );
839
1018
};
840
1019
}
841
1020
@@ -845,9 +1024,11 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor<TBlob
845
1024
}
846
1025
switch (ev->GetTypeRewrite ()) {
847
1026
hFunc (TEvBlobStorage::TEvVMovedPatchResult, Handle );
1027
+ hFunc (TEvents::TEvWakeup, HandleWakeUp<MovedPatchTag>);
848
1028
IgnoreFunc (TEvBlobStorage::TEvVPatchResult);
1029
+ IgnoreFunc (TEvBlobStorage::TEvVPatchFoundParts);
849
1030
default :
850
- Y_ABORT (" Received unknown event" );
1031
+ Y_FAIL_S (" Received unknown event " << TypeName (*ev-> GetBase ()) );
851
1032
};
852
1033
}
853
1034
@@ -858,8 +1039,9 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor<TBlob
858
1039
switch (ev->GetTypeRewrite ()) {
859
1040
hFunc (TEvBlobStorage::TEvVPatchFoundParts, Handle );
860
1041
hFunc (TEvBlobStorage::TEvVPatchResult, Handle );
1042
+ hFunc (TEvents::TEvWakeup, HandleVPatchWakeUp);
861
1043
default :
862
- Y_ABORT (" Received unknown event" );
1044
+ Y_FAIL_S (" Received unknown event " << TypeName (*ev-> GetBase ()) );
863
1045
};
864
1046
}
865
1047
};
0 commit comments