Skip to content

Commit 7116d46

Browse files
robdrynkinrobdrynkin
and
robdrynkin
authored
KIKIMR-20522: Tests for vdisks balancing (ydb-platform#531)
Co-authored-by: robdrynkin <[email protected]>
1 parent 86194b9 commit 7116d46

File tree

4 files changed

+322
-1
lines changed

4 files changed

+322
-1
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,305 @@
1+
#include <ydb/core/blobstorage/ut_blobstorage/lib/env.h>
2+
3+
#include <library/cpp/iterator/enumerate.h>
4+
5+
#include <util/random/entropy.h>
6+
7+
8+
using TPartsLocations = TVector<TVector<ui8>>;
9+
10+
11+
struct TTestEnv {
12+
TTestEnv(ui32 nodeCount, TBlobStorageGroupType erasure)
13+
: Env({
14+
.NodeCount = nodeCount,
15+
.VDiskReplPausedAtStart = false,
16+
.Erasure = erasure,
17+
})
18+
{
19+
Env.CreateBoxAndPool(1, 1);
20+
Env.Sim(TDuration::Minutes(1));
21+
22+
auto groups = Env.GetGroups();
23+
UNIT_ASSERT_VALUES_EQUAL(groups.size(), 1);
24+
GroupInfo = Env.GetGroupInfo(groups.front());
25+
26+
for (ui32 i = 0; i < Env.Settings.NodeCount; ++i) {
27+
RunningNodes.insert(i);
28+
}
29+
}
30+
31+
static TString PrepareData(const ui32 dataLen, const ui32 start) {
32+
TString data(Reserve(dataLen));
33+
for (ui32 i = 0; i < dataLen; ++i) {
34+
data.push_back('a' + (start + i) % 26);
35+
}
36+
return data;
37+
};
38+
39+
void SendPut(ui32 step, const TString& data, NKikimrProto::EReplyStatus expectedStatus) {
40+
const TLogoBlobID id(1, 1, step, 0, data.size(), 0);
41+
Cerr << "SEND TEvPut with key " << id.ToString() << Endl;
42+
const TActorId sender = Env.Runtime->AllocateEdgeActor(GroupInfo->GetActorId(*RunningNodes.begin()).NodeId(), __FILE__, __LINE__);
43+
auto ev = std::make_unique<TEvBlobStorage::TEvPut>(id, data, TInstant::Max());
44+
Env.Runtime->WrapInActorContext(sender, [&] {
45+
SendToBSProxy(sender, GroupInfo->GroupID, ev.release());
46+
});
47+
auto res = Env.WaitForEdgeActorEvent<TEvBlobStorage::TEvPutResult>(sender, false);
48+
UNIT_ASSERT_VALUES_EQUAL(res->Get()->Status, expectedStatus);
49+
Cerr << "TEvPutResult: " << res->Get()->ToString() << Endl;
50+
};
51+
52+
auto SendGet(ui32 step, ui32 dataSize, bool mustRestoreFirst=false) {
53+
const TLogoBlobID blobId(1, 1, step, 0, dataSize, 0);
54+
Cerr << "SEND TEvGet with key " << blobId.ToString() << Endl;
55+
const TActorId sender = Env.Runtime->AllocateEdgeActor(GroupInfo->GetActorId(*RunningNodes.begin()).NodeId(), __FILE__, __LINE__);
56+
auto ev = std::make_unique<TEvBlobStorage::TEvGet>(
57+
blobId,
58+
/* shift */ 0,
59+
/* size */ dataSize,
60+
TInstant::Max(),
61+
NKikimrBlobStorage::EGetHandleClass::FastRead,
62+
mustRestoreFirst
63+
);
64+
Env.Runtime->WrapInActorContext(sender, [&] () {
65+
SendToBSProxy(sender, GroupInfo->GroupID, ev.release());
66+
});
67+
TInstant getDeadline = Env.Now() + TDuration::Seconds(30);
68+
auto res = Env.WaitForEdgeActorEvent<TEvBlobStorage::TEvGetResult>(sender, /* termOnCapture */ false, getDeadline);
69+
Cerr << "TEvGetResult: " << res->Get()->ToString() << Endl;
70+
return res;
71+
};
72+
73+
TActorId GetQueue(const TVDiskID& vDiskId) {
74+
if (!Queues.contains(vDiskId)) {
75+
Queues[vDiskId] = Env.CreateQueueActor(vDiskId, NKikimrBlobStorage::EVDiskQueueId::GetFastRead, 1000);
76+
}
77+
return Queues[vDiskId];
78+
}
79+
80+
TVector<ui32> GetParts(ui32 position, const TLogoBlobID& blobId) {
81+
if (!RunningNodes.contains(position)) {
82+
return {};
83+
}
84+
auto vDiskId = GroupInfo->GetVDiskId(position);
85+
auto ev = TEvBlobStorage::TEvVGet::CreateExtremeIndexQuery(
86+
vDiskId, TInstant::Max(), NKikimrBlobStorage::EGetHandleClass::AsyncRead,
87+
TEvBlobStorage::TEvVGet::EFlags::None, 0,
88+
{{blobId, 0, 0}}
89+
);
90+
const TActorId sender = Env.Runtime->AllocateEdgeActor(GroupInfo->GetActorId(*RunningNodes.begin()).NodeId(), __FILE__, __LINE__);
91+
TVector<ui32> partsRes;
92+
93+
Cerr << "Get request for vdisk " << position << Endl;
94+
auto queueId = GetQueue(vDiskId);
95+
Env.Runtime->WrapInActorContext(sender, [&] {
96+
Env.Runtime->Send(new IEventHandle(queueId, sender, ev.release()));
97+
});
98+
auto res = Env.WaitForEdgeActorEvent<TEvBlobStorage::TEvVGetResult>(sender, false);
99+
auto parts = res->Get()->Record.GetResult().at(0).GetParts();
100+
partsRes = TVector<ui32>(parts.begin(), parts.end());
101+
return partsRes;
102+
}
103+
104+
TPartsLocations GetExpectedPartsLocations(const TLogoBlobID& blobId) {
105+
TPartsLocations result(GroupInfo->GetTopology().GType.BlobSubgroupSize());
106+
TBlobStorageGroupInfo::TOrderNums orderNums;
107+
GroupInfo->GetTopology().PickSubgroup(blobId.Hash(), orderNums);
108+
for (ui32 i = 0; i < GroupInfo->GetTopology().GType.TotalPartCount(); ++i) {
109+
result[orderNums[i]].push_back(i + 1);
110+
}
111+
return result;
112+
}
113+
114+
TPartsLocations GetActualPartsLocations(const TLogoBlobID& blobId) {
115+
TPartsLocations result(GroupInfo->GetTopology().GType.BlobSubgroupSize());
116+
for (ui32 i = 0; i < result.size(); ++i) {
117+
for (ui32 part: GetParts(i, blobId)) {
118+
result[i].push_back(part);
119+
}
120+
Sort(result[i].begin(), result[i].end());
121+
}
122+
return result;
123+
}
124+
125+
bool CheckPartsLocations(const TLogoBlobID& blobId) {
126+
auto expectedParts = GetExpectedPartsLocations(blobId);
127+
auto actualParts = GetActualPartsLocations(blobId);
128+
UNIT_ASSERT_VALUES_EQUAL(expectedParts.size(), actualParts.size());
129+
130+
for (ui32 i = 0; i < expectedParts.size(); ++i) {
131+
UNIT_ASSERT_VALUES_EQUAL(expectedParts[i].size(), actualParts[i].size());
132+
for (ui32 j = 0; j < expectedParts[i].size(); ++j) {
133+
UNIT_ASSERT_VALUES_EQUAL(expectedParts[i][j], actualParts[i][j]);
134+
}
135+
}
136+
137+
return true;
138+
}
139+
140+
void StopNode(ui32 position) {
141+
if (!RunningNodes.contains(position)) {
142+
return;
143+
}
144+
Env.StopNode(GroupInfo->GetActorId(position).NodeId());
145+
RunningNodes.erase(position);
146+
}
147+
148+
void StartNode(ui32 position) {
149+
if (RunningNodes.contains(position)) {
150+
return;
151+
}
152+
Env.StartNode(GroupInfo->GetActorId(position).NodeId());
153+
RunningNodes.insert(position);
154+
for (auto [_, queueId]: Queues) {
155+
Env.Runtime->Send(new IEventHandle(TEvents::TSystem::Poison, 0, queueId, {}, nullptr, 0), queueId.NodeId());
156+
}
157+
Queues.clear();
158+
}
159+
160+
TEnvironmentSetup* operator->() {
161+
return &Env;
162+
}
163+
164+
TEnvironmentSetup Env;
165+
TIntrusivePtr<TBlobStorageGroupInfo> GroupInfo;
166+
THashSet<ui32> RunningNodes;
167+
THashMap<TVDiskID, TActorId> Queues;
168+
};
169+
170+
TLogoBlobID MakeLogoBlobId(ui32 step, ui32 dataSize) {
171+
return TLogoBlobID(1, 1, step, 0, dataSize, 0);
172+
}
173+
174+
175+
TString GenData(ui32 len) {
176+
TString res = TString::Uninitialized(len);
177+
EntropyPool().Read(res.Detach(), res.size());
178+
return res;
179+
}
180+
181+
182+
struct TStopOneNodeTest {
183+
TTestEnv Env;
184+
TString data;
185+
186+
void RunTest() {
187+
ui32 step = 0;
188+
189+
{ // Check just a normal put works
190+
Env.SendPut(++step, data, NKikimrProto::OK);
191+
UNIT_ASSERT_VALUES_EQUAL(Env.SendGet(step, data.size())->Get()->Responses[0].Buffer.ConvertToString(), data);
192+
Env.CheckPartsLocations(MakeLogoBlobId(step, data.size()));
193+
}
194+
195+
196+
{ // Stop one node that should have a part, make put, start it and check that blob would be moved from handoff on main
197+
auto blobId = MakeLogoBlobId(++step, data.size());
198+
auto locations = Env.GetExpectedPartsLocations(blobId);
199+
ui32 nodeIdWithBlob = 0;
200+
while (locations[nodeIdWithBlob].size() == 0) ++nodeIdWithBlob;
201+
202+
Env.StopNode(nodeIdWithBlob);
203+
Env.SendPut(step, data, NKikimrProto::OK);
204+
Env->Sim(TDuration::Seconds(10));
205+
Env.StartNode(nodeIdWithBlob);
206+
Env->Sim(TDuration::Seconds(10));
207+
Env.CheckPartsLocations(MakeLogoBlobId(step, data.size()));
208+
UNIT_ASSERT_VALUES_EQUAL(Env.SendGet(step, data.size())->Get()->Responses[0].Buffer.ConvertToString(), data);
209+
}
210+
}
211+
};
212+
213+
struct TRandomTest {
214+
TTestEnv Env;
215+
ui32 NumIters;
216+
217+
void RunTest() {
218+
TVector<TString> data(Reserve(NumIters));
219+
220+
for (ui32 step = 0; step < NumIters; ++step) {
221+
Cerr << step << Endl;
222+
data.push_back(GenData(16 + random() % 4096));
223+
auto blobId = MakeLogoBlobId(step, data.back().size());
224+
auto locations = Env.GetExpectedPartsLocations(blobId);
225+
226+
if (random() % 10 == 1 && Env.RunningNodes.size() + 2 > Env->Settings.NodeCount) {
227+
ui32 nodeId = random() % Env->Settings.NodeCount;
228+
Cerr << "Stop node " << nodeId << Endl;
229+
Env.StopNode(nodeId);
230+
Env->Sim(TDuration::Seconds(10));
231+
}
232+
233+
Env.SendPut(step, data.back(), NKikimrProto::OK);
234+
235+
if (random() % 10 == 1) {
236+
for (ui32 pos = 0; pos < Env->Settings.NodeCount; ++pos) {
237+
if (!Env.RunningNodes.contains(pos)) {
238+
Cerr << "Start node " << pos << Endl;
239+
Env.StartNode(pos);
240+
Env->Sim(TDuration::Seconds(10));
241+
break;
242+
}
243+
}
244+
}
245+
246+
if (random() % 50 == 1) {
247+
ui32 pos = random() % Env->Settings.NodeCount;
248+
if (Env.RunningNodes.contains(pos)) {
249+
Env->CompactVDisk(Env.GroupInfo->GetActorId(pos));
250+
Env->Sim(TDuration::Seconds(10));
251+
}
252+
}
253+
254+
// Wipe random node
255+
if (random() % 100 == 1) {
256+
ui32 pos = random() % Env->Settings.NodeCount;
257+
if (Env.RunningNodes.contains(pos)) {
258+
auto baseConfig = Env->FetchBaseConfig();
259+
const auto& somePDisk = baseConfig.GetPDisk(pos);
260+
const auto& someVSlot = baseConfig.GetVSlot(pos);
261+
Env->Wipe(somePDisk.GetNodeId(), somePDisk.GetPDiskId(), someVSlot.GetVSlotId().GetVSlotId());
262+
Env->Sim(TDuration::Seconds(10));
263+
}
264+
}
265+
}
266+
267+
for (ui32 pos = 0; pos < Env->Settings.NodeCount; ++pos) {
268+
Env.StartNode(pos);
269+
}
270+
271+
Env->Sim(TDuration::Seconds(300));
272+
Cerr << "Start checking" << Endl;
273+
for (ui32 step = 0; step < NumIters; ++step) {
274+
Cerr << step << Endl;
275+
Env.CheckPartsLocations(MakeLogoBlobId(step, data[step].size()));
276+
UNIT_ASSERT_VALUES_EQUAL(Env.SendGet(step, data[step].size())->Get()->Responses[0].Buffer.ConvertToString(), data[step]);
277+
}
278+
}
279+
};
280+
281+
282+
283+
Y_UNIT_TEST_SUITE(VDiskBalancing) {
284+
285+
Y_UNIT_TEST(TestStopOneNode_Block42) {
286+
TStopOneNodeTest{TTestEnv(8, TBlobStorageGroupType::Erasure4Plus2Block), GenData(100)}.RunTest();
287+
}
288+
Y_UNIT_TEST(TestStopOneNode_Mirror3dc) {
289+
TStopOneNodeTest{TTestEnv(9, TBlobStorageGroupType::ErasureMirror3dc), GenData(100)}.RunTest();
290+
}
291+
Y_UNIT_TEST(TestStopOneNode_Block42_HugeBlob) {
292+
TStopOneNodeTest{TTestEnv(8, TBlobStorageGroupType::Erasure4Plus2Block), GenData(521_KB)}.RunTest();
293+
}
294+
Y_UNIT_TEST(TestStopOneNode_Mirror3dc_HugeBlob) {
295+
TStopOneNodeTest{TTestEnv(9, TBlobStorageGroupType::ErasureMirror3dc), GenData(521_KB)}.RunTest();
296+
}
297+
298+
Y_UNIT_TEST(TestRandom_Block42) {
299+
TRandomTest{TTestEnv(8, TBlobStorageGroupType::Erasure4Plus2Block), 1000}.RunTest();
300+
}
301+
Y_UNIT_TEST(TestRandom_Mirror3dc) {
302+
TRandomTest{TTestEnv(9, TBlobStorageGroupType::ErasureMirror3dc), 1000}.RunTest();
303+
}
304+
305+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
UNITTEST_FOR(ydb/core/blobstorage/ut_blobstorage)
2+
3+
SIZE(MEDIUM)
4+
5+
TIMEOUT(600)
6+
7+
SRCS(
8+
balancing.cpp
9+
)
10+
11+
PEERDIR(
12+
ydb/core/blobstorage/ut_blobstorage/lib
13+
)
14+
15+
END()

ydb/core/blobstorage/ut_blobstorage/ya.make

+1
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ REQUIREMENTS(ram:32)
6060
END()
6161

6262
RECURSE_FOR_TESTS(
63+
ut_balancing
6364
ut_blob_depot
6465
ut_blob_depot_fat
6566
ut_donor

ydb/core/util/testactorsys.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ class TTestActorSystem {
161161
}
162162

163163
void StateFunc(TAutoPtr<IEventHandle>& ev) {
164-
Y_ABORT_UNLESS(HandlePtr, "event is not being captured by this actor Tag# %s", Tag.data());
164+
Y_ABORT_UNLESS(HandlePtr, "event %s is not being captured by this actor Tag# %s", ev->GetTypeName().data(), Tag.data());
165165
Y_ABORT_UNLESS(!*HandlePtr);
166166
HandlePtr->reset(ev.Release());
167167
}

0 commit comments

Comments
 (0)