Skip to content

Commit 59c7c77

Browse files
authored
do not trigger dead tablet issue during creation of a lot of tablets… (#10398)
1 parent 4bbfda7 commit 59c7c77

File tree

7 files changed

+153
-110
lines changed

7 files changed

+153
-110
lines changed

ydb/core/health_check/health_check.cpp

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -189,21 +189,34 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
189189
int Count = 1;
190190
TStackVec<TString> Identifiers;
191191

192-
TNodeTabletStateCount(const NKikimrHive::TTabletInfo& info, const TTabletStateSettings& settings) {
193-
Type = info.tablettype();
194-
Leader = info.followerid() == 0;
192+
static ETabletState GetState(const NKikimrHive::TTabletInfo& info, const TTabletStateSettings& settings) {
195193
if (info.volatilestate() == NKikimrHive::TABLET_VOLATILE_STATE_STOPPED) {
196-
State = ETabletState::Stopped;
197-
} else if (!settings.IsHiveSynchronizationPeriod
198-
&& info.volatilestate() != NKikimrHive::TABLET_VOLATILE_STATE_RUNNING
199-
&& TInstant::MilliSeconds(info.lastalivetimestamp()) < settings.AliveBarrier
200-
&& info.tabletbootmode() == NKikimrHive::TABLET_BOOT_MODE_DEFAULT) {
201-
State = ETabletState::Dead;
202-
} else if (info.restartsperperiod() >= settings.MaxRestartsPerPeriod) {
203-
State = ETabletState::RestartsTooOften;
204-
} else {
205-
State = ETabletState::Good;
194+
return ETabletState::Stopped;
195+
}
196+
ETabletState state = (info.restartsperperiod() >= settings.MaxRestartsPerPeriod) ? ETabletState::RestartsTooOften : ETabletState::Good;
197+
if (info.volatilestate() == NKikimrHive::TABLET_VOLATILE_STATE_RUNNING) {
198+
return state;
199+
}
200+
if (info.tabletbootmode() != NKikimrHive::TABLET_BOOT_MODE_DEFAULT) {
201+
return state;
202+
}
203+
if (info.lastalivetimestamp() != 0 && TInstant::MilliSeconds(info.lastalivetimestamp()) < settings.AliveBarrier) {
204+
// Tablet is not alive for a long time
205+
// We should report it as dead unless it's just waiting to be created
206+
if (info.generation() == 0 && info.volatilestate() == NKikimrHive::TABLET_VOLATILE_STATE_BOOTING && !info.inwaitqueue()) {
207+
return state;
208+
}
209+
return ETabletState::Dead;
206210
}
211+
return state;
212+
213+
}
214+
215+
TNodeTabletStateCount(const NKikimrHive::TTabletInfo& info, const TTabletStateSettings& settings)
216+
: Type(info.tablettype())
217+
, State(GetState(info, settings))
218+
, Leader(info.followerid() == 0)
219+
{
207220
}
208221

209222
bool operator ==(const TNodeTabletStateCount& o) const {
@@ -1983,6 +1996,8 @@ class TSelfCheckRequest : public TActorBootstrapped<TSelfCheckRequest> {
19831996
}
19841997
}
19851998

1999+
// do not propagate RED status to vdisk - so that vdisk is not considered down when computing group status
2000+
context.OverallStatus = MinStatus(context.OverallStatus, Ydb::Monitoring::StatusFlag::ORANGE);
19862001
storagePDiskStatus.set_overall(context.GetOverallStatus());
19872002
}
19882003

ydb/core/health_check/health_check_ut.cpp

Lines changed: 115 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <ydb/core/testlib/test_client.h>
44
#include <ydb/public/lib/deprecated/kicli/kicli.h>
55

6+
#include <ydb/core/mind/hive/hive_events.h>
67
#include <ydb/core/node_whiteboard/node_whiteboard.h>
78
#include <ydb/core/blobstorage/base/blobstorage_events.h>
89
#include <ydb/core/tx/schemeshard/schemeshard.h>
@@ -68,7 +69,8 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
6869

6970
struct TTestVSlotInfo {
7071
std::optional<NKikimrBlobStorage::EVDiskStatus> Status;
71-
ui32 Generation;
72+
ui32 Generation = DEFAULT_GROUP_GENERATION;
73+
NKikimrBlobStorage::EDriveStatus PDiskStatus = NKikimrBlobStorage::ACTIVE;
7274

7375
TTestVSlotInfo(std::optional<NKikimrBlobStorage::EVDiskStatus> status = NKikimrBlobStorage::READY,
7476
ui32 generation = DEFAULT_GROUP_GENERATION)
@@ -77,7 +79,11 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
7779
{
7880
}
7981

80-
TTestVSlotInfo(NKikimrBlobStorage::EVDiskStatus status) : Status(status), Generation(DEFAULT_GROUP_GENERATION) {}
82+
TTestVSlotInfo(NKikimrBlobStorage::EVDiskStatus status, NKikimrBlobStorage::EDriveStatus pDiskStatus = NKikimrBlobStorage::ACTIVE)
83+
: Status(status)
84+
, PDiskStatus(pDiskStatus)
85+
{
86+
}
8187
};
8288

8389
using TVDisks = TVector<TTestVSlotInfo>;
@@ -222,18 +228,20 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
222228
entry->mutable_info()->set_name(STORAGE_POOL_NAME);
223229
}
224230

225-
void AddPDisksToSysViewResponse(NSysView::TEvSysView::TEvGetPDisksResponse::TPtr* ev, size_t count, double occupancy) {
231+
void AddPDisksToSysViewResponse(NSysView::TEvSysView::TEvGetPDisksResponse::TPtr* ev, const TVDisks& vslots, double occupancy) {
226232
auto& record = (*ev)->Get()->Record;
227233
auto entrySample = record.entries(0);
228234
record.clear_entries();
229235
auto pdiskId = PDISK_START_ID;
230236
const size_t totalSize = 3'200'000'000'000ull;
231-
for (size_t i = 0; i < count; ++i) {
237+
const auto *descriptor = NKikimrBlobStorage::EDriveStatus_descriptor();
238+
for (const auto& vslot : vslots) {
232239
auto* entry = record.add_entries();
233240
entry->CopyFrom(entrySample);
234241
entry->mutable_key()->set_pdiskid(pdiskId);
235242
entry->mutable_info()->set_totalsize(totalSize);
236243
entry->mutable_info()->set_availablesize((1 - occupancy) * totalSize);
244+
entry->mutable_info()->set_statusv2(descriptor->FindValueByNumber(vslot.PDiskStatus)->name());
237245
++pdiskId;
238246
}
239247
}
@@ -482,7 +490,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
482490
}
483491
case NSysView::TEvSysView::EvGetPDisksResponse: {
484492
auto* x = reinterpret_cast<NSysView::TEvSysView::TEvGetPDisksResponse::TPtr*>(&ev);
485-
AddPDisksToSysViewResponse(x, vdisks.size(), occupancy);
493+
AddPDisksToSysViewResponse(x, vdisks, occupancy);
486494
break;
487495
}
488496
case NSysView::TEvSysView::EvGetGroupsResponse: {
@@ -710,6 +718,14 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
710718
CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::RED, 1);
711719
}
712720

721+
Y_UNIT_TEST(YellowIssueReadyVDisksOnFaultyPDisks) {
722+
auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, TVDisks{3, {NKikimrBlobStorage::READY, NKikimrBlobStorage::FAULTY}});
723+
Cerr << result.ShortDebugString() << Endl;
724+
CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::YELLOW, 1);
725+
CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::ORANGE, 0);
726+
CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::RED, 0);
727+
}
728+
713729
/* HC currently infers group status on its own, so it's never unknown
714730
Y_UNIT_TEST(RedGroupIssueWhenUnknownGroupStatus) {
715731
auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::UNKNOWN, {});
@@ -1818,123 +1834,128 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) {
18181834
UNIT_ASSERT_VALUES_EQUAL(database_status.storage().pools()[0].id(), "static");
18191835
}
18201836

1821-
void HiveSyncTest(bool syncPeriod) {
1837+
Y_UNIT_TEST(ShardsLimit999) {
1838+
ShardsQuotaTest(999, 1000, 1, Ydb::Monitoring::StatusFlag::RED);
1839+
}
1840+
1841+
Y_UNIT_TEST(ShardsLimit995) {
1842+
ShardsQuotaTest(995, 1000, 1, Ydb::Monitoring::StatusFlag::ORANGE);
1843+
}
1844+
1845+
Y_UNIT_TEST(ShardsLimit905) {
1846+
ShardsQuotaTest(905, 1000, 1, Ydb::Monitoring::StatusFlag::YELLOW);
1847+
}
1848+
1849+
Y_UNIT_TEST(ShardsLimit800) {
1850+
ShardsQuotaTest(805, 1000, 0, Ydb::Monitoring::StatusFlag::GREEN);
1851+
}
1852+
1853+
Y_UNIT_TEST(ShardsNoLimit) {
1854+
ShardsQuotaTest(105, 0, 0, Ydb::Monitoring::StatusFlag::GREEN);
1855+
}
1856+
1857+
bool HasDeadTabletIssue(const Ydb::Monitoring::SelfCheckResult& result) {
1858+
for (const auto& issue_log : result.issue_log()) {
1859+
if (issue_log.level() == 4 && issue_log.type() == "TABLET") {
1860+
return true;
1861+
}
1862+
}
1863+
return false;
1864+
}
1865+
1866+
Y_UNIT_TEST(TestTabletIsDead) {
18221867
TPortManager tp;
18231868
ui16 port = tp.GetPort(2134);
18241869
ui16 grpcPort = tp.GetPort(2135);
18251870
auto settings = TServerSettings(port)
1826-
.SetNodeCount(1)
1871+
.SetNodeCount(2)
18271872
.SetDynamicNodeCount(1)
18281873
.SetUseRealThreads(false)
18291874
.SetDomainName("Root");
18301875
TServer server(settings);
18311876
server.EnableGRpc(grpcPort);
1877+
18321878
TClient client(settings);
1833-
TTestActorRuntime& runtime = *server.GetRuntime();
18341879

1835-
ui32 dynNodeId = runtime.GetNodeId(1);
1880+
TTestActorRuntime* runtime = server.GetRuntime();
1881+
TActorId sender = runtime->AllocateEdgeActor();
18361882

1837-
auto observerFunc = [&](TAutoPtr<IEventHandle>& ev) {
1838-
switch (ev->GetTypeRewrite()) {
1839-
case TEvHive::EvResponseHiveInfo: {
1840-
auto *x = reinterpret_cast<TEvHive::TEvResponseHiveInfo::TPtr*>(&ev);
1841-
auto& record = (*x)->Get()->Record;
1842-
record.SetStartTimeTimestamp(0);
1843-
if (syncPeriod) {
1844-
record.SetResponseTimestamp(NHealthCheck::TSelfCheckRequest::HIVE_SYNCHRONIZATION_PERIOD_MS / 2);
1845-
} else {
1846-
record.SetResponseTimestamp(NHealthCheck::TSelfCheckRequest::HIVE_SYNCHRONIZATION_PERIOD_MS * 2);
1847-
}
1848-
auto *tablet = record.MutableTablets()->Add();
1849-
tablet->SetTabletID(1);
1850-
tablet->SetNodeID(dynNodeId);
1851-
tablet->SetTabletType(NKikimrTabletBase::TTabletTypes::DataShard);
1852-
tablet->SetVolatileState(NKikimrHive::TABLET_VOLATILE_STATE_BOOTING);
1853-
tablet->MutableObjectDomain()->SetSchemeShard(SUBDOMAIN_KEY.OwnerId);
1854-
tablet->MutableObjectDomain()->SetPathId(SUBDOMAIN_KEY.LocalPathId);
1855-
break;
1856-
}
1857-
case TEvHive::EvResponseHiveNodeStats: {
1858-
auto *x = reinterpret_cast<TEvHive::TEvResponseHiveNodeStats::TPtr*>(&ev);
1859-
auto &record = (*x)->Get()->Record;
1860-
auto *nodeStats = record.MutableNodeStats()->Add();
1861-
nodeStats->SetNodeId(dynNodeId);
1862-
nodeStats->MutableNodeDomain()->SetSchemeShard(SUBDOMAIN_KEY.OwnerId);
1863-
nodeStats->MutableNodeDomain()->SetPathId(SUBDOMAIN_KEY.LocalPathId);
1864-
break;
1865-
}
1866-
case NConsole::TEvConsole::EvGetTenantStatusResponse: {
1867-
auto *x = reinterpret_cast<NConsole::TEvConsole::TEvGetTenantStatusResponse::TPtr*>(&ev);
1868-
ChangeGetTenantStatusResponse(x, "/Root/database");
1869-
break;
1870-
}
1871-
case TEvTxProxySchemeCache::EvNavigateKeySetResult: {
1872-
auto *x = reinterpret_cast<TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr*>(&ev);
1873-
TSchemeCacheNavigate::TEntry& entry((*x)->Get()->Request->ResultSet.front());
1874-
entry.Status = TSchemeCacheNavigate::EStatus::Ok;
1875-
entry.Kind = TSchemeCacheNavigate::EKind::KindExtSubdomain;
1876-
entry.Path = {"Root", "database"};
1877-
entry.DomainInfo = MakeIntrusive<TDomainInfo>(SUBDOMAIN_KEY, SUBDOMAIN_KEY);
1883+
server.SetupDynamicLocalService(2, "Root");
1884+
server.StartPQTablets(1);
1885+
server.DestroyDynamicLocalService(2);
1886+
runtime->AdvanceCurrentTime(TDuration::Minutes(5));
18781887

1879-
break;
1880-
}
1881-
}
1888+
TAutoPtr<IEventHandle> handle;
1889+
runtime->Send(new IEventHandle(NHealthCheck::MakeHealthCheckID(), sender, new NHealthCheck::TEvSelfCheckRequest(), 0));
1890+
auto result = runtime->GrabEdgeEvent<NHealthCheck::TEvSelfCheckResult>(handle)->Result;
1891+
Cerr << result.ShortDebugString();
18821892

1883-
return TTestActorRuntime::EEventAction::PROCESS;
1884-
};
1885-
runtime.SetObserverFunc(observerFunc);
1893+
UNIT_ASSERT(HasDeadTabletIssue(result));
1894+
}
18861895

1887-
TActorId sender = runtime.AllocateEdgeActor();
1888-
TAutoPtr<IEventHandle> handle;
1896+
Y_UNIT_TEST(TestBootingTabletIsNotDead) {
1897+
TPortManager tp;
1898+
ui16 port = tp.GetPort(2134);
1899+
ui16 grpcPort = tp.GetPort(2135);
1900+
auto settings = TServerSettings(port)
1901+
.SetNodeCount(2)
1902+
.SetDynamicNodeCount(1)
1903+
.SetUseRealThreads(false)
1904+
.SetDomainName("Root");
1905+
TServer server(settings);
1906+
server.EnableGRpc(grpcPort);
18891907

1890-
auto *request = new NHealthCheck::TEvSelfCheckRequest;
1891-
request->Request.set_return_verbose_status(true);
1892-
request->Database = "/Root/database";
1893-
runtime.Send(new IEventHandle(NHealthCheck::MakeHealthCheckID(), sender, request, 0));
1894-
const auto result = runtime.GrabEdgeEvent<NHealthCheck::TEvSelfCheckResult>(handle)->Result;
1908+
TClient client(settings);
18951909

1896-
Cerr << result.ShortDebugString() << Endl;
1910+
TTestActorRuntime* runtime = server.GetRuntime();
1911+
TActorId sender = runtime->AllocateEdgeActor();
18971912

1898-
UNIT_ASSERT_VALUES_EQUAL(result.database_status_size(), 1);
1913+
auto blockBoot = runtime->AddObserver<NHive::TEvPrivate::TEvProcessBootQueue>([](auto&& ev) { ev.Reset(); });
18991914

1900-
bool deadTabletIssueFoundInResult = false;
1901-
for (const auto &issue_log : result.issue_log()) {
1902-
if (issue_log.level() == 4 && issue_log.type() == "TABLET") {
1903-
UNIT_ASSERT_VALUES_EQUAL(issue_log.location().compute().tablet().id().size(), 1);
1904-
UNIT_ASSERT_VALUES_EQUAL(issue_log.location().compute().tablet().type(), "DataShard");
1905-
deadTabletIssueFoundInResult = true;
1906-
}
1907-
}
1915+
server.SetupDynamicLocalService(2, "Root");
1916+
server.StartPQTablets(1, false);
1917+
runtime->AdvanceCurrentTime(TDuration::Minutes(5));
19081918

1909-
UNIT_ASSERT_VALUES_EQUAL(syncPeriod, !deadTabletIssueFoundInResult);
1910-
}
1919+
TAutoPtr<IEventHandle> handle;
1920+
runtime->Send(new IEventHandle(NHealthCheck::MakeHealthCheckID(), sender, new NHealthCheck::TEvSelfCheckRequest(), 0));
1921+
auto result = runtime->GrabEdgeEvent<NHealthCheck::TEvSelfCheckResult>(handle)->Result;
1922+
Cerr << result.ShortDebugString();
19111923

1912-
Y_UNIT_TEST(HiveSyncPeriodIgnoresTabletsState) {
1913-
HiveSyncTest(true);
1924+
UNIT_ASSERT(!HasDeadTabletIssue(result));
19141925
}
19151926

1916-
Y_UNIT_TEST(AfterHiveSyncPeriodReportsTabletsState) {
1917-
HiveSyncTest(false);
1918-
}
1927+
Y_UNIT_TEST(TestReBootingTabletIsDead) {
1928+
TPortManager tp;
1929+
ui16 port = tp.GetPort(2134);
1930+
ui16 grpcPort = tp.GetPort(2135);
1931+
auto settings = TServerSettings(port)
1932+
.SetNodeCount(2)
1933+
.SetDynamicNodeCount(2)
1934+
.SetUseRealThreads(false)
1935+
.SetDomainName("Root");
1936+
TServer server(settings);
1937+
server.EnableGRpc(grpcPort);
19191938

1920-
Y_UNIT_TEST(ShardsLimit999) {
1921-
ShardsQuotaTest(999, 1000, 1, Ydb::Monitoring::StatusFlag::RED);
1922-
}
1939+
TClient client(settings);
19231940

1924-
Y_UNIT_TEST(ShardsLimit995) {
1925-
ShardsQuotaTest(995, 1000, 1, Ydb::Monitoring::StatusFlag::ORANGE);
1926-
}
1941+
TTestActorRuntime* runtime = server.GetRuntime();
1942+
runtime->SetLogPriority(NKikimrServices::HIVE, NActors::NLog::PRI_TRACE);
1943+
TActorId sender = runtime->AllocateEdgeActor();
19271944

1928-
Y_UNIT_TEST(ShardsLimit905) {
1929-
ShardsQuotaTest(905, 1000, 1, Ydb::Monitoring::StatusFlag::YELLOW);
1930-
}
19311945

1932-
Y_UNIT_TEST(ShardsLimit800) {
1933-
ShardsQuotaTest(805, 1000, 0, Ydb::Monitoring::StatusFlag::GREEN);
1934-
}
1946+
server.SetupDynamicLocalService(2, "Root");
1947+
server.StartPQTablets(1, true);
1948+
server.SetupDynamicLocalService(3, "Root");
1949+
auto blockBoot = runtime->AddObserver<NHive::TEvPrivate::TEvProcessBootQueue>([](auto&& ev) { ev.Reset(); });
1950+
server.DestroyDynamicLocalService(2);
1951+
runtime->AdvanceCurrentTime(TDuration::Minutes(5));
19351952

1936-
Y_UNIT_TEST(ShardsNoLimit) {
1937-
ShardsQuotaTest(105, 0, 0, Ydb::Monitoring::StatusFlag::GREEN);
1953+
TAutoPtr<IEventHandle> handle;
1954+
runtime->Send(new IEventHandle(NHealthCheck::MakeHealthCheckID(), sender, new NHealthCheck::TEvSelfCheckRequest(), 0));
1955+
auto result = runtime->GrabEdgeEvent<NHealthCheck::TEvSelfCheckResult>(handle)->Result;
1956+
Cerr << result.ShortDebugString();
1957+
1958+
UNIT_ASSERT(HasDeadTabletIssue(result));
19381959
}
19391960
}
19401961
}

ydb/core/mind/hive/hive_impl.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ void THive::ExecuteProcessBootQueue(NIceDb::TNiceDb& db, TSideEffects& sideEffec
236236
if (tablet == nullptr) {
237237
continue;
238238
}
239+
tablet->InWaitQueue = false;
239240
if (tablet->IsAlive()) {
240241
BLOG_D("tablet " << record.TabletId << " already alive, skipping");
241242
continue;
@@ -261,6 +262,7 @@ void THive::ExecuteProcessBootQueue(NIceDb::TNiceDb& db, TSideEffects& sideEffec
261262
UpdateTabletFollowersNumber(leader, db, sideEffects);
262263
}
263264
BootQueue.AddToWaitQueue(record); // waiting for new node
265+
tablet->InWaitQueue = true;
264266
continue;
265267
}
266268
}
@@ -1854,6 +1856,9 @@ void THive::FillTabletInfo(NKikimrHive::TEvResponseHiveInfo& response, ui64 tabl
18541856
if (req.GetReturnMetrics()) {
18551857
tabletInfo.MutableMetrics()->CopyFrom(info->GetResourceValues());
18561858
}
1859+
if (info->InWaitQueue) {
1860+
tabletInfo.SetInWaitQueue(true);
1861+
}
18571862
if (req.GetReturnChannelHistory()) {
18581863
for (const auto& channel : info->TabletStorageInfo->Channels) {
18591864
auto& tabletChannel = *tabletInfo.AddTabletChannels();

ydb/core/mind/hive/tablet_info.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ struct TTabletInfo {
162162
TInstant PostponedStart;
163163
EBalancerPolicy BalancerPolicy;
164164
TNodeId FailedNodeId = 0; // last time we tried to start the tablet, we failed on this node
165+
bool InWaitQueue = false;
165166

166167
TTabletInfo(ETabletRole role, THive& hive);
167168
TTabletInfo(const TTabletInfo&) = delete;

ydb/core/protos/hive.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,7 @@ message TTabletInfo {
494494
optional uint32 RestartsPerPeriod = 22;
495495
optional uint64 LastAliveTimestamp = 23;
496496
optional EBalancerPolicy BalancerPolicy = 24;
497+
optional bool InWaitQueue = 25;
497498
}
498499

499500
message TEvSeizeTabletsReply {

0 commit comments

Comments
 (0)