Skip to content

Commit 1d77d19

Browse files
committed
Collect stats during lifetime KIKIMR-19289
1 parent 2f5ea7d commit 1d77d19

File tree

6 files changed

+57
-41
lines changed

6 files changed

+57
-41
lines changed

ydb/core/mind/hive/hive_impl.cpp

+18-1
Original file line numberDiff line numberDiff line change
@@ -1550,7 +1550,9 @@ void THive::DeleteTablet(TTabletId tabletId) {
15501550
}
15511551
Y_ENSURE_LOG(nt->second.LockedTablets.count(&tablet) == 0, " Deleting tablet found on node " << nt->first << " in locked set");
15521552
}
1553-
UpdateCounterTabletsTotal(-1 - (tablet.Followers.size()));
1553+
const i64 tabletsTotalDiff = -1 - (tablet.Followers.size());
1554+
UpdateCounterTabletsTotal(tabletsTotalDiff);
1555+
UpdateTabletsTotalByDomain(tabletsTotalDiff, tablet.ObjectDomain);
15541556
Tablets.erase(it);
15551557
}
15561558
}
@@ -1583,6 +1585,21 @@ void THive::KillNode(TNodeId nodeId, const TActorId& local) {
15831585
Execute(CreateKillNode(nodeId, local));
15841586
}
15851587

1588+
void THive::UpdateTabletsTotalByDomain(i64 tabletsTotalDiff, const TSubDomainKey& objectDomain) {
1589+
if (objectDomain) {
1590+
TabletsTotalByDomain[objectDomain] += tabletsTotalDiff;
1591+
}
1592+
}
1593+
1594+
void THive::UpdateTabletsAliveByDomain(i64 tabletsAliveDiff, const TSubDomainKey& objectDomain, const TSubDomainKey& tabletNodeDomain) {
1595+
if (objectDomain) {
1596+
TabletsAliveByDomain[objectDomain] += tabletsAliveDiff;
1597+
if (objectDomain == tabletNodeDomain) {
1598+
TabletsAliveInObjectDomainByDomain[objectDomain] += tabletsAliveDiff;
1599+
}
1600+
}
1601+
}
1602+
15861603
void THive::SetCounterTabletsTotal(ui64 tabletsTotal) {
15871604
if (TabletCounters != nullptr) {
15881605
auto& counter = TabletCounters->Simple()[NHive::COUNTER_TABLETS_TOTAL];

ydb/core/mind/hive/hive_impl.h

+5
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,9 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
323323
ui32 DataCenters = 1;
324324
ui32 RegisteredDataCenters = 1;
325325
TObjectDistributions ObjectDistributions;
326+
std::unordered_map<TSubDomainKey, ui64> TabletsTotalByDomain;
327+
std::unordered_map<TSubDomainKey, ui64> TabletsAliveByDomain;
328+
std::unordered_map<TSubDomainKey, ui64> TabletsAliveInObjectDomainByDomain;
326329

327330
bool AreWeRootHive() const { return RootHiveId == HiveId; }
328331
bool AreWeSubDomainHive() const { return RootHiveId != HiveId; }
@@ -633,6 +636,8 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
633636
TTabletCategoryInfo& GetTabletCategory(TTabletCategoryId tabletCategoryId);
634637
void KillNode(TNodeId nodeId, const TActorId& local);
635638
void AddToBootQueue(TTabletInfo* tablet);
639+
void UpdateTabletsTotalByDomain(i64 tabletsTotalDiff, const TSubDomainKey& objectDomain);
640+
void UpdateTabletsAliveByDomain(i64 tabletsAliveDiff, const TSubDomainKey& objectDomain, const TSubDomainKey& tabletNodeDomain);
636641
void SetCounterTabletsTotal(ui64 tabletsTotal);
637642
void UpdateCounterTabletsTotal(i64 tabletsTotalDiff);
638643
void UpdateCounterTabletsAlive(i64 tabletsAliveDiff);

ydb/core/mind/hive/leader_tablet_info.cpp

+19
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ TFollowerId TLeaderTabletInfo::GetFollowerPromotableOnNode(TNodeId nodeId) const
6363
}
6464

6565
void TLeaderTabletInfo::AssignDomains(const TSubDomainKey& objectDomain, const TVector<TSubDomainKey>& allowedDomains) {
66+
const TSubDomainKey oldObjectDomain = ObjectDomain;
67+
6668
if (!allowedDomains.empty()) {
6769
NodeFilter.AllowedDomains = allowedDomains;
6870
if (!objectDomain) {
@@ -82,6 +84,22 @@ void TLeaderTabletInfo::AssignDomains(const TSubDomainKey& objectDomain, const T
8284
followerGroup.NodeFilter.AllowedDomains = NodeFilter.AllowedDomains;
8385
followerGroup.NodeFilter.ObjectDomain = NodeFilter.ObjectDomain;
8486
}
87+
88+
const ui64 leaderAndFollowers = 1 + Followers.size();
89+
Hive.UpdateTabletsTotalByDomain(-leaderAndFollowers, oldObjectDomain);
90+
Hive.UpdateTabletsTotalByDomain(+leaderAndFollowers, ObjectDomain);
91+
92+
if (IsAlive()) {
93+
Hive.UpdateTabletsAliveByDomain(-1, oldObjectDomain, Node->GetServicedDomain());
94+
Hive.UpdateTabletsAliveByDomain(+1, ObjectDomain, Node->GetServicedDomain());
95+
}
96+
97+
for (const auto& follower : Followers) {
98+
if (follower.IsAlive()) {
99+
Hive.UpdateTabletsAliveByDomain(-1, oldObjectDomain, follower.Node->GetServicedDomain());
100+
Hive.UpdateTabletsAliveByDomain(+1, ObjectDomain, follower.Node->GetServicedDomain());
101+
}
102+
}
85103
}
86104

87105
bool TLeaderTabletInfo::InitiateAssignTabletGroups() {
@@ -127,6 +145,7 @@ TFollowerTabletInfo& TLeaderTabletInfo::AddFollower(TFollowerGroup& followerGrou
127145
follower.Id = followerId;
128146
}
129147
Hive.UpdateCounterTabletsTotal(+1);
148+
Hive.UpdateTabletsTotalByDomain(+1, ObjectDomain);
130149
return follower;
131150
}
132151

ydb/core/mind/hive/monitoring.cpp

+10-39
Original file line numberDiff line numberDiff line change
@@ -456,37 +456,6 @@ class TTxMonEvent_MemStateDomains : public TTransactionBase<THive> {
456456
}
457457

458458
void RenderHTMLPage(IOutputStream &out) {
459-
THashMap<TSubDomainKey, size_t> tabletsRunningInObjectDomain;
460-
THashMap<TSubDomainKey, size_t> tabletsRunningInOtherDomains;
461-
THashMap<TSubDomainKey, size_t> tabletsTotal;
462-
463-
for (const auto& [_, tablet] : Self->Tablets) {
464-
const TSubDomainKey objectDomain = tablet.ObjectDomain;
465-
++tabletsTotal[objectDomain];
466-
467-
const TNodeInfo* node = tablet.GetNode();
468-
if (node) {
469-
if (node->GetServicedDomain() == objectDomain) {
470-
++tabletsRunningInObjectDomain[objectDomain];
471-
} else {
472-
++tabletsRunningInOtherDomains[objectDomain];
473-
}
474-
}
475-
476-
for (const auto& follower : tablet.Followers) {
477-
++tabletsTotal[objectDomain];
478-
479-
const TNodeInfo* followerNode = follower.GetNode();
480-
if (followerNode) {
481-
if (followerNode->GetServicedDomain() == objectDomain) {
482-
++tabletsRunningInObjectDomain[objectDomain];
483-
} else {
484-
++tabletsRunningInOtherDomains[objectDomain];
485-
}
486-
}
487-
}
488-
}
489-
490459
// out << "<script>$('.container').css('width', 'auto');</script>";
491460
out << "<table class='table table-sortable'>";
492461
out << "<thead>";
@@ -495,8 +464,8 @@ class TTxMonEvent_MemStateDomains : public TTransactionBase<THive> {
495464
out << "<th>Name</th>";
496465
out << "<th>Hive</th>";
497466
out << "<th>Status</th>";
498-
out << "<th>TabletsRunningInTenantDomain</th>";
499-
out << "<th>TabletsRunningInOtherDomains</th>";
467+
out << "<th>TabletsAliveInTenantDomain</th>";
468+
out << "<th>TabletsAliveInOtherDomains</th>";
500469
out << "<th>TabletsTotal</th>";
501470
out << "</tr>";
502471
out << "</thead>";
@@ -521,16 +490,18 @@ class TTxMonEvent_MemStateDomains : public TTransactionBase<THive> {
521490
out << "<td>-</td>";
522491
out << "<td>-</td>";
523492
}
524-
if (tabletsTotal[domainKey] > 0) {
525-
out << "<td>" << std::round(tabletsRunningInObjectDomain[domainKey] * 100.0 / tabletsTotal[domainKey]) << "%"
526-
<< " (" << tabletsRunningInObjectDomain[domainKey] << " of " << tabletsTotal[domainKey] << ")" << "</td>";
527-
out << "<td>" << std::round(tabletsRunningInOtherDomains[domainKey] * 100.0 / tabletsTotal[domainKey]) << "%"
528-
<< " (" << tabletsRunningInOtherDomains[domainKey] << " of " << tabletsTotal[domainKey] << ")" << "</td>";
493+
if (Self->TabletsTotalByDomain[domainKey] > 0) {
494+
out << "<td>" << std::round(Self->TabletsAliveInObjectDomainByDomain[domainKey] * 100.0 / Self->TabletsTotalByDomain[domainKey]) << "%"
495+
<< " (" << Self->TabletsAliveInObjectDomainByDomain[domainKey] << " of " << Self->TabletsTotalByDomain[domainKey] << ")" << "</td>";
496+
497+
const ui64 tabletsAliveInOtherDomains = Self->TabletsAliveByDomain[domainKey] - Self->TabletsAliveInObjectDomainByDomain[domainKey];
498+
out << "<td>" << std::round(tabletsAliveInOtherDomains * 100.0 / Self->TabletsTotalByDomain[domainKey]) << "%"
499+
<< " (" << tabletsAliveInOtherDomains << " of " << Self->TabletsTotalByDomain[domainKey] << ")" << "</td>";
529500
} else {
530501
out << "<td>-</td>";
531502
out << "<td>-</td>";
532503
}
533-
out << "<td>" << tabletsTotal[domainKey] << "</td>";
504+
out << "<td>" << Self->TabletsTotalByDomain[domainKey] << "</td>";
534505
out << "</tr>";
535506
}
536507
out << "</tbody>";

ydb/core/mind/hive/node_info.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ bool TNodeInfo::OnTabletChangeVolatileState(TTabletInfo* tablet, TTabletInfo::EV
6969
TabletsRunningByType[tablet->GetTabletType()].erase(tablet);
7070
TabletsOfObject[tablet->GetObjectId()].erase(tablet);
7171
Hive.UpdateCounterTabletsAlive(-1);
72+
Hive.UpdateTabletsAliveByDomain(-1, tablet->GetLeader().ObjectDomain, GetServicedDomain());
7273
if (tablet->HasCounter() && tablet->IsLeader()) {
7374
Hive.UpdateObjectCount(tablet->AsLeader(), *this, -1);
7475
}
@@ -84,6 +85,7 @@ bool TNodeInfo::OnTabletChangeVolatileState(TTabletInfo* tablet, TTabletInfo::EV
8485
TabletsRunningByType[tablet->GetTabletType()].emplace(tablet);
8586
TabletsOfObject[tablet->GetObjectId()].emplace(tablet);
8687
Hive.UpdateCounterTabletsAlive(+1);
88+
Hive.UpdateTabletsAliveByDomain(+1, tablet->GetLeader().ObjectDomain, GetServicedDomain());
8789
if (tablet->HasCounter() && tablet->IsLeader()) {
8890
Hive.UpdateObjectCount(tablet->AsLeader(), *this, +1);
8991
}

ydb/core/mind/hive/tx__load_everything.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -710,11 +710,13 @@ class TTxLoadEverything : public TTransactionBase<THive> {
710710

711711
void Complete(const TActorContext& ctx) override {
712712
BLOG_NOTICE("THive::TTxLoadEverything::Complete " << Self->DatabaseConfig.ShortDebugString());
713-
i64 tabletsTotal = 0;
713+
ui64 tabletsTotal = 0;
714714
for (auto it = Self->Tablets.begin(); it != Self->Tablets.end(); ++it) {
715715
++tabletsTotal;
716+
Self->UpdateTabletsTotalByDomain(+1, it->second.ObjectDomain);
716717
for (const TTabletInfo& follower : it->second.Followers) {
717718
++tabletsTotal;
719+
Self->UpdateTabletsTotalByDomain(+1, it->second.ObjectDomain);
718720
if (follower.IsLeader()) {
719721
follower.AsLeader();
720722
} else {

0 commit comments

Comments
 (0)