Skip to content

Enhanced monitoring for serverless tenants in Hive KIKIMR-19289 #710

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions ydb/core/mind/hive/domain_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ struct TDomainInfo {
TTabletId HiveId = 0;
TMaybeServerlessComputeResourcesMode ServerlessComputeResourcesMode;

ui64 TabletsTotal = 0;
ui64 TabletsAlive = 0;
ui64 TabletsAliveInObjectDomain = 0;

ENodeSelectionPolicy GetNodeSelectionPolicy() const;
};

Expand Down
19 changes: 18 additions & 1 deletion ydb/core/mind/hive/hive_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1550,7 +1550,9 @@ void THive::DeleteTablet(TTabletId tabletId) {
}
Y_ENSURE_LOG(nt->second.LockedTablets.count(&tablet) == 0, " Deleting tablet found on node " << nt->first << " in locked set");
}
UpdateCounterTabletsTotal(-1 - (tablet.Followers.size()));
const i64 tabletsTotalDiff = -1 - (tablet.Followers.size());
UpdateCounterTabletsTotal(tabletsTotalDiff);
UpdateDomainTabletsTotal(tablet.ObjectDomain, tabletsTotalDiff);
Tablets.erase(it);
}
}
Expand Down Expand Up @@ -1583,6 +1585,21 @@ void THive::KillNode(TNodeId nodeId, const TActorId& local) {
Execute(CreateKillNode(nodeId, local));
}

void THive::UpdateDomainTabletsTotal(const TSubDomainKey& objectDomain, i64 tabletsTotalDiff) {
if (objectDomain) {
Domains[objectDomain].TabletsTotal += tabletsTotalDiff;
}
}

void THive::UpdateDomainTabletsAlive(const TSubDomainKey& objectDomain, i64 tabletsAliveDiff, const TSubDomainKey& tabletNodeDomain) {
if (objectDomain) {
Domains[objectDomain].TabletsAlive += tabletsAliveDiff;
if (tabletNodeDomain == objectDomain) {
Domains[objectDomain].TabletsAliveInObjectDomain += tabletsAliveDiff;
}
}
}

void THive::SetCounterTabletsTotal(ui64 tabletsTotal) {
if (TabletCounters != nullptr) {
auto& counter = TabletCounters->Simple()[NHive::COUNTER_TABLETS_TOTAL];
Expand Down
2 changes: 2 additions & 0 deletions ydb/core/mind/hive/hive_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,8 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
TTabletCategoryInfo& GetTabletCategory(TTabletCategoryId tabletCategoryId);
void KillNode(TNodeId nodeId, const TActorId& local);
void AddToBootQueue(TTabletInfo* tablet);
void UpdateDomainTabletsTotal(const TSubDomainKey& objectDomain, i64 tabletsTotalDiff);
void UpdateDomainTabletsAlive(const TSubDomainKey& objectDomain, i64 tabletsAliveDiff, const TSubDomainKey& tabletNodeDomain);
void SetCounterTabletsTotal(ui64 tabletsTotal);
void UpdateCounterTabletsTotal(i64 tabletsTotalDiff);
void UpdateCounterTabletsAlive(i64 tabletsAliveDiff);
Expand Down
19 changes: 19 additions & 0 deletions ydb/core/mind/hive/leader_tablet_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ TFollowerId TLeaderTabletInfo::GetFollowerPromotableOnNode(TNodeId nodeId) const
}

void TLeaderTabletInfo::AssignDomains(const TSubDomainKey& objectDomain, const TVector<TSubDomainKey>& allowedDomains) {
const TSubDomainKey oldObjectDomain = ObjectDomain;

if (!allowedDomains.empty()) {
NodeFilter.AllowedDomains = allowedDomains;
if (!objectDomain) {
Expand All @@ -82,6 +84,22 @@ void TLeaderTabletInfo::AssignDomains(const TSubDomainKey& objectDomain, const T
followerGroup.NodeFilter.AllowedDomains = NodeFilter.AllowedDomains;
followerGroup.NodeFilter.ObjectDomain = NodeFilter.ObjectDomain;
}

const ui64 leaderAndFollowers = 1 + Followers.size();
Hive.UpdateDomainTabletsTotal(oldObjectDomain, -leaderAndFollowers);
Hive.UpdateDomainTabletsTotal(ObjectDomain, +leaderAndFollowers);

if (IsAlive()) {
Hive.UpdateDomainTabletsAlive(oldObjectDomain, -1, Node->GetServicedDomain());
Hive.UpdateDomainTabletsAlive(ObjectDomain, +1, Node->GetServicedDomain());
}

for (const auto& follower : Followers) {
if (follower.IsAlive()) {
Hive.UpdateDomainTabletsAlive(oldObjectDomain, -1, follower.Node->GetServicedDomain());
Hive.UpdateDomainTabletsAlive(ObjectDomain, +1, follower.Node->GetServicedDomain());
}
}
}

bool TLeaderTabletInfo::InitiateAssignTabletGroups() {
Expand Down Expand Up @@ -127,6 +145,7 @@ TFollowerTabletInfo& TLeaderTabletInfo::AddFollower(TFollowerGroup& followerGrou
follower.Id = followerId;
}
Hive.UpdateCounterTabletsTotal(+1);
Hive.UpdateDomainTabletsTotal(ObjectDomain, +1);
return follower;
}

Expand Down
22 changes: 21 additions & 1 deletion ydb/core/mind/hive/monitoring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,15 @@ class TTxMonEvent_MemStateDomains : public TTransactionBase<THive> {
// out << "<script>$('.container').css('width', 'auto');</script>";
out << "<table class='table table-sortable'>";
out << "<thead>";
out << "<tr><th>TenantId</th><th>Name</th><th>Hive</th><th>Status</th></tr>";
out << "<tr>";
out << "<th>TenantId</th>";
out << "<th>Name</th>";
out << "<th>Hive</th>";
out << "<th>Status</th>";
out << "<th>TabletsAliveInTenantDomain</th>";
out << "<th>TabletsAliveInOtherDomains</th>";
out << "<th>TabletsTotal</th>";
out << "</tr>";
out << "</thead>";
out << "<tbody>";
for (const auto& [domainKey, domainInfo] : Self->Domains) {
Expand All @@ -482,6 +490,18 @@ class TTxMonEvent_MemStateDomains : public TTransactionBase<THive> {
out << "<td>-</td>";
out << "<td>-</td>";
}
if (domainInfo.TabletsTotal > 0) {
out << "<td>" << std::round(domainInfo.TabletsAliveInObjectDomain * 100.0 / domainInfo.TabletsTotal) << "%"
<< " (" << domainInfo.TabletsAliveInObjectDomain << " of " << domainInfo.TabletsTotal << ")" << "</td>";

const ui64 tabletsAliveInOtherDomains = domainInfo.TabletsAlive - domainInfo.TabletsAliveInObjectDomain;
out << "<td>" << std::round(tabletsAliveInOtherDomains * 100.0 / domainInfo.TabletsTotal) << "%"
<< " (" << tabletsAliveInOtherDomains << " of " << domainInfo.TabletsTotal << ")" << "</td>";
} else {
out << "<td>-</td>";
out << "<td>-</td>";
}
out << "<td>" << domainInfo.TabletsTotal << "</td>";
out << "</tr>";
}
out << "</tbody>";
Expand Down
2 changes: 2 additions & 0 deletions ydb/core/mind/hive/node_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ bool TNodeInfo::OnTabletChangeVolatileState(TTabletInfo* tablet, TTabletInfo::EV
TabletsRunningByType[tablet->GetTabletType()].erase(tablet);
TabletsOfObject[tablet->GetObjectId()].erase(tablet);
Hive.UpdateCounterTabletsAlive(-1);
Hive.UpdateDomainTabletsAlive(tablet->GetLeader().ObjectDomain, -1, GetServicedDomain());
if (tablet->HasCounter() && tablet->IsLeader()) {
Hive.UpdateObjectCount(tablet->AsLeader(), *this, -1);
}
Expand All @@ -84,6 +85,7 @@ bool TNodeInfo::OnTabletChangeVolatileState(TTabletInfo* tablet, TTabletInfo::EV
TabletsRunningByType[tablet->GetTabletType()].emplace(tablet);
TabletsOfObject[tablet->GetObjectId()].emplace(tablet);
Hive.UpdateCounterTabletsAlive(+1);
Hive.UpdateDomainTabletsAlive(tablet->GetLeader().ObjectDomain, +1, GetServicedDomain());
if (tablet->HasCounter() && tablet->IsLeader()) {
Hive.UpdateObjectCount(tablet->AsLeader(), *this, +1);
}
Expand Down
2 changes: 1 addition & 1 deletion ydb/core/mind/hive/tx__load_everything.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -710,7 +710,7 @@ class TTxLoadEverything : public TTransactionBase<THive> {

void Complete(const TActorContext& ctx) override {
BLOG_NOTICE("THive::TTxLoadEverything::Complete " << Self->DatabaseConfig.ShortDebugString());
i64 tabletsTotal = 0;
ui64 tabletsTotal = 0;
for (auto it = Self->Tablets.begin(); it != Self->Tablets.end(); ++it) {
++tabletsTotal;
for (const TTabletInfo& follower : it->second.Followers) {
Expand Down