Skip to content

Commit d95e3ab

Browse files
authored
return up-to-date node stats KIKIMR-20697 (#910)
1 parent 5c82488 commit d95e3ab

File tree

6 files changed

+24
-17
lines changed

6 files changed

+24
-17
lines changed

ydb/core/mind/hive/hive_impl.cpp

+14-8
Original file line numberDiff line numberDiff line change
@@ -1782,6 +1782,8 @@ bool THive::IsTabletMoveExpedient(const TTabletInfo& tablet, const TNodeInfo& no
17821782

17831783
void THive::FillTabletInfo(NKikimrHive::TEvResponseHiveInfo& response, ui64 tabletId, const TLeaderTabletInfo *info, const NKikimrHive::TEvRequestHiveInfo &req) {
17841784
if (info) {
1785+
TInstant now = TActivationContext::Now();
1786+
TInstant restartsBarrierTime = now - GetTabletRestartsPeriod();
17851787
auto& tabletInfo = *response.AddTablets();
17861788
tabletInfo.SetTabletID(tabletId);
17871789
tabletInfo.SetTabletType(info->Type);
@@ -1800,7 +1802,7 @@ void THive::FillTabletInfo(NKikimrHive::TEvResponseHiveInfo& response, ui64 tabl
18001802
if (!info->IsRunning()) {
18011803
tabletInfo.SetLastAliveTimestamp(info->Statistics.GetLastAliveTimestamp());
18021804
}
1803-
tabletInfo.SetRestartsPerPeriod(info->Statistics.RestartTimestampSize());
1805+
tabletInfo.SetRestartsPerPeriod(info->GetRestartsPerPeriod(restartsBarrierTime));
18041806
if (req.GetReturnMetrics()) {
18051807
tabletInfo.MutableMetrics()->CopyFrom(info->GetResourceValues());
18061808
}
@@ -1831,7 +1833,7 @@ void THive::FillTabletInfo(NKikimrHive::TEvResponseHiveInfo& response, ui64 tabl
18311833
if (!follower.IsRunning()) {
18321834
tabletInfo.SetLastAliveTimestamp(follower.Statistics.GetLastAliveTimestamp());
18331835
}
1834-
tabletInfo.SetRestartsPerPeriod(follower.Statistics.RestartTimestampSize());
1836+
tabletInfo.SetRestartsPerPeriod(follower.GetRestartsPerPeriod(restartsBarrierTime));
18351837
if (req.GetReturnMetrics()) {
18361838
tabletInfo.MutableMetrics()->CopyFrom(follower.GetResourceValues());
18371839
}
@@ -1843,16 +1845,14 @@ void THive::FillTabletInfo(NKikimrHive::TEvResponseHiveInfo& response, ui64 tabl
18431845
void THive::Handle(TEvHive::TEvRequestHiveInfo::TPtr& ev) {
18441846
const auto& record = ev->Get()->Record;
18451847
TAutoPtr<TEvHive::TEvResponseHiveInfo> response = new TEvHive::TEvResponseHiveInfo();
1846-
TInstant now = TlsActivationContext->Now();
18471848
if (record.HasTabletID()) {
18481849
TTabletId tabletId = record.GetTabletID();
18491850
NKikimrHive::TForwardRequest forwardRequest;
18501851
if (CheckForForwardTabletRequest(tabletId, forwardRequest)) {
18511852
response->Record.MutableForwardRequest()->CopyFrom(forwardRequest);
18521853
}
1853-
TLeaderTabletInfo* tablet = FindTablet(tabletId);
1854+
const TLeaderTabletInfo* tablet = FindTablet(tabletId);
18541855
if (tablet) {
1855-
tablet->ActualizeTabletStatistics(now);
18561856
FillTabletInfo(response->Record, record.GetTabletID(), tablet, record);
18571857
} else {
18581858
BLOG_W("Can't find the tablet from RequestHiveInfo(TabletID=" << tabletId << ")");
@@ -1866,7 +1866,6 @@ void THive::Handle(TEvHive::TEvRequestHiveInfo::TPtr& ev) {
18661866
if (it->second.IsDeleting()) {
18671867
continue;
18681868
}
1869-
it->second.ActualizeTabletStatistics(now);
18701869
FillTabletInfo(response->Record, it->first, &it->second, record);
18711870
}
18721871
response->Record.set_starttimetimestamp(StartTime().MilliSeconds());
@@ -1955,13 +1954,15 @@ void THive::Handle(TEvHive::TEvRequestHiveDomainStats::TPtr& ev) {
19551954

19561955
void THive::Handle(TEvHive::TEvRequestHiveNodeStats::TPtr& ev) {
19571956
const auto& request(ev->Get()->Record);
1957+
TInstant now = TActivationContext::Now();
1958+
TInstant restartsBarrierTime = now - GetNodeRestartWatchPeriod();
19581959
THolder<TEvHive::TEvResponseHiveNodeStats> response = MakeHolder<TEvHive::TEvResponseHiveNodeStats>();
19591960
auto& record = response->Record;
19601961
if (request.GetReturnExtendedTabletInfo()) {
19611962
record.SetExtendedTabletInfo(true);
19621963
}
19631964
for (auto it = Nodes.begin(); it != Nodes.end(); ++it) {
1964-
const TNodeInfo& node = it->second;
1965+
TNodeInfo& node = it->second;
19651966
if (node.IsUnknown()) {
19661967
continue;
19671968
}
@@ -2035,7 +2036,7 @@ void THive::Handle(TEvHive::TEvRequestHiveNodeStats::TPtr& ev) {
20352036
if (!node.IsAlive()) {
20362037
nodeStats.SetLastAliveTimestamp(node.Statistics.GetLastAliveTimestamp());
20372038
}
2038-
nodeStats.SetRestartsPerPeriod(node.Statistics.RestartTimestampSize());
2039+
nodeStats.SetRestartsPerPeriod(node.GetRestartsPerPeriod(restartsBarrierTime));
20392040
}
20402041
Send(ev->Sender, response.Release(), 0, ev->Cookie);
20412042
}
@@ -3299,6 +3300,11 @@ void THive::ActualizeRestartStatistics(google::protobuf::RepeatedField<google::p
32993300
array.erase(begin, it);
33003301
}
33013302

3303+
ui64 THive::GetRestartsPerPeriod(const google::protobuf::RepeatedField<google::protobuf::uint64>& restartTimestamps, ui64 barrier) {
3304+
auto it = std::lower_bound(restartTimestamps.begin(), restartTimestamps.end(), barrier);
3305+
return restartTimestamps.end() - it;
3306+
}
3307+
33023308
bool THive::IsSystemTablet(TTabletTypes::EType type) {
33033309
switch (type) {
33043310
case TTabletTypes::Coordinator:

ydb/core/mind/hive/hive_impl.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -665,7 +665,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
665665
const NKikimrTabletBase::TMetrics& after,
666666
NKikimr::NHive::TResourceRawValues deltaRaw,
667667
NKikimr::NHive::TResourceNormalizedValues deltaNormalized);
668-
static void FillTabletInfo(NKikimrHive::TEvResponseHiveInfo& response, ui64 tabletId, const TLeaderTabletInfo* info, const NKikimrHive::TEvRequestHiveInfo& req);
668+
void FillTabletInfo(NKikimrHive::TEvResponseHiveInfo& response, ui64 tabletId, const TLeaderTabletInfo* info, const NKikimrHive::TEvRequestHiveInfo& req);
669669
void ExecuteStartTablet(TFullTabletId tabletId, const TActorId& local, ui64 cookie, bool external);
670670
ui32 GetDataCenters();
671671
ui32 GetRegisteredDataCenters();
@@ -914,6 +914,7 @@ class THive : public TActor<THive>, public TTabletExecutedFlat, public THiveShar
914914
}
915915

916916
static void ActualizeRestartStatistics(google::protobuf::RepeatedField<google::protobuf::uint64>& restartTimestamps, ui64 barrier);
917+
static ui64 GetRestartsPerPeriod(const google::protobuf::RepeatedField<google::protobuf::uint64>& restartTimestamps, ui64 barrier);
917918
static bool IsSystemTablet(TTabletTypes::EType type);
918919

919920
protected:

ydb/core/mind/hive/node_info.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,10 @@ void TNodeInfo::ActualizeNodeStatistics(TInstant now) {
471471
Hive.ActualizeRestartStatistics(*Statistics.MutableRestartTimestamp(), barierTime.MilliSeconds());
472472
}
473473

474+
ui64 TNodeInfo::GetRestartsPerPeriod(TInstant barrier) const {
475+
return Hive.GetRestartsPerPeriod(Statistics.GetRestartTimestamp(), barrier.MilliSeconds());
476+
}
477+
474478
TString TNodeInfo::GetLogPrefix() const {
475479
return Hive.GetLogPrefix();
476480
}

ydb/core/mind/hive/node_info.h

+1
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,7 @@ struct TNodeInfo {
235235

236236
void UpdateResourceTotalUsage(const NKikimrHive::TEvTabletMetrics& metrics);
237237
void ActualizeNodeStatistics(TInstant now);
238+
ui64 GetRestartsPerPeriod(TInstant barrier) const;
238239

239240
TDataCenterId GetDataCenter() const {
240241
return Location.GetDataCenterId();

ydb/core/mind/hive/tablet_info.cpp

+2-7
Original file line numberDiff line numberDiff line change
@@ -496,13 +496,8 @@ void TTabletInfo::ActualizeTabletStatistics(TInstant now) {
496496
Hive.ActualizeRestartStatistics(*Statistics.MutableRestartTimestamp(), barierTime.MilliSeconds());
497497
}
498498

499-
ui64 TTabletInfo::GetRestartsPerPeriod(TInstant barrier) {
500-
const auto& array(Statistics.GetRestartTimestamp());
501-
ui64 restarts = 0;
502-
for (auto itRestart = array.rbegin(); (itRestart != array.rend()) && (TInstant::MilliSeconds(*itRestart) >= barrier); ++itRestart) {
503-
++restarts;
504-
}
505-
return restarts;
499+
ui64 TTabletInfo::GetRestartsPerPeriod(TInstant barrier) const {
500+
return Hive.GetRestartsPerPeriod(Statistics.GetRestartTimestamp(), barrier.MilliSeconds());
506501
}
507502

508503
bool TTabletInfo::RestartsOften() const {

ydb/core/mind/hive/tablet_info.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ struct TTabletInfo {
294294
}
295295

296296
void ActualizeTabletStatistics(TInstant now);
297-
ui64 GetRestartsPerPeriod(TInstant barrier);
297+
ui64 GetRestartsPerPeriod(TInstant barrier) const;
298298
bool RestartsOften() const;
299299

300300
bool HasCounter() {

0 commit comments

Comments
 (0)