@@ -35,7 +35,8 @@ CResourceMonitor::CResourceMonitor(double byteLimitMargin)
35
35
m_HasPruningStarted(false ), m_PruneThreshold(0 ), m_LastPruneTime(0 ),
36
36
m_PruneWindow(std::numeric_limits<std::size_t >::max()),
37
37
m_PruneWindowMaximum(std::numeric_limits<std::size_t >::max()),
38
- m_PruneWindowMinimum(std::numeric_limits<std::size_t >::max()), m_NoLimit(false ) {
38
+ m_PruneWindowMinimum(std::numeric_limits<std::size_t >::max()),
39
+ m_NoLimit(false ), m_CurrentBytesExceeded(0 ) {
39
40
this ->updateMemoryLimitsAndPruneThreshold (DEFAULT_MEMORY_LIMIT_MB);
40
41
}
41
42
@@ -108,18 +109,21 @@ void CResourceMonitor::refresh(CAnomalyDetector& detector) {
108
109
109
110
void CResourceMonitor::forceRefresh (CAnomalyDetector& detector) {
110
111
this ->memUsage (&detector);
111
- core::CProgramCounters::counter (counter_t ::E_TSADMemoryUsage) = this ->totalMemory ();
112
- LOG_TRACE (<< " Checking allocations: currently at " << this ->totalMemory ());
112
+
113
113
this ->updateAllowAllocations ();
114
114
}
115
115
116
116
void CResourceMonitor::updateAllowAllocations () {
117
117
std::size_t total{this ->totalMemory ()};
118
+ core::CProgramCounters::counter (counter_t ::E_TSADMemoryUsage) = total;
119
+ LOG_TRACE (<< " Checking allocations: currently at " << total);
118
120
if (m_AllowAllocations) {
119
121
if (total > this ->highLimit ()) {
120
122
LOG_INFO (<< " Over current allocation high limit. " << total
121
123
<< " bytes used, the limit is " << this ->highLimit ());
122
124
m_AllowAllocations = false ;
125
+ std::size_t bytesExceeded{total - this ->highLimit ()};
126
+ m_CurrentBytesExceeded = this ->adjustedUsage (bytesExceeded);
123
127
}
124
128
} else if (total < this ->lowLimit ()) {
125
129
LOG_INFO (<< " Below current allocation low limit. " << total
@@ -204,13 +208,6 @@ bool CResourceMonitor::areAllocationsAllowed() const {
204
208
return m_AllowAllocations;
205
209
}
206
210
207
- bool CResourceMonitor::areAllocationsAllowed (std::size_t size) const {
208
- if (m_AllowAllocations) {
209
- return this ->totalMemory () + size < this ->highLimit ();
210
- }
211
- return false ;
212
- }
213
-
214
211
std::size_t CResourceMonitor::allocationLimit () const {
215
212
return this ->highLimit () - std::min (this ->highLimit (), this ->totalMemory ());
216
213
}
@@ -268,6 +265,9 @@ CResourceMonitor::SResults CResourceMonitor::createMemoryUsageReport(core_t::TTi
268
265
res.s_OverFields = 0 ;
269
266
res.s_PartitionFields = 0 ;
270
267
res.s_Usage = this ->totalMemory ();
268
+ res.s_AdjustedUsage = this ->adjustedUsage (res.s_Usage );
269
+ res.s_BytesMemoryLimit = 2 * m_ByteLimitHigh;
270
+ res.s_BytesExceeded = m_CurrentBytesExceeded;
271
271
res.s_AllocationFailures = 0 ;
272
272
res.s_MemoryStatus = m_MemoryStatus;
273
273
res.s_BucketStartTime = bucketStartTime;
@@ -281,6 +281,25 @@ CResourceMonitor::SResults CResourceMonitor::createMemoryUsageReport(core_t::TTi
281
281
return res;
282
282
}
283
283
284
+ std::size_t CResourceMonitor::adjustedUsage (std::size_t usage) const {
285
+ // Background persist causes the memory size to double due to copying
286
+ // the models. On top of that, after the persist is done we may not
287
+ // be able to retrieve that memory back. Thus, we report twice the
288
+ // memory usage in order to allow for that.
289
+ // See https://github.com/elastic/x-pack-elasticsearch/issues/1020.
290
+ // Issue https://github.com/elastic/x-pack-elasticsearch/issues/857
291
+ // discusses adding an option to perform only foreground persist.
292
+ // If that gets implemented, we should only double when background
293
+ // persist is configured.
294
+
295
+ // We also scale the reported memory usage by the inverse of the byte limit margin.
296
+ // This gives the user a fairer indication of how close the job is to hitting
297
+ // the model memory limit in a concise manner (as the limit is scaled down by
298
+ // the margin during the beginning period of the job's existence).
299
+ size_t adjustedUsage = static_cast <std::size_t >(2 * usage / m_ByteLimitMargin);
300
+ return adjustedUsage;
301
+ }
302
+
284
303
void CResourceMonitor::acceptAllocationFailureResult (core_t ::TTime time) {
285
304
m_MemoryStatus = model_t ::E_MemoryStatusHardLimit;
286
305
++m_AllocationFailures[time ];
0 commit comments