From a7b1ce2b2320b4c2eca98db06ae2dc4503931490 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 4 Apr 2025 15:39:45 +1300 Subject: [PATCH 01/11] [ML] Report the "actual" memory usage of the autodetect process Determine the actual memory usgae of the autodetect process as reported by the OS, e.g. on Linux this mould be the value of the maximum resident set size returned by a call to `getrusage`. Add this value to the model size stats record returned to the ES Java process so it can be included in the `job counts` tab for anomaly detection jobs. --- bin/autodetect/Main.cc | 6 +- include/core/CProgramCounters.h | 7 ++- include/model/CResourceMonitor.h | 3 + include/model/ModelTypes.h | 4 +- lib/api/CModelSizeStatsJsonWriter.cc | 4 ++ lib/api/unittest/CAnomalyJobLimitTest.cc | 31 +++++++++- lib/api/unittest/CJsonOutputWriterTest.cc | 59 ++++++++++--------- .../unittest/CModelSnapshotJsonWriterTest.cc | 1 + lib/core/CProcessStats_Linux.cc | 7 ++- lib/core/CProcessStats_MacOSX.cc | 9 ++- lib/core/CProcessStats_Windows.cc | 12 +++- lib/model/CResourceMonitor.cc | 6 ++ lib/model/ModelTypes.cc | 2 + lib/model/unittest/CResourceMonitorTest.cc | 14 ++++- 14 files changed, 126 insertions(+), 39 deletions(-) diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index 904920e3db..bbb90c706a 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -83,7 +84,8 @@ int main(int argc, char** argv) { ml::counter_t::E_TSADNumberMemoryLimitModelCreationFailures, ml::counter_t::E_TSADNumberPrunedItems, ml::counter_t::E_TSADAssignmentMemoryBasis, - ml::counter_t::E_TSADOutputMemoryAllocatorUsage}; + ml::counter_t::E_TSADOutputMemoryAllocatorUsage, + ml::counter_t::E_TSADMaxResidentSetSize}; ml::core::CProgramCounters::registerProgramCounterTypes(counters); @@ -151,6 +153,8 @@ int main(int argc, char** argv) { } cancellerThread.stop(); + LOG_DEBUG(<< "Max Resident Set Size: " << ml::core::CProcessStats::maxResidentSetSize()); + // Log the program version immediately after reconfiguring the logger. This // must be done from the program, and NOT a shared library, as each program // statically links its own version library. diff --git a/include/core/CProgramCounters.h b/include/core/CProgramCounters.h index 3c4d10269f..34d5cdbb26 100644 --- a/include/core/CProgramCounters.h +++ b/include/core/CProgramCounters.h @@ -112,6 +112,9 @@ enum ECounterTypes { //! The memory currently used by the allocators to output JSON documents, in bytes. E_TSADOutputMemoryAllocatorUsage = 30, + //! The maximum resident set size of the process, in bytes. + E_TSADMaxResidentSetSize = 31, + // Data Frame Outlier Detection //! The estimated peak memory usage for outlier detection in bytes @@ -146,7 +149,7 @@ enum ECounterTypes { // Add any new values here //! This MUST be last, increment the value for every new enum added - E_LastEnumCounter = 31 + E_LastEnumCounter = 32 }; static constexpr std::size_t NUM_COUNTERS = static_cast(E_LastEnumCounter); @@ -355,6 +358,8 @@ class CORE_EXPORT CProgramCounters { "Which option is being used to get model memory for node assignment?"}, {counter_t::E_TSADOutputMemoryAllocatorUsage, "E_TSADOutputMemoryAllocatorUsage", "The amount of memory used to output JSON documents, in bytes."}, + {counter_t::E_TSADMaxResidentSetSize, "E_TSADMaxResidentSetSize", + "The maximum resident set size of the process, in bytes"}, {counter_t::E_DFOEstimatedPeakMemoryUsage, "E_DFOEstimatedPeakMemoryUsage", "The upfront estimate of the peak memory outlier detection would use"}, {counter_t::E_DFOPeakMemoryUsage, "E_DFOPeakMemoryUsage", "The peak memory outlier detection used"}, diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index 5c7583888b..c9c887281f 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -54,6 +54,7 @@ class MODEL_EXPORT CResourceMonitor { std::size_t s_AdjustedUsage{0}; std::size_t s_PeakUsage{0}; std::size_t s_AdjustedPeakUsage{0}; + std::size_t s_ActualMemoryUsage{0}; std::size_t s_ByFields{0}; std::size_t s_PartitionFields{0}; std::size_t s_OverFields{0}; @@ -180,6 +181,8 @@ class MODEL_EXPORT CResourceMonitor { //! Returns the sum of used memory plus any extra memory std::size_t totalMemory() const; + std::size_t actualMemoryUsage() const; + private: using TMonitoredResourcePtrSizeUMap = boost::unordered_map; diff --git a/include/model/ModelTypes.h b/include/model/ModelTypes.h index acbcc14c04..aeffe27e83 100644 --- a/include/model/ModelTypes.h +++ b/include/model/ModelTypes.h @@ -719,7 +719,9 @@ enum EAssignmentMemoryBasis { E_AssignmentBasisUnknown = 0, //!< Decision made in Java code E_AssignmentBasisModelMemoryLimit = 1, //!< Use model memory limit E_AssignmentBasisCurrentModelBytes = 2, //!< Use current actual model size - E_AssignmentBasisPeakModelBytes = 3 //!< Use highest ever actual model size + E_AssignmentBasisPeakModelBytes = 3, //!< Use highest ever actual model size + E_AssignmentBasisActualMemoryUsageBytes = 4 //!< Use the actual memory size + //!< of the process, as reported by the OS }; //! Get a string description of \p assignmentMemoryBasis. diff --git a/lib/api/CModelSizeStatsJsonWriter.cc b/lib/api/CModelSizeStatsJsonWriter.cc index 43fef49602..75604c7f6a 100644 --- a/lib/api/CModelSizeStatsJsonWriter.cc +++ b/lib/api/CModelSizeStatsJsonWriter.cc @@ -25,6 +25,7 @@ const std::string JOB_ID{"job_id"}; const std::string MODEL_SIZE_STATS{"model_size_stats"}; const std::string MODEL_BYTES{"model_bytes"}; const std::string PEAK_MODEL_BYTES{"peak_model_bytes"}; +const std::string ACTUAL_MEMORY_USAGE_BYTES{"actual_memory_usage_bytes"}; const std::string MODEL_BYTES_EXCEEDED{"model_bytes_exceeded"}; const std::string MODEL_BYTES_MEMORY_LIMIT{"model_bytes_memory_limit"}; const std::string TOTAL_BY_FIELD_COUNT{"total_by_field_count"}; @@ -60,6 +61,9 @@ void CModelSizeStatsJsonWriter::write(const std::string& jobId, writer.onKey(PEAK_MODEL_BYTES); writer.onUint64(results.s_AdjustedPeakUsage); + writer.onKey(ACTUAL_MEMORY_USAGE_BYTES); + writer.onUint64(results.s_ActualMemoryUsage); + writer.onKey(MODEL_BYTES_EXCEEDED); writer.onUint64(results.s_BytesExceeded); diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index b003e90a53..938892589c 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -9,6 +9,7 @@ * limitation. */ #include +#include #include #include @@ -92,6 +93,10 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { std::size_t nonLimitedUsage{0}; std::size_t limitedUsage{0}; + std::size_t actualUsage{0}; + std::size_t baseline{0}; + std::size_t nonLimitedAdjustedActualUsage{0}; + std::size_t limitedAdjustedActualUsage{0}; { // Without limits, this data set should make the models around // 1230000 bytes @@ -105,6 +110,8 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); model::CLimits limits; + baseline = limits.resourceMonitor().actualMemoryUsage(); + //limits.resourceMonitor().m_ByteLimitHigh = 100000; //limits.resourceMonitor().m_ByteLimitLow = 90000; @@ -127,8 +134,15 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { BOOST_REQUIRE_EQUAL(uint64_t(18630), job.numRecordsHandled()); nonLimitedUsage = limits.resourceMonitor().totalMemory(); + actualUsage = limits.resourceMonitor().actualMemoryUsage(); + nonLimitedAdjustedActualUsage = actualUsage - baseline; } } + LOG_DEBUG(<< "nonLimitedUsage: " << nonLimitedUsage); + LOG_DEBUG(<< "baseline: " << baseline); + LOG_DEBUG(<< "actualUsage: " << actualUsage); + LOG_DEBUG(<< "nonLimitedAdjustedActualUsage: " << nonLimitedAdjustedActualUsage); + BOOST_TEST_REQUIRE(nonLimitedAdjustedActualUsage >= nonLimitedUsage); { // Now run the data with limiting ml::api::CAnomalyJobConfig jobConfig = CTestAnomalyJob::makeSimpleJobConfig( @@ -138,6 +152,8 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { model::CAnomalyDetectorModelConfig::defaultConfig(3600); model::CLimits limits; + baseline = limits.resourceMonitor().actualMemoryUsage(); + std::stringstream outputStrm; { core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); @@ -166,11 +182,18 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { // TODO this limit must be tightened once there is more granular // control over the model memory creation limitedUsage = limits.resourceMonitor().totalMemory(); + actualUsage = limits.resourceMonitor().actualMemoryUsage(); + limitedAdjustedActualUsage = actualUsage - baseline; } LOG_TRACE(<< outputStrm.str()); LOG_DEBUG(<< "Non-limited usage: " << nonLimitedUsage << "; limited: " << limitedUsage); + LOG_DEBUG(<< "baseline: " << baseline); + LOG_DEBUG(<< "actualUsage: " << actualUsage); + LOG_DEBUG(<< "limitedAdjustedActualUsage: " << limitedAdjustedActualUsage); BOOST_TEST_REQUIRE(limitedUsage < nonLimitedUsage); + BOOST_TEST_REQUIRE(limitedAdjustedActualUsage < nonLimitedAdjustedActualUsage); + BOOST_TEST_REQUIRE(limitedAdjustedActualUsage >= limitedUsage); } } @@ -375,6 +398,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage bytes = " << used.s_Usage); + LOG_DEBUG(<< "Actual memory usage bytes = " << used.s_ActualMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * core::constants::BYTES_IN_MEGABYTES); BOOST_TEST_REQUIRE(used.s_ByFields > testParam.s_ExpectedByFields); @@ -384,6 +408,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedByMemoryUsageRelativeErrorDivisor); + BOOST_TEST_REQUIRE(used.s_Usage <= used.s_ActualMemoryUsage); } LOG_DEBUG(<< "**** Test partition with bucketLength = " << testParam.s_BucketLength @@ -423,11 +448,12 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { } core_t::TTime startOfBucket{ maths::common::CIntegerTools::floor(time, testParam.s_BucketLength)}; - auto used = limits.resourceMonitor().createMemoryUsageReport(startOfBucket); + auto used = limits.resourceMonitor(). createMemoryUsageReport(startOfBucket); LOG_DEBUG(<< "# by = " << used.s_ByFields); LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); + LOG_DEBUG(<< "Actual memory usage = " << used.s_ActualMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * 1024 * 1024); BOOST_TEST_REQUIRE(used.s_PartitionFields >= testParam.s_ExpectedPartitionFields); BOOST_TEST_REQUIRE(used.s_PartitionFields < 450); @@ -437,6 +463,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedPartitionUsageRelativeErrorDivisor); + BOOST_TEST_REQUIRE(used.s_Usage <= used.s_ActualMemoryUsage); } LOG_DEBUG(<< "**** Test over with bucketLength = " << testParam.s_BucketLength @@ -479,6 +506,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# over = " << used.s_OverFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); + LOG_DEBUG(<< "Actual memory usage = " << used.s_ActualMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * 1024 * 1024); BOOST_TEST_REQUIRE(used.s_OverFields > testParam.s_ExpectedOverFields); BOOST_TEST_REQUIRE(used.s_OverFields <= 9000); @@ -486,6 +514,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedOverUsageRelativeErrorDivisor); + BOOST_TEST_REQUIRE(used.s_Usage <= used.s_ActualMemoryUsage); } } } diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index ba44163e7c..b4b9db851a 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1728,21 +1728,22 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { resourceUsage.s_AdjustedUsage = 2; resourceUsage.s_PeakUsage = 3; resourceUsage.s_AdjustedPeakUsage = 4; - resourceUsage.s_ByFields = 5; - resourceUsage.s_PartitionFields = 6; - resourceUsage.s_OverFields = 7; - resourceUsage.s_AllocationFailures = 8; + resourceUsage.s_ActualMemoryUsage = 5; + resourceUsage.s_ByFields = 6; + resourceUsage.s_PartitionFields = 7; + resourceUsage.s_OverFields = 8; + resourceUsage.s_AllocationFailures = 9; resourceUsage.s_MemoryStatus = ml::model_t::E_MemoryStatusHardLimit; - resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisCurrentModelBytes; - resourceUsage.s_BucketStartTime = 9; - resourceUsage.s_BytesExceeded = 10; - resourceUsage.s_BytesMemoryLimit = 11; - resourceUsage.s_OverallCategorizerStats.s_CategorizedMessages = 12; - resourceUsage.s_OverallCategorizerStats.s_TotalCategories = 13; - resourceUsage.s_OverallCategorizerStats.s_FrequentCategories = 14; - resourceUsage.s_OverallCategorizerStats.s_RareCategories = 15; - resourceUsage.s_OverallCategorizerStats.s_DeadCategories = 16; - resourceUsage.s_OverallCategorizerStats.s_MemoryCategorizationFailures = 17; + resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisActualMemoryUsageBytes; + resourceUsage.s_BucketStartTime = 10; + resourceUsage.s_BytesExceeded = 11; + resourceUsage.s_BytesMemoryLimit = 12; + resourceUsage.s_OverallCategorizerStats.s_CategorizedMessages = 13; + resourceUsage.s_OverallCategorizerStats.s_TotalCategories = 14; + resourceUsage.s_OverallCategorizerStats.s_FrequentCategories = 15; + resourceUsage.s_OverallCategorizerStats.s_RareCategories = 16; + resourceUsage.s_OverallCategorizerStats.s_DeadCategories = 17; + resourceUsage.s_OverallCategorizerStats.s_MemoryCategorizationFailures = 18; resourceUsage.s_OverallCategorizerStats.s_CategorizationStatus = ml::model_t::E_CategorizationStatusWarn; @@ -1770,44 +1771,46 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { BOOST_REQUIRE_EQUAL(2, sizeStats.at("model_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL(4, sizeStats.at("peak_model_bytes").to_number()); + BOOST_TEST_REQUIRE(sizeStats.contains("actual_memory_usage_bytes")); + BOOST_REQUIRE_EQUAL(5, sizeStats.at("actual_memory_usage_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_by_field_count")); - BOOST_REQUIRE_EQUAL(5, sizeStats.at("total_by_field_count").to_number()); + BOOST_REQUIRE_EQUAL(6, sizeStats.at("total_by_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_partition_field_count")); BOOST_REQUIRE_EQUAL( - 6, sizeStats.at("total_partition_field_count").to_number()); + 7, sizeStats.at("total_partition_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_over_field_count")); - BOOST_REQUIRE_EQUAL(7, sizeStats.at("total_over_field_count").to_number()); + BOOST_REQUIRE_EQUAL(8, sizeStats.at("total_over_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("bucket_allocation_failures_count")); BOOST_REQUIRE_EQUAL( - 8, sizeStats.at("bucket_allocation_failures_count").to_number()); + 9, sizeStats.at("bucket_allocation_failures_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("timestamp")); - BOOST_REQUIRE_EQUAL(9000, sizeStats.at("timestamp").to_number()); + BOOST_REQUIRE_EQUAL(10000, sizeStats.at("timestamp").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("memory_status")); BOOST_REQUIRE_EQUAL("hard_limit", sizeStats.at("memory_status").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("assignment_memory_basis")); - BOOST_REQUIRE_EQUAL("current_model_bytes", + BOOST_REQUIRE_EQUAL("actual_memory_usage_bytes", sizeStats.at("assignment_memory_basis").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("log_time")); std::int64_t nowMs{ml::core::CTimeUtils::nowMs()}; BOOST_TEST_REQUIRE(nowMs >= sizeStats.at("log_time").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("model_bytes_exceeded")); - BOOST_REQUIRE_EQUAL(10, sizeStats.at("model_bytes_exceeded").to_number()); + BOOST_REQUIRE_EQUAL(11, sizeStats.at("model_bytes_exceeded").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("model_bytes_memory_limit")); BOOST_REQUIRE_EQUAL( - 11, sizeStats.at("model_bytes_memory_limit").to_number()); + 12, sizeStats.at("model_bytes_memory_limit").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("categorized_doc_count")); - BOOST_REQUIRE_EQUAL(12, sizeStats.at("categorized_doc_count").to_number()); + BOOST_REQUIRE_EQUAL(13, sizeStats.at("categorized_doc_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_category_count")); - BOOST_REQUIRE_EQUAL(13, sizeStats.at("total_category_count").to_number()); + BOOST_REQUIRE_EQUAL(14, sizeStats.at("total_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("frequent_category_count")); BOOST_REQUIRE_EQUAL( - 14, sizeStats.at("frequent_category_count").to_number()); + 15, sizeStats.at("frequent_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("rare_category_count")); - BOOST_REQUIRE_EQUAL(15, sizeStats.at("rare_category_count").to_number()); + BOOST_REQUIRE_EQUAL(16, sizeStats.at("rare_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("dead_category_count")); - BOOST_REQUIRE_EQUAL(16, sizeStats.at("dead_category_count").to_number()); + BOOST_REQUIRE_EQUAL(17, sizeStats.at("dead_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("failed_category_count")); - BOOST_REQUIRE_EQUAL(17, sizeStats.at("failed_category_count").to_number()); + BOOST_REQUIRE_EQUAL(18, sizeStats.at("failed_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("categorization_status")); BOOST_REQUIRE_EQUAL("warn", sizeStats.at("categorization_status").as_string()); } diff --git a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc index ff404b7f91..eb4d382b9a 100644 --- a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc +++ b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc @@ -36,6 +36,7 @@ BOOST_AUTO_TEST_CASE(testWrite) { 20000, // bytes used (adjusted) 30000, // peak bytes used 60000, // peak bytes used (adjusted) + 409600, // Actual memory used (max rss) 3, // # by fields 1, // # partition fields 150, // # over fields diff --git a/lib/core/CProcessStats_Linux.cc b/lib/core/CProcessStats_Linux.cc index e5ab8cdfd0..b0c02425e9 100644 --- a/lib/core/CProcessStats_Linux.cc +++ b/lib/core/CProcessStats_Linux.cc @@ -11,6 +11,7 @@ #include #include +#include #include #include @@ -87,7 +88,11 @@ std::size_t CProcessStats::maxResidentSetSize() { } // ru_maxrss is in kilobytes - return static_cast(rusage.ru_maxrss * 1024L); + std::size_t maxRSS = static_cast(rusage.ru_maxrss * 1024L); + + CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = maxRSS; + + return maxRSS; } } } diff --git a/lib/core/CProcessStats_MacOSX.cc b/lib/core/CProcessStats_MacOSX.cc index 9aa1e969c9..f1c55e2aae 100644 --- a/lib/core/CProcessStats_MacOSX.cc +++ b/lib/core/CProcessStats_MacOSX.cc @@ -8,9 +8,11 @@ * compliance with the Elastic License 2.0 and the foregoing additional * limitation. */ -#include #include +#include +#include + #include #include #include @@ -31,9 +33,10 @@ std::size_t CProcessStats::maxResidentSetSize() { LOG_DEBUG(<< "failed to get resource usage(getrusage): " << ::strerror(errno)); return 0; } - + std::size_t maxRSS = static_cast(rusage.ru_maxrss); + CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = maxRSS; // ru_maxrss is in bytes - return static_cast(rusage.ru_maxrss); + return maxRSS; } } } diff --git a/lib/core/CProcessStats_Windows.cc b/lib/core/CProcessStats_Windows.cc index 7ca2d7e6c0..78cb418d93 100644 --- a/lib/core/CProcessStats_Windows.cc +++ b/lib/core/CProcessStats_Windows.cc @@ -8,8 +8,10 @@ * compliance with the Elastic License 2.0 and the foregoing additional * limitation. */ -#include #include + +#include +#include #include #include @@ -36,7 +38,13 @@ std::size_t CProcessStats::maxResidentSetSize() { LOG_DEBUG(<< "Failed to retrieve memory info " << CWindowsError()); return 0; } - return static_cast(stats.PeakWorkingSetSize); + + std::size_t peakWorkingSetSize = static_cast(stats.PeakWorkingSetSize); + + + CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = peakWorkingSetSize; + + return peakWorkingSetSize; } } } diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index d93b3b8bd8..a0b74ed6f3 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -382,6 +383,7 @@ CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { res.s_PeakUsage = static_cast( core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage)); res.s_AdjustedPeakUsage = this->adjustedUsage(res.s_PeakUsage); + res.s_ActualMemoryUsage = core::CProcessStats::maxResidentSetSize(); res.s_BytesMemoryLimit = this->persistenceMemoryIncreaseFactor() * m_ByteLimitHigh; res.s_BytesExceeded = m_CurrentBytesExceeded; res.s_MemoryStatus = m_MemoryStatus; @@ -491,5 +493,9 @@ std::size_t CResourceMonitor::totalMemory() const { counter_t::E_TSADOutputMemoryAllocatorUsage)); } +std::size_t CResourceMonitor::actualMemoryUsage() const { + return core::CProcessStats::maxResidentSetSize(); +} + } // model } // ml diff --git a/lib/model/ModelTypes.cc b/lib/model/ModelTypes.cc index 2fab1d1c2a..a9140a66b3 100644 --- a/lib/model/ModelTypes.cc +++ b/lib/model/ModelTypes.cc @@ -1733,6 +1733,8 @@ std::string print(EAssignmentMemoryBasis assignmentMemoryBasis) { return "current_model_bytes"; case E_AssignmentBasisPeakModelBytes: return "peak_model_bytes"; + case E_AssignmentBasisActualMemoryUsageBytes: + return "actual_memory_usage_bytes"; } return "-"; } diff --git a/lib/model/unittest/CResourceMonitorTest.cc b/lib/model/unittest/CResourceMonitorTest.cc index f69dccc384..f8fe8fcc89 100644 --- a/lib/model/unittest/CResourceMonitorTest.cc +++ b/lib/model/unittest/CResourceMonitorTest.cc @@ -536,7 +536,7 @@ BOOST_FIXTURE_TEST_CASE(testExtraMemory, CTestFixture) { } BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { - // Clear the counter so that other test cases do not interfere. + // Clear the counters so that other test cases do not interfere. core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage) = 0; CLimits limits; @@ -549,6 +549,9 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory, m_ReportedModelSizeStats.s_PeakUsage); + BOOST_TEST_REQUIRE(baseTotalMemory <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + monitor.addExtraMemory(100); monitor.updateMoments(monitor.totalMemory(), 0, 1); @@ -556,6 +559,9 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); + BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + monitor.addExtraMemory(-50); monitor.updateMoments(monitor.totalMemory(), 0, 1); @@ -563,12 +569,18 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 50, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); + BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + monitor.addExtraMemory(100); monitor.updateMoments(monitor.totalMemory(), 0, 1); monitor.sendMemoryUsageReport(0, 1); BOOST_REQUIRE_EQUAL(baseTotalMemory + 150, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 150, m_ReportedModelSizeStats.s_PeakUsage); + + BOOST_TEST_REQUIRE(baseTotalMemory + 150 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); } BOOST_FIXTURE_TEST_CASE(testUpdateMoments, CTestFixture) { From cb957cacfaa15484196a48c42467a8a5ab7c8040 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 4 Apr 2025 15:53:50 +1300 Subject: [PATCH 02/11] Update changelog --- docs/CHANGELOG.asciidoc | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 75117ca066..0b118eb82b 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -33,6 +33,7 @@ === Enhancements * Track memory used in the hierarchical results normalizer. (See {ml-pull}2831[#2831].) +* Report the actual memory usage of the autodetect process. (See {ml-pull}2846[#2846]) === Bug Fixes From 8f73f02d7486068b1d43f4a337b0593fcdd1ef12 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 4 Apr 2025 16:28:53 +1300 Subject: [PATCH 03/11] Formatting --- include/model/ModelTypes.h | 2 +- lib/api/unittest/CAnomalyJobLimitTest.cc | 2 +- lib/api/unittest/CJsonOutputWriterTest.cc | 3 ++- lib/core/CProcessStats_Windows.cc | 1 - lib/model/unittest/CResourceMonitorTest.cc | 12 ++++++++---- 5 files changed, 12 insertions(+), 8 deletions(-) diff --git a/include/model/ModelTypes.h b/include/model/ModelTypes.h index aeffe27e83..ef09536551 100644 --- a/include/model/ModelTypes.h +++ b/include/model/ModelTypes.h @@ -721,7 +721,7 @@ enum EAssignmentMemoryBasis { E_AssignmentBasisCurrentModelBytes = 2, //!< Use current actual model size E_AssignmentBasisPeakModelBytes = 3, //!< Use highest ever actual model size E_AssignmentBasisActualMemoryUsageBytes = 4 //!< Use the actual memory size - //!< of the process, as reported by the OS + //!< of the process, as reported by the OS }; //! Get a string description of \p assignmentMemoryBasis. diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index 938892589c..f1531f46ec 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -448,7 +448,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { } core_t::TTime startOfBucket{ maths::common::CIntegerTools::floor(time, testParam.s_BucketLength)}; - auto used = limits.resourceMonitor(). createMemoryUsageReport(startOfBucket); + auto used = limits.resourceMonitor().createMemoryUsageReport(startOfBucket); LOG_DEBUG(<< "# by = " << used.s_ByFields); LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index b4b9db851a..95c5e319b8 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1772,7 +1772,8 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { BOOST_TEST_REQUIRE(sizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL(4, sizeStats.at("peak_model_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("actual_memory_usage_bytes")); - BOOST_REQUIRE_EQUAL(5, sizeStats.at("actual_memory_usage_bytes").to_number()); + BOOST_REQUIRE_EQUAL( + 5, sizeStats.at("actual_memory_usage_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_by_field_count")); BOOST_REQUIRE_EQUAL(6, sizeStats.at("total_by_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_partition_field_count")); diff --git a/lib/core/CProcessStats_Windows.cc b/lib/core/CProcessStats_Windows.cc index 78cb418d93..d6e6e4649c 100644 --- a/lib/core/CProcessStats_Windows.cc +++ b/lib/core/CProcessStats_Windows.cc @@ -41,7 +41,6 @@ std::size_t CProcessStats::maxResidentSetSize() { std::size_t peakWorkingSetSize = static_cast(stats.PeakWorkingSetSize); - CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = peakWorkingSetSize; return peakWorkingSetSize; diff --git a/lib/model/unittest/CResourceMonitorTest.cc b/lib/model/unittest/CResourceMonitorTest.cc index f8fe8fcc89..193746addc 100644 --- a/lib/model/unittest/CResourceMonitorTest.cc +++ b/lib/model/unittest/CResourceMonitorTest.cc @@ -550,7 +550,8 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory, m_ReportedModelSizeStats.s_PeakUsage); BOOST_TEST_REQUIRE(baseTotalMemory <= m_ReportedModelSizeStats.s_ActualMemoryUsage); - BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= + m_ReportedModelSizeStats.s_ActualMemoryUsage); monitor.addExtraMemory(100); @@ -560,7 +561,8 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); - BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= + m_ReportedModelSizeStats.s_ActualMemoryUsage); monitor.addExtraMemory(-50); @@ -570,7 +572,8 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); - BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= + m_ReportedModelSizeStats.s_ActualMemoryUsage); monitor.addExtraMemory(100); @@ -580,7 +583,8 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 150, m_ReportedModelSizeStats.s_PeakUsage); BOOST_TEST_REQUIRE(baseTotalMemory + 150 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); - BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= + m_ReportedModelSizeStats.s_ActualMemoryUsage); } BOOST_FIXTURE_TEST_CASE(testUpdateMoments, CTestFixture) { From d3a39aedc57c7de332d81e3ce8797df5b142e409 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 7 Apr 2025 15:43:50 +1200 Subject: [PATCH 04/11] Appease SonarQube --- lib/core/CProcessStats_Linux.cc | 2 +- lib/core/CProcessStats_MacOSX.cc | 2 +- lib/core/CProcessStats_Windows.cc | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/core/CProcessStats_Linux.cc b/lib/core/CProcessStats_Linux.cc index b0c02425e9..c858b4e589 100644 --- a/lib/core/CProcessStats_Linux.cc +++ b/lib/core/CProcessStats_Linux.cc @@ -88,7 +88,7 @@ std::size_t CProcessStats::maxResidentSetSize() { } // ru_maxrss is in kilobytes - std::size_t maxRSS = static_cast(rusage.ru_maxrss * 1024L); + auto maxRSS = static_cast(rusage.ru_maxrss * 1024L); CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = maxRSS; diff --git a/lib/core/CProcessStats_MacOSX.cc b/lib/core/CProcessStats_MacOSX.cc index f1c55e2aae..c3edc85424 100644 --- a/lib/core/CProcessStats_MacOSX.cc +++ b/lib/core/CProcessStats_MacOSX.cc @@ -33,7 +33,7 @@ std::size_t CProcessStats::maxResidentSetSize() { LOG_DEBUG(<< "failed to get resource usage(getrusage): " << ::strerror(errno)); return 0; } - std::size_t maxRSS = static_cast(rusage.ru_maxrss); + auto maxRSS = static_cast(rusage.ru_maxrss); CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = maxRSS; // ru_maxrss is in bytes return maxRSS; diff --git a/lib/core/CProcessStats_Windows.cc b/lib/core/CProcessStats_Windows.cc index d6e6e4649c..d91db5e3c9 100644 --- a/lib/core/CProcessStats_Windows.cc +++ b/lib/core/CProcessStats_Windows.cc @@ -39,7 +39,7 @@ std::size_t CProcessStats::maxResidentSetSize() { return 0; } - std::size_t peakWorkingSetSize = static_cast(stats.PeakWorkingSetSize); + auto peakWorkingSetSize = static_cast(stats.PeakWorkingSetSize); CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = peakWorkingSetSize; From e2d1bf5f0e2a04bd772fa77e7ea96a8582cf6f00 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 7 Apr 2025 15:46:27 +1200 Subject: [PATCH 05/11] Tweak unit test for platform portability --- lib/api/unittest/CAnomalyJobLimitTest.cc | 32 +++++++----------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index f1531f46ec..348b4162db 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -93,10 +93,8 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { std::size_t nonLimitedUsage{0}; std::size_t limitedUsage{0}; - std::size_t actualUsage{0}; - std::size_t baseline{0}; - std::size_t nonLimitedAdjustedActualUsage{0}; - std::size_t limitedAdjustedActualUsage{0}; + std::size_t nonLimitedActualUsage{0}; + std::size_t limitedActualUsage{0}; { // Without limits, this data set should make the models around // 1230000 bytes @@ -110,10 +108,6 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); model::CLimits limits; - baseline = limits.resourceMonitor().actualMemoryUsage(); - - //limits.resourceMonitor().m_ByteLimitHigh = 100000; - //limits.resourceMonitor().m_ByteLimitLow = 90000; { LOG_TRACE(<< "Setting up job"); @@ -134,15 +128,12 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { BOOST_REQUIRE_EQUAL(uint64_t(18630), job.numRecordsHandled()); nonLimitedUsage = limits.resourceMonitor().totalMemory(); - actualUsage = limits.resourceMonitor().actualMemoryUsage(); - nonLimitedAdjustedActualUsage = actualUsage - baseline; + nonLimitedActualUsage = limits.resourceMonitor().actualMemoryUsage(); } } LOG_DEBUG(<< "nonLimitedUsage: " << nonLimitedUsage); - LOG_DEBUG(<< "baseline: " << baseline); - LOG_DEBUG(<< "actualUsage: " << actualUsage); - LOG_DEBUG(<< "nonLimitedAdjustedActualUsage: " << nonLimitedAdjustedActualUsage); - BOOST_TEST_REQUIRE(nonLimitedAdjustedActualUsage >= nonLimitedUsage); + LOG_DEBUG(<< "nonLimitedActualUsage: " << nonLimitedActualUsage); + BOOST_TEST_REQUIRE(nonLimitedActualUsage >= nonLimitedUsage); { // Now run the data with limiting ml::api::CAnomalyJobConfig jobConfig = CTestAnomalyJob::makeSimpleJobConfig( @@ -152,8 +143,6 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { model::CAnomalyDetectorModelConfig::defaultConfig(3600); model::CLimits limits; - baseline = limits.resourceMonitor().actualMemoryUsage(); - std::stringstream outputStrm; { core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); @@ -182,18 +171,15 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { // TODO this limit must be tightened once there is more granular // control over the model memory creation limitedUsage = limits.resourceMonitor().totalMemory(); - actualUsage = limits.resourceMonitor().actualMemoryUsage(); - limitedAdjustedActualUsage = actualUsage - baseline; + limitedActualUsage = limits.resourceMonitor().actualMemoryUsage(); } LOG_TRACE(<< outputStrm.str()); LOG_DEBUG(<< "Non-limited usage: " << nonLimitedUsage << "; limited: " << limitedUsage); - LOG_DEBUG(<< "baseline: " << baseline); - LOG_DEBUG(<< "actualUsage: " << actualUsage); - LOG_DEBUG(<< "limitedAdjustedActualUsage: " << limitedAdjustedActualUsage); + LOG_DEBUG(<< "Non-limited Actual Usage: " << nonLimitedActualUsage); + LOG_DEBUG(<< "Limited Actual Usage: " << limitedActualUsage); BOOST_TEST_REQUIRE(limitedUsage < nonLimitedUsage); - BOOST_TEST_REQUIRE(limitedAdjustedActualUsage < nonLimitedAdjustedActualUsage); - BOOST_TEST_REQUIRE(limitedAdjustedActualUsage >= limitedUsage); + BOOST_TEST_REQUIRE(limitedActualUsage >= limitedUsage); } } From 1a9a99ae5e69956976caea9d0963c70658efb25d Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 9 Apr 2025 16:01:06 +1200 Subject: [PATCH 06/11] Attend to review comments * ActualMemory -> SystemMemory * Report current resident set size as well as max --- bin/autodetect/Main.cc | 1 + include/core/CProgramCounters.h | 9 ++- include/model/CResourceMonitor.h | 9 ++- include/model/ModelTypes.h | 4 +- lib/api/CModelSizeStatsJsonWriter.cc | 10 ++- lib/api/unittest/CAnomalyJobLimitTest.cc | 33 +++++----- lib/api/unittest/CJsonOutputWriterTest.cc | 66 ++++++++++--------- .../unittest/CModelSnapshotJsonWriterTest.cc | 7 +- lib/core/CProcessStats_Linux.cc | 2 + lib/core/CProcessStats_MacOSX.cc | 1 + lib/core/CProcessStats_Windows.cc | 6 +- lib/model/CResourceMonitor.cc | 10 ++- lib/model/ModelTypes.cc | 6 +- lib/model/unittest/CResourceMonitorTest.cc | 16 ++--- 14 files changed, 110 insertions(+), 70 deletions(-) diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index bbb90c706a..11f7674634 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -85,6 +85,7 @@ int main(int argc, char** argv) { ml::counter_t::E_TSADNumberPrunedItems, ml::counter_t::E_TSADAssignmentMemoryBasis, ml::counter_t::E_TSADOutputMemoryAllocatorUsage, + ml::counter_t::E_TSADResidentSetSize, ml::counter_t::E_TSADMaxResidentSetSize}; ml::core::CProgramCounters::registerProgramCounterTypes(counters); diff --git a/include/core/CProgramCounters.h b/include/core/CProgramCounters.h index 34d5cdbb26..9bad82389a 100644 --- a/include/core/CProgramCounters.h +++ b/include/core/CProgramCounters.h @@ -112,8 +112,11 @@ enum ECounterTypes { //! The memory currently used by the allocators to output JSON documents, in bytes. E_TSADOutputMemoryAllocatorUsage = 30, + //! The resident set size of the process, in bytes. + E_TSADResidentSetSize = 31, + //! The maximum resident set size of the process, in bytes. - E_TSADMaxResidentSetSize = 31, + E_TSADMaxResidentSetSize = 32, // Data Frame Outlier Detection @@ -149,7 +152,7 @@ enum ECounterTypes { // Add any new values here //! This MUST be last, increment the value for every new enum added - E_LastEnumCounter = 32 + E_LastEnumCounter = 33 }; static constexpr std::size_t NUM_COUNTERS = static_cast(E_LastEnumCounter); @@ -358,6 +361,8 @@ class CORE_EXPORT CProgramCounters { "Which option is being used to get model memory for node assignment?"}, {counter_t::E_TSADOutputMemoryAllocatorUsage, "E_TSADOutputMemoryAllocatorUsage", "The amount of memory used to output JSON documents, in bytes."}, + {counter_t::E_TSADResidentSetSize, "E_TSADResidentSetSize", + "The resident set size of the process, in bytes"}, {counter_t::E_TSADMaxResidentSetSize, "E_TSADMaxResidentSetSize", "The maximum resident set size of the process, in bytes"}, {counter_t::E_DFOEstimatedPeakMemoryUsage, "E_DFOEstimatedPeakMemoryUsage", diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index c9c887281f..8cb9d5f3ac 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -54,7 +54,8 @@ class MODEL_EXPORT CResourceMonitor { std::size_t s_AdjustedUsage{0}; std::size_t s_PeakUsage{0}; std::size_t s_AdjustedPeakUsage{0}; - std::size_t s_ActualMemoryUsage{0}; + std::size_t s_SystemMemoryUsage{0}; + std::size_t s_MaxSystemMemoryUsage{0}; std::size_t s_ByFields{0}; std::size_t s_PartitionFields{0}; std::size_t s_OverFields{0}; @@ -181,7 +182,11 @@ class MODEL_EXPORT CResourceMonitor { //! Returns the sum of used memory plus any extra memory std::size_t totalMemory() const; - std::size_t actualMemoryUsage() const; + //! Returns the current physical memory of the process as reported by the system + std::size_t systemMemory() const; + + //! Returns the maximum physical memory of the processs as reported by the system + std::size_t maxSystemMemory() const; private: using TMonitoredResourcePtrSizeUMap = diff --git a/include/model/ModelTypes.h b/include/model/ModelTypes.h index ef09536551..66674ed39d 100644 --- a/include/model/ModelTypes.h +++ b/include/model/ModelTypes.h @@ -720,8 +720,8 @@ enum EAssignmentMemoryBasis { E_AssignmentBasisModelMemoryLimit = 1, //!< Use model memory limit E_AssignmentBasisCurrentModelBytes = 2, //!< Use current actual model size E_AssignmentBasisPeakModelBytes = 3, //!< Use highest ever actual model size - E_AssignmentBasisActualMemoryUsageBytes = 4 //!< Use the actual memory size - //!< of the process, as reported by the OS + E_AssignmentBasisSystemMemoryBytes = 4, //!< Use the current system memory size + E_AssignmentBasisMaxSystemMemoryBytes = 5 //!< Use the highest ever system memory size }; //! Get a string description of \p assignmentMemoryBasis. diff --git a/lib/api/CModelSizeStatsJsonWriter.cc b/lib/api/CModelSizeStatsJsonWriter.cc index 75604c7f6a..12858914af 100644 --- a/lib/api/CModelSizeStatsJsonWriter.cc +++ b/lib/api/CModelSizeStatsJsonWriter.cc @@ -25,7 +25,8 @@ const std::string JOB_ID{"job_id"}; const std::string MODEL_SIZE_STATS{"model_size_stats"}; const std::string MODEL_BYTES{"model_bytes"}; const std::string PEAK_MODEL_BYTES{"peak_model_bytes"}; -const std::string ACTUAL_MEMORY_USAGE_BYTES{"actual_memory_usage_bytes"}; +const std::string SYSTEM_MEMORY_BYTES{"system_memory_bytes"}; +const std::string MAX_SYSTEM_MEMORY_BYTES{"max_system_memory_bytes"}; const std::string MODEL_BYTES_EXCEEDED{"model_bytes_exceeded"}; const std::string MODEL_BYTES_MEMORY_LIMIT{"model_bytes_memory_limit"}; const std::string TOTAL_BY_FIELD_COUNT{"total_by_field_count"}; @@ -61,8 +62,11 @@ void CModelSizeStatsJsonWriter::write(const std::string& jobId, writer.onKey(PEAK_MODEL_BYTES); writer.onUint64(results.s_AdjustedPeakUsage); - writer.onKey(ACTUAL_MEMORY_USAGE_BYTES); - writer.onUint64(results.s_ActualMemoryUsage); + writer.onKey(SYSTEM_MEMORY_BYTES); + writer.onUint64(results.s_SystemMemoryUsage); + + writer.onKey(MAX_SYSTEM_MEMORY_BYTES); + writer.onUint64(results.s_MaxSystemMemoryUsage); writer.onKey(MODEL_BYTES_EXCEEDED); writer.onUint64(results.s_BytesExceeded); diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index 348b4162db..cc85baaf99 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -93,8 +93,8 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { std::size_t nonLimitedUsage{0}; std::size_t limitedUsage{0}; - std::size_t nonLimitedActualUsage{0}; - std::size_t limitedActualUsage{0}; + std::size_t nonLimitedMaxSystemUsage{0}; + std::size_t limitedMaxSystemUsage{0}; { // Without limits, this data set should make the models around // 1230000 bytes @@ -128,12 +128,12 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { BOOST_REQUIRE_EQUAL(uint64_t(18630), job.numRecordsHandled()); nonLimitedUsage = limits.resourceMonitor().totalMemory(); - nonLimitedActualUsage = limits.resourceMonitor().actualMemoryUsage(); + nonLimitedMaxSystemUsage = limits.resourceMonitor().maxSystemMemory(); } } LOG_DEBUG(<< "nonLimitedUsage: " << nonLimitedUsage); - LOG_DEBUG(<< "nonLimitedActualUsage: " << nonLimitedActualUsage); - BOOST_TEST_REQUIRE(nonLimitedActualUsage >= nonLimitedUsage); + LOG_DEBUG(<< "nonLimitedMaxSystemUsage: " << nonLimitedMaxSystemUsage); + BOOST_TEST_REQUIRE(nonLimitedMaxSystemUsage >= nonLimitedUsage); { // Now run the data with limiting ml::api::CAnomalyJobConfig jobConfig = CTestAnomalyJob::makeSimpleJobConfig( @@ -171,15 +171,15 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { // TODO this limit must be tightened once there is more granular // control over the model memory creation limitedUsage = limits.resourceMonitor().totalMemory(); - limitedActualUsage = limits.resourceMonitor().actualMemoryUsage(); + limitedMaxSystemUsage = limits.resourceMonitor().maxSystemMemory(); } LOG_TRACE(<< outputStrm.str()); LOG_DEBUG(<< "Non-limited usage: " << nonLimitedUsage << "; limited: " << limitedUsage); - LOG_DEBUG(<< "Non-limited Actual Usage: " << nonLimitedActualUsage); - LOG_DEBUG(<< "Limited Actual Usage: " << limitedActualUsage); + LOG_DEBUG(<< "Non-limited System Usage: " << nonLimitedMaxSystemUsage); + LOG_DEBUG(<< "Limited System Usage: " << limitedMaxSystemUsage); BOOST_TEST_REQUIRE(limitedUsage < nonLimitedUsage); - BOOST_TEST_REQUIRE(limitedActualUsage >= limitedUsage); + BOOST_TEST_REQUIRE(limitedMaxSystemUsage >= limitedUsage); } } @@ -384,7 +384,8 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage bytes = " << used.s_Usage); - LOG_DEBUG(<< "Actual memory usage bytes = " << used.s_ActualMemoryUsage); + LOG_DEBUG(<< "System memory usage bytes = " << used.s_SystemMemoryUsage); + LOG_DEBUG(<< "Max system memory usage bytes = " << used.s_MaxSystemMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * core::constants::BYTES_IN_MEGABYTES); BOOST_TEST_REQUIRE(used.s_ByFields > testParam.s_ExpectedByFields); @@ -394,7 +395,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedByMemoryUsageRelativeErrorDivisor); - BOOST_TEST_REQUIRE(used.s_Usage <= used.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(used.s_Usage <= used.s_MaxSystemMemoryUsage); } LOG_DEBUG(<< "**** Test partition with bucketLength = " << testParam.s_BucketLength @@ -439,7 +440,8 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); - LOG_DEBUG(<< "Actual memory usage = " << used.s_ActualMemoryUsage); + LOG_DEBUG(<< "System memory usage = " << used.s_SystemMemoryUsage); + LOG_DEBUG(<< "Max system memory usage = " << used.s_MaxSystemMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * 1024 * 1024); BOOST_TEST_REQUIRE(used.s_PartitionFields >= testParam.s_ExpectedPartitionFields); BOOST_TEST_REQUIRE(used.s_PartitionFields < 450); @@ -449,7 +451,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedPartitionUsageRelativeErrorDivisor); - BOOST_TEST_REQUIRE(used.s_Usage <= used.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(used.s_Usage <= used.s_MaxSystemMemoryUsage); } LOG_DEBUG(<< "**** Test over with bucketLength = " << testParam.s_BucketLength @@ -492,7 +494,8 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# over = " << used.s_OverFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); - LOG_DEBUG(<< "Actual memory usage = " << used.s_ActualMemoryUsage); + LOG_DEBUG(<< "System memory usage = " << used.s_SystemMemoryUsage); + LOG_DEBUG(<< "Max system memory usage = " << used.s_MaxSystemMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * 1024 * 1024); BOOST_TEST_REQUIRE(used.s_OverFields > testParam.s_ExpectedOverFields); BOOST_TEST_REQUIRE(used.s_OverFields <= 9000); @@ -500,7 +503,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedOverUsageRelativeErrorDivisor); - BOOST_TEST_REQUIRE(used.s_Usage <= used.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(used.s_Usage <= used.s_MaxSystemMemoryUsage); } } } diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index 95c5e319b8..82c11c31f2 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1728,22 +1728,23 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { resourceUsage.s_AdjustedUsage = 2; resourceUsage.s_PeakUsage = 3; resourceUsage.s_AdjustedPeakUsage = 4; - resourceUsage.s_ActualMemoryUsage = 5; - resourceUsage.s_ByFields = 6; - resourceUsage.s_PartitionFields = 7; - resourceUsage.s_OverFields = 8; - resourceUsage.s_AllocationFailures = 9; + resourceUsage.s_SystemMemoryUsage = 5; + resourceUsage.s_MaxSystemMemoryUsage = 6; + resourceUsage.s_ByFields = 7; + resourceUsage.s_PartitionFields = 8; + resourceUsage.s_OverFields = 9; + resourceUsage.s_AllocationFailures = 10; resourceUsage.s_MemoryStatus = ml::model_t::E_MemoryStatusHardLimit; - resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisActualMemoryUsageBytes; - resourceUsage.s_BucketStartTime = 10; - resourceUsage.s_BytesExceeded = 11; - resourceUsage.s_BytesMemoryLimit = 12; - resourceUsage.s_OverallCategorizerStats.s_CategorizedMessages = 13; - resourceUsage.s_OverallCategorizerStats.s_TotalCategories = 14; - resourceUsage.s_OverallCategorizerStats.s_FrequentCategories = 15; - resourceUsage.s_OverallCategorizerStats.s_RareCategories = 16; - resourceUsage.s_OverallCategorizerStats.s_DeadCategories = 17; - resourceUsage.s_OverallCategorizerStats.s_MemoryCategorizationFailures = 18; + resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisSystemMemoryBytes; + resourceUsage.s_BucketStartTime = 11; + resourceUsage.s_BytesExceeded = 12; + resourceUsage.s_BytesMemoryLimit = 13; + resourceUsage.s_OverallCategorizerStats.s_CategorizedMessages = 14; + resourceUsage.s_OverallCategorizerStats.s_TotalCategories = 15; + resourceUsage.s_OverallCategorizerStats.s_FrequentCategories = 16; + resourceUsage.s_OverallCategorizerStats.s_RareCategories = 17; + resourceUsage.s_OverallCategorizerStats.s_DeadCategories = 18; + resourceUsage.s_OverallCategorizerStats.s_MemoryCategorizationFailures = 19; resourceUsage.s_OverallCategorizerStats.s_CategorizationStatus = ml::model_t::E_CategorizationStatusWarn; @@ -1771,47 +1772,50 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { BOOST_REQUIRE_EQUAL(2, sizeStats.at("model_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL(4, sizeStats.at("peak_model_bytes").to_number()); - BOOST_TEST_REQUIRE(sizeStats.contains("actual_memory_usage_bytes")); + BOOST_TEST_REQUIRE(sizeStats.contains("system_memory_bytes")); BOOST_REQUIRE_EQUAL( - 5, sizeStats.at("actual_memory_usage_bytes").to_number()); + 5, sizeStats.at("system_memory_bytes").to_number()); + BOOST_TEST_REQUIRE(sizeStats.contains("max_system_memory_bytes")); + BOOST_REQUIRE_EQUAL( + 6, sizeStats.at("max_system_memory_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_by_field_count")); - BOOST_REQUIRE_EQUAL(6, sizeStats.at("total_by_field_count").to_number()); + BOOST_REQUIRE_EQUAL(7, sizeStats.at("total_by_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_partition_field_count")); BOOST_REQUIRE_EQUAL( - 7, sizeStats.at("total_partition_field_count").to_number()); + 8, sizeStats.at("total_partition_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_over_field_count")); - BOOST_REQUIRE_EQUAL(8, sizeStats.at("total_over_field_count").to_number()); + BOOST_REQUIRE_EQUAL(9, sizeStats.at("total_over_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("bucket_allocation_failures_count")); BOOST_REQUIRE_EQUAL( - 9, sizeStats.at("bucket_allocation_failures_count").to_number()); + 10, sizeStats.at("bucket_allocation_failures_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("timestamp")); - BOOST_REQUIRE_EQUAL(10000, sizeStats.at("timestamp").to_number()); + BOOST_REQUIRE_EQUAL(11000, sizeStats.at("timestamp").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("memory_status")); BOOST_REQUIRE_EQUAL("hard_limit", sizeStats.at("memory_status").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("assignment_memory_basis")); - BOOST_REQUIRE_EQUAL("actual_memory_usage_bytes", + BOOST_REQUIRE_EQUAL("system_memory_bytes", sizeStats.at("assignment_memory_basis").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("log_time")); std::int64_t nowMs{ml::core::CTimeUtils::nowMs()}; BOOST_TEST_REQUIRE(nowMs >= sizeStats.at("log_time").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("model_bytes_exceeded")); - BOOST_REQUIRE_EQUAL(11, sizeStats.at("model_bytes_exceeded").to_number()); + BOOST_REQUIRE_EQUAL(12, sizeStats.at("model_bytes_exceeded").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("model_bytes_memory_limit")); BOOST_REQUIRE_EQUAL( - 12, sizeStats.at("model_bytes_memory_limit").to_number()); + 13, sizeStats.at("model_bytes_memory_limit").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("categorized_doc_count")); - BOOST_REQUIRE_EQUAL(13, sizeStats.at("categorized_doc_count").to_number()); + BOOST_REQUIRE_EQUAL(14, sizeStats.at("categorized_doc_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_category_count")); - BOOST_REQUIRE_EQUAL(14, sizeStats.at("total_category_count").to_number()); + BOOST_REQUIRE_EQUAL(15, sizeStats.at("total_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("frequent_category_count")); BOOST_REQUIRE_EQUAL( - 15, sizeStats.at("frequent_category_count").to_number()); + 16, sizeStats.at("frequent_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("rare_category_count")); - BOOST_REQUIRE_EQUAL(16, sizeStats.at("rare_category_count").to_number()); + BOOST_REQUIRE_EQUAL(17, sizeStats.at("rare_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("dead_category_count")); - BOOST_REQUIRE_EQUAL(17, sizeStats.at("dead_category_count").to_number()); + BOOST_REQUIRE_EQUAL(18, sizeStats.at("dead_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("failed_category_count")); - BOOST_REQUIRE_EQUAL(18, sizeStats.at("failed_category_count").to_number()); + BOOST_REQUIRE_EQUAL(19, sizeStats.at("failed_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("categorization_status")); BOOST_REQUIRE_EQUAL("warn", sizeStats.at("categorization_status").as_string()); } diff --git a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc index eb4d382b9a..c3c32aa022 100644 --- a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc +++ b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc @@ -36,7 +36,8 @@ BOOST_AUTO_TEST_CASE(testWrite) { 20000, // bytes used (adjusted) 30000, // peak bytes used 60000, // peak bytes used (adjusted) - 409600, // Actual memory used (max rss) + 409600, // System memory used (rss) + 413696, // Max system memory used (max rss) 3, // # by fields 1, // # partition fields 150, // # over fields @@ -117,6 +118,10 @@ BOOST_AUTO_TEST_CASE(testWrite) { BOOST_TEST_REQUIRE(modelSizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL( 60000, modelSizeStats.at("peak_model_bytes").to_number()); + BOOST_REQUIRE_EQUAL( + 409600, modelSizeStats.at("system_memory_bytes").to_number()); + BOOST_REQUIRE_EQUAL( + 413696, modelSizeStats.at("max_system_memory_bytes").to_number()); BOOST_TEST_REQUIRE(modelSizeStats.contains("total_by_field_count")); BOOST_REQUIRE_EQUAL( 3, modelSizeStats.at("total_by_field_count").to_number()); diff --git a/lib/core/CProcessStats_Linux.cc b/lib/core/CProcessStats_Linux.cc index c858b4e589..6511a209f1 100644 --- a/lib/core/CProcessStats_Linux.cc +++ b/lib/core/CProcessStats_Linux.cc @@ -76,6 +76,8 @@ std::size_t CProcessStats::residentSetSize() { } } + CProgramCounters::counter(counter_t::E_TSADResidentSetSize) = rss; + return rss; } diff --git a/lib/core/CProcessStats_MacOSX.cc b/lib/core/CProcessStats_MacOSX.cc index c3edc85424..855b0bdca9 100644 --- a/lib/core/CProcessStats_MacOSX.cc +++ b/lib/core/CProcessStats_MacOSX.cc @@ -23,6 +23,7 @@ namespace core { std::size_t CProcessStats::residentSetSize() { // not supported on osx + CProgramCounters::counter(counter_t::E_TSADResidentSetSize) = 0; return 0; } diff --git a/lib/core/CProcessStats_Windows.cc b/lib/core/CProcessStats_Windows.cc index d91db5e3c9..e4e2baa163 100644 --- a/lib/core/CProcessStats_Windows.cc +++ b/lib/core/CProcessStats_Windows.cc @@ -29,7 +29,11 @@ std::size_t CProcessStats::residentSetSize() { return 0; } - return static_cast(stats.WorkingSetSize); + auto workingSetSize = static_cast(stats.WorkingSetSize); + + CProgramCounters::counter(counter_t::E_TSADResidentSetSize) = workingSetSize; + + return workingSetSize; } std::size_t CProcessStats::maxResidentSetSize() { diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index a0b74ed6f3..69354f5629 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -383,7 +383,8 @@ CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { res.s_PeakUsage = static_cast( core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage)); res.s_AdjustedPeakUsage = this->adjustedUsage(res.s_PeakUsage); - res.s_ActualMemoryUsage = core::CProcessStats::maxResidentSetSize(); + res.s_SystemMemoryUsage = core::CProcessStats::residentSetSize(); + res.s_MaxSystemMemoryUsage = core::CProcessStats::maxResidentSetSize(); res.s_BytesMemoryLimit = this->persistenceMemoryIncreaseFactor() * m_ByteLimitHigh; res.s_BytesExceeded = m_CurrentBytesExceeded; res.s_MemoryStatus = m_MemoryStatus; @@ -493,9 +494,12 @@ std::size_t CResourceMonitor::totalMemory() const { counter_t::E_TSADOutputMemoryAllocatorUsage)); } -std::size_t CResourceMonitor::actualMemoryUsage() const { - return core::CProcessStats::maxResidentSetSize(); +std::size_t CResourceMonitor::systemMemory() const { + return core::CProcessStats::residentSetSize(); } +std::size_t CResourceMonitor::maxSystemMemory() const { + return core::CProcessStats::maxResidentSetSize(); +} } // model } // ml diff --git a/lib/model/ModelTypes.cc b/lib/model/ModelTypes.cc index a9140a66b3..1bddf8bae8 100644 --- a/lib/model/ModelTypes.cc +++ b/lib/model/ModelTypes.cc @@ -1733,8 +1733,10 @@ std::string print(EAssignmentMemoryBasis assignmentMemoryBasis) { return "current_model_bytes"; case E_AssignmentBasisPeakModelBytes: return "peak_model_bytes"; - case E_AssignmentBasisActualMemoryUsageBytes: - return "actual_memory_usage_bytes"; + case E_AssignmentBasisSystemMemoryBytes: + return "system_memory_bytes"; + case E_AssignmentBasisMaxSystemMemoryBytes: + return "max_system_memory_bytes"; } return "-"; } diff --git a/lib/model/unittest/CResourceMonitorTest.cc b/lib/model/unittest/CResourceMonitorTest.cc index 193746addc..c17c644c80 100644 --- a/lib/model/unittest/CResourceMonitorTest.cc +++ b/lib/model/unittest/CResourceMonitorTest.cc @@ -549,9 +549,9 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory, m_ReportedModelSizeStats.s_PeakUsage); - BOOST_TEST_REQUIRE(baseTotalMemory <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(baseTotalMemory <= m_ReportedModelSizeStats.s_SystemMemoryUsage); BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= - m_ReportedModelSizeStats.s_ActualMemoryUsage); + m_ReportedModelSizeStats.s_SystemMemoryUsage); monitor.addExtraMemory(100); @@ -560,9 +560,9 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); - BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= - m_ReportedModelSizeStats.s_ActualMemoryUsage); + m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); monitor.addExtraMemory(-50); @@ -571,9 +571,9 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 50, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); - BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= - m_ReportedModelSizeStats.s_ActualMemoryUsage); + m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); monitor.addExtraMemory(100); @@ -582,9 +582,9 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 150, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 150, m_ReportedModelSizeStats.s_PeakUsage); - BOOST_TEST_REQUIRE(baseTotalMemory + 150 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(baseTotalMemory + 150 <= m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= - m_ReportedModelSizeStats.s_ActualMemoryUsage); + m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); } BOOST_FIXTURE_TEST_CASE(testUpdateMoments, CTestFixture) { From 5ae22cb50c89f8176df8eca696684d294f16cf69 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 11 Apr 2025 11:08:27 +1200 Subject: [PATCH 07/11] Update bin/autodetect/Main.cc Co-authored-by: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> --- bin/autodetect/Main.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index 11f7674634..93cb77e5d7 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -155,7 +155,7 @@ int main(int argc, char** argv) { cancellerThread.stop(); LOG_DEBUG(<< "Max Resident Set Size: " << ml::core::CProcessStats::maxResidentSetSize()); - +LOG_DEBUG(<< "Resident Set Size: " << ml::core::CProcessStats::residentSetSize()); // Log the program version immediately after reconfiguring the logger. This // must be done from the program, and NOT a shared library, as each program // statically links its own version library. From fe6f1fac7df9daf4b6a7a8f42c0ea3ce0f668a32 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 11 Apr 2025 11:08:49 +1200 Subject: [PATCH 08/11] Update include/model/CResourceMonitor.h Co-authored-by: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> --- include/model/CResourceMonitor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index 8cb9d5f3ac..d1815efa81 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -182,7 +182,7 @@ class MODEL_EXPORT CResourceMonitor { //! Returns the sum of used memory plus any extra memory std::size_t totalMemory() const; - //! Returns the current physical memory of the process as reported by the system + //! Returns the current physical memory of the process (rss) as reported by the system std::size_t systemMemory() const; //! Returns the maximum physical memory of the processs as reported by the system From 582430ea4fbfbd0d5cf486af35fc4f469e0c50d0 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 11 Apr 2025 11:09:09 +1200 Subject: [PATCH 09/11] Update include/model/CResourceMonitor.h Co-authored-by: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> --- include/model/CResourceMonitor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index d1815efa81..ff156d007d 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -185,7 +185,7 @@ class MODEL_EXPORT CResourceMonitor { //! Returns the current physical memory of the process (rss) as reported by the system std::size_t systemMemory() const; - //! Returns the maximum physical memory of the processs as reported by the system + //! Returns the maximum physical memory of the process (max rss) as reported by the system std::size_t maxSystemMemory() const; private: From 9476edee78422a088b9c94d104680203d1c21bf2 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 11 Apr 2025 14:59:43 +1200 Subject: [PATCH 10/11] Attend to review comments --- bin/autodetect/Main.cc | 6 +++--- include/core/CProgramCounters.h | 8 ++++---- include/model/CResourceMonitor.h | 2 +- include/model/ModelTypes.h | 4 +--- lib/api/unittest/CJsonOutputWriterTest.cc | 6 +++--- lib/core/CProcessStats_Linux.cc | 4 ++-- lib/core/CProcessStats_MacOSX.cc | 4 ++-- lib/core/CProcessStats_Windows.cc | 4 ++-- lib/model/ModelTypes.cc | 4 ---- 9 files changed, 18 insertions(+), 24 deletions(-) diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index 11f7674634..6037d4ba43 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -85,8 +85,8 @@ int main(int argc, char** argv) { ml::counter_t::E_TSADNumberPrunedItems, ml::counter_t::E_TSADAssignmentMemoryBasis, ml::counter_t::E_TSADOutputMemoryAllocatorUsage, - ml::counter_t::E_TSADResidentSetSize, - ml::counter_t::E_TSADMaxResidentSetSize}; + ml::counter_t::E_TSADSystemMemoryUsage, + ml::counter_t::E_TSADMaxSystemMemoryUsage}; ml::core::CProgramCounters::registerProgramCounterTypes(counters); @@ -154,7 +154,7 @@ int main(int argc, char** argv) { } cancellerThread.stop(); - LOG_DEBUG(<< "Max Resident Set Size: " << ml::core::CProcessStats::maxResidentSetSize()); + LOG_DEBUG(<< "Resident Set Size: " << ml::core::CProcessStats::residentSetSize()); // Log the program version immediately after reconfiguring the logger. This // must be done from the program, and NOT a shared library, as each program diff --git a/include/core/CProgramCounters.h b/include/core/CProgramCounters.h index 9bad82389a..fd9c949a5d 100644 --- a/include/core/CProgramCounters.h +++ b/include/core/CProgramCounters.h @@ -113,10 +113,10 @@ enum ECounterTypes { E_TSADOutputMemoryAllocatorUsage = 30, //! The resident set size of the process, in bytes. - E_TSADResidentSetSize = 31, + E_TSADSystemMemoryUsage = 31, //! The maximum resident set size of the process, in bytes. - E_TSADMaxResidentSetSize = 32, + E_TSADMaxSystemMemoryUsage = 32, // Data Frame Outlier Detection @@ -361,9 +361,9 @@ class CORE_EXPORT CProgramCounters { "Which option is being used to get model memory for node assignment?"}, {counter_t::E_TSADOutputMemoryAllocatorUsage, "E_TSADOutputMemoryAllocatorUsage", "The amount of memory used to output JSON documents, in bytes."}, - {counter_t::E_TSADResidentSetSize, "E_TSADResidentSetSize", + {counter_t::E_TSADSystemMemoryUsage, "E_TSADResidentSetSize", "The resident set size of the process, in bytes"}, - {counter_t::E_TSADMaxResidentSetSize, "E_TSADMaxResidentSetSize", + {counter_t::E_TSADMaxSystemMemoryUsage, "E_TSADMaxResidentSetSize", "The maximum resident set size of the process, in bytes"}, {counter_t::E_DFOEstimatedPeakMemoryUsage, "E_DFOEstimatedPeakMemoryUsage", "The upfront estimate of the peak memory outlier detection would use"}, diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index 8cb9d5f3ac..cd8ea1ad58 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -182,7 +182,7 @@ class MODEL_EXPORT CResourceMonitor { //! Returns the sum of used memory plus any extra memory std::size_t totalMemory() const; - //! Returns the current physical memory of the process as reported by the system + //! Returns the current physical memory (rss) of the process as reported by the system std::size_t systemMemory() const; //! Returns the maximum physical memory of the processs as reported by the system diff --git a/include/model/ModelTypes.h b/include/model/ModelTypes.h index 66674ed39d..9494935b92 100644 --- a/include/model/ModelTypes.h +++ b/include/model/ModelTypes.h @@ -719,9 +719,7 @@ enum EAssignmentMemoryBasis { E_AssignmentBasisUnknown = 0, //!< Decision made in Java code E_AssignmentBasisModelMemoryLimit = 1, //!< Use model memory limit E_AssignmentBasisCurrentModelBytes = 2, //!< Use current actual model size - E_AssignmentBasisPeakModelBytes = 3, //!< Use highest ever actual model size - E_AssignmentBasisSystemMemoryBytes = 4, //!< Use the current system memory size - E_AssignmentBasisMaxSystemMemoryBytes = 5 //!< Use the highest ever system memory size + E_AssignmentBasisPeakModelBytes = 3 }; //! Get a string description of \p assignmentMemoryBasis. diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index 82c11c31f2..495fd77a52 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1717,7 +1717,7 @@ BOOST_AUTO_TEST_CASE(testPersistNormalizer) { BOOST_TEST_REQUIRE(quantileState.contains("timestamp")); } -BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { +BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { std::ostringstream sstream; { ml::core::CJsonOutputStreamWrapper outputStream(sstream); @@ -1735,7 +1735,7 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { resourceUsage.s_OverFields = 9; resourceUsage.s_AllocationFailures = 10; resourceUsage.s_MemoryStatus = ml::model_t::E_MemoryStatusHardLimit; - resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisSystemMemoryBytes; + resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisPeakModelBytes; resourceUsage.s_BucketStartTime = 11; resourceUsage.s_BytesExceeded = 12; resourceUsage.s_BytesMemoryLimit = 13; @@ -1793,7 +1793,7 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { BOOST_TEST_REQUIRE(sizeStats.contains("memory_status")); BOOST_REQUIRE_EQUAL("hard_limit", sizeStats.at("memory_status").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("assignment_memory_basis")); - BOOST_REQUIRE_EQUAL("system_memory_bytes", + BOOST_REQUIRE_EQUAL("peak_model_bytes", sizeStats.at("assignment_memory_basis").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("log_time")); std::int64_t nowMs{ml::core::CTimeUtils::nowMs()}; diff --git a/lib/core/CProcessStats_Linux.cc b/lib/core/CProcessStats_Linux.cc index 6511a209f1..ecaaf7f272 100644 --- a/lib/core/CProcessStats_Linux.cc +++ b/lib/core/CProcessStats_Linux.cc @@ -76,7 +76,7 @@ std::size_t CProcessStats::residentSetSize() { } } - CProgramCounters::counter(counter_t::E_TSADResidentSetSize) = rss; + CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = rss; return rss; } @@ -92,7 +92,7 @@ std::size_t CProcessStats::maxResidentSetSize() { // ru_maxrss is in kilobytes auto maxRSS = static_cast(rusage.ru_maxrss * 1024L); - CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = maxRSS; + CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = maxRSS; return maxRSS; } diff --git a/lib/core/CProcessStats_MacOSX.cc b/lib/core/CProcessStats_MacOSX.cc index 855b0bdca9..d308463c4e 100644 --- a/lib/core/CProcessStats_MacOSX.cc +++ b/lib/core/CProcessStats_MacOSX.cc @@ -23,7 +23,7 @@ namespace core { std::size_t CProcessStats::residentSetSize() { // not supported on osx - CProgramCounters::counter(counter_t::E_TSADResidentSetSize) = 0; + CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = 0; return 0; } @@ -35,7 +35,7 @@ std::size_t CProcessStats::maxResidentSetSize() { return 0; } auto maxRSS = static_cast(rusage.ru_maxrss); - CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = maxRSS; + CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = maxRSS; // ru_maxrss is in bytes return maxRSS; } diff --git a/lib/core/CProcessStats_Windows.cc b/lib/core/CProcessStats_Windows.cc index e4e2baa163..cc88daad75 100644 --- a/lib/core/CProcessStats_Windows.cc +++ b/lib/core/CProcessStats_Windows.cc @@ -31,7 +31,7 @@ std::size_t CProcessStats::residentSetSize() { auto workingSetSize = static_cast(stats.WorkingSetSize); - CProgramCounters::counter(counter_t::E_TSADResidentSetSize) = workingSetSize; + CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = workingSetSize; return workingSetSize; } @@ -45,7 +45,7 @@ std::size_t CProcessStats::maxResidentSetSize() { auto peakWorkingSetSize = static_cast(stats.PeakWorkingSetSize); - CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = peakWorkingSetSize; + CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = peakWorkingSetSize; return peakWorkingSetSize; } diff --git a/lib/model/ModelTypes.cc b/lib/model/ModelTypes.cc index 1bddf8bae8..2fab1d1c2a 100644 --- a/lib/model/ModelTypes.cc +++ b/lib/model/ModelTypes.cc @@ -1733,10 +1733,6 @@ std::string print(EAssignmentMemoryBasis assignmentMemoryBasis) { return "current_model_bytes"; case E_AssignmentBasisPeakModelBytes: return "peak_model_bytes"; - case E_AssignmentBasisSystemMemoryBytes: - return "system_memory_bytes"; - case E_AssignmentBasisMaxSystemMemoryBytes: - return "max_system_memory_bytes"; } return "-"; } From 475fef1aec8111be815bae561bdc1b82d7bb37fb Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 11 Apr 2025 15:12:47 +1200 Subject: [PATCH 11/11] Formatting --- lib/api/unittest/CJsonOutputWriterTest.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index 495fd77a52..ea16f9a19d 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1717,7 +1717,7 @@ BOOST_AUTO_TEST_CASE(testPersistNormalizer) { BOOST_TEST_REQUIRE(quantileState.contains("timestamp")); } -BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { +BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { std::ostringstream sstream; { ml::core::CJsonOutputStreamWrapper outputStream(sstream); @@ -1773,8 +1773,7 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { BOOST_TEST_REQUIRE(sizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL(4, sizeStats.at("peak_model_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("system_memory_bytes")); - BOOST_REQUIRE_EQUAL( - 5, sizeStats.at("system_memory_bytes").to_number()); + BOOST_REQUIRE_EQUAL(5, sizeStats.at("system_memory_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("max_system_memory_bytes")); BOOST_REQUIRE_EQUAL( 6, sizeStats.at("max_system_memory_bytes").to_number());