diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 102d5e6ae8..1c84c191de 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -49,6 +49,8 @@ * Improve robustness of anomaly detection to bad input data. (See {ml-pull}1114[#1114].) * Adds new `num_matches` and `preferred_to_categories` fields to category output. (See {ml-pull}1062[#1062].) +* Reduce peak memory usage and memory estimates for classification and regression. + (See {ml-pull}1125[#1125].) * Reduce variability of classification and regression results across our target operating systems. (See {ml-pull}1127[#1127].) * Switched data frame analytics model memory estimates from kilobytes to megabytes. diff --git a/lib/api/unittest/CDataFrameAnalyzerTrainingTest.cc b/lib/api/unittest/CDataFrameAnalyzerTrainingTest.cc index 5c3c15fb11..4a69411b91 100644 --- a/lib/api/unittest/CDataFrameAnalyzerTrainingTest.cc +++ b/lib/api/unittest/CDataFrameAnalyzerTrainingTest.cc @@ -321,8 +321,8 @@ BOOST_AUTO_TEST_CASE(testRunBoostedTreeRegressionTraining) { << "ms"); BOOST_TEST_REQUIRE(core::CProgramCounters::counter( - counter_t::E_DFTPMEstimatedPeakMemoryUsage) < 6000000); - BOOST_TEST_REQUIRE(core::CProgramCounters::counter(counter_t::E_DFTPMPeakMemoryUsage) < 1500000); + counter_t::E_DFTPMEstimatedPeakMemoryUsage) < 4500000); + BOOST_TEST_REQUIRE(core::CProgramCounters::counter(counter_t::E_DFTPMPeakMemoryUsage) < 1600000); BOOST_TEST_REQUIRE( core::CProgramCounters::counter(counter_t::E_DFTPMPeakMemoryUsage) < core::CProgramCounters::counter(counter_t::E_DFTPMEstimatedPeakMemoryUsage)); @@ -643,8 +643,8 @@ BOOST_AUTO_TEST_CASE(testRunBoostedTreeClassifierTraining) { << "ms"); BOOST_TEST_REQUIRE(core::CProgramCounters::counter( - counter_t::E_DFTPMEstimatedPeakMemoryUsage) < 6000000); - BOOST_TEST_REQUIRE(core::CProgramCounters::counter(counter_t::E_DFTPMPeakMemoryUsage) < 1500000); + counter_t::E_DFTPMEstimatedPeakMemoryUsage) < 4500000); + BOOST_TEST_REQUIRE(core::CProgramCounters::counter(counter_t::E_DFTPMPeakMemoryUsage) < 1600000); BOOST_TEST_REQUIRE( core::CProgramCounters::counter(counter_t::E_DFTPMPeakMemoryUsage) < core::CProgramCounters::counter(counter_t::E_DFTPMEstimatedPeakMemoryUsage)); diff --git a/lib/api/unittest/CDataFrameMockAnalysisRunner.h b/lib/api/unittest/CDataFrameMockAnalysisRunner.h index b102d96569..849179aa37 100644 --- a/lib/api/unittest/CDataFrameMockAnalysisRunner.h +++ b/lib/api/unittest/CDataFrameMockAnalysisRunner.h @@ -13,13 +13,14 @@ #include -#include +#include +#include class CDataFrameMockAnalysisState final : public ml::api::CDataFrameAnalysisInstrumentation { public: CDataFrameMockAnalysisState(const std::string& jobId) : ml::api::CDataFrameAnalysisInstrumentation(jobId) {} - void writeAnalysisStats(std::int64_t /* timestamp */) override{}; + void writeAnalysisStats(std::int64_t /* timestamp */) override {} protected: ml::counter_t::ECounterTypes memoryCounterType() override; diff --git a/lib/maths/CBoostedTreeImpl.cc b/lib/maths/CBoostedTreeImpl.cc index e049a11a20..7724e3e575 100644 --- a/lib/maths/CBoostedTreeImpl.cc +++ b/lib/maths/CBoostedTreeImpl.cc @@ -4,7 +4,6 @@ * you may not use this file except in compliance with the Elastic License. */ -#include "maths/CBoostedTreeUtils.h" #include #include @@ -20,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -27,6 +27,10 @@ #include #include +#include + +#include + namespace ml { namespace maths { using namespace boosted_tree; @@ -313,10 +317,14 @@ std::size_t CBoostedTreeImpl::estimateMemoryUsage(std::size_t numberRows, std::size_t foldRoundLossMemoryUsage{m_NumberFolds * m_NumberRounds * sizeof(TOptionalDouble)}; std::size_t hyperparametersMemoryUsage{numberColumns * sizeof(double)}; + // We only maintain statistics for leaves we know we may possibly split this + // halves the peak number of statistics we maintain. std::size_t leafNodeStatisticsMemoryUsage{ - maximumNumberLeaves * CBoostedTreeLeafNodeStatistics::estimateMemoryUsage( - numberRows, maximumNumberFeatures, m_NumberSplitsPerFeature, - m_Loss->numberParameters())}; + maximumNumberLeaves * + CBoostedTreeLeafNodeStatistics::estimateMemoryUsage( + numberRows, maximumNumberFeatures, m_NumberSplitsPerFeature, + m_Loss->numberParameters()) / + 2}; std::size_t dataTypeMemoryUsage{maximumNumberFeatures * sizeof(CDataFrameUtils::SDataType)}; std::size_t featureSampleProbabilities{maximumNumberFeatures * sizeof(double)}; std::size_t missingFeatureMaskMemoryUsage{ @@ -724,14 +732,13 @@ CBoostedTreeImpl::trainTree(core::CDataFrame& frame, LOG_TRACE(<< "Training one tree..."); using TLeafNodeStatisticsPtr = CBoostedTreeLeafNodeStatistics::TPtr; - using TLeafNodeStatisticsPtrQueue = - std::priority_queue, COrderings::SLess>; + using TLeafNodeStatisticsPtrQueue = boost::circular_buffer; TNodeVec tree(1); tree.reserve(2 * maximumTreeSize + 1); - TLeafNodeStatisticsPtrQueue leaves; - leaves.push(std::make_shared( + TLeafNodeStatisticsPtrQueue leaves(maximumTreeSize / 2 + 3); + leaves.push_back(std::make_shared( 0 /*root*/, m_NumberInputColumns, m_Loss->numberParameters(), m_NumberThreads, frame, *m_Encoder, m_Regularization, candidateSplits, this->featureBag(), 0 /*depth*/, trainingRowMask)); @@ -755,10 +762,16 @@ CBoostedTreeImpl::trainTree(core::CDataFrame& frame, double totalGain{0.0}; + COrderings::SLess less; + for (std::size_t i = 0; i < maximumTreeSize; ++i) { - auto leaf = leaves.top(); - leaves.pop(); + if (leaves.empty()) { + break; + } + + auto leaf = leaves.back(); + leaves.pop_back(); scopeMemoryUsage.remove(leaf); @@ -767,7 +780,8 @@ CBoostedTreeImpl::trainTree(core::CDataFrame& frame, } totalGain += leaf->gain(); - LOG_TRACE(<< "splitting " << leaf->id() << " total gain = " << totalGain); + LOG_TRACE(<< "splitting " << leaf->id() << " leaf gain = " << leaf->gain() + << " total gain = " << totalGain); std::size_t splitFeature; double splitValue; @@ -786,11 +800,26 @@ CBoostedTreeImpl::trainTree(core::CDataFrame& frame, leftChildId, rightChildId, m_NumberThreads, frame, *m_Encoder, m_Regularization, candidateSplits, this->featureBag(), tree[leaf->id()]); - scopeMemoryUsage.add(leftChild); - scopeMemoryUsage.add(rightChild); + if (less(rightChild, leftChild)) { + std::swap(leftChild, rightChild); + } - leaves.push(std::move(leftChild)); - leaves.push(std::move(rightChild)); + std::size_t n{leaves.size()}; + if (leftChild->gain() >= MINIMUM_RELATIVE_GAIN_PER_SPLIT * totalGain) { + scopeMemoryUsage.add(leftChild); + leaves.push_back(std::move(leftChild)); + } + if (rightChild->gain() >= MINIMUM_RELATIVE_GAIN_PER_SPLIT * totalGain) { + scopeMemoryUsage.add(rightChild); + leaves.push_back(std::move(rightChild)); + } + std::inplace_merge(leaves.begin(), leaves.begin() + n, leaves.end(), less); + + // Drop any leaves which can't possibly be split. + while (leaves.size() + i + 1 > maximumTreeSize) { + scopeMemoryUsage.remove(leaves.front()); + leaves.pop_front(); + } } tree.shrink_to_fit();