Skip to content

Commit d12416f

Browse files
committed
[ML] Improvements to upfront memory estimation for data frame analyses (elastic#1003)
1 parent a45db7d commit d12416f

12 files changed

+157
-120
lines changed

docs/CHANGELOG.asciidoc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ and classification. (See {ml-pull}948[#948].)
4343
(See {ml-pull}991[#991].)
4444
* Add new model_size_stats fields to instrument categorization. (See {ml-pull}948[#948]
4545
and {pull}51879[#51879], issue: {issue}50794[#50749].)
46+
* Improve upfront memory estimation for all data frame analyses, which were higher than
47+
necessary. This will improve the allocation of data frame analyses to cluster nodes.
48+
(See {ml-pull}1003[#1003].)
4649

4750
=== Bug Fixes
4851

include/api/CDataFrameAnalysisInstrumentation.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ class API_EXPORT CDataFrameAnalysisInstrumentation
7474
private:
7575
void writeProgress(std::uint32_t step);
7676
void writeMemory(std::uint32_t step);
77-
void writeState(uint32_t step);
77+
void writeState(std::uint32_t step);
7878

7979
private:
8080
std::atomic_bool m_Finished;

include/maths/CDataFrameAnalysisInstrumentationInterface.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ class MATHS_EXPORT CDataFrameAnalysisInstrumentationInterface {
2020
public:
2121
using TProgressCallback = std::function<void(double)>;
2222
using TMemoryUsageCallback = std::function<void(std::int64_t)>;
23+
using TStepCallback = std::function<void(std::uint32_t)>;
2324

2425
public:
2526
virtual ~CDataFrameAnalysisInstrumentationInterface() = default;
@@ -47,6 +48,10 @@ class MATHS_EXPORT CDataFrameAnalysisInstrumentationInterface {
4748
TMemoryUsageCallback memoryUsageCallback() {
4849
return [this](std::int64_t delta) { this->updateMemoryUsage(delta); };
4950
}
51+
//! Factory for the nextStep() callback function object.
52+
TStepCallback stepCallback() {
53+
return [this](std::uint32_t step) { this->nextStep(step); };
54+
}
5055
};
5156

5257
//! \brief Dummies out all instrumentation.

lib/api/CDataFrameAnalysisRunner.cc

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ TBoolVec CDataFrameAnalysisRunner::columnsForWhichEmptyIsMissing(const TStrVec&
4848

4949
void CDataFrameAnalysisRunner::estimateMemoryUsage(CMemoryUsageEstimationResultJsonWriter& writer) const {
5050
std::size_t numberRows{m_Spec.numberRows()};
51-
std::size_t numberColumns{m_Spec.numberColumns() + this->numberExtraColumns()};
51+
std::size_t numberColumns{m_Spec.numberColumns()};
5252
std::size_t maxNumberPartitions{maximumNumberPartitions(m_Spec)};
5353
if (maxNumberPartitions == 0) {
5454
writer.write("0", "0");
@@ -68,7 +68,7 @@ void CDataFrameAnalysisRunner::estimateMemoryUsage(CMemoryUsageEstimationResultJ
6868
void CDataFrameAnalysisRunner::computeAndSaveExecutionStrategy() {
6969

7070
std::size_t numberRows{m_Spec.numberRows()};
71-
std::size_t numberColumns{m_Spec.numberColumns() + this->numberExtraColumns()};
71+
std::size_t numberColumns{m_Spec.numberColumns()};
7272
std::size_t memoryLimit{m_Spec.memoryLimit()};
7373

7474
LOG_TRACE(<< "memory limit = " << memoryLimit);
@@ -163,8 +163,9 @@ const CDataFrameAnalysisSpecification& CDataFrameAnalysisRunner::spec() const {
163163
std::size_t CDataFrameAnalysisRunner::estimateMemoryUsage(std::size_t totalNumberRows,
164164
std::size_t partitionNumberRows,
165165
std::size_t numberColumns) const {
166-
return core::CDataFrame::estimateMemoryUsage(this->storeDataFrameInMainMemory(),
167-
totalNumberRows, numberColumns) +
166+
return core::CDataFrame::estimateMemoryUsage(
167+
this->storeDataFrameInMainMemory(), totalNumberRows,
168+
numberColumns + this->numberExtraColumns()) +
168169
this->estimateBookkeepingMemoryUsage(m_NumberPartitions, totalNumberRows,
169170
partitionNumberRows, numberColumns);
170171
}

lib/api/unittest/CDataFrameAnalysisRunnerTest.cc

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ BOOST_AUTO_TEST_CASE(testComputeAndSaveExecutionStrategyDiskUsageFlag) {
115115
}
116116
}
117117

118+
namespace {
118119
void testEstimateMemoryUsage(std::int64_t numberRows,
119120
const std::string& expectedExpectedMemoryWithoutDisk,
120121
const std::string& expectedExpectedMemoryWithDisk,
@@ -152,32 +153,33 @@ void testEstimateMemoryUsage(std::int64_t numberRows,
152153

153154
BOOST_TEST_REQUIRE(result.HasMember("expected_memory_without_disk"));
154155
BOOST_REQUIRE_EQUAL(expectedExpectedMemoryWithoutDisk,
155-
std::string(result["expected_memory_without_disk"].GetString()));
156+
result["expected_memory_without_disk"].GetString());
156157
BOOST_TEST_REQUIRE(result.HasMember("expected_memory_with_disk"));
157158
BOOST_REQUIRE_EQUAL(expectedExpectedMemoryWithDisk,
158-
std::string(result["expected_memory_with_disk"].GetString()));
159+
result["expected_memory_with_disk"].GetString());
159160

160161
BOOST_REQUIRE_EQUAL(expectedNumberErrors, static_cast<int>(errors.size()));
161162
}
163+
}
162164

163165
BOOST_AUTO_TEST_CASE(testEstimateMemoryUsageFor0Rows) {
164166
testEstimateMemoryUsage(0, "0", "0", 1);
165167
}
166168

167169
BOOST_AUTO_TEST_CASE(testEstimateMemoryUsageFor1Row) {
168-
testEstimateMemoryUsage(1, "6kB", "6kB", 0);
170+
testEstimateMemoryUsage(1, "4kB", "4kB", 0);
169171
}
170172

171173
BOOST_AUTO_TEST_CASE(testEstimateMemoryUsageFor10Rows) {
172-
testEstimateMemoryUsage(10, "15kB", "13kB", 0);
174+
testEstimateMemoryUsage(10, "12kB", "10kB", 0);
173175
}
174176

175177
BOOST_AUTO_TEST_CASE(testEstimateMemoryUsageFor100Rows) {
176-
testEstimateMemoryUsage(100, "62kB", "35kB", 0);
178+
testEstimateMemoryUsage(100, "57kB", "35kB", 0);
177179
}
178180

179181
BOOST_AUTO_TEST_CASE(testEstimateMemoryUsageFor1000Rows) {
180-
testEstimateMemoryUsage(1000, "450kB", "143kB", 0);
182+
testEstimateMemoryUsage(1000, "403kB", "142kB", 0);
181183
}
182184

183185
void testColumnsForWhichEmptyIsMissing(const std::string& analysis,

lib/api/unittest/CDataFrameAnalyzerFeatureImportanceTest.cc

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,17 @@ struct SFixture {
156156

157157
analyzer.handleRecord(fieldNames, {"", "", "", "", "", "", "$"});
158158

159+
LOG_DEBUG(<< "estimated memory usage = "
160+
<< core::CProgramCounters::counter(counter_t::E_DFTPMEstimatedPeakMemoryUsage));
161+
LOG_DEBUG(<< "peak memory = "
162+
<< core::CProgramCounters::counter(counter_t::E_DFTPMPeakMemoryUsage));
163+
LOG_DEBUG(<< "time to train = " << core::CProgramCounters::counter(counter_t::E_DFTPMTimeToTrain)
164+
<< "ms");
165+
166+
BOOST_TEST_REQUIRE(
167+
core::CProgramCounters::counter(counter_t::E_DFTPMPeakMemoryUsage) <
168+
core::CProgramCounters::counter(counter_t::E_DFTPMEstimatedPeakMemoryUsage));
169+
159170
rapidjson::Document results;
160171
rapidjson::ParseResult ok(results.Parse(s_Output.str()));
161172
BOOST_TEST_REQUIRE(static_cast<bool>(ok) == true);
@@ -184,6 +195,17 @@ struct SFixture {
184195

185196
analyzer.handleRecord(fieldNames, {"", "", "", "", "", "", "$"});
186197

198+
LOG_DEBUG(<< "estimated memory usage = "
199+
<< core::CProgramCounters::counter(counter_t::E_DFTPMEstimatedPeakMemoryUsage));
200+
LOG_DEBUG(<< "peak memory = "
201+
<< core::CProgramCounters::counter(counter_t::E_DFTPMPeakMemoryUsage));
202+
LOG_DEBUG(<< "time to train = " << core::CProgramCounters::counter(counter_t::E_DFTPMTimeToTrain)
203+
<< "ms");
204+
205+
BOOST_TEST_REQUIRE(
206+
core::CProgramCounters::counter(counter_t::E_DFTPMPeakMemoryUsage) <
207+
core::CProgramCounters::counter(counter_t::E_DFTPMEstimatedPeakMemoryUsage));
208+
187209
rapidjson::Document results;
188210
rapidjson::ParseResult ok(results.Parse(s_Output.str()));
189211
BOOST_TEST_REQUIRE(static_cast<bool>(ok) == true);
@@ -208,6 +230,17 @@ struct SFixture {
208230

209231
analyzer.handleRecord(fieldNames, {"", "", "", "", "", "", "$"});
210232

233+
LOG_DEBUG(<< "estimated memory usage = "
234+
<< core::CProgramCounters::counter(counter_t::E_DFTPMEstimatedPeakMemoryUsage));
235+
LOG_DEBUG(<< "peak memory = "
236+
<< core::CProgramCounters::counter(counter_t::E_DFTPMPeakMemoryUsage));
237+
LOG_DEBUG(<< "time to train = " << core::CProgramCounters::counter(counter_t::E_DFTPMTimeToTrain)
238+
<< "ms");
239+
240+
BOOST_TEST_REQUIRE(
241+
core::CProgramCounters::counter(counter_t::E_DFTPMPeakMemoryUsage) <
242+
core::CProgramCounters::counter(counter_t::E_DFTPMEstimatedPeakMemoryUsage));
243+
211244
rapidjson::Document results;
212245
rapidjson::ParseResult ok(results.Parse(s_Output.str()));
213246
BOOST_TEST_REQUIRE(static_cast<bool>(ok) == true);

lib/api/unittest/CDataFrameAnalyzerOutlierTest.cc

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
* you may not use this file except in compliance with the Elastic License.
55
*/
66

7-
#include "CDataFrameMockAnalysisRunner.h"
8-
97
#include <core/CContainerPrinter.h>
108
#include <core/CJsonOutputStreamWrapper.h>
119
#include <core/CProgramCounters.h>
@@ -17,11 +15,10 @@
1715
#include <api/CDataFrameAnalysisSpecification.h>
1816
#include <api/CDataFrameAnalyzer.h>
1917

18+
#include <test/BoostTestCloseAbsolute.h>
2019
#include <test/CDataFrameAnalysisSpecificationFactory.h>
2120
#include <test/CRandomNumbers.h>
2221

23-
#include <test/BoostTestCloseAbsolute.h>
24-
2522
#include <boost/test/unit_test.hpp>
2623

2724
#include <memory>
@@ -87,9 +84,10 @@ void addOutlierTestData(TStrVec fieldNames,
8784
}
8885

8986
frame->finishWritingRows();
90-
CDataFrameMockAnalysisState state;
87+
maths::CDataFrameAnalysisInstrumentationStub instrumentation;
9188
maths::COutliers::compute(
92-
{1, 1, true, method, numberNeighbours, computeFeatureInfluence, 0.05}, *frame, state);
89+
{1, 1, true, method, numberNeighbours, computeFeatureInfluence, 0.05},
90+
*frame, instrumentation);
9391

9492
expectedScores.resize(numberInliers + numberOutliers);
9593
expectedFeatureInfluences.resize(numberInliers + numberOutliers, TDoubleVec(5));
@@ -202,10 +200,17 @@ BOOST_AUTO_TEST_CASE(testRunOutlierDetection) {
202200

203201
LOG_DEBUG(<< "number partitions = "
204202
<< core::CProgramCounters::counter(counter_t::E_DFONumberPartitions));
203+
LOG_DEBUG(<< "estimated memory usage = "
204+
<< core::CProgramCounters::counter(counter_t::E_DFOEstimatedPeakMemoryUsage));
205205
LOG_DEBUG(<< "peak memory = "
206206
<< core::CProgramCounters::counter(counter_t::E_DFOPeakMemoryUsage));
207+
207208
BOOST_TEST_REQUIRE(core::CProgramCounters::counter(counter_t::E_DFONumberPartitions) == 1);
208209
BOOST_TEST_REQUIRE(core::CProgramCounters::counter(counter_t::E_DFOPeakMemoryUsage) < 100000);
210+
// Allow a 20% margin
211+
BOOST_TEST_REQUIRE(
212+
core::CProgramCounters::counter(counter_t::E_DFOPeakMemoryUsage) <
213+
(120 * core::CProgramCounters::counter(counter_t::E_DFOEstimatedPeakMemoryUsage)) / 100);
209214
}
210215

211216
BOOST_AUTO_TEST_CASE(testRunOutlierDetectionPartitioned) {
@@ -249,11 +254,17 @@ BOOST_AUTO_TEST_CASE(testRunOutlierDetectionPartitioned) {
249254

250255
LOG_DEBUG(<< "number partitions = "
251256
<< core::CProgramCounters::counter(counter_t::E_DFONumberPartitions));
257+
LOG_DEBUG(<< "estimated memory usage = "
258+
<< core::CProgramCounters::counter(counter_t::E_DFOEstimatedPeakMemoryUsage));
252259
LOG_DEBUG(<< "peak memory = "
253260
<< core::CProgramCounters::counter(counter_t::E_DFOPeakMemoryUsage));
261+
254262
BOOST_TEST_REQUIRE(core::CProgramCounters::counter(counter_t::E_DFONumberPartitions) > 1);
255-
BOOST_TEST_REQUIRE(core::CProgramCounters::counter(counter_t::E_DFOPeakMemoryUsage) <
256-
300000); // + 16%
263+
// Allow a 20% margin
264+
BOOST_TEST_REQUIRE(core::CProgramCounters::counter(counter_t::E_DFOPeakMemoryUsage) < 120000);
265+
BOOST_TEST_REQUIRE(
266+
core::CProgramCounters::counter(counter_t::E_DFOPeakMemoryUsage) <
267+
(120 * core::CProgramCounters::counter(counter_t::E_DFOEstimatedPeakMemoryUsage)) / 100);
257268
}
258269

259270
BOOST_AUTO_TEST_CASE(testRunOutlierFeatureInfluences) {

lib/api/unittest/CDataFrameAnalyzerTrainingTest.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,9 @@ BOOST_AUTO_TEST_CASE(testRunBoostedTreeRegressionTraining) {
427427
BOOST_TEST_REQUIRE(core::CProgramCounters::counter(
428428
counter_t::E_DFTPMEstimatedPeakMemoryUsage) < 6000000);
429429
BOOST_TEST_REQUIRE(core::CProgramCounters::counter(counter_t::E_DFTPMPeakMemoryUsage) < 1500000);
430+
BOOST_TEST_REQUIRE(
431+
core::CProgramCounters::counter(counter_t::E_DFTPMPeakMemoryUsage) <
432+
core::CProgramCounters::counter(counter_t::E_DFTPMEstimatedPeakMemoryUsage));
430433
BOOST_TEST_REQUIRE(core::CProgramCounters::counter(counter_t::E_DFTPMTimeToTrain) > 0);
431434
BOOST_TEST_REQUIRE(core::CProgramCounters::counter(counter_t::E_DFTPMTimeToTrain) <= duration);
432435
}
@@ -722,9 +725,13 @@ BOOST_AUTO_TEST_CASE(testRunBoostedTreeClassifierTraining) {
722725
<< core::CProgramCounters::counter(counter_t::E_DFTPMPeakMemoryUsage));
723726
LOG_DEBUG(<< "time to train = " << core::CProgramCounters::counter(counter_t::E_DFTPMTimeToTrain)
724727
<< "ms");
728+
725729
BOOST_TEST_REQUIRE(core::CProgramCounters::counter(
726730
counter_t::E_DFTPMEstimatedPeakMemoryUsage) < 6000000);
727731
BOOST_TEST_REQUIRE(core::CProgramCounters::counter(counter_t::E_DFTPMPeakMemoryUsage) < 1500000);
732+
BOOST_TEST_REQUIRE(
733+
core::CProgramCounters::counter(counter_t::E_DFTPMPeakMemoryUsage) <
734+
core::CProgramCounters::counter(counter_t::E_DFTPMEstimatedPeakMemoryUsage));
728735
BOOST_TEST_REQUIRE(core::CProgramCounters::counter(counter_t::E_DFTPMTimeToTrain) > 0);
729736
BOOST_TEST_REQUIRE(core::CProgramCounters::counter(counter_t::E_DFTPMTimeToTrain) <= duration);
730737
}

lib/api/unittest/CDataFrameMockAnalysisRunner.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,11 @@ std::size_t CDataFrameMockAnalysisRunner::estimateBookkeepingMemoryUsage(std::si
4040

4141
const ml::api::CDataFrameAnalysisInstrumentation&
4242
CDataFrameMockAnalysisRunner::instrumentation() const {
43-
return m_State;
43+
return m_Instrumentation;
4444
}
4545

4646
ml::api::CDataFrameAnalysisInstrumentation& CDataFrameMockAnalysisRunner::instrumentation() {
47-
return m_State;
47+
return m_Instrumentation;
4848
}
4949

5050
ml::test::CRandomNumbers CDataFrameMockAnalysisRunner::ms_Rng;

lib/api/unittest/CDataFrameMockAnalysisRunner.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ class CDataFrameMockAnalysisRunner final : public ml::api::CDataFrameAnalysisRun
3030
ml::core::CRapidJsonConcurrentLineWriter&) const override;
3131

3232
const ml::api::CDataFrameAnalysisInstrumentation& instrumentation() const override;
33-
3433
ml::api::CDataFrameAnalysisInstrumentation& instrumentation() override;
3534

3635
private:
@@ -42,7 +41,7 @@ class CDataFrameMockAnalysisRunner final : public ml::api::CDataFrameAnalysisRun
4241

4342
private:
4443
static ml::test::CRandomNumbers ms_Rng;
45-
CDataFrameMockAnalysisState m_State;
44+
CDataFrameMockAnalysisState m_Instrumentation;
4645
};
4746

4847
class CDataFrameMockAnalysisRunnerFactory final : public ml::api::CDataFrameAnalysisRunnerFactory {

lib/maths/CBoostedTreeImpl.cc

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -330,8 +330,6 @@ std::size_t CBoostedTreeImpl::estimateMemoryUsage(std::size_t numberRows,
330330
std::size_t forestMemoryUsage{
331331
m_MaximumNumberTrees *
332332
(sizeof(TNodeVec) + maximumNumberNodes * sizeof(CBoostedTreeNode))};
333-
std::size_t extraColumnsMemoryUsage{numberExtraColumnsForTrain(m_Loss->numberParameters()) *
334-
numberRows * sizeof(CFloatStorage)};
335333
std::size_t foldRoundLossMemoryUsage{m_NumberFolds * m_NumberRounds *
336334
sizeof(TOptionalDouble)};
337335
std::size_t hyperparametersMemoryUsage{numberColumns * sizeof(double)};
@@ -349,8 +347,8 @@ std::size_t CBoostedTreeImpl::estimateMemoryUsage(std::size_t numberRows,
349347
this->numberHyperparametersToTune(), m_NumberRounds)};
350348
std::size_t shapMemoryUsage{
351349
m_TopShapValues > 0 ? numberRows * numberColumns * sizeof(CFloatStorage) : 0};
352-
return sizeof(*this) + forestMemoryUsage + extraColumnsMemoryUsage +
353-
foldRoundLossMemoryUsage + hyperparametersMemoryUsage +
350+
return sizeof(*this) + forestMemoryUsage + foldRoundLossMemoryUsage +
351+
hyperparametersMemoryUsage +
354352
std::max(leafNodeStatisticsMemoryUsage, shapMemoryUsage) + // not concurrent
355353
dataTypeMemoryUsage + featureSampleProbabilities + missingFeatureMaskMemoryUsage +
356354
trainTestMaskMemoryUsage + bayesianOptimisationMemoryUsage;

0 commit comments

Comments
 (0)