Skip to content

Commit def6d14

Browse files
authored
[7.8][ML] Fix memory usage estimation for vectors with a custom allocator (#1162)
Backport #1156.
1 parent 49e8719 commit def6d14

File tree

5 files changed

+42
-14
lines changed

5 files changed

+42
-14
lines changed

include/core/CMemory.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -327,8 +327,8 @@ class CORE_EXPORT CMemory : private CNonInstantiatable {
327327
}
328328

329329
//! Overload for std::vector.
330-
template<typename T>
331-
static std::size_t dynamicSize(const std::vector<T>& t) {
330+
template<typename T, typename A>
331+
static std::size_t dynamicSize(const std::vector<T, A>& t) {
332332
std::size_t mem = 0;
333333
if (!memory_detail::SDynamicSizeAlwaysZero<T>::value()) {
334334
for (auto i = t.begin(); i != t.end(); ++i) {
@@ -781,9 +781,9 @@ class CORE_EXPORT CMemoryDebug : private CNonInstantiatable {
781781
}
782782

783783
//! Overload for std::vector.
784-
template<typename T>
784+
template<typename T, typename A>
785785
static void dynamicSize(const char* name,
786-
const std::vector<T>& t,
786+
const std::vector<T, A>& t,
787787
const CMemoryUsage::TMemoryUsagePtr& mem) {
788788
std::string componentName(name);
789789

lib/api/CDataFrameTrainBoostedTreeClassifierRunner.cc

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -162,9 +162,9 @@ void CDataFrameTrainBoostedTreeClassifierRunner::writeOneRow(
162162
}
163163

164164
if (featureImportance != nullptr) {
165+
int numberClasses{static_cast<int>(classValues.size())};
165166
featureImportance->shap(
166-
row, [&writer, &classValues](
167-
const maths::CTreeShapFeatureImportance::TSizeVec& indices,
167+
row, [&](const maths::CTreeShapFeatureImportance::TSizeVec& indices,
168168
const TStrVec& featureNames,
169169
const maths::CTreeShapFeatureImportance::TVectorVec& shap) {
170170
writer.Key(FEATURE_IMPORTANCE_FIELD_NAME);
@@ -178,8 +178,7 @@ void CDataFrameTrainBoostedTreeClassifierRunner::writeOneRow(
178178
writer.Key(IMPORTANCE_FIELD_NAME);
179179
writer.Double(shap[i](0));
180180
} else {
181-
for (int j = 0;
182-
j < shap[i].size() && j < classValues.size(); ++j) {
181+
for (int j = 0; j < shap[i].size() && j < numberClasses; ++j) {
183182
writer.Key(classValues[j]);
184183
writer.Double(shap[i](j));
185184
}

lib/api/unittest/CDataFrameAnalyzerTrainingTest.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ BOOST_AUTO_TEST_CASE(testRunBoostedTreeRegressionTrainingMse) {
331331

332332
BOOST_TEST_REQUIRE(core::CProgramCounters::counter(
333333
counter_t::E_DFTPMEstimatedPeakMemoryUsage) < 4500000);
334-
BOOST_TEST_REQUIRE(core::CProgramCounters::counter(counter_t::E_DFTPMPeakMemoryUsage) < 1600000);
334+
BOOST_TEST_REQUIRE(core::CProgramCounters::counter(counter_t::E_DFTPMPeakMemoryUsage) < 1800000);
335335
BOOST_TEST_REQUIRE(
336336
core::CProgramCounters::counter(counter_t::E_DFTPMPeakMemoryUsage) <
337337
core::CProgramCounters::counter(counter_t::E_DFTPMEstimatedPeakMemoryUsage));
@@ -721,7 +721,7 @@ BOOST_AUTO_TEST_CASE(testRunBoostedTreeClassifierTraining) {
721721

722722
BOOST_TEST_REQUIRE(core::CProgramCounters::counter(
723723
counter_t::E_DFTPMEstimatedPeakMemoryUsage) < 4500000);
724-
BOOST_TEST_REQUIRE(core::CProgramCounters::counter(counter_t::E_DFTPMPeakMemoryUsage) < 1600000);
724+
BOOST_TEST_REQUIRE(core::CProgramCounters::counter(counter_t::E_DFTPMPeakMemoryUsage) < 1800000);
725725
BOOST_TEST_REQUIRE(
726726
core::CProgramCounters::counter(counter_t::E_DFTPMPeakMemoryUsage) <
727727
core::CProgramCounters::counter(counter_t::E_DFTPMEstimatedPeakMemoryUsage));

lib/core/unittest/CDataFrameTest.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -427,9 +427,10 @@ BOOST_FIXTURE_TEST_CASE(testMemoryUsage, CTestFixture) {
427427

428428
// Memory usage should be less than:
429429
// 1) 1075 + 4 times the root directory length bytes for on disk, and
430-
// 2) data size + doc ids size + 900 byte overhead in main memory.
430+
// 2) data size + doc ids size + 900 byte overhead in main memory. Note that
431+
// we round up the number of columns to the next multiple of the alignment.
431432
std::size_t maximumMemory[]{1075 + 4 * rootDirectory.length(),
432-
rows * (cols + 1) * 4 + 900};
433+
rows * (4 * ((cols + 3) / 4) + 1) * 4 + 900};
433434

434435
std::string type[]{"on disk", "main memory"};
435436
std::size_t t{0};

lib/core/unittest/CMemoryUsageTest.cc

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
* you may not use this file except in compliance with the Elastic License.
55
*/
66

7+
#include <core/CAlignment.h>
78
#include <core/CContainerPrinter.h>
89
#include <core/CHashing.h>
910
#include <core/CLogger.h>
@@ -167,8 +168,10 @@ class CBase {
167168

168169
virtual std::size_t staticSize() const { return sizeof(*this); }
169170

171+
const std::uint64_t* fixed() const { return m_Fixed; } // suppress warning
172+
170173
private:
171-
uint64_t m_Fixed[5];
174+
std::uint64_t m_Fixed[5];
172175
TIntVec m_Vec;
173176
};
174177

@@ -193,8 +196,10 @@ class CDerived : public CBase {
193196

194197
virtual std::size_t staticSize() const { return sizeof(*this); }
195198

199+
const std::uint64_t* fixed() const { return m_Fixed; } // suppress warning
200+
196201
private:
197-
uint64_t m_Fixed[50];
202+
std::uint64_t m_Fixed[50];
198203
TStrVec m_Strings;
199204
};
200205

@@ -1204,4 +1209,27 @@ BOOST_AUTO_TEST_CASE(testSmallVector) {
12041209
BOOST_TEST_REQUIRE(extraMem > 0);
12051210
}
12061211

1212+
BOOST_AUTO_TEST_CASE(testAlignedVector) {
1213+
using TDoubleVec = std::vector<double>;
1214+
using TAlignedDoubleVec = std::vector<double, core::CAlignedAllocator<double>>;
1215+
1216+
TDoubleVec vector{10.0, 11.0, 12.0, 13.0, 14.0,
1217+
15.0, 16.0, 17.0, 18.0, 19.0};
1218+
TAlignedDoubleVec alignedVector{10.0, 11.0, 12.0, 13.0, 14.0,
1219+
15.0, 16.0, 17.0, 18.0, 19.0};
1220+
1221+
LOG_DEBUG(<< "TDoubleVec usage = " << core::CMemory::dynamicSize(vector));
1222+
LOG_DEBUG(<< "TAlignedDoubleVec usage = " << core::CMemory::dynamicSize(alignedVector));
1223+
BOOST_REQUIRE_EQUAL(core::CMemory::dynamicSize(vector),
1224+
core::CMemory::dynamicSize(alignedVector));
1225+
1226+
core::CMemoryUsage memoryUsage;
1227+
memoryUsage.setName("test", 0);
1228+
core::CMemoryDebug::dynamicSize("TAlignedDoubleVec", vector, memoryUsage.addChild());
1229+
std::ostringstream ss;
1230+
memoryUsage.print(ss);
1231+
LOG_DEBUG(<< "TAlignedDoubleVec usage debug = " << ss.str());
1232+
BOOST_REQUIRE_EQUAL(core::CMemory::dynamicSize(vector), memoryUsage.usage());
1233+
}
1234+
12071235
BOOST_AUTO_TEST_SUITE_END()

0 commit comments

Comments
 (0)