Skip to content

Commit 0e44e14

Browse files
committed
[ML] Boosted tree tidy ups (elastic#1155)
1 parent 36515fd commit 0e44e14

File tree

4 files changed

+16
-26
lines changed

4 files changed

+16
-26
lines changed

include/maths/CBoostedTreeLeafNodeStatistics.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -506,7 +506,6 @@ class MATHS_EXPORT CBoostedTreeLeafNodeStatistics final {
506506
};
507507

508508
private:
509-
void maybeRecoverMemory();
510509
void computeAggregateLossDerivatives(std::size_t numberThreads,
511510
const core::CDataFrame& frame,
512511
const CDataFrameCategoryEncoder& encoder);

lib/maths/CBoostedTreeImpl.cc

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1005,12 +1005,13 @@ void CBoostedTreeImpl::refreshPredictionsAndLossDerivatives(core::CDataFrame& fr
10051005
core::bindRetrievableState(
10061006
[&](TArgMinLossVec& leafValues_, TRowItr beginRows, TRowItr endRows) {
10071007
std::size_t numberLossParameters{m_Loss->numberParameters()};
1008-
for (auto row = beginRows; row != endRows; ++row) {
1009-
auto prediction = readPrediction(*row, m_ExtraColumns,
1008+
for (auto row_ = beginRows; row_ != endRows; ++row_) {
1009+
auto row = *row_;
1010+
auto prediction = readPrediction(row, m_ExtraColumns,
10101011
numberLossParameters);
1011-
double actual{readActual(*row, m_DependentVariable)};
1012-
double weight{readExampleWeight(*row, m_ExtraColumns)};
1013-
leafValues_[root(tree).leafIndex(m_Encoder->encode(*row), tree)]
1012+
double actual{readActual(row, m_DependentVariable)};
1013+
double weight{readExampleWeight(row, m_ExtraColumns)};
1014+
leafValues_[root(tree).leafIndex(m_Encoder->encode(row), tree)]
10141015
.add(prediction, actual, weight);
10151016
}
10161017
},
@@ -1038,13 +1039,14 @@ void CBoostedTreeImpl::refreshPredictionsAndLossDerivatives(core::CDataFrame& fr
10381039
m_NumberThreads, 0, frame.numberRows(),
10391040
[&](TRowItr beginRows, TRowItr endRows) {
10401041
std::size_t numberLossParameters{m_Loss->numberParameters()};
1041-
for (auto row = beginRows; row != endRows; ++row) {
1042-
auto prediction = readPrediction(*row, m_ExtraColumns, numberLossParameters);
1043-
double actual{readActual(*row, m_DependentVariable)};
1044-
double weight{readExampleWeight(*row, m_ExtraColumns)};
1045-
prediction += root(tree).value(m_Encoder->encode(*row), tree);
1046-
writeLossGradient(*row, m_ExtraColumns, *m_Loss, prediction, actual, weight);
1047-
writeLossCurvature(*row, m_ExtraColumns, *m_Loss, prediction, actual, weight);
1042+
for (auto row_ = beginRows; row_ != endRows; ++row_) {
1043+
auto row = *row_;
1044+
auto prediction = readPrediction(row, m_ExtraColumns, numberLossParameters);
1045+
double actual{readActual(row, m_DependentVariable)};
1046+
double weight{readExampleWeight(row, m_ExtraColumns)};
1047+
prediction += root(tree).value(m_Encoder->encode(row), tree);
1048+
writeLossGradient(row, m_ExtraColumns, *m_Loss, prediction, actual, weight);
1049+
writeLossCurvature(row, m_ExtraColumns, *m_Loss, prediction, actual, weight);
10481050
}
10491051
},
10501052
&updateRowMask);

lib/maths/CBoostedTreeLeafNodeStatistics.cc

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,6 @@ CBoostedTreeLeafNodeStatistics::split(std::size_t leftChildId,
104104
auto rightChild = std::make_shared<CBoostedTreeLeafNodeStatistics>(
105105
rightChildId, std::move(*this), *leftChild, regularization,
106106
featureBag, std::move(rightChildRowMask));
107-
leftChild->maybeRecoverMemory();
108-
rightChild->maybeRecoverMemory();
109107

110108
return std::make_pair(leftChild, rightChild);
111109
}
@@ -119,8 +117,6 @@ CBoostedTreeLeafNodeStatistics::split(std::size_t leftChildId,
119117
auto leftChild = std::make_shared<CBoostedTreeLeafNodeStatistics>(
120118
leftChildId, std::move(*this), *rightChild, regularization, featureBag,
121119
std::move(leftChildRowMask));
122-
leftChild->maybeRecoverMemory();
123-
rightChild->maybeRecoverMemory();
124120

125121
return std::make_pair(leftChild, rightChild);
126122
}
@@ -176,13 +172,6 @@ CBoostedTreeLeafNodeStatistics::estimateMemoryUsage(std::size_t numberRows,
176172
return sizeof(CBoostedTreeLeafNodeStatistics) + rowMaskSize + splitsDerivativesSize;
177173
}
178174

179-
void CBoostedTreeLeafNodeStatistics::maybeRecoverMemory() {
180-
if (this->gain() <= 0.0) {
181-
m_RowMask = core::CPackedBitVector{};
182-
m_Derivatives = CSplitsDerivatives{};
183-
}
184-
}
185-
186175
void CBoostedTreeLeafNodeStatistics::computeAggregateLossDerivatives(
187176
std::size_t numberThreads,
188177
const core::CDataFrame& frame,

lib/maths/CBoostedTreeLoss.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -482,8 +482,6 @@ CArgMinMsleImpl::TObjective CArgMinMsleImpl::objective() const {
482482
return [this](double logWeight) {
483483

484484
double weight{std::exp(logWeight)};
485-
double loss{0.0};
486-
double totalCount{0.0};
487485
if (this->bucketWidth().first == 0.0) {
488486
// prediction is constant
489487
double expPrediction{m_ExpPredictionMinMax.max()};
@@ -495,6 +493,8 @@ CArgMinMsleImpl::TObjective CArgMinMsleImpl::objective() const {
495493
CTools::pow2(logPrediction)};
496494
return loss + this->lambda() * CTools::pow2(weight);
497495
} else {
496+
double loss{0.0};
497+
double totalCount{0.0};
498498
for (const auto& bucketPrediction : m_Buckets) {
499499
for (const auto& bucketActual : bucketPrediction) {
500500
double count{CBasicStatistics::count(bucketActual)};

0 commit comments

Comments
 (0)