Skip to content

[ML] Reduce variability in regression and classification results across our target platforms #1127

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Apr 9, 2020
Merged
2 changes: 2 additions & 0 deletions docs/CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@
* Improve robustness of anomaly detection to bad input data. (See {ml-pull}1114[#1114].)
* Adds new `num_matches` and `preferred_to_categories` fields to category output.
(See {ml-pull}1062[#1062])
* Ensure classification and regression results agree for all our target operating systems.
(See {ml-pull}1127[1127].)

== {es} version 7.7.0

Expand Down
3 changes: 2 additions & 1 deletion include/maths/CBoostedTreeLeafNodeStatistics.h
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,8 @@ class MATHS_EXPORT CBoostedTreeLeafNodeStatistics final {

bool operator<(const SSplitStatistics& rhs) const {
return COrderings::lexicographical_compare(
s_Gain, s_Curvature, s_Feature, rhs.s_Gain, rhs.s_Curvature, rhs.s_Feature);
s_Gain, s_Curvature, s_Feature, s_SplitAt, // <- lhs
rhs.s_Gain, rhs.s_Curvature, rhs.s_Feature, rhs.s_SplitAt);
}

std::string print() const {
Expand Down
18 changes: 16 additions & 2 deletions include/maths/CTools.h
Original file line number Diff line number Diff line change
Expand Up @@ -668,6 +668,17 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable {
//! Compute \f$x^2\f$.
static double pow2(double x) { return x * x; }

//! Compute a value from \p x which will be stable across platforms.
static double stable(double x) {
return STABLE_EPS * std::floor(x / STABLE_EPS + 0.5);
}

//! A version of std::log which is stable across platforms.
static double stableLog(double x) { return stable(std::log(x)); }

//! A version of std::log which is stable across platforms.
static double stableExp(double x) { return stable(std::exp(x)); }

//! Sigmoid function of \p p.
static double sigmoid(double p) { return 1.0 / (1.0 + 1.0 / p); }

Expand All @@ -681,7 +692,7 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable {
//! \param[in] sign Determines whether it's a step up or down.
static double
logisticFunction(double x, double width = 1.0, double x0 = 0.0, double sign = 1.0) {
return sigmoid(std::exp(std::copysign(1.0, sign) * (x - x0) / width));
return sigmoid(stableExp(std::copysign(1.0, sign) * (x - x0) / width));
}

//! Compute the softmax from the multinomial logit values \p logit.
Expand All @@ -695,7 +706,7 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable {
double Z{0.0};
double zmax{*std::max_element(z.begin(), z.end())};
for (auto& zi : z) {
zi = std::exp(zi - zmax);
zi = stableExp(zi - zmax);
Z += zi;
}
for (auto& zi : z) {
Expand Down Expand Up @@ -726,6 +737,9 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable {

//! A wrapper around lgamma which handles corner cases if requested
static bool lgamma(double value, double& result, bool checkForFinite = true);

private:
static constexpr double STABLE_EPS{10.0 * std::numeric_limits<double>::epsilon()};
};
}
}
Expand Down
15 changes: 8 additions & 7 deletions lib/maths/CBayesianOptimisation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -325,8 +325,8 @@ const CBayesianOptimisation::TVector& CBayesianOptimisation::maximumLikelihoodKe
// We restart optimization with initial guess on different scales for global probing.
TDoubleVec scales;
scales.reserve((m_Restarts - 1) * n);
CSampling::uniformSample(m_Rng, std::log(0.1), std::log(4.0),
(m_Restarts - 1) * n, scales);
CSampling::uniformSample(m_Rng, CTools::stableLog(0.1),
CTools::stableLog(4.0), (m_Restarts - 1) * n, scales);

TLikelihoodFunc l;
TLikelihoodGradientFunc g;
Expand Down Expand Up @@ -446,11 +446,12 @@ CBayesianOptimisation::kernelCovariates(const TVector& a, const TVector& x, doub
}

double CBayesianOptimisation::kernel(const TVector& a, const TVector& x, const TVector& y) const {
return CTools::pow2(a(0)) * std::exp(-(x - y).transpose() *
(m_MinimumKernelCoordinateDistanceScale +
a.tail(a.size() - 1).cwiseAbs2().matrix())
.asDiagonal() *
(x - y));
return CTools::pow2(a(0)) *
CTools::stableExp(-(x - y).transpose() *
(m_MinimumKernelCoordinateDistanceScale +
a.tail(a.size() - 1).cwiseAbs2().matrix())
.asDiagonal() *
(x - y));
}

void CBayesianOptimisation::acceptPersistInserter(core::CStatePersistInserter& inserter) const {
Expand Down
64 changes: 35 additions & 29 deletions lib/maths/CBoostedTreeFactory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -489,16 +489,16 @@ void CBoostedTreeFactory::initializeUnsetRegularizationHyperparameters(core::CDa
if (m_TreeImpl->m_RegularizationOverride.depthPenaltyMultiplier() == boost::none) {
if (gainPerNode90thPercentile > 0.0) {
double searchIntervalSize{2.0 * gainPerNode90thPercentile / gainPerNode1stPercentile};
double logMaxDepthPenaltyMultiplier{std::log(gainPerNode90thPercentile)};
double logMaxDepthPenaltyMultiplier{CTools::stableLog(gainPerNode90thPercentile)};
double logMinDepthPenaltyMultiplier{logMaxDepthPenaltyMultiplier -
std::log(searchIntervalSize)};
CTools::stableLog(searchIntervalSize)};
double meanLogDepthPenaltyMultiplier{
(logMinDepthPenaltyMultiplier + logMaxDepthPenaltyMultiplier) / 2.0};
double mainLoopSearchInterval{std::log(searchIntervalSize) / 2.0};
double mainLoopSearchInterval{CTools::stableLog(searchIntervalSize) / 2.0};
LOG_TRACE(<< "mean log depth penalty multiplier = " << meanLogDepthPenaltyMultiplier);

auto applyDepthPenaltyMultiplier = [](CBoostedTreeImpl& tree, double logDepthPenalty) {
tree.m_Regularization.depthPenaltyMultiplier(std::exp(logDepthPenalty));
tree.m_Regularization.depthPenaltyMultiplier(CTools::stableExp(logDepthPenalty));
return true;
};

Expand All @@ -516,7 +516,7 @@ void CBoostedTreeFactory::initializeUnsetRegularizationHyperparameters(core::CDa
LOG_TRACE(<< "log depth penalty multiplier search interval = ["
<< m_LogDepthPenaltyMultiplierSearchInterval.toDelimited() << "]");

m_TreeImpl->m_Regularization.depthPenaltyMultiplier(std::exp(
m_TreeImpl->m_Regularization.depthPenaltyMultiplier(CTools::stableExp(
m_LogDepthPenaltyMultiplierSearchInterval(BEST_REGULARIZER_INDEX)));
}
if (gainPerNode90thPercentile <= 0.0 ||
Expand All @@ -531,18 +531,19 @@ void CBoostedTreeFactory::initializeUnsetRegularizationHyperparameters(core::CDa
if (m_TreeImpl->m_RegularizationOverride.treeSizePenaltyMultiplier() == boost::none) {
if (gainPerNode90thPercentile > 0.0) {
double searchIntervalSize{2.0 * gainPerNode90thPercentile / gainPerNode1stPercentile};
double logMaxTreeSizePenaltyMultiplier{std::log(gainPerNode90thPercentile)};
double logMinTreeSizePenaltyMultiplier{logMaxTreeSizePenaltyMultiplier -
std::log(searchIntervalSize)};
double logMaxTreeSizePenaltyMultiplier{CTools::stableLog(gainPerNode90thPercentile)};
double logMinTreeSizePenaltyMultiplier{
logMaxTreeSizePenaltyMultiplier - CTools::stableLog(searchIntervalSize)};
double meanLogTreeSizePenaltyMultiplier{
(logMinTreeSizePenaltyMultiplier + logMaxTreeSizePenaltyMultiplier) / 2.0};
double mainLoopSearchInterval{0.5 * std::log(searchIntervalSize)};
double mainLoopSearchInterval{0.5 * CTools::stableLog(searchIntervalSize)};
LOG_TRACE(<< "mean log tree size penalty multiplier = "
<< meanLogTreeSizePenaltyMultiplier);

auto applyTreeSizePenaltyMultiplier = [](CBoostedTreeImpl& tree,
double logTreeSizePenalty) {
tree.m_Regularization.treeSizePenaltyMultiplier(std::exp(logTreeSizePenalty));
tree.m_Regularization.treeSizePenaltyMultiplier(
CTools::stableExp(logTreeSizePenalty));
return true;
};

Expand All @@ -561,7 +562,7 @@ void CBoostedTreeFactory::initializeUnsetRegularizationHyperparameters(core::CDa
LOG_TRACE(<< "log tree size penalty multiplier search interval = ["
<< m_LogTreeSizePenaltyMultiplierSearchInterval.toDelimited() << "]");

m_TreeImpl->m_Regularization.treeSizePenaltyMultiplier(std::exp(
m_TreeImpl->m_Regularization.treeSizePenaltyMultiplier(CTools::stableExp(
m_LogTreeSizePenaltyMultiplierSearchInterval(BEST_REGULARIZER_INDEX)));
}
if (gainPerNode90thPercentile <= 0.0 ||
Expand All @@ -577,18 +578,20 @@ void CBoostedTreeFactory::initializeUnsetRegularizationHyperparameters(core::CDa
if (totalCurvaturePerNode90thPercentile > 0.0) {
double searchIntervalSize{2.0 * totalCurvaturePerNode90thPercentile /
totalCurvaturePerNode1stPercentile};
double logMaxLeafWeightPenaltyMultiplier{std::log(totalCurvaturePerNode90thPercentile)};
double logMaxLeafWeightPenaltyMultiplier{
CTools::stableLog(totalCurvaturePerNode90thPercentile)};
double logMinLeafWeightPenaltyMultiplier{
logMaxLeafWeightPenaltyMultiplier - std::log(searchIntervalSize)};
logMaxLeafWeightPenaltyMultiplier - CTools::stableLog(searchIntervalSize)};
double meanLogLeafWeightPenaltyMultiplier{
(logMinLeafWeightPenaltyMultiplier + logMaxLeafWeightPenaltyMultiplier) / 2.0};
double mainLoopSearchInterval{0.5 * std::log(searchIntervalSize)};
double mainLoopSearchInterval{0.5 * CTools::stableLog(searchIntervalSize)};
LOG_TRACE(<< "mean log leaf weight penalty multiplier = "
<< meanLogLeafWeightPenaltyMultiplier);

auto applyLeafWeightPenaltyMultiplier = [](CBoostedTreeImpl& tree,
double logLeafWeightPenalty) {
tree.m_Regularization.leafWeightPenaltyMultiplier(std::exp(logLeafWeightPenalty));
tree.m_Regularization.leafWeightPenaltyMultiplier(
CTools::stableExp(logLeafWeightPenalty));
return true;
};

Expand All @@ -607,7 +610,7 @@ void CBoostedTreeFactory::initializeUnsetRegularizationHyperparameters(core::CDa
LOG_TRACE(<< "log leaf weight penalty multiplier search interval = ["
<< m_LogLeafWeightPenaltyMultiplierSearchInterval.toDelimited()
<< "]");
m_TreeImpl->m_Regularization.leafWeightPenaltyMultiplier(std::exp(
m_TreeImpl->m_Regularization.leafWeightPenaltyMultiplier(CTools::stableExp(
m_LogLeafWeightPenaltyMultiplierSearchInterval(BEST_REGULARIZER_INDEX)));
}
if (totalCurvaturePerNode90thPercentile <= 0.0 ||
Expand All @@ -631,9 +634,10 @@ void CBoostedTreeFactory::initializeUnsetDownsampleFactor(core::CDataFrame& fram
double searchIntervalSize{CTools::truncate(
m_TreeImpl->m_TrainingRowMasks[0].manhattan() / 100.0,
MIN_DOWNSAMPLE_LINE_SEARCH_RANGE, MAX_DOWNSAMPLE_LINE_SEARCH_RANGE)};
double logMaxDownsampleFactor{std::log(std::min(
double logMaxDownsampleFactor{CTools::stableLog(std::min(
std::sqrt(searchIntervalSize) * m_TreeImpl->m_DownsampleFactor, 1.0))};
double logMinDownsampleFactor{logMaxDownsampleFactor - std::log(searchIntervalSize)};
double logMinDownsampleFactor{logMaxDownsampleFactor -
CTools::stableLog(searchIntervalSize)};
double meanLogDownSampleFactor{(logMinDownsampleFactor + logMaxDownsampleFactor) / 2.0};
LOG_TRACE(<< "mean log down sample factor = " << meanLogDownSampleFactor);

Expand Down Expand Up @@ -666,7 +670,7 @@ void CBoostedTreeFactory::initializeUnsetDownsampleFactor(core::CDataFrame& fram
double numberTrainingRows{m_TreeImpl->m_TrainingRowMasks[0].manhattan()};

auto applyDownsampleFactor = [&](CBoostedTreeImpl& tree, double logDownsampleFactor) {
double downsampleFactor{std::exp(logDownsampleFactor)};
double downsampleFactor{CTools::stableExp(logDownsampleFactor)};
tree.m_DownsampleFactor = downsampleFactor;
scaleRegularizers(tree, downsampleFactor);
return tree.m_DownsampleFactor * numberTrainingRows > 10.0;
Expand All @@ -676,23 +680,24 @@ void CBoostedTreeFactory::initializeUnsetDownsampleFactor(core::CDataFrame& fram
m_LogDownsampleFactorSearchInterval =
this->testLossLineSearch(frame, applyDownsampleFactor,
logMinDownsampleFactor, logMaxDownsampleFactor,
std::log(MIN_DOWNSAMPLE_FACTOR_SCALE),
std::log(MAX_DOWNSAMPLE_FACTOR_SCALE))
CTools::stableLog(MIN_DOWNSAMPLE_FACTOR_SCALE),
CTools::stableLog(MAX_DOWNSAMPLE_FACTOR_SCALE))
.value_or(fallback);

// Truncate the log(scale) to be less than or equal to log(1.0) and the down
// sampled set contains at least ten examples on average.
m_LogDownsampleFactorSearchInterval =
min(max(m_LogDownsampleFactorSearchInterval,
TVector{std::log(10.0 / numberTrainingRows)}),
TVector{CTools::stableLog(10.0 / numberTrainingRows)}),
TVector{0.0});
LOG_TRACE(<< "log down sample factor search interval = ["
<< m_LogDownsampleFactorSearchInterval.toDelimited() << "]");

m_TreeImpl->m_DownsampleFactor =
std::exp(m_LogDownsampleFactorSearchInterval(BEST_REGULARIZER_INDEX));
m_TreeImpl->m_DownsampleFactor = CTools::stableExp(
m_LogDownsampleFactorSearchInterval(BEST_REGULARIZER_INDEX));

TVector logScale{std::log(scaleRegularizers(*m_TreeImpl, m_TreeImpl->m_DownsampleFactor))};
TVector logScale{CTools::stableLog(
scaleRegularizers(*m_TreeImpl, m_TreeImpl->m_DownsampleFactor))};
m_LogTreeSizePenaltyMultiplierSearchInterval += logScale;
m_LogLeafWeightPenaltyMultiplierSearchInterval += logScale;

Expand All @@ -706,14 +711,15 @@ void CBoostedTreeFactory::initializeUnsetEta(core::CDataFrame& frame) {

if (m_TreeImpl->m_EtaOverride == boost::none) {
double searchIntervalSize{5.0 * MAX_ETA_SCALE / MIN_ETA_SCALE};
double logMaxEta{std::log(std::sqrt(searchIntervalSize) * m_TreeImpl->m_Eta)};
double logMinEta{logMaxEta - std::log(searchIntervalSize)};
double logMaxEta{
CTools::stableLog(std::sqrt(searchIntervalSize) * m_TreeImpl->m_Eta)};
double logMinEta{logMaxEta - CTools::stableLog(searchIntervalSize)};
double meanLogEta{(logMaxEta + logMinEta) / 2.0};
double mainLoopSearchInterval{std::log(0.2 * searchIntervalSize)};
double mainLoopSearchInterval{CTools::stableLog(0.2 * searchIntervalSize)};
LOG_TRACE(<< "mean log eta = " << meanLogEta);

auto applyEta = [](CBoostedTreeImpl& tree, double eta) {
tree.m_Eta = std::exp(eta);
tree.m_Eta = CTools::stableExp(eta);
tree.m_EtaGrowthRatePerTree = 1.0 + tree.m_Eta / 2.0;
tree.m_MaximumNumberTrees = computeMaximumNumberTrees(tree.m_Eta);
return true;
Expand Down
25 changes: 14 additions & 11 deletions lib/maths/CBoostedTreeImpl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1076,16 +1076,16 @@ bool CBoostedTreeImpl::selectNextHyperparameters(const TMeanVarAccumulator& loss
// Read parameters for last round.
int i{0};
if (m_DownsampleFactorOverride == boost::none) {
parameters(i++) = std::log(m_DownsampleFactor);
parameters(i++) = CTools::stableLog(m_DownsampleFactor);
}
if (m_RegularizationOverride.depthPenaltyMultiplier() == boost::none) {
parameters(i++) = std::log(m_Regularization.depthPenaltyMultiplier());
parameters(i++) = CTools::stableLog(m_Regularization.depthPenaltyMultiplier());
}
if (m_RegularizationOverride.leafWeightPenaltyMultiplier() == boost::none) {
parameters(i++) = std::log(m_Regularization.leafWeightPenaltyMultiplier());
parameters(i++) = CTools::stableLog(m_Regularization.leafWeightPenaltyMultiplier());
}
if (m_RegularizationOverride.treeSizePenaltyMultiplier() == boost::none) {
parameters(i++) = std::log(m_Regularization.treeSizePenaltyMultiplier());
parameters(i++) = CTools::stableLog(m_Regularization.treeSizePenaltyMultiplier());
}
if (m_RegularizationOverride.softTreeDepthLimit() == boost::none) {
parameters(i++) = m_Regularization.softTreeDepthLimit();
Expand All @@ -1094,7 +1094,7 @@ bool CBoostedTreeImpl::selectNextHyperparameters(const TMeanVarAccumulator& loss
parameters(i++) = m_Regularization.softTreeDepthTolerance();
}
if (m_EtaOverride == boost::none) {
parameters(i++) = std::log(m_Eta);
parameters(i++) = CTools::stableLog(m_Eta);
parameters(i++) = m_EtaGrowthRatePerTree;
}
if (m_FeatureBagFractionOverride == boost::none) {
Expand Down Expand Up @@ -1131,21 +1131,24 @@ bool CBoostedTreeImpl::selectNextHyperparameters(const TMeanVarAccumulator& loss
// Write parameters for next round.
i = 0;
if (m_DownsampleFactorOverride == boost::none) {
m_DownsampleFactor = std::exp(parameters(i++));
m_DownsampleFactor = CTools::stableExp(parameters(i++));
TVector minBoundary;
TVector maxBoundary;
std::tie(minBoundary, maxBoundary) = bopt.boundingBox();
scale = std::min(scale, 2.0 * m_DownsampleFactor /
(std::exp(minBoundary(0)) + std::exp(maxBoundary(0))));
(CTools::stableExp(minBoundary(0)) +
CTools::stableExp(maxBoundary(0))));
}
if (m_RegularizationOverride.depthPenaltyMultiplier() == boost::none) {
m_Regularization.depthPenaltyMultiplier(std::exp(parameters(i++)));
m_Regularization.depthPenaltyMultiplier(CTools::stableExp(parameters(i++)));
}
if (m_RegularizationOverride.leafWeightPenaltyMultiplier() == boost::none) {
m_Regularization.leafWeightPenaltyMultiplier(scale * std::exp(parameters(i++)));
m_Regularization.leafWeightPenaltyMultiplier(
scale * CTools::stableExp(parameters(i++)));
}
if (m_RegularizationOverride.treeSizePenaltyMultiplier() == boost::none) {
m_Regularization.treeSizePenaltyMultiplier(scale * std::exp(parameters(i++)));
m_Regularization.treeSizePenaltyMultiplier(
scale * CTools::stableExp(parameters(i++)));
}
if (m_RegularizationOverride.softTreeDepthLimit() == boost::none) {
m_Regularization.softTreeDepthLimit(parameters(i++));
Expand All @@ -1154,7 +1157,7 @@ bool CBoostedTreeImpl::selectNextHyperparameters(const TMeanVarAccumulator& loss
m_Regularization.softTreeDepthTolerance(parameters(i++));
}
if (m_EtaOverride == boost::none) {
m_Eta = std::exp(scale * parameters(i++));
m_Eta = CTools::stableExp(scale * parameters(i++));
m_EtaGrowthRatePerTree = parameters(i++);
}
if (m_FeatureBagFractionOverride == boost::none) {
Expand Down
2 changes: 1 addition & 1 deletion lib/maths/CBoostedTreeLeafNodeStatistics.cc
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ CBoostedTreeLeafNodeStatistics::split(std::size_t leftChildId,
}

bool CBoostedTreeLeafNodeStatistics::operator<(const CBoostedTreeLeafNodeStatistics& rhs) const {
return m_BestSplit < rhs.m_BestSplit;
return COrderings::lexicographical_compare(m_BestSplit, m_Id, rhs.m_BestSplit, rhs.m_Id);
}

double CBoostedTreeLeafNodeStatistics::gain() const {
Expand Down
Loading