diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 695d242a98..4fc8ea09c8 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -46,6 +46,8 @@ * Improve robustness of anomaly detection to bad input data. (See {ml-pull}1114[#1114].) * Reduce peak memory usage and memory estimates for classification and regression. (See {ml-pull}1125[#1125].) +* Reduce variability of classification and regression results across our target operating systems. + (See {ml-pull}1127[#1127].) * Switched data frame analytics model memory estimates from kilobytes to megabytes. (See {ml-pull}1126[#1126], issue: {issue}54506[#54506].) * Added a {ml} native code build for Linux on AArch64. (See {ml-pull}1132[#1132] and diff --git a/include/core/CIEEE754.h b/include/core/CIEEE754.h index 2059c0f2c9..842647862b 100644 --- a/include/core/CIEEE754.h +++ b/include/core/CIEEE754.h @@ -9,8 +9,8 @@ #include -#include -#include +#include +#include namespace ml { namespace core { @@ -40,14 +40,14 @@ class CORE_EXPORT CIEEE754 { //! as an integer. //! \note The actual "exponent" is "exponent - 1022" in two's complement. struct SDoubleRep { -#ifdef __sparc // Add any other big endian architectures - uint64_t s_Sign : 1; // sign bit - uint64_t s_Exponent : 11; // exponent - uint64_t s_Mantissa : 52; // mantissa +#ifdef __sparc // Add any other big endian architectures + std::uint64_t s_Sign : 1; // sign bit + std::uint64_t s_Exponent : 11; // exponent + std::uint64_t s_Mantissa : 52; // mantissa #else - uint64_t s_Mantissa : 52; // mantissa - uint64_t s_Exponent : 11; // exponent - uint64_t s_Sign : 1; // sign bit + std::uint64_t s_Mantissa : 52; // mantissa + std::uint64_t s_Exponent : 11; // exponent + std::uint64_t s_Sign : 1; // sign bit #endif }; @@ -57,15 +57,19 @@ class CORE_EXPORT CIEEE754 { //! //! \note This is closely related to std::frexp for double but returns //! the mantissa interpreted as an integer. - static void decompose(double value, uint64_t& mantissa, int& exponent) { + static void decompose(double value, std::uint64_t& mantissa, int& exponent) { SDoubleRep parsed; static_assert(sizeof(double) == sizeof(SDoubleRep), "SDoubleRep definition unsuitable for memcpy to double"); // Use memcpy() rather than union to adhere to strict aliasing rules - ::memcpy(&parsed, &value, sizeof(double)); + std::memcpy(&parsed, &value, sizeof(double)); exponent = static_cast(parsed.s_Exponent) - 1022; mantissa = parsed.s_Mantissa; } + + //! Drop \p bits trailing bits from the mantissa of \p value in a stable way + //! for different operating systems. + static double dropbits(double value, int bits); }; } } diff --git a/include/maths/CBoostedTreeLeafNodeStatistics.h b/include/maths/CBoostedTreeLeafNodeStatistics.h index d2b41146fe..4f5627699f 100644 --- a/include/maths/CBoostedTreeLeafNodeStatistics.h +++ b/include/maths/CBoostedTreeLeafNodeStatistics.h @@ -459,7 +459,8 @@ class MATHS_EXPORT CBoostedTreeLeafNodeStatistics final { bool operator<(const SSplitStatistics& rhs) const { return COrderings::lexicographical_compare( - s_Gain, s_Curvature, s_Feature, rhs.s_Gain, rhs.s_Curvature, rhs.s_Feature); + s_Gain, s_Curvature, s_Feature, s_SplitAt, // <- lhs + rhs.s_Gain, rhs.s_Curvature, rhs.s_Feature, rhs.s_SplitAt); } std::string print() const { diff --git a/include/maths/CLbfgs.h b/include/maths/CLbfgs.h index 9f26d3a722..c22b802ef8 100644 --- a/include/maths/CLbfgs.h +++ b/include/maths/CLbfgs.h @@ -8,6 +8,7 @@ #define INCLUDED_ml_maths_CLbfgs_h #include +#include #include @@ -302,8 +303,9 @@ class CLbfgs { return m_BacktrackingMinDecrease * s * las::inner(m_Gx, m_P) / las::norm(m_P); } - constexpr double minimumStepSize() const { - return std::pow(m_StepScale, static_cast(MAXIMUM_BACK_TRACKING_ITERATIONS)); + double minimumStepSize() const { + return CTools::stable(std::pow( + m_StepScale, static_cast(MAXIMUM_BACK_TRACKING_ITERATIONS))); } private: diff --git a/include/maths/CTools.h b/include/maths/CTools.h index 0a02d7ceeb..9a23c8f6bc 100644 --- a/include/maths/CTools.h +++ b/include/maths/CTools.h @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -456,7 +457,7 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable { // (interpreted as an integer) to the corresponding // double value and fastLog uses the same approach // to extract the mantissa. - uint64_t dx = 0x10000000000000ull / BINS; + std::uint64_t dx = 0x10000000000000ull / BINS; core::CIEEE754::SDoubleRep x; x.s_Sign = 0; x.s_Mantissa = (dx / 2) & core::CIEEE754::IEEE754_MANTISSA_MASK; @@ -469,12 +470,12 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable { // Use memcpy() rather than union to adhere to strict // aliasing rules std::memcpy(&value, &x, sizeof(double)); - m_Table[i] = std::log2(value); + m_Table[i] = stable(std::log2(value)); } } //! Lookup log2 for a given mantissa. - const double& operator[](uint64_t mantissa) const { + const double& operator[](std::uint64_t mantissa) const { return m_Table[mantissa >> FAST_LOG_SHIFT]; } @@ -494,7 +495,7 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable { //! \note This is taken from the approach given in //! http://www.icsi.berkeley.edu/pubs/techreports/TR-07-002.pdf static double fastLog(double x) { - uint64_t mantissa; + std::uint64_t mantissa; int log2; core::CIEEE754::decompose(x, mantissa, log2); return 0.693147180559945 * (FAST_LOG_TABLE[mantissa] + log2); @@ -669,6 +670,15 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable { //! Compute \f$x^2\f$. static double pow2(double x) { return x * x; } + //! Compute a value from \p x which will be stable across platforms. + static double stable(double x) { return core::CIEEE754::dropbits(x, 1); } + + //! A version of std::log which is stable across platforms. + static double stableLog(double x) { return stable(std::log(x)); } + + //! A version of std::log which is stable across platforms. + static double stableExp(double x) { return stable(std::exp(x)); } + //! Sigmoid function of \p p. static double sigmoid(double p) { return 1.0 / (1.0 + 1.0 / p); } @@ -682,7 +692,7 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable { //! \param[in] sign Determines whether it's a step up or down. static double logisticFunction(double x, double width = 1.0, double x0 = 0.0, double sign = 1.0) { - return sigmoid(std::exp(std::copysign(1.0, sign) * (x - x0) / width)); + return sigmoid(stableExp(std::copysign(1.0, sign) * (x - x0) / width)); } //! Compute the softmax for the multinomial logit values \p logit. @@ -696,7 +706,7 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable { double Z{0.0}; double zmax{*std::max_element(z.begin(), z.end())}; for (auto& zi : z) { - zi = std::exp(zi - zmax); + zi = stableExp(zi - zmax); Z += zi; } for (auto& zi : z) { diff --git a/include/maths/CToolsDetail.h b/include/maths/CToolsDetail.h index ac4e02e8ba..e20b0da94c 100644 --- a/include/maths/CToolsDetail.h +++ b/include/maths/CToolsDetail.h @@ -316,7 +316,7 @@ void CTools::inplaceLogSoftmax(CDenseVector& z) { double zmax{z.maxCoeff()}; z.array() -= zmax; double Z{z.array().exp().sum()}; - z.array() -= std::log(Z); + z.array() -= stableLog(Z); } } } diff --git a/lib/api/unittest/CDataFrameAnalyzerFeatureImportanceTest.cc b/lib/api/unittest/CDataFrameAnalyzerFeatureImportanceTest.cc index e30030211c..afa83fc61b 100644 --- a/lib/api/unittest/CDataFrameAnalyzerFeatureImportanceTest.cc +++ b/lib/api/unittest/CDataFrameAnalyzerFeatureImportanceTest.cc @@ -485,8 +485,8 @@ BOOST_FIXTURE_TEST_CASE(testRegressionFeatureImportanceNoImportance, SFixture) { double c1{readShapValue(result, "c1")}; double prediction{ result["row_results"]["results"]["ml"]["target_prediction"].GetDouble()}; - // c1 explains 95% of the prediction value, i.e. the difference from the prediction is less than 2%. - BOOST_REQUIRE_CLOSE(c1, prediction, 5.0); + // c1 explains 94% of the prediction value, i.e. the difference from the prediction is less than 2%. + BOOST_REQUIRE_CLOSE(c1, prediction, 6.0); for (const auto& feature : {"c2", "c3", "c4"}) { double c = readShapValue(result, feature); BOOST_REQUIRE_SMALL(c, 2.0); diff --git a/lib/core/CIEEE754.cc b/lib/core/CIEEE754.cc index 6037412f96..a8c4dbffaf 100644 --- a/lib/core/CIEEE754.cc +++ b/lib/core/CIEEE754.cc @@ -7,14 +7,14 @@ #include #include +#include namespace ml { namespace core { double CIEEE754::round(double value, EPrecision precision) { - // This first decomposes the value into the mantissa - // and exponent to avoid the problem with overflow if - // the values are close to max double. + // First decomposes the value into the mantissa and exponent to avoid the + // problem with overflow if the values are close to max double. int exponent; double mantissa = std::frexp(value, &exponent); @@ -39,5 +39,15 @@ double CIEEE754::round(double value, EPrecision precision) { return std::ldexp(mantissa, exponent); } + +double CIEEE754::dropbits(double value, int bits) { + SDoubleRep parsed; + static_assert(sizeof(double) == sizeof(SDoubleRep), + "SDoubleRep definition unsuitable for memcpy to double"); + std::memcpy(&parsed, &value, sizeof(double)); + parsed.s_Mantissa &= ((IEEE754_MANTISSA_MASK << bits) & IEEE754_MANTISSA_MASK); + std::memcpy(&value, &parsed, sizeof(double)); + return value; +} } } diff --git a/lib/maths/CBayesianOptimisation.cc b/lib/maths/CBayesianOptimisation.cc index 3d85090769..31fcfcc736 100644 --- a/lib/maths/CBayesianOptimisation.cc +++ b/lib/maths/CBayesianOptimisation.cc @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include @@ -42,6 +42,16 @@ const std::string RANGE_SCALE_TAG{"range_scale"}; const std::string RESTARTS_TAG{"restarts"}; const std::string RNG_TAG{"rng"}; +//! A version of the normal c.d.f. which is stable across our target platforms. +double stableNormCdf(double z) { + return (1.0 + CTools::stable(std::erf(z / boost::math::constants::root_two()))) / 2.0; +} + +//! A version of the normal p.d.f. which is stable across our target platforms. +double stableNormPdf(double z) { + return CTools::stableExp(-z * z / 2.0) / boost::math::constants::root_two_pi(); +} + // The kernel we use is v * I + a(0)^2 * O(I). We fall back to random search when // a(0)^2 < eps * v since for small eps and a reasonable number of dimensions the // expected improvement will be constant in the space we search. We don't terminate @@ -132,7 +142,7 @@ CBayesianOptimisation::maximumExpectedImprovement() { double fx{minusEI(x)}; LOG_TRACE(<< "x = " << x.transpose() << " EI(x) = " << fx); - if (-fx > fmax) { + if (COrderings::lexicographical_compare(fmax, xmax, -fx, x)) { xmax = x; fmax = -fx; } @@ -154,9 +164,8 @@ CBayesianOptimisation::maximumExpectedImprovement() { LOG_TRACE(<< "x0 = " << x0.second.transpose()); std::tie(xcand, fcand) = lbfgs.constrainedMinimize( minusEI, minusEIGradient, a, b, std::move(x0.second), rho); - LOG_TRACE(<< " xcand = " << xcand.transpose() << " EI(cand) = " << fcand); - - if (-fcand > fmax) { + LOG_TRACE(<< "xcand = " << xcand.transpose() << " EI(cand) = " << fcand); + if (COrderings::lexicographical_compare(fmax, xmax, -fcand, xcand)) { std::tie(xmax, fmax) = std::make_pair(std::move(xcand), -fcand); } } @@ -174,10 +183,10 @@ CBayesianOptimisation::maximumExpectedImprovement() { expectedImprovement = fmax / m_RangeScale; } - LOG_TRACE(<< "best = " << xmax.cwiseProduct(m_MaxBoundary - m_MinBoundary).transpose() - << " EI(best) = " << expectedImprovement); + xmax = xmax.cwiseProduct(m_MaxBoundary - m_MinBoundary); + LOG_TRACE(<< "best = " << xmax.transpose() << " EI(best) = " << expectedImprovement); - return {xmax.cwiseProduct(m_MaxBoundary - m_MinBoundary), expectedImprovement}; + return {std::move(xmax), expectedImprovement}; } std::pair @@ -185,45 +194,48 @@ CBayesianOptimisation::minusLikelihoodAndGradient() const { TVector f{this->function()}; double v{this->meanErrorVariance()}; - - auto minusLogLikelihood = [f, v, this](const TVector& a) { - Eigen::LDLT Kldl{this->kernel(a, v)}; - TVector Kinvf{Kldl.solve(f)}; + TVector ones; + TVector gradient; + TMatrix K; + TVector Kinvf; + TMatrix Kinv; + TMatrix dKdai; + + auto minusLogLikelihood = [=](const TVector& a) mutable { + K = this->kernel(a, v); + Eigen::LDLT Kldl{K}; + Kinvf = Kldl.solve(f); // We can only determine values up to eps * "max diagonal". If the diagonal // has a zero it blows up the determinant term. In practice, we know the // kernel can't be singular by construction so we perturb the diagonal by // the numerical error in such a way as to recover a non-singular matrix. // (Note that the solve routine deals with the zero for us.) double eps{std::numeric_limits::epsilon() * Kldl.vectorD().maxCoeff()}; - return 0.5 * - (f.transpose() * Kinvf + (Kldl.vectorD().array() + eps).log().sum()); + return 0.5 * (f.transpose() * Kinvf + + Kldl.vectorD().cwiseMax(eps).array().log().sum()); }; - auto minusLogLikelihoodGradient = [f, v, this](const TVector& a) { - TMatrix K{this->kernel(a, v)}; + auto minusLogLikelihoodGradient = [=](const TVector& a) mutable { + K = this->kernel(a, v); Eigen::LDLT Kldl{K}; - TVector Kinvf{Kldl.solve(f)}; + Kinvf = Kldl.solve(f); - TVector ones(f.size()); - ones.setOnes(); - TMatrix KInv{Kldl.solve(TMatrix{ones.asDiagonal()})}; + ones = TVector::Ones(f.size()); + Kinv = Kldl.solve(TMatrix::Identity(f.size(), f.size())); K.diagonal() -= v * ones; - TVector gradient{a.size()}; - gradient.setZero(); - + gradient = TVector::Zero(a.size()); for (int i = 0; i < Kinvf.size(); ++i) { - gradient(0) -= - 1.0 / a(0) * - double{(Kinvf(i) * Kinvf.transpose() - KInv.row(i)) * K.col(i)}; + double di{(Kinvf(i) * Kinvf.transpose() - Kinv.row(i)) * K.col(i)}; + gradient(0) -= di / a(0); } for (int i = 1; i < a.size(); ++i) { - TMatrix dKdai{this->dKerneld(a, i)}; + dKdai = this->dKerneld(a, i); for (int j = 0; j < Kinvf.size(); ++j) { - gradient(i) += 0.5 * double{(Kinvf(j) * Kinvf.transpose() - KInv.row(j)) * - dKdai.col(j)}; + double di{(Kinvf(j) * Kinvf.transpose() - Kinv.row(j)) * dKdai.col(j)}; + gradient(i) += 0.5 * di; } } @@ -238,11 +250,16 @@ CBayesianOptimisation::minusExpectedImprovementAndGradient() const { TMatrix K{this->kernel(m_KernelParameters, this->meanErrorVariance())}; Eigen::LDLT Kldl{K}; - TVector Kinvf{Kldl.solve(this->function())}; - double vx{this->meanErrorVariance()}; + TVector Kxn; + TVector KinvKxn; + TVector muGradient; + TVector sigmaGradient; + TVector dKxndx; + TVector zGradient; + double fmin{ std::min_element(m_FunctionMeanValues.begin(), m_FunctionMeanValues.end(), [](const TVectorDoublePr& lhs, const TVectorDoublePr& rhs) { @@ -250,9 +267,8 @@ CBayesianOptimisation::minusExpectedImprovementAndGradient() const { }) ->second}; - auto EI = [Kldl, Kinvf, vx, fmin, this](const TVector& x) { + auto EI = [=](const TVector& x) mutable { double Kxx; - TVector Kxn; std::tie(Kxn, Kxx) = this->kernelCovariates(m_KernelParameters, x, vx); double sigma{Kxx - Kxn.transpose() * Kldl.solve(Kxn)}; @@ -264,17 +280,17 @@ CBayesianOptimisation::minusExpectedImprovementAndGradient() const { double mu{Kxn.transpose() * Kinvf}; sigma = std::sqrt(sigma); - boost::math::normal normal{0.0, 1.0}; double z{(fmin - mu) / sigma}; - return -sigma * (z * CTools::safeCdf(normal, z) + CTools::safePdf(normal, z)); + double cdfz{stableNormCdf(z)}; + double pdfz{stableNormPdf(z)}; + return -sigma * (z * cdfz + pdfz); }; - auto EIGradient = [Kldl, Kinvf, vx, fmin, this](const TVector& x) { + auto EIGradient = [=](const TVector& x) mutable { double Kxx; - TVector Kxn; std::tie(Kxn, Kxx) = this->kernelCovariates(m_KernelParameters, x, vx); - TVector KinvKxn{Kldl.solve(Kxn)}; + KinvKxn = Kldl.solve(Kxn); double sigma{Kxx - Kxn.transpose() * KinvKxn}; if (sigma <= 0.0) { @@ -284,15 +300,14 @@ CBayesianOptimisation::minusExpectedImprovementAndGradient() const { double mu{Kxn.transpose() * Kinvf}; sigma = std::sqrt(sigma); - boost::math::normal normal{0.0, 1.0}; double z{(fmin - mu) / sigma}; - double cdfz{CTools::safeCdf(normal, z)}; - double pdfz{CTools::safePdf(normal, z)}; + double cdfz{stableNormCdf(z)}; + double pdfz{stableNormPdf(z)}; - TVector muGradient{x.size()}; - TVector sigmaGradient{x.size()}; + muGradient.resize(x.size()); + sigmaGradient.resize(x.size()); for (int i = 0; i < x.size(); ++i) { - TVector dKxndx{Kxn.size()}; + dKxndx.resize(Kxn.size()); for (int j = 0; j < Kxn.size(); ++j) { const TVector& xj{m_FunctionMeanValues[j].first}; dKxndx(j) = 2.0 * @@ -305,8 +320,7 @@ CBayesianOptimisation::minusExpectedImprovementAndGradient() const { } sigmaGradient /= 2.0 * sigma; - TVector zGradient{((mu - fmin) / CTools::pow2(sigma)) * sigmaGradient - - muGradient / sigma}; + zGradient = ((mu - fmin) / CTools::pow2(sigma)) * sigmaGradient - muGradient / sigma; return TVector{-(z * cdfz + pdfz) * sigmaGradient - sigma * cdfz * zGradient}; }; @@ -325,8 +339,8 @@ const CBayesianOptimisation::TVector& CBayesianOptimisation::maximumLikelihoodKe // We restart optimization with initial guess on different scales for global probing. TDoubleVec scales; scales.reserve((m_Restarts - 1) * n); - CSampling::uniformSample(m_Rng, std::log(0.1), std::log(4.0), - (m_Restarts - 1) * n, scales); + CSampling::uniformSample(m_Rng, CTools::stableLog(0.2), + CTools::stableLog(5.0), (m_Restarts - 1) * n, scales); TLikelihoodFunc l; TLikelihoodGradientFunc g; @@ -345,12 +359,12 @@ const CBayesianOptimisation::TVector& CBayesianOptimisation::maximumLikelihoodKe for (std::size_t j = 0; j < n; ++j) { scale(j) = scales[(i - 1) * n + j]; } - a.array() = scale.array().exp() * a.array(); + a.array() *= scale.array().exp(); double la; std::tie(a, la) = lbfgs.minimize(l, g, std::move(a), 1e-8, 75); - if (la < lmax) { + if (COrderings::lexicographical_compare(la, a, lmax, amax)) { lmax = la; amax = std::move(a); } @@ -360,6 +374,7 @@ const CBayesianOptimisation::TVector& CBayesianOptimisation::maximumLikelihoodKe // but improves traceability. m_KernelParameters = amax.cwiseAbs(); LOG_TRACE(<< "kernel parameters = " << m_KernelParameters.transpose()); + LOG_TRACE(<< "likelihood = " << -lmax); return m_KernelParameters; } @@ -446,11 +461,10 @@ CBayesianOptimisation::kernelCovariates(const TVector& a, const TVector& x, doub } double CBayesianOptimisation::kernel(const TVector& a, const TVector& x, const TVector& y) const { - return CTools::pow2(a(0)) * std::exp(-(x - y).transpose() * - (m_MinimumKernelCoordinateDistanceScale + - a.tail(a.size() - 1).cwiseAbs2().matrix()) - .asDiagonal() * - (x - y)); + return CTools::pow2(a(0)) * + CTools::stableExp(-(x - y).transpose() * (m_MinimumKernelCoordinateDistanceScale + + a.tail(a.size() - 1).cwiseAbs2()) + .cwiseProduct(x - y)); } void CBayesianOptimisation::acceptPersistInserter(core::CStatePersistInserter& inserter) const { diff --git a/lib/maths/CBoostedTreeFactory.cc b/lib/maths/CBoostedTreeFactory.cc index ffc43e0662..04d9c2e655 100644 --- a/lib/maths/CBoostedTreeFactory.cc +++ b/lib/maths/CBoostedTreeFactory.cc @@ -489,16 +489,16 @@ void CBoostedTreeFactory::initializeUnsetRegularizationHyperparameters(core::CDa if (m_TreeImpl->m_RegularizationOverride.depthPenaltyMultiplier() == boost::none) { if (gainPerNode90thPercentile > 0.0) { double searchIntervalSize{2.0 * gainPerNode90thPercentile / gainPerNode1stPercentile}; - double logMaxDepthPenaltyMultiplier{std::log(gainPerNode90thPercentile)}; + double logMaxDepthPenaltyMultiplier{CTools::stableLog(gainPerNode90thPercentile)}; double logMinDepthPenaltyMultiplier{logMaxDepthPenaltyMultiplier - - std::log(searchIntervalSize)}; + CTools::stableLog(searchIntervalSize)}; double meanLogDepthPenaltyMultiplier{ (logMinDepthPenaltyMultiplier + logMaxDepthPenaltyMultiplier) / 2.0}; - double mainLoopSearchInterval{std::log(searchIntervalSize) / 2.0}; + double mainLoopSearchInterval{CTools::stableLog(searchIntervalSize) / 2.0}; LOG_TRACE(<< "mean log depth penalty multiplier = " << meanLogDepthPenaltyMultiplier); auto applyDepthPenaltyMultiplier = [](CBoostedTreeImpl& tree, double logDepthPenalty) { - tree.m_Regularization.depthPenaltyMultiplier(std::exp(logDepthPenalty)); + tree.m_Regularization.depthPenaltyMultiplier(CTools::stableExp(logDepthPenalty)); return true; }; @@ -516,7 +516,7 @@ void CBoostedTreeFactory::initializeUnsetRegularizationHyperparameters(core::CDa LOG_TRACE(<< "log depth penalty multiplier search interval = [" << m_LogDepthPenaltyMultiplierSearchInterval.toDelimited() << "]"); - m_TreeImpl->m_Regularization.depthPenaltyMultiplier(std::exp( + m_TreeImpl->m_Regularization.depthPenaltyMultiplier(CTools::stableExp( m_LogDepthPenaltyMultiplierSearchInterval(BEST_REGULARIZER_INDEX))); } if (gainPerNode90thPercentile <= 0.0 || @@ -531,18 +531,19 @@ void CBoostedTreeFactory::initializeUnsetRegularizationHyperparameters(core::CDa if (m_TreeImpl->m_RegularizationOverride.treeSizePenaltyMultiplier() == boost::none) { if (gainPerNode90thPercentile > 0.0) { double searchIntervalSize{2.0 * gainPerNode90thPercentile / gainPerNode1stPercentile}; - double logMaxTreeSizePenaltyMultiplier{std::log(gainPerNode90thPercentile)}; - double logMinTreeSizePenaltyMultiplier{logMaxTreeSizePenaltyMultiplier - - std::log(searchIntervalSize)}; + double logMaxTreeSizePenaltyMultiplier{CTools::stableLog(gainPerNode90thPercentile)}; + double logMinTreeSizePenaltyMultiplier{ + logMaxTreeSizePenaltyMultiplier - CTools::stableLog(searchIntervalSize)}; double meanLogTreeSizePenaltyMultiplier{ (logMinTreeSizePenaltyMultiplier + logMaxTreeSizePenaltyMultiplier) / 2.0}; - double mainLoopSearchInterval{0.5 * std::log(searchIntervalSize)}; + double mainLoopSearchInterval{0.5 * CTools::stableLog(searchIntervalSize)}; LOG_TRACE(<< "mean log tree size penalty multiplier = " << meanLogTreeSizePenaltyMultiplier); auto applyTreeSizePenaltyMultiplier = [](CBoostedTreeImpl& tree, double logTreeSizePenalty) { - tree.m_Regularization.treeSizePenaltyMultiplier(std::exp(logTreeSizePenalty)); + tree.m_Regularization.treeSizePenaltyMultiplier( + CTools::stableExp(logTreeSizePenalty)); return true; }; @@ -561,7 +562,7 @@ void CBoostedTreeFactory::initializeUnsetRegularizationHyperparameters(core::CDa LOG_TRACE(<< "log tree size penalty multiplier search interval = [" << m_LogTreeSizePenaltyMultiplierSearchInterval.toDelimited() << "]"); - m_TreeImpl->m_Regularization.treeSizePenaltyMultiplier(std::exp( + m_TreeImpl->m_Regularization.treeSizePenaltyMultiplier(CTools::stableExp( m_LogTreeSizePenaltyMultiplierSearchInterval(BEST_REGULARIZER_INDEX))); } if (gainPerNode90thPercentile <= 0.0 || @@ -577,18 +578,20 @@ void CBoostedTreeFactory::initializeUnsetRegularizationHyperparameters(core::CDa if (totalCurvaturePerNode90thPercentile > 0.0) { double searchIntervalSize{2.0 * totalCurvaturePerNode90thPercentile / totalCurvaturePerNode1stPercentile}; - double logMaxLeafWeightPenaltyMultiplier{std::log(totalCurvaturePerNode90thPercentile)}; + double logMaxLeafWeightPenaltyMultiplier{ + CTools::stableLog(totalCurvaturePerNode90thPercentile)}; double logMinLeafWeightPenaltyMultiplier{ - logMaxLeafWeightPenaltyMultiplier - std::log(searchIntervalSize)}; + logMaxLeafWeightPenaltyMultiplier - CTools::stableLog(searchIntervalSize)}; double meanLogLeafWeightPenaltyMultiplier{ (logMinLeafWeightPenaltyMultiplier + logMaxLeafWeightPenaltyMultiplier) / 2.0}; - double mainLoopSearchInterval{0.5 * std::log(searchIntervalSize)}; + double mainLoopSearchInterval{0.5 * CTools::stableLog(searchIntervalSize)}; LOG_TRACE(<< "mean log leaf weight penalty multiplier = " << meanLogLeafWeightPenaltyMultiplier); auto applyLeafWeightPenaltyMultiplier = [](CBoostedTreeImpl& tree, double logLeafWeightPenalty) { - tree.m_Regularization.leafWeightPenaltyMultiplier(std::exp(logLeafWeightPenalty)); + tree.m_Regularization.leafWeightPenaltyMultiplier( + CTools::stableExp(logLeafWeightPenalty)); return true; }; @@ -607,7 +610,7 @@ void CBoostedTreeFactory::initializeUnsetRegularizationHyperparameters(core::CDa LOG_TRACE(<< "log leaf weight penalty multiplier search interval = [" << m_LogLeafWeightPenaltyMultiplierSearchInterval.toDelimited() << "]"); - m_TreeImpl->m_Regularization.leafWeightPenaltyMultiplier(std::exp( + m_TreeImpl->m_Regularization.leafWeightPenaltyMultiplier(CTools::stableExp( m_LogLeafWeightPenaltyMultiplierSearchInterval(BEST_REGULARIZER_INDEX))); } if (totalCurvaturePerNode90thPercentile <= 0.0 || @@ -631,9 +634,10 @@ void CBoostedTreeFactory::initializeUnsetDownsampleFactor(core::CDataFrame& fram double searchIntervalSize{CTools::truncate( m_TreeImpl->m_TrainingRowMasks[0].manhattan() / 100.0, MIN_DOWNSAMPLE_LINE_SEARCH_RANGE, MAX_DOWNSAMPLE_LINE_SEARCH_RANGE)}; - double logMaxDownsampleFactor{std::log(std::min( + double logMaxDownsampleFactor{CTools::stableLog(std::min( std::sqrt(searchIntervalSize) * m_TreeImpl->m_DownsampleFactor, 1.0))}; - double logMinDownsampleFactor{logMaxDownsampleFactor - std::log(searchIntervalSize)}; + double logMinDownsampleFactor{logMaxDownsampleFactor - + CTools::stableLog(searchIntervalSize)}; double meanLogDownSampleFactor{(logMinDownsampleFactor + logMaxDownsampleFactor) / 2.0}; LOG_TRACE(<< "mean log down sample factor = " << meanLogDownSampleFactor); @@ -666,7 +670,7 @@ void CBoostedTreeFactory::initializeUnsetDownsampleFactor(core::CDataFrame& fram double numberTrainingRows{m_TreeImpl->m_TrainingRowMasks[0].manhattan()}; auto applyDownsampleFactor = [&](CBoostedTreeImpl& tree, double logDownsampleFactor) { - double downsampleFactor{std::exp(logDownsampleFactor)}; + double downsampleFactor{CTools::stableExp(logDownsampleFactor)}; tree.m_DownsampleFactor = downsampleFactor; scaleRegularizers(tree, downsampleFactor); return tree.m_DownsampleFactor * numberTrainingRows > 10.0; @@ -676,23 +680,24 @@ void CBoostedTreeFactory::initializeUnsetDownsampleFactor(core::CDataFrame& fram m_LogDownsampleFactorSearchInterval = this->testLossLineSearch(frame, applyDownsampleFactor, logMinDownsampleFactor, logMaxDownsampleFactor, - std::log(MIN_DOWNSAMPLE_FACTOR_SCALE), - std::log(MAX_DOWNSAMPLE_FACTOR_SCALE)) + CTools::stableLog(MIN_DOWNSAMPLE_FACTOR_SCALE), + CTools::stableLog(MAX_DOWNSAMPLE_FACTOR_SCALE)) .value_or(fallback); // Truncate the log(scale) to be less than or equal to log(1.0) and the down // sampled set contains at least ten examples on average. m_LogDownsampleFactorSearchInterval = min(max(m_LogDownsampleFactorSearchInterval, - TVector{std::log(10.0 / numberTrainingRows)}), + TVector{CTools::stableLog(10.0 / numberTrainingRows)}), TVector{0.0}); LOG_TRACE(<< "log down sample factor search interval = [" << m_LogDownsampleFactorSearchInterval.toDelimited() << "]"); - m_TreeImpl->m_DownsampleFactor = - std::exp(m_LogDownsampleFactorSearchInterval(BEST_REGULARIZER_INDEX)); + m_TreeImpl->m_DownsampleFactor = CTools::stableExp( + m_LogDownsampleFactorSearchInterval(BEST_REGULARIZER_INDEX)); - TVector logScale{std::log(scaleRegularizers(*m_TreeImpl, m_TreeImpl->m_DownsampleFactor))}; + TVector logScale{CTools::stableLog( + scaleRegularizers(*m_TreeImpl, m_TreeImpl->m_DownsampleFactor))}; m_LogTreeSizePenaltyMultiplierSearchInterval += logScale; m_LogLeafWeightPenaltyMultiplierSearchInterval += logScale; @@ -706,14 +711,15 @@ void CBoostedTreeFactory::initializeUnsetEta(core::CDataFrame& frame) { if (m_TreeImpl->m_EtaOverride == boost::none) { double searchIntervalSize{5.0 * MAX_ETA_SCALE / MIN_ETA_SCALE}; - double logMaxEta{std::log(std::sqrt(searchIntervalSize) * m_TreeImpl->m_Eta)}; - double logMinEta{logMaxEta - std::log(searchIntervalSize)}; + double logMaxEta{ + CTools::stableLog(std::sqrt(searchIntervalSize) * m_TreeImpl->m_Eta)}; + double logMinEta{logMaxEta - CTools::stableLog(searchIntervalSize)}; double meanLogEta{(logMaxEta + logMinEta) / 2.0}; - double mainLoopSearchInterval{std::log(0.2 * searchIntervalSize)}; + double mainLoopSearchInterval{CTools::stableLog(0.2 * searchIntervalSize)}; LOG_TRACE(<< "mean log eta = " << meanLogEta); auto applyEta = [](CBoostedTreeImpl& tree, double eta) { - tree.m_Eta = std::exp(eta); + tree.m_Eta = CTools::stableExp(eta); tree.m_EtaGrowthRatePerTree = 1.0 + tree.m_Eta / 2.0; tree.m_MaximumNumberTrees = computeMaximumNumberTrees(tree.m_Eta); return true; diff --git a/lib/maths/CBoostedTreeImpl.cc b/lib/maths/CBoostedTreeImpl.cc index dc6d22582d..91c0f639df 100644 --- a/lib/maths/CBoostedTreeImpl.cc +++ b/lib/maths/CBoostedTreeImpl.cc @@ -1117,16 +1117,16 @@ bool CBoostedTreeImpl::selectNextHyperparameters(const TMeanVarAccumulator& loss // Read parameters for last round. int i{0}; if (m_DownsampleFactorOverride == boost::none) { - parameters(i++) = std::log(m_DownsampleFactor); + parameters(i++) = CTools::stableLog(m_DownsampleFactor); } if (m_RegularizationOverride.depthPenaltyMultiplier() == boost::none) { - parameters(i++) = std::log(m_Regularization.depthPenaltyMultiplier()); + parameters(i++) = CTools::stableLog(m_Regularization.depthPenaltyMultiplier()); } if (m_RegularizationOverride.leafWeightPenaltyMultiplier() == boost::none) { - parameters(i++) = std::log(m_Regularization.leafWeightPenaltyMultiplier()); + parameters(i++) = CTools::stableLog(m_Regularization.leafWeightPenaltyMultiplier()); } if (m_RegularizationOverride.treeSizePenaltyMultiplier() == boost::none) { - parameters(i++) = std::log(m_Regularization.treeSizePenaltyMultiplier()); + parameters(i++) = CTools::stableLog(m_Regularization.treeSizePenaltyMultiplier()); } if (m_RegularizationOverride.softTreeDepthLimit() == boost::none) { parameters(i++) = m_Regularization.softTreeDepthLimit(); @@ -1135,7 +1135,7 @@ bool CBoostedTreeImpl::selectNextHyperparameters(const TMeanVarAccumulator& loss parameters(i++) = m_Regularization.softTreeDepthTolerance(); } if (m_EtaOverride == boost::none) { - parameters(i++) = std::log(m_Eta); + parameters(i++) = CTools::stableLog(m_Eta); parameters(i++) = m_EtaGrowthRatePerTree; } if (m_FeatureBagFractionOverride == boost::none) { @@ -1172,21 +1172,24 @@ bool CBoostedTreeImpl::selectNextHyperparameters(const TMeanVarAccumulator& loss // Write parameters for next round. i = 0; if (m_DownsampleFactorOverride == boost::none) { - m_DownsampleFactor = std::exp(parameters(i++)); + m_DownsampleFactor = CTools::stableExp(parameters(i++)); TVector minBoundary; TVector maxBoundary; std::tie(minBoundary, maxBoundary) = bopt.boundingBox(); scale = std::min(scale, 2.0 * m_DownsampleFactor / - (std::exp(minBoundary(0)) + std::exp(maxBoundary(0)))); + (CTools::stableExp(minBoundary(0)) + + CTools::stableExp(maxBoundary(0)))); } if (m_RegularizationOverride.depthPenaltyMultiplier() == boost::none) { - m_Regularization.depthPenaltyMultiplier(std::exp(parameters(i++))); + m_Regularization.depthPenaltyMultiplier(CTools::stableExp(parameters(i++))); } if (m_RegularizationOverride.leafWeightPenaltyMultiplier() == boost::none) { - m_Regularization.leafWeightPenaltyMultiplier(scale * std::exp(parameters(i++))); + m_Regularization.leafWeightPenaltyMultiplier( + scale * CTools::stableExp(parameters(i++))); } if (m_RegularizationOverride.treeSizePenaltyMultiplier() == boost::none) { - m_Regularization.treeSizePenaltyMultiplier(scale * std::exp(parameters(i++))); + m_Regularization.treeSizePenaltyMultiplier( + scale * CTools::stableExp(parameters(i++))); } if (m_RegularizationOverride.softTreeDepthLimit() == boost::none) { m_Regularization.softTreeDepthLimit(parameters(i++)); @@ -1195,7 +1198,7 @@ bool CBoostedTreeImpl::selectNextHyperparameters(const TMeanVarAccumulator& loss m_Regularization.softTreeDepthTolerance(parameters(i++)); } if (m_EtaOverride == boost::none) { - m_Eta = std::exp(scale * parameters(i++)); + m_Eta = CTools::stableExp(scale * parameters(i++)); m_EtaGrowthRatePerTree = parameters(i++); } if (m_FeatureBagFractionOverride == boost::none) { diff --git a/lib/maths/CBoostedTreeLeafNodeStatistics.cc b/lib/maths/CBoostedTreeLeafNodeStatistics.cc index 93fa9a7751..abd7becb35 100644 --- a/lib/maths/CBoostedTreeLeafNodeStatistics.cc +++ b/lib/maths/CBoostedTreeLeafNodeStatistics.cc @@ -126,7 +126,7 @@ CBoostedTreeLeafNodeStatistics::split(std::size_t leftChildId, } bool CBoostedTreeLeafNodeStatistics::operator<(const CBoostedTreeLeafNodeStatistics& rhs) const { - return m_BestSplit < rhs.m_BestSplit; + return COrderings::lexicographical_compare(m_BestSplit, m_Id, rhs.m_BestSplit, rhs.m_Id); } double CBoostedTreeLeafNodeStatistics::gain() const { diff --git a/lib/maths/CBoostedTreeLoss.cc b/lib/maths/CBoostedTreeLoss.cc index dd4970e88b..e3a68e2cf4 100644 --- a/lib/maths/CBoostedTreeLoss.cc +++ b/lib/maths/CBoostedTreeLoss.cc @@ -22,7 +22,8 @@ namespace maths { using namespace boosted_tree_detail; namespace { -double LOG_EPSILON{std::log(100.0 * std::numeric_limits::epsilon())}; +const double EPSILON{100.0 * std::numeric_limits::epsilon()}; +const double LOG_EPSILON{CTools::stableLog(EPSILON)}; // MSLE CONSTANTS const std::size_t MSLE_PREDICTION_INDEX{0}; @@ -35,7 +36,7 @@ double logOneMinusLogistic(double logOdds) { if (logOdds > -LOG_EPSILON) { return -logOdds; } - return std::log(1.0 - CTools::logisticFunction(logOdds)); + return CTools::stableLog(1.0 - CTools::logisticFunction(logOdds)); } double logLogistic(double logOdds) { @@ -43,7 +44,7 @@ double logLogistic(double logOdds) { if (logOdds < LOG_EPSILON) { return logOdds; } - return std::log(CTools::logisticFunction(logOdds)); + return CTools::stableLog(CTools::logisticFunction(logOdds)); } } @@ -176,8 +177,8 @@ CArgMinBinomialLogisticLossImpl::TDoubleVector CArgMinBinomialLogisticLossImpl:: double c0{m_ClassCounts(0) + 1.0}; double c1{m_ClassCounts(1) + 1.0}; double empiricalProbabilityC1{c1 / (c0 + c1)}; - double empiricalLogOddsC1{ - std::log(empiricalProbabilityC1 / (1.0 - empiricalProbabilityC1))}; + double empiricalLogOddsC1{CTools::stableLog( + empiricalProbabilityC1 / (1.0 - empiricalProbabilityC1))}; minWeight = (empiricalProbabilityC1 < 0.5 ? empiricalLogOddsC1 : 0.0) - prediction; maxWeight = (empiricalProbabilityC1 < 0.5 ? 0.0 : empiricalLogOddsC1) - prediction; @@ -695,7 +696,7 @@ void CBinomialLogisticLoss::gradient(const TMemoryMappedFloatVector& prediction, TWriter writer, double weight) const { if (prediction(0) > -LOG_EPSILON && actual == 1.0) { - writer(0, -weight * std::exp(-prediction(0))); + writer(0, -weight * CTools::stableExp(-prediction(0))); } else { writer(0, weight * (CTools::logisticFunction(prediction(0)) - actual)); } @@ -706,7 +707,7 @@ void CBinomialLogisticLoss::curvature(const TMemoryMappedFloatVector& prediction TWriter writer, double weight) const { if (prediction(0) > -LOG_EPSILON) { - writer(0, weight * std::exp(-prediction(0))); + writer(0, weight * CTools::stableExp(-prediction(0))); } else { double probability{CTools::logisticFunction(prediction(0))}; writer(0, weight * probability * (1.0 - probability)); @@ -765,7 +766,7 @@ double CMultinomialLogisticLoss::value(const TMemoryMappedFloatVector& predictio for (int i = 0; i < predictions.size(); ++i) { logZ += std::exp(predictions(i) - zmax); } - logZ = zmax + std::log(logZ); + logZ = zmax + CTools::stableLog(logZ); // i.e. -log(z(actual)) return weight * (logZ - predictions(static_cast(actual))); @@ -791,11 +792,12 @@ void CMultinomialLogisticLoss::gradient(const TMemoryMappedFloatVector& predicti logZ += std::exp(predictions(i) - zmax); } } - logZ = zmax + std::log(logZ); + eps = CTools::stable(eps); + logZ = zmax + CTools::stableLog(logZ); for (int i = 0; i < predictions.size(); ++i) { if (i == static_cast(actual)) { - double probability{std::exp(predictions(i) - logZ)}; + double probability{CTools::stableExp(predictions(i) - logZ)}; if (probability == 1.0) { // We have that p = 1 / (1 + eps) and the gradient is p - 1. // Use a Taylor expansion and drop terms of O(eps^2) to get: @@ -804,7 +806,7 @@ void CMultinomialLogisticLoss::gradient(const TMemoryMappedFloatVector& predicti writer(i, weight * (probability - 1.0)); } } else { - writer(i, weight * std::exp(predictions(i) - logZ)); + writer(i, weight * CTools::stableExp(predictions(i) - logZ)); } } } @@ -831,10 +833,11 @@ void CMultinomialLogisticLoss::curvature(const TMemoryMappedFloatVector& predict logZ += std::exp(predictions(i) - zmax); } } - logZ = zmax + std::log(logZ); + eps = CTools::stable(eps); + logZ = zmax + CTools::stableLog(logZ); for (std::size_t i = 0, k = 0; i < m_NumberClasses; ++i) { - double probability{std::exp(predictions(i) - logZ)}; + double probability{CTools::stableExp(predictions(i) - logZ)}; if (probability == 1.0) { // We have that p = 1 / (1 + eps) and the curvature is p (1 - p). // Use a Taylor expansion and drop terms of O(eps^2) to get: @@ -843,8 +846,8 @@ void CMultinomialLogisticLoss::curvature(const TMemoryMappedFloatVector& predict writer(k++, weight * probability * (1.0 - probability)); } for (std::size_t j = i + 1; j < m_NumberClasses; ++j) { - double probabilities[]{std::exp(predictions(i) - logZ), - std::exp(predictions(j) - logZ)}; + double probabilities[]{CTools::stableExp(predictions(i) - logZ), + CTools::stableExp(predictions(j) - logZ)}; writer(k++, -weight * probabilities[0] * probabilities[1]); } } diff --git a/lib/maths/unittest/CBayesianOptimisationTest.cc b/lib/maths/unittest/CBayesianOptimisationTest.cc index 853e63487c..698b7d282f 100644 --- a/lib/maths/unittest/CBayesianOptimisationTest.cc +++ b/lib/maths/unittest/CBayesianOptimisationTest.cc @@ -250,14 +250,15 @@ BOOST_AUTO_TEST_CASE(testMaximumExpectedImprovement) { TVector a(vector({-10.0, -10.0, -10.0, -10.0})); TVector b(vector({10.0, 10.0, 10.0, 10.0})); - TMeanAccumulator gain; + TMeanAccumulator meanImprovementBopt; + TMeanAccumulator meanImprovementRs; for (std::size_t test = 0; test < 20; ++test) { rng.generateUniformSamples(-10.0, 10.0, 12, centreCoordinates); rng.generateUniformSamples(0.3, 4.0, 12, coordinateScales); - // Use sum of some different quadratric functions. + // Use sum of some different quadratic functions. TVector centres[]{TVector{4}, TVector{4}, TVector{4}}; TVector scales[]{TVector{4}, TVector{4}, TVector{4}}; for (std::size_t i = 0; i < 3; ++i) { @@ -273,7 +274,7 @@ BOOST_AUTO_TEST_CASE(testMaximumExpectedImprovement) { (x - centres[1])}; double f3{(x - centres[2]).transpose() * scales[2].asDiagonal() * (x - centres[2])}; - return f1 + f2 + f3; + return 100.0 + f1 - 0.2 * f2 + f3; }; maths::CBayesianOptimisation bopt{ @@ -292,28 +293,39 @@ BOOST_AUTO_TEST_CASE(testMaximumExpectedImprovement) { } LOG_TRACE(<< "Bayesian optimisation..."); - for (std::size_t i = 0; i < 10; ++i) { + double f0Bopt{fminBopt}; + for (std::size_t i = 0; i < 20; ++i) { TVector x; std::tie(x, std::ignore) = bopt.maximumExpectedImprovement(); LOG_TRACE(<< "x = " << x.transpose() << ", f(x) = " << f(x)); bopt.add(x, f(x), 10.0); fminBopt = std::min(fminBopt, f(x)); } + double improvementBopt{(f0Bopt - fminBopt) / f0Bopt}; LOG_TRACE(<< "random search..."); - for (std::size_t i = 0; i < 10; ++i) { + double f0Rs{fminRs}; + for (std::size_t i = 0; i < 20; ++i) { rng.generateUniformSamples(0.0, 1.0, 4, randomSearch); TVector x{a + vector(randomSearch).asDiagonal() * (b - a)}; LOG_TRACE(<< "x = " << x.transpose() << ", f(x) = " << f(x)); fminRs = std::min(fminRs, f(x)); } + double improvementRs{(f0Rs - fminRs) / f0Rs}; - LOG_TRACE(<< "gain = " << fminRs / fminBopt); - gain.add(fminRs / fminBopt); + LOG_DEBUG(<< "% improvement BO = " << 100.0 * improvementBopt + << ", % improvement RS = " << 100.0 * improvementRs); + BOOST_TEST_REQUIRE(improvementBopt > improvementRs); + meanImprovementBopt.add(improvementBopt); + meanImprovementRs.add(improvementRs); } - LOG_DEBUG(<< "mean gain = " << maths::CBasicStatistics::mean(gain)); - BOOST_TEST_REQUIRE(maths::CBasicStatistics::mean(gain) > 1.26); + LOG_DEBUG(<< "mean % improvement BO = " + << 100.0 * maths::CBasicStatistics::mean(meanImprovementBopt)); + LOG_DEBUG(<< "mean % improvement RS = " + << 100.0 * maths::CBasicStatistics::mean(meanImprovementRs)); + BOOST_TEST_REQUIRE(maths::CBasicStatistics::mean(meanImprovementBopt) > + 1.8 * maths::CBasicStatistics::mean(meanImprovementRs)); // 80% } BOOST_AUTO_TEST_CASE(testPersistRestore) { diff --git a/lib/maths/unittest/CBoostedTreeTest.cc b/lib/maths/unittest/CBoostedTreeTest.cc index 19712ea302..2b8b87d5a1 100644 --- a/lib/maths/unittest/CBoostedTreeTest.cc +++ b/lib/maths/unittest/CBoostedTreeTest.cc @@ -294,7 +294,7 @@ BOOST_AUTO_TEST_CASE(testPiecewiseConstant) { // Unbiased... BOOST_REQUIRE_CLOSE_ABSOLUTE( 0.0, modelBias[i][0], - 9.1 * std::sqrt(noiseVariance / static_cast(trainRows))); + 5.0 * std::sqrt(noiseVariance / static_cast(trainRows))); // Good R^2... BOOST_TEST_REQUIRE(modelRSquared[i][0] > 0.97); diff --git a/lib/maths/unittest/CToolsTest.cc b/lib/maths/unittest/CToolsTest.cc index d1f601cbac..a49d32af69 100644 --- a/lib/maths/unittest/CToolsTest.cc +++ b/lib/maths/unittest/CToolsTest.cc @@ -28,6 +28,7 @@ #include #include +#include #include BOOST_AUTO_TEST_SUITE(CToolsTest) @@ -342,6 +343,20 @@ class CLogPdf { private: const maths::CMixtureDistribution& m_Mixture; }; + +void printByte(const unsigned char& byte, std::ostream& o) { + o << std::bitset{byte}; +} + +template +std::string printBits(const T& t) { + std::ostringstream o; + const unsigned char* byte{reinterpret_cast(&t)}; + for (std::size_t s = 0; s < sizeof(t); ++s, ++byte) { + printByte(*byte, o); + } + return o.str(); +} } BOOST_AUTO_TEST_CASE(testProbabilityOfLessLikelySample) { @@ -1260,4 +1275,27 @@ BOOST_AUTO_TEST_CASE(testLogSoftmax) { } } +BOOST_AUTO_TEST_CASE(testStable) { + // Test the bit representation of the stable log and exp of some random values. + // This will fail if they're different on any of our target platforms. + + BOOST_REQUIRE_EQUAL("1001000010101011111010010001000011111110001100011111001110111111", + printBits(CTools::stableLog(0.301283021))); + BOOST_REQUIRE_EQUAL("1001010011010100101100111011101001100110101100010010110101000000", + printBits(CTools::stableLog(2803801.9332))); + BOOST_REQUIRE_EQUAL("1100010011111111100101000001110111100000001011010001001101000000", + printBits(CTools::stableLog(120.880233))); + BOOST_REQUIRE_EQUAL("1110111001101001111010100000001100101010100100110000011001000000", + printBits(CTools::stableLog(16.808042323))); + + BOOST_REQUIRE_EQUAL("1000000001000110101011010011001110111111101111000001000101000000", + printBits(CTools::stableExp(1.48937498342))); + BOOST_REQUIRE_EQUAL("1101111011000110011111000001010001101011111111000011000001000000", + printBits(CTools::stableExp(2.832390))); + BOOST_REQUIRE_EQUAL("0101111001100101111110101101101100100100111001101001001100111111", + printBits(CTools::stableExp(-3.94080233))); + BOOST_REQUIRE_EQUAL("1111100000011010010001001000000100011011010100100000010101000000", + printBits(CTools::stableExp(0.98023840))); +} + BOOST_AUTO_TEST_SUITE_END()