From b742e506c28529753e7b298b19b52e86ad84bc01 Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@elastic.co>
Date: Mon, 23 Sep 2019 13:32:25 +0100
Subject: [PATCH 01/23] Improve hyperparameter optimisation initialisation

---
 include/core/CLoopProgress.h           |  27 ++-
 include/maths/CBoostedTreeFactory.h    |  30 ++-
 include/maths/CBoostedTreeImpl.h       | 198 +++++++++++-----
 lib/api/CDataFrameBoostedTreeRunner.cc |   3 +-
 lib/core/CLoopProgress.cc              |  57 ++++-
 lib/core/unittest/CLoopProgressTest.cc |  50 +++-
 lib/core/unittest/CLoopProgressTest.h  |   1 +
 lib/maths/CBoostedTreeFactory.cc       | 306 +++++++++++++++++++------
 lib/maths/CBoostedTreeImpl.cc          | 249 ++++++++++----------
 lib/maths/unittest/CBoostedTreeTest.cc |   2 +-
 10 files changed, 663 insertions(+), 260 deletions(-)
diff --git a/include/core/CLoopProgress.h b/include/core/CLoopProgress.h
index a9a4d70c25..d37de8c3bb 100644
--- a/include/core/CLoopProgress.h
+++ b/include/core/CLoopProgress.h
@@ -14,6 +14,8 @@
 
 namespace ml {
 namespace core {
+class CStatePersistInserter;
+class CStateRestoreTraverser;
 
 //! \brief Manages recording the progress of a loop.
 //!
@@ -46,14 +48,35 @@ class CORE_EXPORT CLoopProgress {
     using TProgressCallback = std::function<void(double)>;
 
 public:
+    CLoopProgress();
     template<typename ITR>
-    CLoopProgress(ITR begin, ITR end, const TProgressCallback& recordProgress, double scale = 1.0)
+    CLoopProgress(ITR begin, ITR end, const TProgressCallback& recordProgress = noop, double scale = 1.0)
         : CLoopProgress(std::distance(begin, end), recordProgress, scale) {}
-    CLoopProgress(std::size_t size, const TProgressCallback& recordProgress, double scale = 1.0);
+    CLoopProgress(std::size_t size,
+                  const TProgressCallback& recordProgress = noop,
+                  double scale = 1.0);
+
+    //! Attach a new progress monitor callback.
+    void attach(const TProgressCallback& recordProgress);
 
     //! Increment the progress by \p i.
     void increment(std::size_t i = 1);
 
+    //! Resume progress monitoring which was restored.
+    void resumeRestored();
+
+    //! Get a checksum for this object.
+    std::uint64_t checksum() const;
+
+    //! Persist by passing information to \p inserter.
+    void acceptPersistInserter(CStatePersistInserter& inserter) const;
+
+    //! Populate the object from serialized data.
+    bool acceptRestoreTraverser(CStateRestoreTraverser& traverser);
+
+private:
+    static void noop(double);
+
 private:
     std::size_t m_Size;
     std::size_t m_Steps;
diff --git a/include/maths/CBoostedTreeFactory.h b/include/maths/CBoostedTreeFactory.h
index 11fcbffd58..3dd579a4a8 100644
--- a/include/maths/CBoostedTreeFactory.h
+++ b/include/maths/CBoostedTreeFactory.h
@@ -10,6 +10,7 @@
 #include <core/CDataFrame.h>
 
 #include <maths/CBoostedTree.h>
+#include <maths/CLinearAlgebra.h>
 #include <maths/ImportExport.h>
 
 #include <boost/optional.hpp>
@@ -93,10 +94,14 @@ class MATHS_EXPORT CBoostedTreeFactory final {
     TBoostedTreeUPtr buildFor(core::CDataFrame& frame, std::size_t dependentVariable);
 
 private:
+    using TDoubleDoublePr = std::pair<double, double>;
     using TOptionalDouble = boost::optional<double>;
     using TOptionalSize = boost::optional<std::size_t>;
+    using TVector = CVectorNx1<double, 3>;
+    using TOptionalVector = boost::optional<TVector>;
     using TPackedBitVectorVec = std::vector<core::CPackedBitVector>;
     using TBoostedTreeImplUPtr = std::unique_ptr<CBoostedTreeImpl>;
+    using TScaleRegularization = std::function<void(double)>;
 
 private:
     static const double MINIMUM_ETA;
@@ -121,10 +126,22 @@ class MATHS_EXPORT CBoostedTreeFactory final {
     //! Initialize the regressors sample distribution.
     bool initializeFeatureSampleDistribution() const;
 
-    //! Read overrides for hyperparameters and if necessary estimate the initial
-    //! values for \f$\lambda\f$ and \f$\gamma\f$ which match the gain from an
-    //! overfit tree.
-    void initializeHyperparameters(core::CDataFrame& frame) const;
+    //! Set the initial values for the various hyperparameters.
+    void initializeHyperparameters(core::CDataFrame& frame);
+
+    //! Estimate a good central value for the regularisation hyperparameters
+    //! search bounding box.
+    void initializeUnsetRegularizationHyperparameters(core::CDataFrame& frame);
+
+    //! Estimate the reduction in gain from a split and the total curvature of
+    //! the loss function at a split.
+    TDoubleDoublePr estimateTreeGainAndCurvature(core::CDataFrame& frame,
+                                                 const core::CPackedBitVector& trainingRowMask) const;
+
+    //! Get the regularizer value at the point the model starts to overfit.
+    TOptionalVector candidateRegularizerSearchInterval(core::CDataFrame& frame,
+                                                       core::CPackedBitVector trainingRowMask,
+                                                       TScaleRegularization scale) const;
 
     //! Initialize the state for hyperparameter optimisation.
     void initializeHyperparameterOptimisation() const;
@@ -132,6 +149,9 @@ class MATHS_EXPORT CBoostedTreeFactory final {
     //! Get the number of hyperparameter tuning rounds to use.
     std::size_t numberHyperparameterTuningRounds() const;
 
+    //! Setup monitoring for training progress.
+    void setupTrainingProgressMonitoring();
+
     static void noopRecordProgress(double);
     static void noopRecordMemoryUsage(std::int64_t);
     static void noopRecordTrainingState(CDataFrameRegressionModel::TPersistFunc);
@@ -140,6 +160,8 @@ class MATHS_EXPORT CBoostedTreeFactory final {
     TOptionalDouble m_MinimumFrequencyToOneHotEncode;
     TOptionalSize m_BayesianOptimisationRestarts;
     TBoostedTreeImplUPtr m_TreeImpl;
+    TVector m_GammaSearchInterval;
+    TVector m_LambdaSearchInterval;
     TProgressCallback m_RecordProgress = noopRecordProgress;
     TMemoryUsageCallback m_RecordMemoryUsage = noopRecordMemoryUsage;
     TTrainingStateCallback m_RecordTrainingState = noopRecordTrainingState;
diff --git a/include/maths/CBoostedTreeImpl.h b/include/maths/CBoostedTreeImpl.h
index 1f4793ef95..d5d8201b47 100644
--- a/include/maths/CBoostedTreeImpl.h
+++ b/include/maths/CBoostedTreeImpl.h
@@ -103,14 +103,14 @@ class MATHS_EXPORT CBoostedTreeImpl final {
     std::size_t memoryUsage() const;
 
 private:
-    using TDoubleDoublePrVec = std::vector<std::pair<double, double>>;
+    using TSizeDoublePr = std::pair<std::size_t, double>;
+    using TDoubleDoublePr = std::pair<double, double>;
+    using TDoubleDoublePrVec = std::vector<TDoubleDoublePr>;
     using TOptionalDouble = boost::optional<double>;
     using TOptionalSize = boost::optional<std::size_t>;
-    using TVector = CDenseVector<double>;
     using TDoubleVecVec = std::vector<TDoubleVec>;
     using TSizeVec = std::vector<std::size_t>;
-    using TSizeDoublePr = std::pair<std::size_t, double>;
-    using TDoubleDoubleDoubleTr = std::tuple<double, double, double>;
+    using TVector = CDenseVector<double>;
     using TRowItr = core::CDataFrame::TRowItr;
     using TPackedBitVectorVec = std::vector<core::CPackedBitVector>;
     using TDataFrameCategoryEncoderUPtr = std::unique_ptr<CDataFrameCategoryEncoder>;
@@ -120,14 +120,73 @@ class MATHS_EXPORT CBoostedTreeImpl final {
     using TNodeVec = std::vector<CNode>;
     using TNodeVecVec = std::vector<TNodeVec>;
 
+    //! \brief Holds the parameters associated with the different types of regulariser
+    //! terms available.
+    template<typename T>
+    class CRegularization final {
+    public:
+        //! Set the multiplier of the tree size regularizer.
+        CRegularization& gamma(double gamma) {
+            m_Gamma = gamma;
+            return *this;
+        }
+
+        //! Set the multiplier of the square leaf weight regularizer.
+        CRegularization& lambda(double lambda) {
+            m_Lambda = lambda;
+            return *this;
+        }
+
+        //! Count the number of parameters which have their default values.
+        std::size_t countNotSet() const {
+            return (m_Gamma == T{} ? 1 : 0) + (m_Lambda == T{} ? 1 : 0);
+        }
+
+        //! Multiplier of the tree size regularizer.
+        T gamma() const { return m_Gamma; }
+
+        //! Multiplier of the square leaf weight regularizer.
+        T lambda() const { return m_Lambda; }
+
+        //! Get description of the regularization parameters.
+        std::string print() const {
+            return "(gamma = " + toString(m_Gamma) +
+                   ", lambda = " + toString(m_Lambda) + ")";
+        }
+
+        //! Persist by passing information to \p inserter.
+        void acceptPersistInserter(core::CStatePersistInserter& inserter) const;
+
+        //! Populate the object from serialized data.
+        bool acceptRestoreTraverser(core::CStateRestoreTraverser& traverser);
+
+    private:
+        static std::string toString(double x) { return std::to_string(x); }
+        static std::string toString(TOptionalDouble x) {
+            return x != boost::none ? toString(*x) : "null";
+        }
+
+    private:
+        T m_Gamma = T{};
+        T m_Lambda = T{};
+    };
+
+    using TRegularization = CRegularization<double>;
+    using TRegularizationOverride = CRegularization<TOptionalDouble>;
+
     //! \brief The algorithm parameters we'll directly optimise to improve test error.
     struct SHyperparameters {
-        double s_Lambda;
-        double s_Gamma;
+        //! The regularisation parameters.
+        TRegularization s_Regularization;
+
+        //! Shrinkage.
         double s_Eta;
+
+        //! Rate of growth of shrinkage in the training loop.
         double s_EtaGrowthRatePerTree;
+
+        //! The fraction of features we use per bag.
         double s_FeatureBagFraction;
-        TDoubleVec s_FeatureSampleProbabilities;
 
         //! Persist by passing information to \p inserter.
         void acceptPersistInserter(core::CStatePersistInserter& inserter) const;
@@ -182,16 +241,26 @@ class MATHS_EXPORT CBoostedTreeImpl final {
         //! Set the node value to \p value.
         void value(double value) { m_NodeValue = value; }
 
+        //! Get the gain of the split.
+        double gain() const { return m_Gain; }
+
+        //! Get the total curvature at the rows below this node.
+        double curvature() const { return m_Curvature; }
+
         //! Split this node and add its child nodes to \p tree.
         std::pair<std::size_t, std::size_t> split(std::size_t splitFeature,
                                                   double splitValue,
                                                   bool assignMissingToLeft,
+                                                  double gain,
+                                                  double curvature,
                                                   TNodeVec& tree) {
             m_SplitFeature = splitFeature;
             m_SplitValue = splitValue;
             m_AssignMissingToLeft = assignMissingToLeft;
             m_LeftChild = static_cast<std::int32_t>(tree.size());
             m_RightChild = static_cast<std::int32_t>(tree.size() + 1);
+            m_Gain = gain;
+            m_Curvature = curvature;
             tree.resize(tree.size() + 2);
             return {m_LeftChild, m_RightChild};
         }
@@ -211,8 +280,8 @@ class MATHS_EXPORT CBoostedTreeImpl final {
                 core::bindRetrievableState(
                     [&](auto& state, TRowItr beginRows, TRowItr endRows) {
                         core::CPackedBitVector& leftRowMask{std::get<0>(state)};
-                        std::size_t& leftCount{std::get<1>(state)};
-                        std::size_t& rightCount{std::get<2>(state)};
+                        std::size_t& leftChildNumberRows{std::get<1>(state)};
+                        std::size_t& rightChildNumberRows{std::get<2>(state)};
                         for (auto row = beginRows; row != endRows; ++row) {
                             std::size_t index{row->index()};
                             double value{encoder.encode(*row)[m_SplitFeature]};
@@ -221,9 +290,9 @@ class MATHS_EXPORT CBoostedTreeImpl final {
                                 (missing == false && value < m_SplitValue)) {
                                 leftRowMask.extend(false, index - leftRowMask.size());
                                 leftRowMask.extend(true);
-                                ++leftCount;
+                                ++leftChildNumberRows;
                             } else {
-                                ++rightCount;
+                                ++rightChildNumberRows;
                             }
                         }
                     },
@@ -237,13 +306,14 @@ class MATHS_EXPORT CBoostedTreeImpl final {
             }
 
             core::CPackedBitVector leftRowMask;
-            std::size_t leftCount;
-            std::size_t rightCount;
-            std::tie(leftRowMask, leftCount, rightCount) = std::move(masks[0].s_FunctionState);
+            std::size_t leftChildNumberRows;
+            std::size_t rightChildNumberRows;
+            std::tie(leftRowMask, leftChildNumberRows, rightChildNumberRows) =
+                std::move(masks[0].s_FunctionState);
             for (std::size_t i = 1; i < masks.size(); ++i) {
                 leftRowMask |= std::get<0>(masks[i].s_FunctionState);
-                leftCount += std::get<1>(masks[i].s_FunctionState);
-                rightCount += std::get<2>(masks[i].s_FunctionState);
+                leftChildNumberRows += std::get<1>(masks[i].s_FunctionState);
+                rightChildNumberRows += std::get<2>(masks[i].s_FunctionState);
             }
             LOG_TRACE(<< "# rows in left node = " << leftRowMask.manhattan());
             LOG_TRACE(<< "left row mask = " << leftRowMask);
@@ -254,7 +324,7 @@ class MATHS_EXPORT CBoostedTreeImpl final {
             LOG_TRACE(<< "left row mask = " << rightRowMask);
 
             return std::make_tuple(std::move(leftRowMask), std::move(rightRowMask),
-                                   leftCount < rightCount);
+                                   leftChildNumberRows < rightChildNumberRows);
         }
 
         //! Get a human readable description of this tree.
@@ -290,6 +360,8 @@ class MATHS_EXPORT CBoostedTreeImpl final {
         std::int32_t m_LeftChild = -1;
         std::int32_t m_RightChild = -1;
         double m_NodeValue = 0.0;
+        double m_Gain = 0.0;
+        double m_Curvature = 0.0;
     };
 
     //! \brief Maintains a collection of statistics about a leaf of the regression
@@ -305,12 +377,13 @@ class MATHS_EXPORT CBoostedTreeImpl final {
                             std::size_t numberThreads,
                             const core::CDataFrame& frame,
                             const CDataFrameCategoryEncoder& encoder,
-                            double lambda,
-                            double gamma,
+                            const TRegularization& regularization,
                             const TDoubleVecVec& candidateSplits,
+                            std::size_t depth,
                             TSizeVec featureBag,
                             core::CPackedBitVector rowMask)
-            : m_Id{id}, m_Lambda{lambda}, m_Gamma{gamma}, m_CandidateSplits{candidateSplits},
+            : m_Id{id}, m_Regularization{regularization},
+              m_CandidateSplits{candidateSplits}, m_Depth{depth},
               m_FeatureBag{std::move(featureBag)}, m_RowMask{std::move(rowMask)} {
 
             std::sort(m_FeatureBag.begin(), m_FeatureBag.end());
@@ -320,13 +393,13 @@ class MATHS_EXPORT CBoostedTreeImpl final {
             this->computeAggregateLossDerivatives(numberThreads, frame, encoder);
         }
 
-        //! This should only called by split but is public so it's accessible to make_shared.
+        //! This should only called by split but is public so it's accessible to std::make_shared.
         CLeafNodeStatistics(std::size_t id,
                             const CLeafNodeStatistics& parent,
                             const CLeafNodeStatistics& sibling,
                             core::CPackedBitVector rowMask)
-            : m_Id{id}, m_Lambda{sibling.m_Lambda}, m_Gamma{sibling.m_Gamma},
-              m_CandidateSplits{sibling.m_CandidateSplits},
+            : m_Id{id}, m_Regularization{sibling.m_Regularization},
+              m_CandidateSplits{sibling.m_CandidateSplits}, m_Depth{sibling.m_Depth},
               m_FeatureBag{sibling.m_FeatureBag}, m_RowMask{std::move(rowMask)} {
 
             LOG_TRACE(<< "row mask = " << m_RowMask);
@@ -363,10 +436,10 @@ class MATHS_EXPORT CBoostedTreeImpl final {
 
         CLeafNodeStatistics(const CLeafNodeStatistics&) = delete;
 
-        CLeafNodeStatistics& operator=(const CLeafNodeStatistics&) = delete;
-
         CLeafNodeStatistics(CLeafNodeStatistics&&) = default;
 
+        CLeafNodeStatistics& operator=(const CLeafNodeStatistics&) = delete;
+
         CLeafNodeStatistics& operator=(CLeafNodeStatistics&&) = default;
 
         //! Apply the split defined by (\p leftChildRowMask, \p rightChildRowMask).
@@ -375,8 +448,7 @@ class MATHS_EXPORT CBoostedTreeImpl final {
                    std::size_t numberThreads,
                    const core::CDataFrame& frame,
                    const CDataFrameCategoryEncoder& encoder,
-                   double lambda,
-                   double gamma,
+                   const TRegularization& regularization,
                    const TDoubleVecVec& candidateSplits,
                    TSizeVec featureBag,
                    core::CPackedBitVector leftChildRowMask,
@@ -385,8 +457,9 @@ class MATHS_EXPORT CBoostedTreeImpl final {
 
             if (leftChildHasFewerRows) {
                 auto leftChild = std::make_shared<CLeafNodeStatistics>(
-                    leftChildId, numberThreads, frame, encoder, lambda, gamma, candidateSplits,
-                    std::move(featureBag), std::move(leftChildRowMask));
+                    leftChildId, numberThreads, frame, encoder, regularization,
+                    candidateSplits, m_Depth + 1, std::move(featureBag),
+                    std::move(leftChildRowMask));
                 auto rightChild = std::make_shared<CLeafNodeStatistics>(
                     rightChildId, *this, *leftChild, std::move(rightChildRowMask));
 
@@ -394,8 +467,8 @@ class MATHS_EXPORT CBoostedTreeImpl final {
             }
 
             auto rightChild = std::make_shared<CLeafNodeStatistics>(
-                rightChildId, numberThreads, frame, encoder, lambda, gamma,
-                candidateSplits, std::move(featureBag), std::move(rightChildRowMask));
+                rightChildId, numberThreads, frame, encoder, regularization, candidateSplits,
+                m_Depth + 1, std::move(featureBag), std::move(rightChildRowMask));
             auto leftChild = std::make_shared<CLeafNodeStatistics>(
                 leftChildId, *this, *rightChild, std::move(leftChildRowMask));
 
@@ -410,6 +483,10 @@ class MATHS_EXPORT CBoostedTreeImpl final {
         //! Get the gain in loss of the best split of this leaf.
         double gain() const { return this->bestSplitStatistics().s_Gain; }
 
+        double curvature() const {
+            return this->bestSplitStatistics().s_Curvature;
+        }
+
         //! Get the best (feature, feature value) split.
         TSizeDoublePr bestSplit() const {
             const auto& split = this->bestSplitStatistics();
@@ -460,20 +537,20 @@ class MATHS_EXPORT CBoostedTreeImpl final {
             std::size_t curvatureSize{gradientsSize};
             std::size_t missingGradientsSize{(numberCols - 1) * sizeof(double)};
             std::size_t missingCurvatureSize{missingGradientsSize};
-            return featureBagSize + rowMaskSize + gradientsSize +
+            return sizeof(CLeafNodeStatistics) + featureBagSize + rowMaskSize + gradientsSize +
                    curvatureSize + missingGradientsSize + missingCurvatureSize;
         }
 
     private:
         //! \brief Statistics relating to a split of the node.
         struct SSplitStatistics : private boost::less_than_comparable<SSplitStatistics> {
-            SSplitStatistics(double gain, std::size_t feature, double splitAt, bool assignMissingToLeft)
-                : s_Gain{gain}, s_Feature{feature}, s_SplitAt{splitAt},
+            SSplitStatistics(double gain, double curvature, std::size_t feature, double splitAt, bool assignMissingToLeft)
+                : s_Gain{gain}, s_Curvature{curvature}, s_Feature{feature}, s_SplitAt{splitAt},
                   s_AssignMissingToLeft{assignMissingToLeft} {}
 
             bool operator<(const SSplitStatistics& rhs) const {
                 return COrderings::lexicographical_compare(
-                    s_Gain, s_Feature, rhs.s_Gain, rhs.s_Feature);
+                    s_Gain, s_Curvature, s_Feature, rhs.s_Gain, rhs.s_Curvature, rhs.s_Feature);
             }
 
             std::string print() const {
@@ -484,6 +561,7 @@ class MATHS_EXPORT CBoostedTreeImpl final {
             }
 
             double s_Gain;
+            double s_Curvature;
             std::size_t s_Feature;
             double s_SplitAt;
             bool s_AssignMissingToLeft;
@@ -571,10 +649,11 @@ class MATHS_EXPORT CBoostedTreeImpl final {
 
         SSplitStatistics computeBestSplitStatistics() const {
 
-            static const std::size_t ASSIGN_MISSING_TO_LEFT{0};
-            static const std::size_t ASSIGN_MISSING_TO_RIGHT{1};
+            // We have two possible regularisation terms we'll use:
+            //   1. Tree size: gamma * "node count"
+            //   2. Sum square weights: lambda * sum{"leaf weight" ^ 2)}
 
-            SSplitStatistics result{-INF, m_FeatureBag.size(), INF, true};
+            SSplitStatistics result{-INF, 0.0, m_FeatureBag.size(), INF, true};
 
             for (auto i : m_FeatureBag) {
                 double g{std::accumulate(m_Gradients[i].begin(), m_Gradients[i].end(), 0.0) +
@@ -595,14 +674,17 @@ class MATHS_EXPORT CBoostedTreeImpl final {
                     gl[ASSIGN_MISSING_TO_RIGHT] += m_Gradients[i][j];
                     hl[ASSIGN_MISSING_TO_RIGHT] += m_Curvatures[i][j];
 
-                    double gain[]{CTools::pow2(gl[ASSIGN_MISSING_TO_LEFT]) /
-                                          (hl[ASSIGN_MISSING_TO_LEFT] + m_Lambda) +
-                                      CTools::pow2(g - gl[ASSIGN_MISSING_TO_LEFT]) /
-                                          (h - hl[ASSIGN_MISSING_TO_LEFT] + m_Lambda),
-                                  CTools::pow2(gl[ASSIGN_MISSING_TO_RIGHT]) /
-                                          (hl[ASSIGN_MISSING_TO_RIGHT] + m_Lambda) +
-                                      CTools::pow2(g - gl[ASSIGN_MISSING_TO_RIGHT]) /
-                                          (h - hl[ASSIGN_MISSING_TO_RIGHT] + m_Lambda)};
+                    double gain[]{
+                        CTools::pow2(gl[ASSIGN_MISSING_TO_LEFT]) /
+                                (hl[ASSIGN_MISSING_TO_LEFT] + m_Regularization.lambda()) +
+                            CTools::pow2(g - gl[ASSIGN_MISSING_TO_LEFT]) /
+                                (h - hl[ASSIGN_MISSING_TO_LEFT] +
+                                 m_Regularization.lambda()),
+                        CTools::pow2(gl[ASSIGN_MISSING_TO_RIGHT]) /
+                                (hl[ASSIGN_MISSING_TO_RIGHT] + m_Regularization.lambda()) +
+                            CTools::pow2(g - gl[ASSIGN_MISSING_TO_RIGHT]) /
+                                (h - hl[ASSIGN_MISSING_TO_RIGHT] +
+                                 m_Regularization.lambda())};
 
                     if (gain[ASSIGN_MISSING_TO_LEFT] > maximumGain) {
                         maximumGain = gain[ASSIGN_MISSING_TO_LEFT];
@@ -616,9 +698,11 @@ class MATHS_EXPORT CBoostedTreeImpl final {
                     }
                 }
 
-                double gain{0.5 * (maximumGain - CTools::pow2(g) / (h + m_Lambda)) - m_Gamma};
+                double gain{0.5 * (maximumGain -
+                                   CTools::pow2(g) / (h + m_Regularization.lambda())) -
+                            m_Regularization.gamma()};
 
-                SSplitStatistics candidate{gain, i, splitAt, assignMissingToLeft};
+                SSplitStatistics candidate{gain, h, i, splitAt, assignMissingToLeft};
                 LOG_TRACE(<< "candidate split: " << candidate.print());
 
                 if (candidate > result) {
@@ -633,9 +717,9 @@ class MATHS_EXPORT CBoostedTreeImpl final {
 
     private:
         std::size_t m_Id;
-        double m_Lambda;
-        double m_Gamma;
+        const TRegularization& m_Regularization;
         const TDoubleVecVec& m_CandidateSplits;
+        std::size_t m_Depth;
         TSizeVec m_FeatureBag;
         core::CPackedBitVector m_RowMask;
         TDoubleVecVec m_Gradients;
@@ -662,9 +746,8 @@ class MATHS_EXPORT CBoostedTreeImpl final {
 
     //! Compute the sum loss for the predictions from \p frame and the leaf
     //! count and squared weight sum from \p forest.
-    TDoubleDoubleDoubleTr regularisedLoss(const core::CDataFrame& frame,
-                                          const core::CPackedBitVector& trainingRowMask,
-                                          const TNodeVecVec& forest) const;
+    TDoubleDoublePr gainAndCurvatureAtPercentile(double percentile,
+                                                 const TNodeVecVec& forest) const;
 
     //! Train the forest and compute loss moments on each fold.
     TMeanVarAccumulator crossValidateForest(core::CDataFrame& frame,
@@ -761,20 +844,18 @@ class MATHS_EXPORT CBoostedTreeImpl final {
     std::size_t m_NumberThreads;
     std::size_t m_DependentVariable = std::numeric_limits<std::size_t>::max();
     CBoostedTree::TLossFunctionUPtr m_Loss;
-    TOptionalDouble m_LambdaOverride;
-    TOptionalDouble m_GammaOverride;
+    TRegularizationOverride m_RegularizationOverride;
     TOptionalDouble m_EtaOverride;
     TOptionalSize m_MaximumNumberTreesOverride;
     TOptionalDouble m_FeatureBagFractionOverride;
-    double m_Lambda = 0.0;
-    double m_Gamma = 0.0;
+    TRegularization m_Regularization;
     double m_Eta = 0.1;
     double m_EtaGrowthRatePerTree = 1.05;
     std::size_t m_NumberFolds = 4;
     std::size_t m_MaximumNumberTrees = 20;
     std::size_t m_MaximumAttemptsToAddTree = 3;
     std::size_t m_NumberSplitsPerFeature = 75;
-    std::size_t m_MaximumOptimisationRoundsPerHyperparameter = 5;
+    std::size_t m_MaximumOptimisationRoundsPerHyperparameter = 3;
     std::size_t m_RowsPerFeature = 50;
     double m_FeatureBagFraction = 0.5;
     double m_MaximumTreeSizeMultiplier = 1.0;
@@ -790,6 +871,7 @@ class MATHS_EXPORT CBoostedTreeImpl final {
     TBayesinOptimizationUPtr m_BayesianOptimization;
     std::size_t m_NumberRounds = 1;
     std::size_t m_CurrentRound = 0;
+    mutable core::CLoopProgress m_TrainingProgress;
 
     friend class CBoostedTreeFactory;
 };
diff --git a/lib/api/CDataFrameBoostedTreeRunner.cc b/lib/api/CDataFrameBoostedTreeRunner.cc
index 41ad7cb549..0023c99d04 100644
--- a/lib/api/CDataFrameBoostedTreeRunner.cc
+++ b/lib/api/CDataFrameBoostedTreeRunner.cc
@@ -191,11 +191,10 @@ void CDataFrameBoostedTreeRunner::runImpl(const TStrVec& featureNames,
     auto restoreSearcher{this->spec().restoreSearcher()};
     bool treeRestored{false};
     if (restoreSearcher != nullptr) {
-        treeRestored = restoreBoostedTree(frame, restoreSearcher);
+        treeRestored = this->restoreBoostedTree(frame, restoreSearcher);
     }
 
     if (treeRestored == false) {
-
         m_BoostedTree = m_BoostedTreeFactory->buildFor(
             frame, dependentVariableColumn - featureNames.begin());
     }
diff --git a/lib/core/CLoopProgress.cc b/lib/core/CLoopProgress.cc
index 4a167402ec..608892f754 100644
--- a/lib/core/CLoopProgress.cc
+++ b/lib/core/CLoopProgress.cc
@@ -6,12 +6,28 @@
 
 #include <core/CLoopProgress.h>
 
+#include <core/CHashing.h>
+#include <core/CStatePersistInserter.h>
+#include <core/CStateRestoreTraverser.h>
+#include <core/RestoreMacros.h>
+
 #include <algorithm>
+#include <functional>
 
 namespace ml {
 namespace core {
 namespace {
-const std::size_t STEPS{16};
+const std::size_t STEPS{32};
+const std::string LOOP_SIZE_TAG{"loop_size_tag"};
+const std::string PROGRESS_STEPS_TAG{"progress_steps_tag"};
+const std::string CURRENT_STEP_PROGRESS_TAG{"current_step_progress_tag"};
+const std::string LOOP_POS_TAG{"loop_pos_tag"};
+const std::hash<std::string> stringHasher;
+}
+
+CLoopProgress::CLoopProgress()
+    : m_Size{std::numeric_limits<std::size_t>::max()}, m_Steps{1},
+      m_StepProgress{1.0}, m_RecordProgress{noop} {
 }
 
 CLoopProgress::CLoopProgress(std::size_t size, const TProgressCallback& recordProgress, double scale)
@@ -19,6 +35,10 @@ CLoopProgress::CLoopProgress(std::size_t size, const TProgressCallback& recordPr
       m_StepProgress{scale / static_cast<double>(m_Steps)}, m_RecordProgress{recordProgress} {
 }
 
+void CLoopProgress::attach(const TProgressCallback& recordProgress) {
+    m_RecordProgress = recordProgress;
+}
+
 void CLoopProgress::increment(std::size_t i) {
     m_Pos += i;
 
@@ -30,5 +50,40 @@ void CLoopProgress::increment(std::size_t i) {
         m_LastProgress += stride;
     }
 }
+
+void CLoopProgress::resumeRestored() {
+    this->increment(0);
+}
+
+std::uint64_t CLoopProgress::checksum() const {
+    std::uint64_t seed{core::CHashing::hashCombine(
+        static_cast<std::uint64_t>(m_Size), static_cast<std::uint64_t>(m_Steps))};
+    seed = core::CHashing::hashCombine(
+        seed, stringHasher(core::CStringUtils::typeToStringPrecise(
+                  m_StepProgress, core::CIEEE754::E_DoublePrecision)));
+    return core::CHashing::hashCombine(seed, static_cast<std::uint64_t>(m_Pos));
+}
+
+void CLoopProgress::acceptPersistInserter(CStatePersistInserter& inserter) const {
+    inserter.insertValue(LOOP_SIZE_TAG, m_Size);
+    inserter.insertValue(PROGRESS_STEPS_TAG, m_Steps);
+    inserter.insertValue(CURRENT_STEP_PROGRESS_TAG, m_StepProgress,
+                         core::CIEEE754::E_DoublePrecision);
+    inserter.insertValue(LOOP_POS_TAG, m_Pos);
+}
+
+bool CLoopProgress::acceptRestoreTraverser(CStateRestoreTraverser& traverser) {
+    do {
+        const std::string& name{traverser.name()};
+        RESTORE_BUILT_IN(LOOP_SIZE_TAG, m_Size)
+        RESTORE_BUILT_IN(PROGRESS_STEPS_TAG, m_Steps)
+        RESTORE_BUILT_IN(CURRENT_STEP_PROGRESS_TAG, m_StepProgress)
+        RESTORE_BUILT_IN(LOOP_POS_TAG, m_Pos)
+    } while (traverser.next());
+    return true;
+}
+
+void CLoopProgress::noop(double) {
+}
 }
 }
diff --git a/lib/core/unittest/CLoopProgressTest.cc b/lib/core/unittest/CLoopProgressTest.cc
index ab790e4379..bdcae9fb76 100644
--- a/lib/core/unittest/CLoopProgressTest.cc
+++ b/lib/core/unittest/CLoopProgressTest.cc
@@ -6,11 +6,16 @@
 
 #include "CLoopProgressTest.h"
 
+#include <core/CJsonStatePersistInserter.h>
+#include <core/CJsonStateRestoreTraverser.h>
 #include <core/CLogger.h>
 #include <core/CLoopProgress.h>
 
 #include <test/CRandomNumbers.h>
 
+#include <functional>
+#include <sstream>
+
 using namespace ml;
 
 using TSizeVec = std::vector<std::size_t>;
@@ -75,7 +80,7 @@ void CLoopProgressTest::testRandom() {
         core::CLoopProgress loopProgress{size[0], recordProgress};
 
         for (std::size_t i = 0; i < size[0]; ++i, loopProgress.increment()) {
-            CPPUNIT_ASSERT_EQUAL(static_cast<double>(16 * i / size[0]) / 16.0, progress);
+            CPPUNIT_ASSERT_EQUAL(static_cast<double>(32 * i / size[0]) / 32.0, progress);
         }
 
         CPPUNIT_ASSERT_EQUAL(1.0, progress);
@@ -86,7 +91,7 @@ void CLoopProgressTest::testRandom() {
     for (std::size_t t = 0; t < 100; ++t) {
 
         TSizeVec size;
-        rng.generateUniformSamples(30, 100, 1, size);
+        rng.generateUniformSamples(33, 100, 1, size);
 
         if (t % 10 == 0) {
             LOG_DEBUG(<< "Loop length = " << size[0]);
@@ -96,7 +101,7 @@ void CLoopProgressTest::testRandom() {
         core::CLoopProgress loopProgress{size[0], recordProgress};
 
         for (std::size_t i = 0; i < size[0]; i += 20, loopProgress.increment(20)) {
-            CPPUNIT_ASSERT_EQUAL(static_cast<double>(16 * i / size[0]) / 16.0, progress);
+            CPPUNIT_ASSERT_EQUAL(static_cast<double>(32 * i / size[0]) / 32.0, progress);
         }
 
         CPPUNIT_ASSERT_EQUAL(1.0, progress);
@@ -134,6 +139,43 @@ void CLoopProgressTest::testScaled() {
     }
 }
 
+void CLoopProgressTest::testSerialization() {
+
+    double progress{0.0};
+    auto recordProgress = [&progress](double p) { progress += p; };
+
+    core::CLoopProgress loopProgress{50, recordProgress};
+    for (std::size_t i = 0; i < 20; ++i) {
+        loopProgress.increment();
+    }
+
+    std::stringstream persistStream;
+    {
+        core::CJsonStatePersistInserter inserter(persistStream);
+        loopProgress.acceptPersistInserter(inserter);
+    }
+
+    LOG_DEBUG(<< "state = " << persistStream.str());
+
+    core::CJsonStateRestoreTraverser traverser(persistStream);
+    core::CLoopProgress restoredLoopProgress;
+    restoredLoopProgress.acceptRestoreTraverser(traverser);
+
+    double restoredProgress{0.0};
+    auto restoredRecordProgress = [&restoredProgress](double p) {
+        restoredProgress += p;
+    };
+    restoredLoopProgress.attach(restoredRecordProgress);
+    restoredLoopProgress.resumeRestored();
+
+    CPPUNIT_ASSERT_EQUAL(loopProgress.checksum(), restoredLoopProgress.checksum());
+    for (std::size_t i = 20; i < 50; ++i) {
+        loopProgress.increment();
+        restoredLoopProgress.increment();
+        CPPUNIT_ASSERT_EQUAL(progress, restoredProgress);
+    }
+}
+
 CppUnit::Test* CLoopProgressTest::suite() {
     CppUnit::TestSuite* suiteOfTests = new CppUnit::TestSuite("CLoopProgressTest");
 
@@ -143,6 +185,8 @@ CppUnit::Test* CLoopProgressTest::suite() {
         "CLoopProgressTest::testRandom", &CLoopProgressTest::testRandom));
     suiteOfTests->addTest(new CppUnit::TestCaller<CLoopProgressTest>(
         "CLoopProgressTest::testScaled", &CLoopProgressTest::testScaled));
+    suiteOfTests->addTest(new CppUnit::TestCaller<CLoopProgressTest>(
+        "CLoopProgressTest::testSerialization", &CLoopProgressTest::testSerialization));
 
     return suiteOfTests;
 }
diff --git a/lib/core/unittest/CLoopProgressTest.h b/lib/core/unittest/CLoopProgressTest.h
index 20048a490d..34244f6dc7 100644
--- a/lib/core/unittest/CLoopProgressTest.h
+++ b/lib/core/unittest/CLoopProgressTest.h
@@ -14,6 +14,7 @@ class CLoopProgressTest : public CppUnit::TestFixture {
     void testShort();
     void testRandom();
     void testScaled();
+    void testSerialization();
 
     static CppUnit::Test* suite();
 };
diff --git a/lib/maths/CBoostedTreeFactory.cc b/lib/maths/CBoostedTreeFactory.cc
index e6f46f1b7f..ebb76ef141 100644
--- a/lib/maths/CBoostedTreeFactory.cc
+++ b/lib/maths/CBoostedTreeFactory.cc
@@ -11,8 +11,12 @@
 #include <maths/CBayesianOptimisation.h>
 #include <maths/CBoostedTreeImpl.h>
 #include <maths/CDataFrameCategoryEncoder.h>
+#include <maths/CLeastSquaresOnlineRegression.h>
+#include <maths/CLeastSquaresOnlineRegressionDetail.h>
 #include <maths/CSampling.h>
 
+#include <cmath>
+
 namespace ml {
 namespace maths {
 using namespace boosted_tree_detail;
@@ -21,6 +25,10 @@ using TSizeVec = std::vector<std::size_t>;
 using TRowItr = core::CDataFrame::TRowItr;
 
 namespace {
+const std::size_t MIN_REGULARIZER_INDEX{0};
+const std::size_t BEST_REGULARIZER_INDEX{1};
+const std::size_t MAX_REGULARIZER_INDEX{2};
+const std::size_t INITIAL_REGULARIZER_SEARCH_ITERATIONS{8};
 const double MIN_REGULARIZER_SCALE{0.1};
 const double MAX_REGULARIZER_SCALE{10.0};
 const double MIN_ETA_SCALE{0.3};
@@ -37,6 +45,8 @@ CBoostedTreeFactory::buildFor(core::CDataFrame& frame, std::size_t dependentVari
 
     m_TreeImpl->m_DependentVariable = dependentVariable;
 
+    this->setupTrainingProgressMonitoring();
+
     this->initializeMissingFeatureMasks(frame);
     std::tie(m_TreeImpl->m_TrainingRowMasks, m_TreeImpl->m_TestingRowMasks) =
         this->crossValidationRowMasks();
@@ -78,13 +88,13 @@ void CBoostedTreeFactory::initializeHyperparameterOptimisation() const {
     // less than p_1, this translates to using log parameter values.
 
     CBayesianOptimisation::TDoubleDoublePrVec boundingBox;
-    if (m_TreeImpl->m_LambdaOverride == boost::none) {
-        boundingBox.emplace_back(std::log(MIN_REGULARIZER_SCALE * m_TreeImpl->m_Lambda),
-                                 std::log(MAX_REGULARIZER_SCALE * m_TreeImpl->m_Lambda));
+    if (m_TreeImpl->m_RegularizationOverride.lambda() == boost::none) {
+        boundingBox.emplace_back(std::log(m_LambdaSearchInterval(MIN_REGULARIZER_INDEX)),
+                                 std::log(m_LambdaSearchInterval(MAX_REGULARIZER_INDEX)));
     }
-    if (m_TreeImpl->m_GammaOverride == boost::none) {
-        boundingBox.emplace_back(std::log(MIN_REGULARIZER_SCALE * m_TreeImpl->m_Gamma),
-                                 std::log(MAX_REGULARIZER_SCALE * m_TreeImpl->m_Gamma));
+    if (m_TreeImpl->m_RegularizationOverride.gamma() == boost::none) {
+        boundingBox.emplace_back(std::log(m_GammaSearchInterval(MIN_REGULARIZER_INDEX)),
+                                 std::log(m_GammaSearchInterval(MAX_REGULARIZER_INDEX)));
     }
     if (m_TreeImpl->m_EtaOverride == boost::none) {
         double rate{m_TreeImpl->m_EtaGrowthRatePerTree - 1.0};
@@ -175,6 +185,7 @@ void CBoostedTreeFactory::selectFeaturesAndEncodeCategories(const core::CDataFra
             .minimumFrequencyToOneHotEncode(m_MinimumFrequencyToOneHotEncode)
             .rowMask(m_TreeImpl->allTrainingRowsMask())
             .columnMask(std::move(regressors)));
+    m_TreeImpl->m_TrainingProgress.increment(1);
 }
 
 void CBoostedTreeFactory::determineFeatureDataTypes(const core::CDataFrame& frame) const {
@@ -214,11 +225,9 @@ bool CBoostedTreeFactory::initializeFeatureSampleDistribution() const {
     return false;
 }
 
-void CBoostedTreeFactory::initializeHyperparameters(core::CDataFrame& frame) const {
+void CBoostedTreeFactory::initializeHyperparameters(core::CDataFrame& frame) {
 
-    m_TreeImpl->m_Lambda = m_TreeImpl->m_LambdaOverride.value_or(0.0);
-    m_TreeImpl->m_Gamma = m_TreeImpl->m_GammaOverride.value_or(0.0);
-    if (m_TreeImpl->m_EtaOverride) {
+    if (m_TreeImpl->m_EtaOverride != boost::none) {
         m_TreeImpl->m_Eta = *(m_TreeImpl->m_EtaOverride);
     } else {
         // Eta is the learning rate. There is a lot of empirical evidence that
@@ -238,69 +247,25 @@ void CBoostedTreeFactory::initializeHyperparameters(core::CDataFrame& frame) con
                                                      frame.numberColumns() - 4)));
         m_TreeImpl->m_EtaGrowthRatePerTree = 1.0 + m_TreeImpl->m_Eta / 2.0;
     }
-    if (m_TreeImpl->m_MaximumNumberTreesOverride) {
+
+    if (m_TreeImpl->m_MaximumNumberTreesOverride != boost::none) {
         m_TreeImpl->m_MaximumNumberTrees = *(m_TreeImpl->m_MaximumNumberTreesOverride);
     } else {
         // This needs to be tied to the learn rate to avoid bias.
         m_TreeImpl->m_MaximumNumberTrees =
             static_cast<std::size_t>(2.0 / m_TreeImpl->m_Eta + 0.5);
     }
-    if (m_TreeImpl->m_FeatureBagFractionOverride) {
+
+    if (m_TreeImpl->m_FeatureBagFractionOverride != boost::none) {
         m_TreeImpl->m_FeatureBagFraction = *(m_TreeImpl->m_FeatureBagFractionOverride);
     }
 
-    if (m_TreeImpl->m_LambdaOverride && m_TreeImpl->m_GammaOverride) {
-        // Fall through.
-    } else {
-        core::CPackedBitVector trainingRowMask{m_TreeImpl->allTrainingRowsMask()};
-
-        auto tree = m_TreeImpl->initializePredictionsAndLossDerivatives(frame, trainingRowMask);
-
-        double L[2];
-        double T[2];
-        double W[2];
-
-        std::tie(L[0], T[0], W[0]) =
-            m_TreeImpl->regularisedLoss(frame, trainingRowMask, {std::move(tree)});
-        LOG_TRACE(<< "loss = " << L[0] << ", # leaves = " << T[0]
-                  << ", sum square weights = " << W[0]);
+    m_TreeImpl->m_Regularization
+        .gamma(m_TreeImpl->m_RegularizationOverride.gamma().value_or(0.0))
+        .lambda(m_TreeImpl->m_RegularizationOverride.lambda().value_or(0.0));
 
-        double eta{1.0};
-        std::size_t maximumNumberOfTrees{1};
-        std::swap(eta, m_TreeImpl->m_Eta);
-        std::swap(maximumNumberOfTrees, m_TreeImpl->m_MaximumNumberTrees);
-        auto forest = m_TreeImpl->trainForest(frame, trainingRowMask, m_RecordMemoryUsage);
-        std::swap(eta, m_TreeImpl->m_Eta);
-        std::swap(maximumNumberOfTrees, m_TreeImpl->m_MaximumNumberTrees);
-
-        std::tie(L[1], T[1], W[1]) =
-            m_TreeImpl->regularisedLoss(frame, trainingRowMask, forest);
-        LOG_TRACE(<< "loss = " << L[1] << ", # leaves = " << T[1]
-                  << ", sum square weights = " << W[1]);
-
-        // If we can't improve the loss with no regularisation on the train set
-        // we're not going to be able to make much headway! In this case we just
-        // force the regularisation parameters to zero and don't try to optimise
-        // them.
-        double scale{static_cast<double>(m_TreeImpl->m_NumberFolds - 1) /
-                     static_cast<double>(m_TreeImpl->m_NumberFolds)};
-        double lambda{m_TreeImpl->m_Eta * scale *
-                      (L[0] <= L[1] ? 0.0 : (L[0] - L[1]) / (W[1] - W[0]))};
-        double gamma{m_TreeImpl->m_Eta * scale *
-                     (L[0] <= L[1] ? 0.0 : (L[0] - L[1]) / (T[1] - T[0]))};
-
-        if (lambda == 0.0) {
-            m_TreeImpl->m_LambdaOverride = lambda;
-        } else if (m_TreeImpl->m_LambdaOverride == boost::none) {
-            m_TreeImpl->m_Lambda = m_TreeImpl->m_GammaOverride ? lambda : 0.5 * lambda;
-        }
-        if (gamma == 0.0) {
-            m_TreeImpl->m_GammaOverride = gamma;
-        } else if (m_TreeImpl->m_GammaOverride == boost::none) {
-            m_TreeImpl->m_Gamma = m_TreeImpl->m_LambdaOverride ? gamma : 0.5 * gamma;
-        }
-        LOG_TRACE(<< "lambda(initial) = " << m_TreeImpl->m_Lambda
-                  << " gamma(initial) = " << m_TreeImpl->m_Gamma);
+    if (m_TreeImpl->m_RegularizationOverride.countNotSet() > 0) {
+        this->initializeUnsetRegularizationHyperparameters(frame);
     }
 
     m_TreeImpl->m_MaximumTreeSizeMultiplier = MAIN_TRAINING_LOOP_TREE_SIZE_MULTIPLIER;
@@ -315,6 +280,188 @@ void CBoostedTreeFactory::initializeHyperparameters(core::CDataFrame& frame) con
     }
 }
 
+void CBoostedTreeFactory::initializeUnsetRegularizationHyperparameters(core::CDataFrame& frame) {
+
+    // The strategy here is to:
+    //   1) Get percentile estimates of the gain in and sum curvature of the loss
+    //      function at splits in a single tree,
+    //   2) Use these to upper bound the size of gamma and lambda, that is find
+    //      values we for which we expect to underfit the data,
+    //   3) Decrease each regularizer and look for turning point in the test loss,
+    //      i.e. the point at which transition to overfit occurs.
+    // We'll search intervals in the vicinity of this values in the hyperparameter
+    // optimisation loop.
+
+    core::CPackedBitVector allTrainingRowsMask{m_TreeImpl->allTrainingRowsMask()};
+
+    double gainPerNode;
+    double totalCurvaturePerNode;
+    std::tie(gainPerNode, totalCurvaturePerNode) =
+        this->estimateTreeGainAndCurvature(frame, allTrainingRowsMask);
+
+    if (gainPerNode > 0.0 && m_TreeImpl->m_RegularizationOverride.gamma() == boost::none) {
+
+        TVector fallbackInterval{{MIN_REGULARIZER_SCALE, 1.0, MAX_REGULARIZER_SCALE}};
+        fallbackInterval *= m_TreeImpl->m_Eta;
+        auto interval = this->candidateRegularizerSearchInterval(
+            frame, allTrainingRowsMask, [this, gainPerNode](double scale) {
+                m_TreeImpl->m_Regularization.gamma(scale * gainPerNode);
+            });
+        m_GammaSearchInterval = interval.value_or(fallbackInterval) * gainPerNode;
+        LOG_TRACE(<< "gamma search interval = ["
+                  << m_GammaSearchInterval.toDelimited() << "]");
+
+    } else if (m_TreeImpl->m_RegularizationOverride.gamma() == boost::none) {
+        m_TreeImpl->m_RegularizationOverride.gamma(0.0);
+    }
+
+    if (totalCurvaturePerNode > 0.0 &&
+        m_TreeImpl->m_RegularizationOverride.lambda() == boost::none) {
+
+        TVector fallbackInterval{{MIN_REGULARIZER_SCALE, 1.0, MAX_REGULARIZER_SCALE}};
+        m_TreeImpl->m_Regularization.gamma(m_GammaSearchInterval(MIN_REGULARIZER_INDEX));
+        auto interval = this->candidateRegularizerSearchInterval(
+            frame, allTrainingRowsMask, [this, totalCurvaturePerNode](double scale) {
+                m_TreeImpl->m_Regularization.lambda(scale * totalCurvaturePerNode);
+            });
+        m_LambdaSearchInterval = interval.value_or(fallbackInterval) * totalCurvaturePerNode;
+        LOG_TRACE(<< "lambda search interval = ["
+                  << m_LambdaSearchInterval.toDelimited() << "]");
+
+    } else if (m_TreeImpl->m_RegularizationOverride.lambda() == boost::none) {
+        m_TreeImpl->m_RegularizationOverride.lambda(0.0);
+    }
+
+    double scale{
+        static_cast<double>(m_TreeImpl->m_NumberFolds - 1) /
+        static_cast<double>(m_TreeImpl->m_NumberFolds) /
+        ((m_TreeImpl->m_RegularizationOverride.gamma() != boost::none ? 0.0 : 1.0) +
+         (m_TreeImpl->m_RegularizationOverride.lambda() != boost::none ? 0.0 : 1.0))};
+
+    if (m_TreeImpl->m_RegularizationOverride.gamma() == boost::none) {
+        m_GammaSearchInterval *= scale;
+        m_TreeImpl->m_Regularization.gamma(m_GammaSearchInterval(BEST_REGULARIZER_INDEX));
+    }
+    if (m_TreeImpl->m_RegularizationOverride.lambda() == boost::none) {
+        m_LambdaSearchInterval *= scale;
+        m_TreeImpl->m_Regularization.lambda(m_LambdaSearchInterval(BEST_REGULARIZER_INDEX));
+    }
+    LOG_TRACE(<< "regularization(initial) = " << m_TreeImpl->m_Regularization.print());
+}
+
+CBoostedTreeFactory::TDoubleDoublePr
+CBoostedTreeFactory::estimateTreeGainAndCurvature(core::CDataFrame& frame,
+                                                  const core::CPackedBitVector& trainingRowMask) const {
+
+    std::size_t maximumNumberOfTrees{1};
+    std::swap(maximumNumberOfTrees, m_TreeImpl->m_MaximumNumberTrees);
+    auto forest = m_TreeImpl->trainForest(frame, trainingRowMask, m_RecordMemoryUsage);
+    std::swap(maximumNumberOfTrees, m_TreeImpl->m_MaximumNumberTrees);
+
+    double gain;
+    double curvature;
+    std::tie(gain, curvature) = m_TreeImpl->gainAndCurvatureAtPercentile(75.0, forest);
+
+    LOG_TRACE(<< "gain = " << gain << ", curvature = " << curvature);
+
+    return {gain, curvature};
+}
+
+CBoostedTreeFactory::TOptionalVector
+CBoostedTreeFactory::candidateRegularizerSearchInterval(core::CDataFrame& frame,
+                                                        core::CPackedBitVector trainingRowMask,
+                                                        TScaleRegularization scaleRegularization) const {
+
+    // This uses a quadratic approximation to the test loss function w.r.t.
+    // the scaled regularization hyperparameter from which it estimates the
+    // minimum error point in the interval we search here. Separately, it
+    // examines size of the residual errors w.r.t. to the variation in the
+    // best fit curve over the interval. We truncate the interval the main
+    // hyperparameter optimisation loop searches if we determine there is a
+    // low chance of missing the best solution by doing so.
+
+    using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar<double>::TAccumulator;
+
+    double pSample{1.0 / static_cast<double>(m_TreeImpl->m_NumberFolds)};
+
+    core::CPackedBitVector testRowMask;
+    for (auto row = trainingRowMask.beginOneBits();
+         row != trainingRowMask.endOneBits(); ++row) {
+        if (CSampling::uniformSample(m_TreeImpl->m_Rng, 0.0, 1.0) < pSample) {
+            testRowMask.extend(false, *row - testRowMask.size());
+            testRowMask.extend(true);
+        }
+    }
+    testRowMask.extend(false, trainingRowMask.size() - testRowMask.size());
+    trainingRowMask ^= testRowMask;
+
+    double maximumTreeSizeMultiplier{MAIN_TRAINING_LOOP_TREE_SIZE_MULTIPLIER};
+    std::swap(maximumTreeSizeMultiplier, m_TreeImpl->m_MaximumTreeSizeMultiplier);
+
+    double multiplier{std::exp(
+        -std::log(1024.0) / static_cast<double>(INITIAL_REGULARIZER_SEARCH_ITERATIONS))};
+
+    CLeastSquaresOnlineRegression<2, double> leastSquaresQuadraticTestLoss;
+    TDoubleVec testLosses(INITIAL_REGULARIZER_SEARCH_ITERATIONS);
+
+    double scale{1.0};
+    for (std::size_t i = 0; i < INITIAL_REGULARIZER_SEARCH_ITERATIONS; ++i) {
+        scaleRegularization(scale);
+        scale *= multiplier;
+        auto forest = m_TreeImpl->trainForest(frame, trainingRowMask, m_RecordMemoryUsage);
+        double testLoss{m_TreeImpl->meanLoss(frame, testRowMask, forest)};
+        leastSquaresQuadraticTestLoss.add(static_cast<double>(i), testLoss);
+        testLosses[i] = testLoss;
+        m_TreeImpl->m_TrainingProgress.increment();
+    }
+    LOG_TRACE(<< "test losses = " << core::CContainerPrinter::print(testLosses));
+
+    std::swap(maximumTreeSizeMultiplier, m_TreeImpl->m_MaximumTreeSizeMultiplier);
+
+    CLeastSquaresOnlineRegression<2, double>::TArray params;
+    bool successful{leastSquaresQuadraticTestLoss.parameters(params)};
+    double gradient{params[1]};
+    double curvature{params[2]};
+    LOG_TRACE(<< "[intercept, slope, curvature] = "
+              << core::CContainerPrinter::print(params));
+
+    // Find the scale at the minimum of the least squares quadratic fit
+    // to the test loss in the search interval.
+    double leftEndpoint{0.0};
+    double rightEndpoint{static_cast<double>(INITIAL_REGULARIZER_SEARCH_ITERATIONS - 1)};
+    double stationaryPoint{-gradient / 2.0 / curvature};
+    double distanceToLeftEndpoint{std::fabs(leftEndpoint - stationaryPoint)};
+    double distanceToRightEndpoint{std::fabs(rightEndpoint - stationaryPoint)};
+    double logBestRegularizerScale{
+        curvature < 0.0
+            ? (distanceToLeftEndpoint > distanceToRightEndpoint ? leftEndpoint : rightEndpoint)
+            : CTools::truncate(stationaryPoint, leftEndpoint, rightEndpoint)};
+    double bestRegularizerScale{std::pow(0.5, logBestRegularizerScale)};
+
+    // Find an interval with a high probability of containing the optimal
+    // regularisation parameter if the interval we searched has a minimum.
+    TVector interval{{MIN_REGULARIZER_SCALE, 1.0, MAX_REGULARIZER_SCALE}};
+    if (curvature > 0.0) {
+        TMeanVarAccumulator residualMoments;
+        for (std::size_t i = 0; i < INITIAL_REGULARIZER_SEARCH_ITERATIONS; ++i) {
+            residualMoments.add(testLosses[i] - leastSquaresQuadraticTestLoss.predict(
+                                                    static_cast<double>(i)));
+        }
+        double margin{2.0 * std::sqrt(CBasicStatistics::variance(residualMoments)) / curvature};
+        if (logBestRegularizerScale - margin >= leftEndpoint) {
+            interval(MIN_REGULARIZER_INDEX) =
+                std::max(std::pow(0.5, margin), MIN_REGULARIZER_SCALE);
+        }
+        if (logBestRegularizerScale + margin <= rightEndpoint) {
+            interval(MAX_REGULARIZER_INDEX) =
+                std::min(std::pow(2.0, margin), MAX_REGULARIZER_SCALE);
+        }
+    }
+    interval *= bestRegularizerScale;
+
+    return successful ? TOptionalVector{interval} : TOptionalVector{};
+}
+
 CBoostedTreeFactory CBoostedTreeFactory::constructFromParameters(std::size_t numberThreads,
                                                                  TLossFunctionUPtr loss) {
     return {numberThreads, std::move(loss)};
@@ -334,6 +481,8 @@ CBoostedTreeFactory::constructFromString(std::istream& jsonStringStream,
         if (treePtr->acceptRestoreTraverser(traverser) == false || traverser.haveBadState()) {
             throw std::runtime_error{"failed to restore boosted tree"};
         }
+        treePtr->m_Impl->m_TrainingProgress.attach(recordProgress);
+        treePtr->m_Impl->m_TrainingProgress.resumeRestored();
         frame.resizeColumns(treePtr->m_Impl->m_NumberThreads,
                             frame.numberColumns() +
                                 treePtr->m_Impl->numberExtraColumnsForTrain());
@@ -345,7 +494,8 @@ CBoostedTreeFactory::constructFromString(std::istream& jsonStringStream,
 }
 
 CBoostedTreeFactory::CBoostedTreeFactory(std::size_t numberThreads, TLossFunctionUPtr loss)
-    : m_TreeImpl{std::make_unique<CBoostedTreeImpl>(numberThreads, std::move(loss))} {
+    : m_TreeImpl{std::make_unique<CBoostedTreeImpl>(numberThreads, std::move(loss))},
+      m_GammaSearchInterval{0.0}, m_LambdaSearchInterval{0.0} {
 }
 
 CBoostedTreeFactory::CBoostedTreeFactory(CBoostedTreeFactory&&) = default;
@@ -377,7 +527,7 @@ CBoostedTreeFactory& CBoostedTreeFactory::lambda(double lambda) {
         LOG_WARN(<< "Lambda must be non-negative");
         lambda = 0.0;
     }
-    m_TreeImpl->m_LambdaOverride = lambda;
+    m_TreeImpl->m_RegularizationOverride.lambda(lambda);
     return *this;
 }
 
@@ -386,7 +536,7 @@ CBoostedTreeFactory& CBoostedTreeFactory::gamma(double gamma) {
         LOG_WARN(<< "Gamma must be non-negative");
         gamma = 0.0;
     }
-    m_TreeImpl->m_GammaOverride = gamma;
+    m_TreeImpl->m_RegularizationOverride.gamma(gamma);
     return *this;
 }
 
@@ -485,6 +635,30 @@ std::size_t CBoostedTreeFactory::numberExtraColumnsForTrain() const {
     return m_TreeImpl->numberExtraColumnsForTrain();
 }
 
+void CBoostedTreeFactory::setupTrainingProgressMonitoring() {
+
+    // The base unit is the cost of training on one fold.
+    //
+    // This comprises:
+    //  - The cost of category encoding and feature selection which we count as
+    //    one unit,
+    //  - INITIAL_REGULARIZER_SEARCH_ITERATIONS units per regularization parameter
+    //    which isn't user defined,
+    //  - The main optimisation loop which costs number folds units per iteration,
+    //  - The cost of the final train which we count as number folds units.
+
+    std::size_t totalNumberSteps{1};
+    if (m_TreeImpl->m_RegularizationOverride.gamma() == boost::none) {
+        totalNumberSteps += INITIAL_REGULARIZER_SEARCH_ITERATIONS;
+    }
+    if (m_TreeImpl->m_RegularizationOverride.lambda() == boost::none) {
+        totalNumberSteps += INITIAL_REGULARIZER_SEARCH_ITERATIONS;
+    }
+    totalNumberSteps += (this->numberHyperparameterTuningRounds() + 1) *
+                        m_TreeImpl->m_NumberFolds;
+    m_TreeImpl->m_TrainingProgress = core::CLoopProgress{totalNumberSteps, m_RecordProgress};
+}
+
 void CBoostedTreeFactory::noopRecordTrainingState(std::function<void(core::CStatePersistInserter&)>) {
 }
 
diff --git a/lib/maths/CBoostedTreeImpl.cc b/lib/maths/CBoostedTreeImpl.cc
index 90ef01bf27..4e618beabd 100644
--- a/lib/maths/CBoostedTreeImpl.cc
+++ b/lib/maths/CBoostedTreeImpl.cc
@@ -84,6 +84,9 @@ double readLossCurvature(const TRowRef& row) {
 double readActual(const TRowRef& row, std::size_t dependentVariable) {
     return row[dependentVariable];
 }
+
+const std::size_t ASSIGN_MISSING_TO_LEFT{0};
+const std::size_t ASSIGN_MISSING_TO_RIGHT{1};
 }
 
 void CBoostedTreeImpl::CLeafNodeStatistics::addRowDerivatives(const CEncodedDataFrameRowRef& row,
@@ -111,7 +114,7 @@ void CBoostedTreeImpl::CLeafNodeStatistics::addRowDerivatives(const CEncodedData
 
 CBoostedTreeImpl::CBoostedTreeImpl(std::size_t numberThreads, CBoostedTree::TLossFunctionUPtr loss)
     : m_NumberThreads{numberThreads}, m_Loss{std::move(loss)},
-      m_BestHyperparameters{m_Lambda, m_Gamma, m_Eta, m_EtaGrowthRatePerTree, m_FeatureBagFraction, m_FeatureSampleProbabilities} {
+      m_BestHyperparameters{m_Regularization, m_Eta, m_EtaGrowthRatePerTree, m_FeatureBagFraction} {
 }
 
 CBoostedTreeImpl::CBoostedTreeImpl() = default;
@@ -133,11 +136,7 @@ void CBoostedTreeImpl::train(core::CDataFrame& frame,
 
     LOG_TRACE(<< "Main training loop...");
 
-    // We account for cost of setup as one round. The main optimisation loop runs
-    // for "m_NumberRounds + 1" rounds and training on the choosen hyperparameter
-    // values is counted as one round. This gives a total of m_NumberRounds + 3.
-    core::CLoopProgress progress{m_NumberRounds + 3 - m_CurrentRound, recordProgress};
-    progress.increment();
+    m_TrainingProgress.attach(recordProgress);
 
     std::uint64_t lastMemoryUsage(this->memoryUsage());
     recordMemoryUsage(lastMemoryUsage);
@@ -172,8 +171,6 @@ void CBoostedTreeImpl::train(core::CDataFrame& frame,
                 break;
             }
 
-            progress.increment();
-
             std::int64_t memoryUsage(this->memoryUsage());
             recordMemoryUsage(memoryUsage - lastMemoryUsage);
             lastMemoryUsage = memoryUsage;
@@ -280,40 +277,32 @@ core::CPackedBitVector CBoostedTreeImpl::allTrainingRowsMask() const {
     return ~m_MissingFeatureRowMasks[m_DependentVariable];
 }
 
-CBoostedTreeImpl::TDoubleDoubleDoubleTr
-CBoostedTreeImpl::regularisedLoss(const core::CDataFrame& frame,
-                                  const core::CPackedBitVector& trainingRowMask,
-                                  const TNodeVecVec& forest) const {
+CBoostedTreeImpl::TDoubleDoublePr
+CBoostedTreeImpl::gainAndCurvatureAtPercentile(double percentile,
+                                               const TNodeVecVec& forest) const {
 
-    auto results = frame.readRows(
-        m_NumberThreads, 0, frame.numberRows(),
-        core::bindRetrievableState(
-            [&](double& loss, TRowItr beginRows, TRowItr endRows) {
-                for (auto row = beginRows; row != endRows; ++row) {
-                    loss += m_Loss->value(readPrediction(*row),
-                                          readActual(*row, m_DependentVariable));
-                }
-            },
-            0.0),
-        &trainingRowMask);
+    TDoubleVec gains;
+    TDoubleVec curvatures;
 
-    double loss{0.0};
-    for (const auto& result : results.first) {
-        loss += result.s_FunctionState;
-    }
-
-    double leafCount{0.0};
-    double sumSquareLeafWeights{0.0};
     for (const auto& tree : forest) {
         for (const auto& node : tree) {
-            if (node.isLeaf()) {
-                leafCount += 1.0;
-                sumSquareLeafWeights += CTools::pow2(node.value());
+            if (node.isLeaf() == false) {
+                gains.push_back(node.gain());
+                curvatures.push_back(node.curvature());
             }
         }
     }
 
-    return {loss, leafCount, 0.5 * sumSquareLeafWeights};
+    if (gains.size() == 0) {
+        return {0.0, 0.0};
+    }
+
+    std::size_t index{static_cast<std::size_t>(
+        percentile * static_cast<double>(gains.size()) / 100.0 + 0.5)};
+    std::nth_element(gains.begin(), gains.begin() + index, gains.end());
+    std::nth_element(curvatures.begin(), curvatures.begin() + index, curvatures.end());
+
+    return {gains[index], curvatures[index]};
 }
 
 CBoostedTreeImpl::TMeanVarAccumulator
@@ -326,6 +315,7 @@ CBoostedTreeImpl::crossValidateForest(core::CDataFrame& frame,
         lossMoments.add(loss);
         LOG_TRACE(<< "fold = " << i << " forest size = " << forest.size()
                   << " test set loss = " << loss);
+        m_TrainingProgress.increment();
     }
     LOG_TRACE(<< "test mean loss = " << CBasicStatistics::mean(lossMoments)
               << ", sigma = " << std::sqrt(CBasicStatistics::mean(lossMoments)));
@@ -514,8 +504,8 @@ CBoostedTreeImpl::trainTree(core::CDataFrame& frame,
 
     TLeafNodeStatisticsPtrQueue leaves;
     leaves.push(std::make_shared<CLeafNodeStatistics>(
-        0 /*root*/, m_NumberThreads, frame, *m_Encoder, m_Lambda, m_Gamma,
-        candidateSplits, this->featureBag(), trainingRowMask));
+        0 /*root*/, m_NumberThreads, frame, *m_Encoder, m_Regularization,
+        candidateSplits, 0 /*depth*/, this->featureBag(), trainingRowMask));
 
     // We update local variables because the callback can be expensive if it
     // requires accessing atomics.
@@ -555,8 +545,9 @@ CBoostedTreeImpl::trainTree(core::CDataFrame& frame,
         bool assignMissingToLeft{leaf->assignMissingToLeft()};
 
         std::size_t leftChildId, rightChildId;
-        std::tie(leftChildId, rightChildId) = tree[leaf->id()].split(
-            splitFeature, splitValue, assignMissingToLeft, tree);
+        std::tie(leftChildId, rightChildId) =
+            tree[leaf->id()].split(splitFeature, splitValue, assignMissingToLeft,
+                                   leaf->gain(), leaf->curvature(), tree);
 
         TSizeVec featureBag{this->featureBag()};
 
@@ -569,11 +560,10 @@ CBoostedTreeImpl::trainTree(core::CDataFrame& frame,
 
         TLeafNodeStatisticsPtr leftChild;
         TLeafNodeStatisticsPtr rightChild;
-        std::tie(leftChild, rightChild) =
-            leaf->split(leftChildId, rightChildId, m_NumberThreads, frame,
-                        *m_Encoder, m_Lambda, m_Gamma, candidateSplits,
-                        std::move(featureBag), std::move(leftChildRowMask),
-                        std::move(rightChildRowMask), leftChildHasFewerRows);
+        std::tie(leftChild, rightChild) = leaf->split(
+            leftChildId, rightChildId, m_NumberThreads, frame, *m_Encoder, m_Regularization,
+            candidateSplits, std::move(featureBag), std::move(leftChildRowMask),
+            std::move(rightChildRowMask), leftChildHasFewerRows);
 
         scopeMemoryUsage.add(leftChild);
         scopeMemoryUsage.add(rightChild);
@@ -740,11 +730,11 @@ bool CBoostedTreeImpl::selectNextHyperparameters(const TMeanVarAccumulator& loss
 
     // Read parameters for last round.
     int i{0};
-    if (m_LambdaOverride == boost::none) {
-        parameters(i++) = std::log(m_Lambda);
+    if (m_RegularizationOverride.lambda() == boost::none) {
+        parameters(i++) = std::log(m_Regularization.lambda());
     }
-    if (m_GammaOverride == boost::none) {
-        parameters(i++) = std::log(m_Gamma);
+    if (m_RegularizationOverride.gamma() == boost::none) {
+        parameters(i++) = std::log(m_Regularization.gamma());
     }
     if (m_EtaOverride == boost::none) {
         parameters(i++) = std::log(m_Eta);
@@ -757,6 +747,11 @@ bool CBoostedTreeImpl::selectNextHyperparameters(const TMeanVarAccumulator& loss
     double meanLoss{CBasicStatistics::mean(lossMoments)};
     double lossVariance{CBasicStatistics::variance(lossMoments)};
 
+    LOG_TRACE(<< "round = " << m_CurrentRound << " loss = " << meanLoss
+              << ": regularization = " << m_Regularization.print() << ", eta = " << m_Eta
+              << ", eta growth rate per tree = " << m_EtaGrowthRatePerTree
+              << ", feature bag fraction = " << m_FeatureBagFraction);
+
     bopt.add(parameters, meanLoss, lossVariance);
     if (3 * m_CurrentRound < m_NumberRounds) {
         std::generate_n(parameters.data(), parameters.size(), [&]() {
@@ -772,11 +767,11 @@ bool CBoostedTreeImpl::selectNextHyperparameters(const TMeanVarAccumulator& loss
 
     // Write parameters for next round.
     i = 0;
-    if (m_LambdaOverride == boost::none) {
-        m_Lambda = std::exp(parameters(i++));
+    if (m_RegularizationOverride.lambda() == boost::none) {
+        m_Regularization.lambda(std::exp(parameters(i++)));
     }
-    if (m_GammaOverride == boost::none) {
-        m_Gamma = std::exp(parameters(i++));
+    if (m_RegularizationOverride.gamma() == boost::none) {
+        m_Regularization.gamma(std::exp(parameters(i++)));
     }
     if (m_EtaOverride == boost::none) {
         m_Eta = std::exp(parameters(i++));
@@ -786,10 +781,6 @@ bool CBoostedTreeImpl::selectNextHyperparameters(const TMeanVarAccumulator& loss
         m_FeatureBagFraction = parameters(i++);
     }
 
-    LOG_TRACE(<< "round = " << m_CurrentRound << ": lambda = " << m_Lambda
-              << ", gamma = " << m_Gamma << ", eta = " << m_Eta
-              << ", eta growth rate per tree = " << m_EtaGrowthRatePerTree
-              << ", feature bag fraction = " << m_FeatureBagFraction);
     return true;
 }
 
@@ -802,25 +793,24 @@ void CBoostedTreeImpl::captureBestHyperparameters(const TMeanVarAccumulator& los
     if (loss < m_BestForestTestLoss) {
         m_BestForestTestLoss = loss;
         m_BestHyperparameters = SHyperparameters{
-            m_Lambda, m_Gamma, m_Eta, m_EtaGrowthRatePerTree, m_FeatureBagFraction, m_FeatureSampleProbabilities};
+            m_Regularization, m_Eta, m_EtaGrowthRatePerTree, m_FeatureBagFraction};
     }
 }
 
 void CBoostedTreeImpl::restoreBestHyperparameters() {
-    m_Lambda = m_BestHyperparameters.s_Lambda;
-    m_Gamma = m_BestHyperparameters.s_Gamma;
+    m_Regularization = m_BestHyperparameters.s_Regularization;
     m_Eta = m_BestHyperparameters.s_Eta;
     m_EtaGrowthRatePerTree = m_BestHyperparameters.s_EtaGrowthRatePerTree;
     m_FeatureBagFraction = m_BestHyperparameters.s_FeatureBagFraction;
-    m_FeatureSampleProbabilities = m_BestHyperparameters.s_FeatureSampleProbabilities;
-    LOG_TRACE(<< "lambda* = " << m_Lambda << ", gamma* = " << m_Gamma
-              << ", eta* = " << m_Eta << ", eta growth rate per tree* = " << m_EtaGrowthRatePerTree
+    LOG_TRACE(<< "regularization* = " << m_Regularization.print() << ", eta* = " << m_Eta
+              << ", eta growth rate per tree* = " << m_EtaGrowthRatePerTree
               << ", feature bag fraction* = " << m_FeatureBagFraction);
 }
 
 std::size_t CBoostedTreeImpl::numberHyperparametersToTune() const {
-    return (m_LambdaOverride ? 0 : 1) + (m_GammaOverride ? 0 : 1) +
-           (m_EtaOverride ? 0 : 2) + (m_FeatureBagFractionOverride ? 0 : 1);
+    return m_RegularizationOverride.countNotSet() +
+           (m_EtaOverride != boost::none ? 0 : 2) +
+           (m_FeatureBagFractionOverride != boost::none ? 0 : 1);
 }
 
 std::size_t CBoostedTreeImpl::maximumTreeSize(const core::CPackedBitVector& trainingRowMask) const {
@@ -835,7 +825,6 @@ std::size_t CBoostedTreeImpl::maximumTreeSize(std::size_t numberRows) const {
 const std::size_t CBoostedTreeImpl::PACKED_BIT_VECTOR_MAXIMUM_ROWS_PER_BYTE{256};
 
 namespace {
-const std::string RANDOM_NUMBER_GENERATOR_TAG{"random_number_generator"};
 const std::string BAYESIAN_OPTIMIZATION_TAG{"bayesian_optimization"};
 const std::string BEST_FOREST_TAG{"best_forest"};
 const std::string BEST_FOREST_TEST_LOSS_TAG{"best_forest_test_loss"};
@@ -851,9 +840,7 @@ const std::string FEATURE_BAG_FRACTION_TAG{"feature_bag_fraction"};
 const std::string FEATURE_DATA_TYPES_TAG{"feature_data_types"};
 const std::string FEATURE_SAMPLE_PROBABILITIES_TAG{"feature_sample_probabilities"};
 const std::string GAMMA_OVERRIDE_TAG{"gamma_override"};
-const std::string GAMMA_TAG{"gamma"};
 const std::string LAMBDA_OVERRIDE_TAG{"lambda_override"};
-const std::string LAMBDA_TAG{"lambda"};
 const std::string LOSS_TAG{"loss"};
 const std::string MAXIMUM_ATTEMPTS_TO_ADD_TREE_TAG{"maximum_attempts_to_add_tree"};
 const std::string MAXIMUM_NUMBER_TREES_OVERRIDE_TAG{"maximum_number_trees_override"};
@@ -866,9 +853,21 @@ const std::string NUMBER_FOLDS_TAG{"number_folds"};
 const std::string NUMBER_ROUNDS_TAG{"number_rounds"};
 const std::string NUMBER_SPLITS_PER_FEATURE_TAG{"number_splits_per_feature"};
 const std::string NUMBER_THREADS_TAG{"number_threads"};
+const std::string RANDOM_NUMBER_GENERATOR_TAG{"random_number_generator"};
+const std::string REGULARIZATION_TAG{"regularization"};
+const std::string REGULARIZATION_OVERRIDE_TAG{"regularization_override"};
 const std::string ROWS_PER_FEATURE_TAG{"rows_per_feature"};
 const std::string TESTING_ROW_MASKS_TAG{"testing_row_masks"};
 const std::string TRAINING_ROW_MASKS_TAG{"training_row_masks"};
+const std::string TRAINING_PROGRESS_TAG{"training_row_masks"};
+
+const std::string REGULARIZATION_GAMMA_TAG{"gamma"};
+const std::string REGULARIZATION_LAMBDA_TAG{"lambda"};
+
+const std::string HYPERPARAM_ETA_TAG{"hyperparam_eta"};
+const std::string HYPERPARAM_ETA_GROWTH_RATE_PER_TREE_TAG{"hyperparam_eta_growth_rate_per_tree"};
+const std::string HYPERPARAM_FEATURE_BAG_FRACTION_TAG{"hyperparam_feature_bag_fraction"};
+const std::string HYPERPARAM_REGULARIZATION_TAG{"hyperparam_regularization"};
 
 const std::string LEFT_CHILD_TAG{"left_child"};
 const std::string RIGHT_CHILD_TAG{"right_child"};
@@ -876,19 +875,35 @@ const std::string SPLIT_FEATURE_TAG{"split_feature"};
 const std::string ASSIGN_MISSING_TO_LEFT_TAG{"assign_missing_to_left "};
 const std::string NODE_VALUE_TAG{"node_value"};
 const std::string SPLIT_VALUE_TAG{"split_value"};
+}
 
-const std::string HYPERPARAM_LAMBDA_TAG{"hyperparam_lambda"};
-const std::string HYPERPARAM_GAMMA_TAG{"hyperparam_gamma"};
-const std::string HYPERPARAM_ETA_TAG{"hyperparam_eta"};
-const std::string HYPERPARAM_ETA_GROWTH_RATE_PER_TREE_TAG{"hyperparam_eta_growth_rate_per_tree"};
-const std::string HYPERPARAM_FEATURE_BAG_FRACTION_TAG{"hyperparam_feature_bag_fraction"};
-const std::string HYPERPARAM_FEATURE_SAMPLE_PROBABILITIES_TAG{"hyperparam_feature_sample_probabilities"};
+template<typename T>
+void CBoostedTreeImpl::CRegularization<T>::acceptPersistInserter(core::CStatePersistInserter& inserter) const {
+    core::CPersistUtils::persist(REGULARIZATION_GAMMA_TAG, m_Gamma, inserter);
+    core::CPersistUtils::persist(REGULARIZATION_LAMBDA_TAG, m_Lambda, inserter);
+}
+
+void CBoostedTreeImpl::SHyperparameters::acceptPersistInserter(core::CStatePersistInserter& inserter) const {
+    core::CPersistUtils::persist(HYPERPARAM_ETA_TAG, s_Eta, inserter);
+    core::CPersistUtils::persist(HYPERPARAM_ETA_GROWTH_RATE_PER_TREE_TAG,
+                                 s_EtaGrowthRatePerTree, inserter);
+    core::CPersistUtils::persist(HYPERPARAM_FEATURE_BAG_FRACTION_TAG,
+                                 s_FeatureBagFraction, inserter);
+    core::CPersistUtils::persist(HYPERPARAM_REGULARIZATION_TAG, s_Regularization, inserter);
+}
+
+void CBoostedTreeImpl::CNode::acceptPersistInserter(core::CStatePersistInserter& inserter) const {
+    core::CPersistUtils::persist(LEFT_CHILD_TAG, m_LeftChild, inserter);
+    core::CPersistUtils::persist(RIGHT_CHILD_TAG, m_RightChild, inserter);
+    core::CPersistUtils::persist(SPLIT_FEATURE_TAG, m_SplitFeature, inserter);
+    core::CPersistUtils::persist(ASSIGN_MISSING_TO_LEFT_TAG, m_AssignMissingToLeft, inserter);
+    core::CPersistUtils::persist(NODE_VALUE_TAG, m_NodeValue, inserter);
+    core::CPersistUtils::persist(SPLIT_VALUE_TAG, m_SplitValue, inserter);
 }
 
 void CBoostedTreeImpl::acceptPersistInserter(core::CStatePersistInserter& inserter) const {
     core::CPersistUtils::persist(BAYESIAN_OPTIMIZATION_TAG, *m_BayesianOptimization, inserter);
     core::CPersistUtils::persist(BEST_FOREST_TEST_LOSS_TAG, m_BestForestTestLoss, inserter);
-    inserter.insertValue(RANDOM_NUMBER_GENERATOR_TAG, m_Rng.toString());
     core::CPersistUtils::persist(CURRENT_ROUND_TAG, m_CurrentRound, inserter);
     core::CPersistUtils::persist(DEPENDENT_VARIABLE_TAG, m_DependentVariable, inserter);
     core::CPersistUtils::persist(ENCODER_TAG, *m_Encoder, inserter);
@@ -899,8 +914,6 @@ void CBoostedTreeImpl::acceptPersistInserter(core::CStatePersistInserter& insert
     core::CPersistUtils::persist(FEATURE_DATA_TYPES_TAG, m_FeatureDataTypes, inserter);
     core::CPersistUtils::persist(FEATURE_SAMPLE_PROBABILITIES_TAG,
                                  m_FeatureSampleProbabilities, inserter);
-    core::CPersistUtils::persist(GAMMA_TAG, m_Gamma, inserter);
-    core::CPersistUtils::persist(LAMBDA_TAG, m_Lambda, inserter);
     core::CPersistUtils::persist(MAXIMUM_ATTEMPTS_TO_ADD_TREE_TAG,
                                  m_MaximumAttemptsToAddTree, inserter);
     core::CPersistUtils::persist(MAXIMUM_OPTIMISATION_ROUNDS_PER_HYPERPARAMETER_TAG,
@@ -914,50 +927,40 @@ void CBoostedTreeImpl::acceptPersistInserter(core::CStatePersistInserter& insert
     core::CPersistUtils::persist(NUMBER_SPLITS_PER_FEATURE_TAG,
                                  m_NumberSplitsPerFeature, inserter);
     core::CPersistUtils::persist(NUMBER_THREADS_TAG, m_NumberThreads, inserter);
+    inserter.insertValue(RANDOM_NUMBER_GENERATOR_TAG, m_Rng.toString());
+    core::CPersistUtils::persist(REGULARIZATION_OVERRIDE_TAG,
+                                 m_RegularizationOverride, inserter);
+    core::CPersistUtils::persist(REGULARIZATION_TAG, m_Regularization, inserter);
     core::CPersistUtils::persist(ROWS_PER_FEATURE_TAG, m_RowsPerFeature, inserter);
     core::CPersistUtils::persist(TESTING_ROW_MASKS_TAG, m_TestingRowMasks, inserter);
     core::CPersistUtils::persist(MAXIMUM_NUMBER_TREES_TAG, m_MaximumNumberTrees, inserter);
     core::CPersistUtils::persist(TRAINING_ROW_MASKS_TAG, m_TrainingRowMasks, inserter);
+    core::CPersistUtils::persist(TRAINING_PROGRESS_TAG, m_TrainingProgress, inserter);
     core::CPersistUtils::persist(BEST_FOREST_TAG, m_BestForest, inserter);
     core::CPersistUtils::persist(BEST_HYPERPARAMETERS_TAG, m_BestHyperparameters, inserter);
     core::CPersistUtils::persist(ETA_OVERRIDE_TAG, m_EtaOverride, inserter);
     core::CPersistUtils::persist(FEATURE_BAG_FRACTION_OVERRIDE_TAG,
                                  m_FeatureBagFractionOverride, inserter);
-    core::CPersistUtils::persist(GAMMA_OVERRIDE_TAG, m_GammaOverride, inserter);
-    core::CPersistUtils::persist(LAMBDA_OVERRIDE_TAG, m_LambdaOverride, inserter);
     core::CPersistUtils::persist(MAXIMUM_NUMBER_TREES_OVERRIDE_TAG,
                                  m_MaximumNumberTreesOverride, inserter);
     inserter.insertValue(LOSS_TAG, m_Loss->name());
 }
 
-void CBoostedTreeImpl::CNode::acceptPersistInserter(core::CStatePersistInserter& inserter) const {
-    core::CPersistUtils::persist(LEFT_CHILD_TAG, m_LeftChild, inserter);
-    core::CPersistUtils::persist(RIGHT_CHILD_TAG, m_RightChild, inserter);
-    core::CPersistUtils::persist(SPLIT_FEATURE_TAG, m_SplitFeature, inserter);
-    core::CPersistUtils::persist(ASSIGN_MISSING_TO_LEFT_TAG, m_AssignMissingToLeft, inserter);
-    core::CPersistUtils::persist(NODE_VALUE_TAG, m_NodeValue, inserter);
-    core::CPersistUtils::persist(SPLIT_VALUE_TAG, m_SplitValue, inserter);
-}
-
-void CBoostedTreeImpl::SHyperparameters::acceptPersistInserter(core::CStatePersistInserter& inserter) const {
-    core::CPersistUtils::persist(HYPERPARAM_LAMBDA_TAG, s_Lambda, inserter);
-    core::CPersistUtils::persist(HYPERPARAM_GAMMA_TAG, s_Gamma, inserter);
-    core::CPersistUtils::persist(HYPERPARAM_ETA_TAG, s_Eta, inserter);
-    core::CPersistUtils::persist(HYPERPARAM_ETA_GROWTH_RATE_PER_TREE_TAG,
-                                 s_EtaGrowthRatePerTree, inserter);
-    core::CPersistUtils::persist(HYPERPARAM_FEATURE_BAG_FRACTION_TAG,
-                                 s_FeatureBagFraction, inserter);
-    core::CPersistUtils::persist(HYPERPARAM_FEATURE_SAMPLE_PROBABILITIES_TAG,
-                                 s_FeatureSampleProbabilities, inserter);
+template<typename T>
+bool CBoostedTreeImpl::CRegularization<T>::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) {
+    do {
+        const std::string& name = traverser.name();
+        RESTORE(REGULARIZATION_GAMMA_TAG,
+                core::CPersistUtils::restore(REGULARIZATION_GAMMA_TAG, m_Gamma, traverser))
+        RESTORE(REGULARIZATION_LAMBDA_TAG,
+                core::CPersistUtils::restore(REGULARIZATION_LAMBDA_TAG, m_Lambda, traverser))
+    } while (traverser.next());
+    return true;
 }
 
 bool CBoostedTreeImpl::SHyperparameters::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) {
     do {
         const std::string& name = traverser.name();
-        RESTORE(HYPERPARAM_LAMBDA_TAG,
-                core::CPersistUtils::restore(HYPERPARAM_LAMBDA_TAG, s_Lambda, traverser))
-        RESTORE(HYPERPARAM_GAMMA_TAG,
-                core::CPersistUtils::restore(HYPERPARAM_GAMMA_TAG, s_Gamma, traverser))
         RESTORE(HYPERPARAM_ETA_TAG,
                 core::CPersistUtils::restore(HYPERPARAM_ETA_TAG, s_Eta, traverser))
         RESTORE(HYPERPARAM_ETA_GROWTH_RATE_PER_TREE_TAG,
@@ -966,9 +969,9 @@ bool CBoostedTreeImpl::SHyperparameters::acceptRestoreTraverser(core::CStateRest
         RESTORE(HYPERPARAM_FEATURE_BAG_FRACTION_TAG,
                 core::CPersistUtils::restore(HYPERPARAM_FEATURE_BAG_FRACTION_TAG,
                                              s_FeatureBagFraction, traverser))
-        RESTORE(HYPERPARAM_FEATURE_SAMPLE_PROBABILITIES_TAG,
-                core::CPersistUtils::restore(HYPERPARAM_FEATURE_SAMPLE_PROBABILITIES_TAG,
-                                             s_FeatureSampleProbabilities, traverser))
+        RESTORE(HYPERPARAM_REGULARIZATION_TAG,
+                core::CPersistUtils::restore(HYPERPARAM_REGULARIZATION_TAG,
+                                             s_Regularization, traverser))
     } while (traverser.next());
     return true;
 }
@@ -993,18 +996,6 @@ bool CBoostedTreeImpl::CNode::acceptRestoreTraverser(core::CStateRestoreTraverse
     return true;
 }
 
-bool CBoostedTreeImpl::restoreLoss(CBoostedTree::TLossFunctionUPtr& loss,
-                                   core::CStateRestoreTraverser& traverser) {
-    const std::string& lossFunctionName{traverser.value()};
-    if (lossFunctionName == CMse::NAME) {
-        loss = std::make_unique<CMse>();
-        return true;
-    }
-    LOG_ERROR(<< "Error restoring loss function. Unknown loss function type '"
-              << lossFunctionName << "'.");
-    return false;
-}
-
 bool CBoostedTreeImpl::acceptRestoreTraverser(core::CStateRestoreTraverser& traverser) {
     do {
         const std::string& name = traverser.name();
@@ -1014,8 +1005,6 @@ bool CBoostedTreeImpl::acceptRestoreTraverser(core::CStateRestoreTraverser& trav
         RESTORE(BEST_FOREST_TEST_LOSS_TAG,
                 core::CPersistUtils::restore(BEST_FOREST_TEST_LOSS_TAG,
                                              m_BestForestTestLoss, traverser))
-        RESTORE(RANDOM_NUMBER_GENERATOR_TAG, m_Rng.fromString(traverser.value()))
-
         RESTORE(CURRENT_ROUND_TAG,
                 core::CPersistUtils::restore(CURRENT_ROUND_TAG, m_CurrentRound, traverser))
         RESTORE(DEPENDENT_VARIABLE_TAG,
@@ -1036,8 +1025,6 @@ bool CBoostedTreeImpl::acceptRestoreTraverser(core::CStateRestoreTraverser& trav
         RESTORE(FEATURE_SAMPLE_PROBABILITIES_TAG,
                 core::CPersistUtils::restore(FEATURE_SAMPLE_PROBABILITIES_TAG,
                                              m_FeatureSampleProbabilities, traverser))
-        RESTORE(GAMMA_TAG, core::CPersistUtils::restore(GAMMA_TAG, m_Gamma, traverser))
-        RESTORE(LAMBDA_TAG, core::CPersistUtils::restore(LAMBDA_TAG, m_Lambda, traverser))
         RESTORE(MAXIMUM_ATTEMPTS_TO_ADD_TREE_TAG,
                 core::CPersistUtils::restore(MAXIMUM_ATTEMPTS_TO_ADD_TREE_TAG,
                                              m_MaximumAttemptsToAddTree, traverser))
@@ -1060,6 +1047,12 @@ bool CBoostedTreeImpl::acceptRestoreTraverser(core::CStateRestoreTraverser& trav
                                              m_NumberSplitsPerFeature, traverser))
         RESTORE(NUMBER_THREADS_TAG,
                 core::CPersistUtils::restore(NUMBER_THREADS_TAG, m_NumberThreads, traverser))
+        RESTORE(RANDOM_NUMBER_GENERATOR_TAG, m_Rng.fromString(traverser.value()))
+        RESTORE(REGULARIZATION_TAG,
+                core::CPersistUtils::restore(REGULARIZATION_TAG, m_Regularization, traverser))
+        RESTORE(REGULARIZATION_OVERRIDE_TAG,
+                core::CPersistUtils::restore(REGULARIZATION_OVERRIDE_TAG,
+                                             m_RegularizationOverride, traverser))
         RESTORE(ROWS_PER_FEATURE_TAG,
                 core::CPersistUtils::restore(ROWS_PER_FEATURE_TAG, m_RowsPerFeature, traverser))
         RESTORE(TESTING_ROW_MASKS_TAG,
@@ -1069,6 +1062,8 @@ bool CBoostedTreeImpl::acceptRestoreTraverser(core::CStateRestoreTraverser& trav
                                              m_MaximumNumberTrees, traverser))
         RESTORE(TRAINING_ROW_MASKS_TAG,
                 core::CPersistUtils::restore(TRAINING_ROW_MASKS_TAG, m_TrainingRowMasks, traverser))
+        RESTORE(TRAINING_PROGRESS_TAG,
+                core::CPersistUtils::restore(TRAINING_PROGRESS_TAG, m_TrainingProgress, traverser))
         RESTORE(BEST_FOREST_TAG,
                 core::CPersistUtils::restore(BEST_FOREST_TAG, m_BestForest, traverser))
         RESTORE(BEST_HYPERPARAMETERS_TAG,
@@ -1079,10 +1074,6 @@ bool CBoostedTreeImpl::acceptRestoreTraverser(core::CStateRestoreTraverser& trav
         RESTORE(FEATURE_BAG_FRACTION_OVERRIDE_TAG,
                 core::CPersistUtils::restore(FEATURE_BAG_FRACTION_OVERRIDE_TAG,
                                              m_FeatureBagFractionOverride, traverser))
-        RESTORE(GAMMA_OVERRIDE_TAG,
-                core::CPersistUtils::restore(GAMMA_OVERRIDE_TAG, m_GammaOverride, traverser))
-        RESTORE(LAMBDA_OVERRIDE_TAG,
-                core::CPersistUtils::restore(LAMBDA_OVERRIDE_TAG, m_LambdaOverride, traverser))
         RESTORE(MAXIMUM_NUMBER_TREES_OVERRIDE_TAG,
                 core::CPersistUtils::restore(MAXIMUM_NUMBER_TREES_OVERRIDE_TAG,
                                              m_MaximumNumberTreesOverride, traverser))
@@ -1091,6 +1082,18 @@ bool CBoostedTreeImpl::acceptRestoreTraverser(core::CStateRestoreTraverser& trav
     return true;
 }
 
+bool CBoostedTreeImpl::restoreLoss(CBoostedTree::TLossFunctionUPtr& loss,
+                                   core::CStateRestoreTraverser& traverser) {
+    const std::string& lossFunctionName{traverser.value()};
+    if (lossFunctionName == CMse::NAME) {
+        loss = std::make_unique<CMse>();
+        return true;
+    }
+    LOG_ERROR(<< "Error restoring loss function. Unknown loss function type '"
+              << lossFunctionName << "'.");
+    return false;
+}
+
 std::size_t CBoostedTreeImpl::memoryUsage() const {
     std::size_t mem{core::CMemory::dynamicSize(m_Loss)};
     mem += core::CMemory::dynamicSize(m_Encoder);
diff --git a/lib/maths/unittest/CBoostedTreeTest.cc b/lib/maths/unittest/CBoostedTreeTest.cc
index b5178b80be..99dd04242f 100644
--- a/lib/maths/unittest/CBoostedTreeTest.cc
+++ b/lib/maths/unittest/CBoostedTreeTest.cc
@@ -602,7 +602,7 @@ void CBoostedTreeTest::testCategoricalRegressors() {
 
     LOG_DEBUG(<< "bias = " << modelBias);
     LOG_DEBUG(<< " R^2 = " << modelRSquared);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, modelBias, 0.1);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, modelBias, 0.13);
     CPPUNIT_ASSERT(modelRSquared > 0.9);
 }
 

From 31796bdcad665d83161f1760637bbe59659064e0 Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@elastic.co>
Date: Mon, 23 Sep 2019 14:06:53 +0100
Subject: [PATCH 02/23] Docs

---
 docs/CHANGELOG.asciidoc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc
index 3f1ca56001..629a9f01ff 100644
--- a/docs/CHANGELOG.asciidoc
+++ b/docs/CHANGELOG.asciidoc
@@ -36,6 +36,8 @@
 For large data sets this change was observed to give a 10% to 20% decrease in
 train time. (See {ml-pull}622[#622].)
 * Upgrade Boost libraries to version 1.71. (See {ml-pull}638[#638].)
+* Improve initialisation of boosted tree training. This generally enables us to
+find lower loss models faster. (See {ml-pull}686[#686].)
 
 == {es} version 7.4.0
 

From d163dfd3157292049b7354f32e90e0417064c4f3 Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@elastic.co>
Date: Mon, 23 Sep 2019 15:00:31 +0100
Subject: [PATCH 03/23] Typos

---
 lib/maths/CBoostedTreeFactory.cc | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/lib/maths/CBoostedTreeFactory.cc b/lib/maths/CBoostedTreeFactory.cc
index ebb76ef141..037c14ff2f 100644
--- a/lib/maths/CBoostedTreeFactory.cc
+++ b/lib/maths/CBoostedTreeFactory.cc
@@ -283,13 +283,14 @@ void CBoostedTreeFactory::initializeHyperparameters(core::CDataFrame& frame) {
 void CBoostedTreeFactory::initializeUnsetRegularizationHyperparameters(core::CDataFrame& frame) {
 
     // The strategy here is to:
-    //   1) Get percentile estimates of the gain in and sum curvature of the loss
-    //      function at splits in a single tree,
+    //   1) Get percentile estimates of the gain in the loss function and its sum
+    //      curvature from the splits selected in a single tree with regulizers
+    //      zeroed,
     //   2) Use these to upper bound the size of gamma and lambda, that is find
-    //      values we for which we expect to underfit the data,
-    //   3) Decrease each regularizer and look for turning point in the test loss,
-    //      i.e. the point at which transition to overfit occurs.
-    // We'll search intervals in the vicinity of this values in the hyperparameter
+    //      values for which we expect to underfit the data,
+    //   3) Decrease each regularizer and look for a turning point in the test
+    //      loss, i.e. the point at which transition to overfit occurs.
+    // We'll search intervals in the vicinity of these values in the hyperparameter
     // optimisation loop.
 
     core::CPackedBitVector allTrainingRowsMask{m_TreeImpl->allTrainingRowsMask()};

From dcbe3feb3e9ac0fa36bdd07df4c1e0c0d3510cf0 Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@elastic.co>
Date: Mon, 23 Sep 2019 15:04:03 +0100
Subject: [PATCH 04/23] Build fix

---
 include/maths/CBoostedTreeImpl.h | 133 ++++++++-----------------------
 lib/maths/CBoostedTreeImpl.cc    |  65 +++++++++++++++
 2 files changed, 97 insertions(+), 101 deletions(-)

diff --git a/include/maths/CBoostedTreeImpl.h b/include/maths/CBoostedTreeImpl.h
index d5d8201b47..d8cf8c4703 100644
--- a/include/maths/CBoostedTreeImpl.h
+++ b/include/maths/CBoostedTreeImpl.h
@@ -398,41 +398,38 @@ class MATHS_EXPORT CBoostedTreeImpl final {
                             const CLeafNodeStatistics& parent,
                             const CLeafNodeStatistics& sibling,
                             core::CPackedBitVector rowMask)
-            : m_Id{id}, m_Regularization{sibling.m_Regularization},
-              m_CandidateSplits{sibling.m_CandidateSplits}, m_Depth{sibling.m_Depth},
-              m_FeatureBag{sibling.m_FeatureBag}, m_RowMask{std::move(rowMask)} {
-
-            LOG_TRACE(<< "row mask = " << m_RowMask);
-            LOG_TRACE(<< "feature bag = " << core::CContainerPrinter::print(m_FeatureBag));
-
-            m_Gradients.resize(m_CandidateSplits.size());
-            m_Curvatures.resize(m_CandidateSplits.size());
-            m_MissingGradients.resize(m_CandidateSplits.size(), 0.0);
-            m_MissingCurvatures.resize(m_CandidateSplits.size(), 0.0);
-
-            for (std::size_t i = 0; i < m_CandidateSplits.size(); ++i) {
-                std::size_t numberSplits{m_CandidateSplits[i].size() + 1};
-                m_Gradients[i].resize(numberSplits);
-                m_Curvatures[i].resize(numberSplits);
-                for (std::size_t j = 0; j < numberSplits; ++j) {
-                    m_Gradients[i][j] = parent.m_Gradients[i][j] -
-                                        sibling.m_Gradients[i][j];
-                    m_Curvatures[i][j] = parent.m_Curvatures[i][j] -
-                                         sibling.m_Curvatures[i][j];
-                }
-                m_MissingGradients[i] = parent.m_MissingGradients[i] -
-                                        sibling.m_MissingGradients[i];
-                m_MissingCurvatures[i] = parent.m_MissingCurvatures[i] -
-                                         sibling.m_MissingCurvatures[i];
-            }
-
-            LOG_TRACE(<< "gradients = " << core::CContainerPrinter::print(m_Gradients));
-            LOG_TRACE(<< "curvatures = " << core::CContainerPrinter::print(m_Curvatures));
-            LOG_TRACE(<< "missing gradients = "
-                      << core::CContainerPrinter::print(m_MissingGradients));
-            LOG_TRACE(<< "missing curvatures = "
-                      << core::CContainerPrinter::print(m_MissingCurvatures));
+                                : m_Id{id}, m_Regularization{sibling.m_Regularization},
+      m_CandidateSplits{sibling.m_CandidateSplits}, m_Depth{sibling.m_Depth},
+      m_FeatureBag{sibling.m_FeatureBag}, m_RowMask{std::move(rowMask)} {
+
+    LOG_TRACE(<< "row mask = " << m_RowMask);
+    LOG_TRACE(<< "feature bag = " << core::CContainerPrinter::print(m_FeatureBag));
+
+    m_Gradients.resize(m_CandidateSplits.size());
+    m_Curvatures.resize(m_CandidateSplits.size());
+    m_MissingGradients.resize(m_CandidateSplits.size(), 0.0);
+    m_MissingCurvatures.resize(m_CandidateSplits.size(), 0.0);
+
+    for (std::size_t i = 0; i < m_CandidateSplits.size(); ++i) {
+        std::size_t numberSplits{m_CandidateSplits[i].size() + 1};
+        m_Gradients[i].resize(numberSplits);
+        m_Curvatures[i].resize(numberSplits);
+        for (std::size_t j = 0; j < numberSplits; ++j) {
+            m_Gradients[i][j] = parent.m_Gradients[i][j] - sibling.m_Gradients[i][j];
+            m_Curvatures[i][j] = parent.m_Curvatures[i][j] - sibling.m_Curvatures[i][j];
         }
+        m_MissingGradients[i] = parent.m_MissingGradients[i] -
+                                sibling.m_MissingGradients[i];
+        m_MissingCurvatures[i] = parent.m_MissingCurvatures[i] -
+                                 sibling.m_MissingCurvatures[i];
+    }
+
+    LOG_TRACE(<< "gradients = " << core::CContainerPrinter::print(m_Gradients));
+    LOG_TRACE(<< "curvatures = " << core::CContainerPrinter::print(m_Curvatures));
+    LOG_TRACE(<< "missing gradients = " << core::CContainerPrinter::print(m_MissingGradients));
+    LOG_TRACE(<< "missing curvatures = "
+              << core::CContainerPrinter::print(m_MissingCurvatures));
+}
 
         CLeafNodeStatistics(const CLeafNodeStatistics&) = delete;
 
@@ -647,73 +644,7 @@ class MATHS_EXPORT CBoostedTreeImpl final {
             return *m_BestSplit;
         }
 
-        SSplitStatistics computeBestSplitStatistics() const {
-
-            // We have two possible regularisation terms we'll use:
-            //   1. Tree size: gamma * "node count"
-            //   2. Sum square weights: lambda * sum{"leaf weight" ^ 2)}
-
-            SSplitStatistics result{-INF, 0.0, m_FeatureBag.size(), INF, true};
-
-            for (auto i : m_FeatureBag) {
-                double g{std::accumulate(m_Gradients[i].begin(), m_Gradients[i].end(), 0.0) +
-                         m_MissingGradients[i]};
-                double h{std::accumulate(m_Curvatures[i].begin(),
-                                         m_Curvatures[i].end(), 0.0) +
-                         m_MissingCurvatures[i]};
-                double gl[]{m_MissingGradients[i], 0.0};
-                double hl[]{m_MissingCurvatures[i], 0.0};
-
-                double maximumGain{-INF};
-                double splitAt{-INF};
-                bool assignMissingToLeft{true};
-
-                for (std::size_t j = 0; j + 1 < m_Gradients[i].size(); ++j) {
-                    gl[ASSIGN_MISSING_TO_LEFT] += m_Gradients[i][j];
-                    hl[ASSIGN_MISSING_TO_LEFT] += m_Curvatures[i][j];
-                    gl[ASSIGN_MISSING_TO_RIGHT] += m_Gradients[i][j];
-                    hl[ASSIGN_MISSING_TO_RIGHT] += m_Curvatures[i][j];
-
-                    double gain[]{
-                        CTools::pow2(gl[ASSIGN_MISSING_TO_LEFT]) /
-                                (hl[ASSIGN_MISSING_TO_LEFT] + m_Regularization.lambda()) +
-                            CTools::pow2(g - gl[ASSIGN_MISSING_TO_LEFT]) /
-                                (h - hl[ASSIGN_MISSING_TO_LEFT] +
-                                 m_Regularization.lambda()),
-                        CTools::pow2(gl[ASSIGN_MISSING_TO_RIGHT]) /
-                                (hl[ASSIGN_MISSING_TO_RIGHT] + m_Regularization.lambda()) +
-                            CTools::pow2(g - gl[ASSIGN_MISSING_TO_RIGHT]) /
-                                (h - hl[ASSIGN_MISSING_TO_RIGHT] +
-                                 m_Regularization.lambda())};
-
-                    if (gain[ASSIGN_MISSING_TO_LEFT] > maximumGain) {
-                        maximumGain = gain[ASSIGN_MISSING_TO_LEFT];
-                        splitAt = m_CandidateSplits[i][j];
-                        assignMissingToLeft = true;
-                    }
-                    if (gain[ASSIGN_MISSING_TO_RIGHT] > maximumGain) {
-                        maximumGain = gain[ASSIGN_MISSING_TO_RIGHT];
-                        splitAt = m_CandidateSplits[i][j];
-                        assignMissingToLeft = false;
-                    }
-                }
-
-                double gain{0.5 * (maximumGain -
-                                   CTools::pow2(g) / (h + m_Regularization.lambda())) -
-                            m_Regularization.gamma()};
-
-                SSplitStatistics candidate{gain, h, i, splitAt, assignMissingToLeft};
-                LOG_TRACE(<< "candidate split: " << candidate.print());
-
-                if (candidate > result) {
-                    result = candidate;
-                }
-            }
-
-            LOG_TRACE(<< "best split: " << result.print());
-
-            return result;
-        }
+        SSplitStatistics computeBestSplitStatistics() const;
 
     private:
         std::size_t m_Id;
diff --git a/lib/maths/CBoostedTreeImpl.cc b/lib/maths/CBoostedTreeImpl.cc
index 4e618beabd..ae656a6d8b 100644
--- a/lib/maths/CBoostedTreeImpl.cc
+++ b/lib/maths/CBoostedTreeImpl.cc
@@ -112,6 +112,71 @@ void CBoostedTreeImpl::CLeafNodeStatistics::addRowDerivatives(const CEncodedData
     }
 }
 
+CBoostedTreeImpl::CLeafNodeStatistics::SSplitStatistics
+CBoostedTreeImpl::CLeafNodeStatistics::computeBestSplitStatistics() const {
+
+    // We have two possible regularisation terms we'll use:
+    //   1. Tree size: gamma * "node count"
+    //   2. Sum square weights: lambda * sum{"leaf weight" ^ 2)}
+
+    SSplitStatistics result{-INF, 0.0, m_FeatureBag.size(), INF, true};
+
+    for (auto i : m_FeatureBag) {
+        double g{std::accumulate(m_Gradients[i].begin(), m_Gradients[i].end(), 0.0) +
+                 m_MissingGradients[i]};
+        double h{std::accumulate(m_Curvatures[i].begin(), m_Curvatures[i].end(), 0.0) +
+                 m_MissingCurvatures[i]};
+        double gl[]{m_MissingGradients[i], 0.0};
+        double hl[]{m_MissingCurvatures[i], 0.0};
+
+        double maximumGain{-INF};
+        double splitAt{-INF};
+        bool assignMissingToLeft{true};
+
+        for (std::size_t j = 0; j + 1 < m_Gradients[i].size(); ++j) {
+            gl[ASSIGN_MISSING_TO_LEFT] += m_Gradients[i][j];
+            hl[ASSIGN_MISSING_TO_LEFT] += m_Curvatures[i][j];
+            gl[ASSIGN_MISSING_TO_RIGHT] += m_Gradients[i][j];
+            hl[ASSIGN_MISSING_TO_RIGHT] += m_Curvatures[i][j];
+
+            double gain[]{
+                CTools::pow2(gl[ASSIGN_MISSING_TO_LEFT]) /
+                        (hl[ASSIGN_MISSING_TO_LEFT] + m_Regularization.lambda()) +
+                    CTools::pow2(g - gl[ASSIGN_MISSING_TO_LEFT]) /
+                        (h - hl[ASSIGN_MISSING_TO_LEFT] + m_Regularization.lambda()),
+                CTools::pow2(gl[ASSIGN_MISSING_TO_RIGHT]) /
+                        (hl[ASSIGN_MISSING_TO_RIGHT] + m_Regularization.lambda()) +
+                    CTools::pow2(g - gl[ASSIGN_MISSING_TO_RIGHT]) /
+                        (h - hl[ASSIGN_MISSING_TO_RIGHT] + m_Regularization.lambda())};
+
+            if (gain[ASSIGN_MISSING_TO_LEFT] > maximumGain) {
+                maximumGain = gain[ASSIGN_MISSING_TO_LEFT];
+                splitAt = m_CandidateSplits[i][j];
+                assignMissingToLeft = true;
+            }
+            if (gain[ASSIGN_MISSING_TO_RIGHT] > maximumGain) {
+                maximumGain = gain[ASSIGN_MISSING_TO_RIGHT];
+                splitAt = m_CandidateSplits[i][j];
+                assignMissingToLeft = false;
+            }
+        }
+
+        double gain{0.5 * (maximumGain - CTools::pow2(g) / (h + m_Regularization.lambda())) -
+                    m_Regularization.gamma()};
+
+        SSplitStatistics candidate{gain, h, i, splitAt, assignMissingToLeft};
+        LOG_TRACE(<< "candidate split: " << candidate.print());
+
+        if (candidate > result) {
+            result = candidate;
+        }
+    }
+
+    LOG_TRACE(<< "best split: " << result.print());
+
+    return result;
+}
+
 CBoostedTreeImpl::CBoostedTreeImpl(std::size_t numberThreads, CBoostedTree::TLossFunctionUPtr loss)
     : m_NumberThreads{numberThreads}, m_Loss{std::move(loss)},
       m_BestHyperparameters{m_Regularization, m_Eta, m_EtaGrowthRatePerTree, m_FeatureBagFraction} {

From 841ff03c950b224cbdbf1d34d56fabc3124b58df Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@elastic.co>
Date: Mon, 23 Sep 2019 15:04:56 +0100
Subject: [PATCH 05/23] Formatting

---
 include/maths/CBoostedTreeImpl.h | 65 +++++++++++++++++---------------
 1 file changed, 34 insertions(+), 31 deletions(-)

diff --git a/include/maths/CBoostedTreeImpl.h b/include/maths/CBoostedTreeImpl.h
index d8cf8c4703..c0becaf137 100644
--- a/include/maths/CBoostedTreeImpl.h
+++ b/include/maths/CBoostedTreeImpl.h
@@ -398,38 +398,41 @@ class MATHS_EXPORT CBoostedTreeImpl final {
                             const CLeafNodeStatistics& parent,
                             const CLeafNodeStatistics& sibling,
                             core::CPackedBitVector rowMask)
-                                : m_Id{id}, m_Regularization{sibling.m_Regularization},
-      m_CandidateSplits{sibling.m_CandidateSplits}, m_Depth{sibling.m_Depth},
-      m_FeatureBag{sibling.m_FeatureBag}, m_RowMask{std::move(rowMask)} {
-
-    LOG_TRACE(<< "row mask = " << m_RowMask);
-    LOG_TRACE(<< "feature bag = " << core::CContainerPrinter::print(m_FeatureBag));
-
-    m_Gradients.resize(m_CandidateSplits.size());
-    m_Curvatures.resize(m_CandidateSplits.size());
-    m_MissingGradients.resize(m_CandidateSplits.size(), 0.0);
-    m_MissingCurvatures.resize(m_CandidateSplits.size(), 0.0);
-
-    for (std::size_t i = 0; i < m_CandidateSplits.size(); ++i) {
-        std::size_t numberSplits{m_CandidateSplits[i].size() + 1};
-        m_Gradients[i].resize(numberSplits);
-        m_Curvatures[i].resize(numberSplits);
-        for (std::size_t j = 0; j < numberSplits; ++j) {
-            m_Gradients[i][j] = parent.m_Gradients[i][j] - sibling.m_Gradients[i][j];
-            m_Curvatures[i][j] = parent.m_Curvatures[i][j] - sibling.m_Curvatures[i][j];
+            : m_Id{id}, m_Regularization{sibling.m_Regularization},
+              m_CandidateSplits{sibling.m_CandidateSplits}, m_Depth{sibling.m_Depth},
+              m_FeatureBag{sibling.m_FeatureBag}, m_RowMask{std::move(rowMask)} {
+
+            LOG_TRACE(<< "row mask = " << m_RowMask);
+            LOG_TRACE(<< "feature bag = " << core::CContainerPrinter::print(m_FeatureBag));
+
+            m_Gradients.resize(m_CandidateSplits.size());
+            m_Curvatures.resize(m_CandidateSplits.size());
+            m_MissingGradients.resize(m_CandidateSplits.size(), 0.0);
+            m_MissingCurvatures.resize(m_CandidateSplits.size(), 0.0);
+
+            for (std::size_t i = 0; i < m_CandidateSplits.size(); ++i) {
+                std::size_t numberSplits{m_CandidateSplits[i].size() + 1};
+                m_Gradients[i].resize(numberSplits);
+                m_Curvatures[i].resize(numberSplits);
+                for (std::size_t j = 0; j < numberSplits; ++j) {
+                    m_Gradients[i][j] = parent.m_Gradients[i][j] -
+                                        sibling.m_Gradients[i][j];
+                    m_Curvatures[i][j] = parent.m_Curvatures[i][j] -
+                                         sibling.m_Curvatures[i][j];
+                }
+                m_MissingGradients[i] = parent.m_MissingGradients[i] -
+                                        sibling.m_MissingGradients[i];
+                m_MissingCurvatures[i] = parent.m_MissingCurvatures[i] -
+                                         sibling.m_MissingCurvatures[i];
+            }
+
+            LOG_TRACE(<< "gradients = " << core::CContainerPrinter::print(m_Gradients));
+            LOG_TRACE(<< "curvatures = " << core::CContainerPrinter::print(m_Curvatures));
+            LOG_TRACE(<< "missing gradients = "
+                      << core::CContainerPrinter::print(m_MissingGradients));
+            LOG_TRACE(<< "missing curvatures = "
+                      << core::CContainerPrinter::print(m_MissingCurvatures));
         }
-        m_MissingGradients[i] = parent.m_MissingGradients[i] -
-                                sibling.m_MissingGradients[i];
-        m_MissingCurvatures[i] = parent.m_MissingCurvatures[i] -
-                                 sibling.m_MissingCurvatures[i];
-    }
-
-    LOG_TRACE(<< "gradients = " << core::CContainerPrinter::print(m_Gradients));
-    LOG_TRACE(<< "curvatures = " << core::CContainerPrinter::print(m_Curvatures));
-    LOG_TRACE(<< "missing gradients = " << core::CContainerPrinter::print(m_MissingGradients));
-    LOG_TRACE(<< "missing curvatures = "
-              << core::CContainerPrinter::print(m_MissingCurvatures));
-}
 
         CLeafNodeStatistics(const CLeafNodeStatistics&) = delete;
 

From 7a57883e05fc55dcf878201b6be5ebde4cc13a4a Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@elastic.co>
Date: Mon, 23 Sep 2019 15:10:45 +0100
Subject: [PATCH 06/23] Remove depth: this isn't needed yet

---
 include/maths/CBoostedTreeImpl.h | 14 +++++---------
 lib/maths/CBoostedTreeImpl.cc    |  2 +-
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/include/maths/CBoostedTreeImpl.h b/include/maths/CBoostedTreeImpl.h
index c0becaf137..0c11fd3f0b 100644
--- a/include/maths/CBoostedTreeImpl.h
+++ b/include/maths/CBoostedTreeImpl.h
@@ -379,11 +379,9 @@ class MATHS_EXPORT CBoostedTreeImpl final {
                             const CDataFrameCategoryEncoder& encoder,
                             const TRegularization& regularization,
                             const TDoubleVecVec& candidateSplits,
-                            std::size_t depth,
                             TSizeVec featureBag,
                             core::CPackedBitVector rowMask)
-            : m_Id{id}, m_Regularization{regularization},
-              m_CandidateSplits{candidateSplits}, m_Depth{depth},
+            : m_Id{id}, m_Regularization{regularization}, m_CandidateSplits{candidateSplits},
               m_FeatureBag{std::move(featureBag)}, m_RowMask{std::move(rowMask)} {
 
             std::sort(m_FeatureBag.begin(), m_FeatureBag.end());
@@ -399,7 +397,7 @@ class MATHS_EXPORT CBoostedTreeImpl final {
                             const CLeafNodeStatistics& sibling,
                             core::CPackedBitVector rowMask)
             : m_Id{id}, m_Regularization{sibling.m_Regularization},
-              m_CandidateSplits{sibling.m_CandidateSplits}, m_Depth{sibling.m_Depth},
+              m_CandidateSplits{sibling.m_CandidateSplits},
               m_FeatureBag{sibling.m_FeatureBag}, m_RowMask{std::move(rowMask)} {
 
             LOG_TRACE(<< "row mask = " << m_RowMask);
@@ -458,8 +456,7 @@ class MATHS_EXPORT CBoostedTreeImpl final {
             if (leftChildHasFewerRows) {
                 auto leftChild = std::make_shared<CLeafNodeStatistics>(
                     leftChildId, numberThreads, frame, encoder, regularization,
-                    candidateSplits, m_Depth + 1, std::move(featureBag),
-                    std::move(leftChildRowMask));
+                    candidateSplits, std::move(featureBag), std::move(leftChildRowMask));
                 auto rightChild = std::make_shared<CLeafNodeStatistics>(
                     rightChildId, *this, *leftChild, std::move(rightChildRowMask));
 
@@ -467,8 +464,8 @@ class MATHS_EXPORT CBoostedTreeImpl final {
             }
 
             auto rightChild = std::make_shared<CLeafNodeStatistics>(
-                rightChildId, numberThreads, frame, encoder, regularization, candidateSplits,
-                m_Depth + 1, std::move(featureBag), std::move(rightChildRowMask));
+                rightChildId, numberThreads, frame, encoder, regularization,
+                candidateSplits, std::move(featureBag), std::move(rightChildRowMask));
             auto leftChild = std::make_shared<CLeafNodeStatistics>(
                 leftChildId, *this, *rightChild, std::move(leftChildRowMask));
 
@@ -653,7 +650,6 @@ class MATHS_EXPORT CBoostedTreeImpl final {
         std::size_t m_Id;
         const TRegularization& m_Regularization;
         const TDoubleVecVec& m_CandidateSplits;
-        std::size_t m_Depth;
         TSizeVec m_FeatureBag;
         core::CPackedBitVector m_RowMask;
         TDoubleVecVec m_Gradients;
diff --git a/lib/maths/CBoostedTreeImpl.cc b/lib/maths/CBoostedTreeImpl.cc
index ae656a6d8b..9f74d9ae05 100644
--- a/lib/maths/CBoostedTreeImpl.cc
+++ b/lib/maths/CBoostedTreeImpl.cc
@@ -570,7 +570,7 @@ CBoostedTreeImpl::trainTree(core::CDataFrame& frame,
     TLeafNodeStatisticsPtrQueue leaves;
     leaves.push(std::make_shared<CLeafNodeStatistics>(
         0 /*root*/, m_NumberThreads, frame, *m_Encoder, m_Regularization,
-        candidateSplits, 0 /*depth*/, this->featureBag(), trainingRowMask));
+        candidateSplits, this->featureBag(), trainingRowMask));
 
     // We update local variables because the callback can be expensive if it
     // requires accessing atomics.

From cd71cd6a62794ea5dfd89e052b9afa658dce0df6 Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@elastic.co>
Date: Mon, 23 Sep 2019 18:28:39 +0100
Subject: [PATCH 07/23] Fix tests

---
 lib/api/unittest/CDataFrameAnalyzerTest.cc | 35 ++++++++--------------
 lib/maths/CBoostedTreeImpl.cc              |  6 ++--
 2 files changed, 16 insertions(+), 25 deletions(-)

diff --git a/lib/api/unittest/CDataFrameAnalyzerTest.cc b/lib/api/unittest/CDataFrameAnalyzerTest.cc
index 06c133982b..7c82fdbe5f 100644
--- a/lib/api/unittest/CDataFrameAnalyzerTest.cc
+++ b/lib/api/unittest/CDataFrameAnalyzerTest.cc
@@ -653,7 +653,7 @@ void CDataFrameAnalyzerTest::testRunBoostedTreeTraining() {
     LOG_DEBUG(<< "time to train = " << core::CProgramCounters::counter(counter_t::E_DFTPMTimeToTrain)
               << "ms");
     CPPUNIT_ASSERT(core::CProgramCounters::counter(
-                       counter_t::E_DFTPMEstimatedPeakMemoryUsage) < 2300000);
+                       counter_t::E_DFTPMEstimatedPeakMemoryUsage) < 2600000);
     CPPUNIT_ASSERT(core::CProgramCounters::counter(counter_t::E_DFTPMPeakMemoryUsage) < 1050000);
     CPPUNIT_ASSERT(core::CProgramCounters::counter(counter_t::E_DFTPMTimeToTrain) > 0);
     CPPUNIT_ASSERT(core::CProgramCounters::counter(counter_t::E_DFTPMTimeToTrain) <= duration);
@@ -1176,34 +1176,25 @@ void CDataFrameAnalyzerTest::testRunBoostedTreeTrainingWithStateRecoverySubrouti
 
     rapidjson::Document expectedResults{treeToJsonDocument(*expectedTree)};
     const auto& expectedHyperparameters = expectedResults["best_hyperparameters"];
+    const auto& expectedRegularizationHyperparameters =
+        expectedHyperparameters["hyperparam_regularization"];
 
     rapidjson::Document actualResults{treeToJsonDocument(*actualTree)};
     const auto& actualHyperparameters = actualResults["best_hyperparameters"];
+    const auto& actualRegularizationHyperparameters =
+        actualHyperparameters["hyperparam_regularization"];
 
-    auto assertDoublesEqual = [&expectedHyperparameters,
-                               &actualHyperparameters](std::string key) {
+    for (const auto& key : {"hyperparam_eta", "hyperparam_eta_growth_rate_per_tree",
+                            "hyperparam_feature_bag_fraction"}) {
         double expected{std::stod(expectedHyperparameters[key].GetString())};
         double actual{std::stod(actualHyperparameters[key].GetString())};
         CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, 1e-4 * expected);
-    };
-    auto assertDoublesArrayEqual = [&expectedHyperparameters,
-                                    &actualHyperparameters](std::string key) {
-        TDoubleVec expectedVector;
-        core::CPersistUtils::fromString(expectedHyperparameters[key].GetString(), expectedVector);
-        TDoubleVec actualVector;
-        core::CPersistUtils::fromString(actualHyperparameters[key].GetString(), actualVector);
-        CPPUNIT_ASSERT_EQUAL(expectedVector.size(), actualVector.size());
-        for (size_t i = 0; i < expectedVector.size(); i++) {
-            CPPUNIT_ASSERT_DOUBLES_EQUAL(expectedVector[i], actualVector[i],
-                                         1e-4 * expectedVector[i]);
-        }
-    };
-    assertDoublesEqual("hyperparam_lambda");
-    assertDoublesEqual("hyperparam_gamma");
-    assertDoublesEqual("hyperparam_eta");
-    assertDoublesEqual("hyperparam_eta_growth_rate_per_tree");
-    assertDoublesEqual("hyperparam_feature_bag_fraction");
-    assertDoublesArrayEqual("hyperparam_feature_sample_probabilities");
+    }
+    for (const auto& key : {"regularization_gamma", "regularization_lambda"}) {
+        double expected{std::stod(expectedRegularizationHyperparameters[key].GetString())};
+        double actual{std::stod(actualRegularizationHyperparameters[key].GetString())};
+        CPPUNIT_ASSERT_DOUBLES_EQUAL(expected, actual, 1e-4 * expected);
+    }
 }
 
 maths::CBoostedTreeFactory::TBoostedTreeUPtr
diff --git a/lib/maths/CBoostedTreeImpl.cc b/lib/maths/CBoostedTreeImpl.cc
index 9f74d9ae05..5a46ae07b2 100644
--- a/lib/maths/CBoostedTreeImpl.cc
+++ b/lib/maths/CBoostedTreeImpl.cc
@@ -924,10 +924,10 @@ const std::string REGULARIZATION_OVERRIDE_TAG{"regularization_override"};
 const std::string ROWS_PER_FEATURE_TAG{"rows_per_feature"};
 const std::string TESTING_ROW_MASKS_TAG{"testing_row_masks"};
 const std::string TRAINING_ROW_MASKS_TAG{"training_row_masks"};
-const std::string TRAINING_PROGRESS_TAG{"training_row_masks"};
+const std::string TRAINING_PROGRESS_TAG{"training_progress"};
 
-const std::string REGULARIZATION_GAMMA_TAG{"gamma"};
-const std::string REGULARIZATION_LAMBDA_TAG{"lambda"};
+const std::string REGULARIZATION_GAMMA_TAG{"regularization_gamma"};
+const std::string REGULARIZATION_LAMBDA_TAG{"regularization_lambda"};
 
 const std::string HYPERPARAM_ETA_TAG{"hyperparam_eta"};
 const std::string HYPERPARAM_ETA_GROWTH_RATE_PER_TREE_TAG{"hyperparam_eta_growth_rate_per_tree"};

From e42208c2eb880fa90fcc29467d1afdef348cf3dc Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@elastic.co>
Date: Tue, 24 Sep 2019 12:11:19 +0100
Subject: [PATCH 08/23] Create tree trainer in buildFor for both restore and
 creation from scratch

---
 include/api/CDataFrameBoostedTreeRunner.h |  4 +-
 include/maths/CBoostedTreeFactory.h       | 15 ++---
 lib/api/CDataFrameBoostedTreeRunner.cc    | 17 +++--
 lib/maths/CBoostedTreeFactory.cc          | 77 ++++++++++++-----------
 lib/maths/CBoostedTreeImpl.cc             |  2 +
 lib/maths/unittest/CBoostedTreeTest.cc    | 13 ++--
 6 files changed, 71 insertions(+), 57 deletions(-)

diff --git a/include/api/CDataFrameBoostedTreeRunner.h b/include/api/CDataFrameBoostedTreeRunner.h
index 7924ea44f6..13b0a6863f 100644
--- a/include/api/CDataFrameBoostedTreeRunner.h
+++ b/include/api/CDataFrameBoostedTreeRunner.h
@@ -47,6 +47,7 @@ class API_EXPORT CDataFrameBoostedTreeRunner final : public CDataFrameAnalysisRu
 private:
     using TBoostedTreeUPtr = std::unique_ptr<maths::CBoostedTree>;
     using TBoostedTreeFactoryUPtr = std::unique_ptr<maths::CBoostedTreeFactory>;
+    using TDataSearcherUPtr = CDataFrameAnalysisSpecification::TDataSearcherUPtr;
     using TMemoryEstimator = std::function<void(std::int64_t)>;
 
 private:
@@ -58,7 +59,8 @@ class API_EXPORT CDataFrameBoostedTreeRunner final : public CDataFrameAnalysisRu
     TMemoryEstimator memoryEstimator();
 
     bool restoreBoostedTree(core::CDataFrame& frame,
-                            CDataFrameAnalysisSpecification::TDataSearcherUPtr& restoreSearcher);
+                            std::size_t dependentVariableColumn,
+                            TDataSearcherUPtr& restoreSearcher);
 
 private:
     // Note custom config is written directly to the factory object.
diff --git a/include/maths/CBoostedTreeFactory.h b/include/maths/CBoostedTreeFactory.h
index 3dd579a4a8..fa33e8324f 100644
--- a/include/maths/CBoostedTreeFactory.h
+++ b/include/maths/CBoostedTreeFactory.h
@@ -44,12 +44,9 @@ class MATHS_EXPORT CBoostedTreeFactory final {
                                                        TLossFunctionUPtr loss);
 
     //! Construct a boosted tree object from its serialized version.
-    static TBoostedTreeUPtr
-    constructFromString(std::istream& jsonStringStream,
-                        core::CDataFrame& frame,
-                        TProgressCallback recordProgress = noopRecordProgress,
-                        TMemoryUsageCallback recordMemoryUsage = noopRecordMemoryUsage,
-                        TTrainingStateCallback recordTrainingState = noopRecordTrainingState);
+    //!
+    //! \warning Throws runtime error on fail to restore.
+    static CBoostedTreeFactory constructFromString(std::istream& jsonStringStream);
 
     ~CBoostedTreeFactory();
     CBoostedTreeFactory(CBoostedTreeFactory&) = delete;
@@ -108,7 +105,7 @@ class MATHS_EXPORT CBoostedTreeFactory final {
     static const std::size_t MAXIMUM_NUMBER_TREES;
 
 private:
-    CBoostedTreeFactory(std::size_t numberThreads, TLossFunctionUPtr loss);
+    CBoostedTreeFactory(bool restored, std::size_t numberThreads, TLossFunctionUPtr loss);
 
     //! Compute the row masks for the missing values for each feature.
     void initializeMissingFeatureMasks(const core::CDataFrame& frame) const;
@@ -152,6 +149,9 @@ class MATHS_EXPORT CBoostedTreeFactory final {
     //! Setup monitoring for training progress.
     void setupTrainingProgressMonitoring();
 
+    //! Refresh progress monitoring after restoring from saved training state.
+    void restoreTrainingProgressMonitoring();
+
     static void noopRecordProgress(double);
     static void noopRecordMemoryUsage(std::int64_t);
     static void noopRecordTrainingState(CDataFrameRegressionModel::TPersistFunc);
@@ -159,6 +159,7 @@ class MATHS_EXPORT CBoostedTreeFactory final {
 private:
     TOptionalDouble m_MinimumFrequencyToOneHotEncode;
     TOptionalSize m_BayesianOptimisationRestarts;
+    bool m_Restored = false;
     TBoostedTreeImplUPtr m_TreeImpl;
     TVector m_GammaSearchInterval;
     TVector m_LambdaSearchInterval;
diff --git a/lib/api/CDataFrameBoostedTreeRunner.cc b/lib/api/CDataFrameBoostedTreeRunner.cc
index 0023c99d04..f4267519ad 100644
--- a/lib/api/CDataFrameBoostedTreeRunner.cc
+++ b/lib/api/CDataFrameBoostedTreeRunner.cc
@@ -191,7 +191,8 @@ void CDataFrameBoostedTreeRunner::runImpl(const TStrVec& featureNames,
     auto restoreSearcher{this->spec().restoreSearcher()};
     bool treeRestored{false};
     if (restoreSearcher != nullptr) {
-        treeRestored = this->restoreBoostedTree(frame, restoreSearcher);
+        treeRestored = this->restoreBoostedTree(
+            frame, dependentVariableColumn - featureNames.begin(), restoreSearcher);
     }
 
     if (treeRestored == false) {
@@ -204,9 +205,10 @@ void CDataFrameBoostedTreeRunner::runImpl(const TStrVec& featureNames,
     core::CProgramCounters::counter(counter_t::E_DFTPMTimeToTrain) = watch.stop();
 }
 
-bool CDataFrameBoostedTreeRunner::restoreBoostedTree(
-    core::CDataFrame& frame,
-    CDataFrameAnalysisSpecification::TDataSearcherUPtr& restoreSearcher) { // Restore from Elasticsearch compressed data
+bool CDataFrameBoostedTreeRunner::restoreBoostedTree(core::CDataFrame& frame,
+                                                     std::size_t dependentVariableColumn,
+                                                     TDataSearcherUPtr& restoreSearcher) {
+    // Restore from Elasticsearch compressed data
     try {
         core::CStateDecompressor decompressor(*restoreSearcher);
         decompressor.setStateRestoreSearch(
@@ -228,8 +230,11 @@ bool CDataFrameBoostedTreeRunner::restoreBoostedTree(
             return false;
         }
 
-        m_BoostedTree = maths::CBoostedTreeFactory::constructFromString(
-            *inputStream, frame, progressRecorder(), memoryEstimator(), statePersister());
+        m_BoostedTree = maths::CBoostedTreeFactory::constructFromString(*inputStream)
+                            .progressCallback(this->progressRecorder())
+                            .trainingStateCallback(this->statePersister())
+                            .memoryUsageCallback(this->memoryEstimator())
+                            .buildFor(frame, dependentVariableColumn);
     } catch (std::exception& e) {
         LOG_ERROR(<< "Failed to restore state! " << e.what());
         return false;
diff --git a/lib/maths/CBoostedTreeFactory.cc b/lib/maths/CBoostedTreeFactory.cc
index 037c14ff2f..25c8cb7343 100644
--- a/lib/maths/CBoostedTreeFactory.cc
+++ b/lib/maths/CBoostedTreeFactory.cc
@@ -43,25 +43,38 @@ const double MAIN_TRAINING_LOOP_TREE_SIZE_MULTIPLIER{10.0};
 CBoostedTreeFactory::TBoostedTreeUPtr
 CBoostedTreeFactory::buildFor(core::CDataFrame& frame, std::size_t dependentVariable) {
 
-    m_TreeImpl->m_DependentVariable = dependentVariable;
+    if (m_Restored) {
 
-    this->setupTrainingProgressMonitoring();
+        if (dependentVariable != m_TreeImpl->m_DependentVariable) {
+            HANDLE_FATAL(<< "Internal error: expected dependent variable "
+                         << m_TreeImpl->m_DependentVariable << " got " << dependentVariable);
+        }
+
+        this->restoreTrainingProgressMonitoring();
+
+        frame.resizeColumns(m_TreeImpl->m_NumberThreads,
+                            frame.numberColumns() + this->numberExtraColumnsForTrain());
+
+    } else {
+
+        m_TreeImpl->m_DependentVariable = dependentVariable;
 
-    this->initializeMissingFeatureMasks(frame);
-    std::tie(m_TreeImpl->m_TrainingRowMasks, m_TreeImpl->m_TestingRowMasks) =
-        this->crossValidationRowMasks();
+        this->setupTrainingProgressMonitoring();
 
-    // We store the gradient and curvature of the loss function and the predicted
-    // value for the dependent variable of the regression.
-    frame.resizeColumns(m_TreeImpl->m_NumberThreads,
-                        frame.numberColumns() + this->numberExtraColumnsForTrain());
+        this->initializeMissingFeatureMasks(frame);
+        std::tie(m_TreeImpl->m_TrainingRowMasks, m_TreeImpl->m_TestingRowMasks) =
+            this->crossValidationRowMasks();
 
-    this->selectFeaturesAndEncodeCategories(frame);
-    this->determineFeatureDataTypes(frame);
+        frame.resizeColumns(m_TreeImpl->m_NumberThreads,
+                            frame.numberColumns() + this->numberExtraColumnsForTrain());
 
-    if (this->initializeFeatureSampleDistribution()) {
-        this->initializeHyperparameters(frame);
-        this->initializeHyperparameterOptimisation();
+        this->selectFeaturesAndEncodeCategories(frame);
+        this->determineFeatureDataTypes(frame);
+
+        if (this->initializeFeatureSampleDistribution()) {
+            this->initializeHyperparameters(frame);
+            this->initializeHyperparameterOptimisation();
+        }
     }
 
     // TODO can only use factory to create one object since this is moved. This seems trappy.
@@ -465,37 +478,26 @@ CBoostedTreeFactory::candidateRegularizerSearchInterval(core::CDataFrame& frame,
 
 CBoostedTreeFactory CBoostedTreeFactory::constructFromParameters(std::size_t numberThreads,
                                                                  TLossFunctionUPtr loss) {
-    return {numberThreads, std::move(loss)};
+    return {false, numberThreads, std::move(loss)};
 }
 
-CBoostedTreeFactory::TBoostedTreeUPtr
-CBoostedTreeFactory::constructFromString(std::istream& jsonStringStream,
-                                         core::CDataFrame& frame,
-                                         TProgressCallback recordProgress,
-                                         TMemoryUsageCallback recordMemoryUsage,
-                                         TTrainingStateCallback recordTrainingState) {
+CBoostedTreeFactory CBoostedTreeFactory::constructFromString(std::istream& jsonStringStream) {
+    CBoostedTreeFactory result{true, 1, nullptr};
     try {
-        TBoostedTreeUPtr treePtr{new CBoostedTree{
-            frame, std::move(recordProgress), std::move(recordMemoryUsage),
-            std::move(recordTrainingState), TBoostedTreeImplUPtr{new CBoostedTreeImpl{}}}};
         core::CJsonStateRestoreTraverser traverser(jsonStringStream);
-        if (treePtr->acceptRestoreTraverser(traverser) == false || traverser.haveBadState()) {
+        if (result.m_TreeImpl->acceptRestoreTraverser(traverser) == false ||
+            traverser.haveBadState()) {
             throw std::runtime_error{"failed to restore boosted tree"};
         }
-        treePtr->m_Impl->m_TrainingProgress.attach(recordProgress);
-        treePtr->m_Impl->m_TrainingProgress.resumeRestored();
-        frame.resizeColumns(treePtr->m_Impl->m_NumberThreads,
-                            frame.numberColumns() +
-                                treePtr->m_Impl->numberExtraColumnsForTrain());
-        return treePtr;
     } catch (const std::exception& e) {
-        HANDLE_FATAL(<< "Input error: '" << e.what() << "'. Check logs for more details.");
+        throw std::runtime_error{std::string{"Input error: '"} + e.what() + "'"};
     }
-    return nullptr;
+    return result;
 }
 
-CBoostedTreeFactory::CBoostedTreeFactory(std::size_t numberThreads, TLossFunctionUPtr loss)
-    : m_TreeImpl{std::make_unique<CBoostedTreeImpl>(numberThreads, std::move(loss))},
+CBoostedTreeFactory::CBoostedTreeFactory(bool restored, std::size_t numberThreads, TLossFunctionUPtr loss)
+    : m_Restored{restored}, m_TreeImpl{std::make_unique<CBoostedTreeImpl>(numberThreads,
+                                                                          std::move(loss))},
       m_GammaSearchInterval{0.0}, m_LambdaSearchInterval{0.0} {
 }
 
@@ -660,6 +662,11 @@ void CBoostedTreeFactory::setupTrainingProgressMonitoring() {
     m_TreeImpl->m_TrainingProgress = core::CLoopProgress{totalNumberSteps, m_RecordProgress};
 }
 
+void CBoostedTreeFactory::restoreTrainingProgressMonitoring() {
+    m_TreeImpl->m_TrainingProgress.attach(m_RecordProgress);
+    m_TreeImpl->m_TrainingProgress.resumeRestored();
+}
+
 void CBoostedTreeFactory::noopRecordTrainingState(std::function<void(core::CStatePersistInserter&)>) {
 }
 
diff --git a/lib/maths/CBoostedTreeImpl.cc b/lib/maths/CBoostedTreeImpl.cc
index 5a46ae07b2..b564d8f613 100644
--- a/lib/maths/CBoostedTreeImpl.cc
+++ b/lib/maths/CBoostedTreeImpl.cc
@@ -302,6 +302,8 @@ std::size_t CBoostedTreeImpl::columnHoldingDependentVariable() const {
 }
 
 std::size_t CBoostedTreeImpl::numberExtraColumnsForTrain() {
+    // We store the gradient and curvature of the loss function and the predicted
+    // value for the dependent variable of the regression in the data frame.
     return 3;
 }
 
diff --git a/lib/maths/unittest/CBoostedTreeTest.cc b/lib/maths/unittest/CBoostedTreeTest.cc
index 99dd04242f..13c9dcc543 100644
--- a/lib/maths/unittest/CBoostedTreeTest.cc
+++ b/lib/maths/unittest/CBoostedTreeTest.cc
@@ -908,7 +908,7 @@ void CBoostedTreeTest::testPersistRestore() {
     }
     // restore
     auto boostedTree =
-        maths::CBoostedTreeFactory::constructFromString(persistOnceSStream, *frame);
+        maths::CBoostedTreeFactory::constructFromString(persistOnceSStream).buildFor(*frame, cols - 1);
     {
         core::CJsonStatePersistInserter inserter(persistTwiceSStream);
         boostedTree->acceptPersistInserter(inserter);
@@ -965,8 +965,8 @@ void CBoostedTreeTest::testRestoreErrorHandling() {
 
     bool throwsExceptions{false};
     try {
-        auto boostedTree = maths::CBoostedTreeFactory::constructFromString(
-            errorInBayesianOptimisationState, *frame);
+        auto boostedTree = maths::CBoostedTreeFactory::constructFromString(errorInBayesianOptimisationState)
+                               .buildFor(*frame, 2);
     } catch (const std::exception& e) {
         LOG_DEBUG(<< "got = " << e.what());
         throwsExceptions = true;
@@ -1004,11 +1004,8 @@ void CBoostedTreeTest::testRestoreErrorHandling() {
 
     throwsExceptions = false;
     try {
-        auto boostedTree = maths::CBoostedTreeFactory::constructFromString(
-            errorInBoostedTreeImplState, *frame,
-            ml::maths::CBoostedTreeFactory::TProgressCallback(),
-            ml::maths::CBoostedTreeFactory::TMemoryUsageCallback(),
-            ml::maths::CBoostedTreeFactory::TTrainingStateCallback());
+        auto boostedTree = maths::CBoostedTreeFactory::constructFromString(errorInBoostedTreeImplState)
+                               .buildFor(*frame, 2);
     } catch (const std::exception& e) {
         LOG_DEBUG(<< "got = " << e.what());
         throwsExceptions = true;

From 79f3b433cc9085a4e776a08bd8a1ca7f69bb96c9 Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@elastic.co>
Date: Tue, 24 Sep 2019 12:18:15 +0100
Subject: [PATCH 09/23] Better naming plus add explanation of persist/restore
 strategy for loop progress

---
 include/core/CLoopProgress.h     | 2 +-
 lib/core/CLoopProgress.cc        | 5 ++++-
 lib/maths/CBoostedTreeFactory.cc | 2 +-
 lib/maths/CBoostedTreeImpl.cc    | 2 +-
 4 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/include/core/CLoopProgress.h b/include/core/CLoopProgress.h
index d37de8c3bb..4d551e7707 100644
--- a/include/core/CLoopProgress.h
+++ b/include/core/CLoopProgress.h
@@ -57,7 +57,7 @@ class CORE_EXPORT CLoopProgress {
                   double scale = 1.0);
 
     //! Attach a new progress monitor callback.
-    void attach(const TProgressCallback& recordProgress);
+    void progressCallback(const TProgressCallback& recordProgress);
 
     //! Increment the progress by \p i.
     void increment(std::size_t i = 1);
diff --git a/lib/core/CLoopProgress.cc b/lib/core/CLoopProgress.cc
index 608892f754..679dd9d6f2 100644
--- a/lib/core/CLoopProgress.cc
+++ b/lib/core/CLoopProgress.cc
@@ -35,7 +35,7 @@ CLoopProgress::CLoopProgress(std::size_t size, const TProgressCallback& recordPr
       m_StepProgress{scale / static_cast<double>(m_Steps)}, m_RecordProgress{recordProgress} {
 }
 
-void CLoopProgress::attach(const TProgressCallback& recordProgress) {
+void CLoopProgress::progressCallback(const TProgressCallback& recordProgress) {
     m_RecordProgress = recordProgress;
 }
 
@@ -52,6 +52,7 @@ void CLoopProgress::increment(std::size_t i) {
 }
 
 void CLoopProgress::resumeRestored() {
+    // This outputs progress and updates m_LastProgress to the correct value.
     this->increment(0);
 }
 
@@ -70,6 +71,8 @@ void CLoopProgress::acceptPersistInserter(CStatePersistInserter& inserter) const
     inserter.insertValue(CURRENT_STEP_PROGRESS_TAG, m_StepProgress,
                          core::CIEEE754::E_DoublePrecision);
     inserter.insertValue(LOOP_POS_TAG, m_Pos);
+    // m_LastProgress is not persisted because when restoring we will have never
+    // recorded progress.
 }
 
 bool CLoopProgress::acceptRestoreTraverser(CStateRestoreTraverser& traverser) {
diff --git a/lib/maths/CBoostedTreeFactory.cc b/lib/maths/CBoostedTreeFactory.cc
index 25c8cb7343..c78a95bd75 100644
--- a/lib/maths/CBoostedTreeFactory.cc
+++ b/lib/maths/CBoostedTreeFactory.cc
@@ -663,7 +663,7 @@ void CBoostedTreeFactory::setupTrainingProgressMonitoring() {
 }
 
 void CBoostedTreeFactory::restoreTrainingProgressMonitoring() {
-    m_TreeImpl->m_TrainingProgress.attach(m_RecordProgress);
+    m_TreeImpl->m_TrainingProgress.progressCallback(m_RecordProgress);
     m_TreeImpl->m_TrainingProgress.resumeRestored();
 }
 
diff --git a/lib/maths/CBoostedTreeImpl.cc b/lib/maths/CBoostedTreeImpl.cc
index b564d8f613..42c1be3bdb 100644
--- a/lib/maths/CBoostedTreeImpl.cc
+++ b/lib/maths/CBoostedTreeImpl.cc
@@ -201,7 +201,7 @@ void CBoostedTreeImpl::train(core::CDataFrame& frame,
 
     LOG_TRACE(<< "Main training loop...");
 
-    m_TrainingProgress.attach(recordProgress);
+    m_TrainingProgress.progressCallback(recordProgress);
 
     std::uint64_t lastMemoryUsage(this->memoryUsage());
     recordMemoryUsage(lastMemoryUsage);

From aeae71e0cdd8d13237f71d22d1ba24f221e71fb0 Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@elastic.co>
Date: Tue, 24 Sep 2019 12:27:52 +0100
Subject: [PATCH 10/23] Improve progress related function names

---
 include/maths/CBoostedTreeFactory.h | 4 ++--
 lib/maths/CBoostedTreeFactory.cc    | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/maths/CBoostedTreeFactory.h b/include/maths/CBoostedTreeFactory.h
index fa33e8324f..570684ee14 100644
--- a/include/maths/CBoostedTreeFactory.h
+++ b/include/maths/CBoostedTreeFactory.h
@@ -147,10 +147,10 @@ class MATHS_EXPORT CBoostedTreeFactory final {
     std::size_t numberHyperparameterTuningRounds() const;
 
     //! Setup monitoring for training progress.
-    void setupTrainingProgressMonitoring();
+    void initializeTrainingProgressMonitoring();
 
     //! Refresh progress monitoring after restoring from saved training state.
-    void restoreTrainingProgressMonitoring();
+    void resumeRestoredTrainingProgressMonitoring();
 
     static void noopRecordProgress(double);
     static void noopRecordMemoryUsage(std::int64_t);
diff --git a/lib/maths/CBoostedTreeFactory.cc b/lib/maths/CBoostedTreeFactory.cc
index c78a95bd75..7b4002d402 100644
--- a/lib/maths/CBoostedTreeFactory.cc
+++ b/lib/maths/CBoostedTreeFactory.cc
@@ -50,7 +50,7 @@ CBoostedTreeFactory::buildFor(core::CDataFrame& frame, std::size_t dependentVari
                          << m_TreeImpl->m_DependentVariable << " got " << dependentVariable);
         }
 
-        this->restoreTrainingProgressMonitoring();
+        this->resumeRestoredTrainingProgressMonitoring();
 
         frame.resizeColumns(m_TreeImpl->m_NumberThreads,
                             frame.numberColumns() + this->numberExtraColumnsForTrain());
@@ -59,7 +59,7 @@ CBoostedTreeFactory::buildFor(core::CDataFrame& frame, std::size_t dependentVari
 
         m_TreeImpl->m_DependentVariable = dependentVariable;
 
-        this->setupTrainingProgressMonitoring();
+        this->initializeTrainingProgressMonitoring();
 
         this->initializeMissingFeatureMasks(frame);
         std::tie(m_TreeImpl->m_TrainingRowMasks, m_TreeImpl->m_TestingRowMasks) =
@@ -638,7 +638,7 @@ std::size_t CBoostedTreeFactory::numberExtraColumnsForTrain() const {
     return m_TreeImpl->numberExtraColumnsForTrain();
 }
 
-void CBoostedTreeFactory::setupTrainingProgressMonitoring() {
+void CBoostedTreeFactory::initializeTrainingProgressMonitoring() {
 
     // The base unit is the cost of training on one fold.
     //
@@ -662,7 +662,7 @@ void CBoostedTreeFactory::setupTrainingProgressMonitoring() {
     m_TreeImpl->m_TrainingProgress = core::CLoopProgress{totalNumberSteps, m_RecordProgress};
 }
 
-void CBoostedTreeFactory::restoreTrainingProgressMonitoring() {
+void CBoostedTreeFactory::resumeRestoredTrainingProgressMonitoring() {
     m_TreeImpl->m_TrainingProgress.progressCallback(m_RecordProgress);
     m_TreeImpl->m_TrainingProgress.resumeRestored();
 }

From 7111620b7fb8a677758c81af6f8a889a1a671a98 Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@elastic.co>
Date: Tue, 24 Sep 2019 12:30:36 +0100
Subject: [PATCH 11/23] Improve logic readability

---
 lib/maths/CBoostedTreeFactory.cc | 61 ++++++++++++++++----------------
 1 file changed, 30 insertions(+), 31 deletions(-)

diff --git a/lib/maths/CBoostedTreeFactory.cc b/lib/maths/CBoostedTreeFactory.cc
index 7b4002d402..3936f9ff75 100644
--- a/lib/maths/CBoostedTreeFactory.cc
+++ b/lib/maths/CBoostedTreeFactory.cc
@@ -313,37 +313,36 @@ void CBoostedTreeFactory::initializeUnsetRegularizationHyperparameters(core::CDa
     std::tie(gainPerNode, totalCurvaturePerNode) =
         this->estimateTreeGainAndCurvature(frame, allTrainingRowsMask);
 
-    if (gainPerNode > 0.0 && m_TreeImpl->m_RegularizationOverride.gamma() == boost::none) {
-
-        TVector fallbackInterval{{MIN_REGULARIZER_SCALE, 1.0, MAX_REGULARIZER_SCALE}};
-        fallbackInterval *= m_TreeImpl->m_Eta;
-        auto interval = this->candidateRegularizerSearchInterval(
-            frame, allTrainingRowsMask, [this, gainPerNode](double scale) {
-                m_TreeImpl->m_Regularization.gamma(scale * gainPerNode);
-            });
-        m_GammaSearchInterval = interval.value_or(fallbackInterval) * gainPerNode;
-        LOG_TRACE(<< "gamma search interval = ["
-                  << m_GammaSearchInterval.toDelimited() << "]");
-
-    } else if (m_TreeImpl->m_RegularizationOverride.gamma() == boost::none) {
-        m_TreeImpl->m_RegularizationOverride.gamma(0.0);
-    }
-
-    if (totalCurvaturePerNode > 0.0 &&
-        m_TreeImpl->m_RegularizationOverride.lambda() == boost::none) {
-
-        TVector fallbackInterval{{MIN_REGULARIZER_SCALE, 1.0, MAX_REGULARIZER_SCALE}};
-        m_TreeImpl->m_Regularization.gamma(m_GammaSearchInterval(MIN_REGULARIZER_INDEX));
-        auto interval = this->candidateRegularizerSearchInterval(
-            frame, allTrainingRowsMask, [this, totalCurvaturePerNode](double scale) {
-                m_TreeImpl->m_Regularization.lambda(scale * totalCurvaturePerNode);
-            });
-        m_LambdaSearchInterval = interval.value_or(fallbackInterval) * totalCurvaturePerNode;
-        LOG_TRACE(<< "lambda search interval = ["
-                  << m_LambdaSearchInterval.toDelimited() << "]");
-
-    } else if (m_TreeImpl->m_RegularizationOverride.lambda() == boost::none) {
-        m_TreeImpl->m_RegularizationOverride.lambda(0.0);
+    if (m_TreeImpl->m_RegularizationOverride.gamma() == boost::none) {
+        if (gainPerNode > 0.0) {
+            TVector fallbackInterval{{MIN_REGULARIZER_SCALE, 1.0, MAX_REGULARIZER_SCALE}};
+            fallbackInterval *= m_TreeImpl->m_Eta;
+            auto interval = this->candidateRegularizerSearchInterval(
+                frame, allTrainingRowsMask, [this, gainPerNode](double scale) {
+                    m_TreeImpl->m_Regularization.gamma(scale * gainPerNode);
+                });
+            m_GammaSearchInterval = interval.value_or(fallbackInterval) * gainPerNode;
+            LOG_TRACE(<< "gamma search interval = ["
+                      << m_GammaSearchInterval.toDelimited() << "]");
+        } else {
+            m_TreeImpl->m_RegularizationOverride.gamma(0.0);
+        }
+    }
+
+    if (m_TreeImpl->m_RegularizationOverride.lambda() == boost::none) {
+        if (totalCurvaturePerNode > 0.0) {
+            TVector fallbackInterval{{MIN_REGULARIZER_SCALE, 1.0, MAX_REGULARIZER_SCALE}};
+            m_TreeImpl->m_Regularization.gamma(m_GammaSearchInterval(MIN_REGULARIZER_INDEX));
+            auto interval = this->candidateRegularizerSearchInterval(
+                frame, allTrainingRowsMask, [this, totalCurvaturePerNode](double scale) {
+                    m_TreeImpl->m_Regularization.lambda(scale * totalCurvaturePerNode);
+                });
+            m_LambdaSearchInterval = interval.value_or(fallbackInterval) * totalCurvaturePerNode;
+            LOG_TRACE(<< "lambda search interval = ["
+                      << m_LambdaSearchInterval.toDelimited() << "]");
+        } else {
+            m_TreeImpl->m_RegularizationOverride.lambda(0.0);
+        }
     }
 
     double scale{

From 1f5d4c0f3135f1a432d2d4b27cfe7953889d35b4 Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@elastic.co>
Date: Tue, 24 Sep 2019 12:32:30 +0100
Subject: [PATCH 12/23] Unpack long line

---
 lib/maths/CBoostedTreeFactory.cc | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lib/maths/CBoostedTreeFactory.cc b/lib/maths/CBoostedTreeFactory.cc
index 3936f9ff75..0817b693cd 100644
--- a/lib/maths/CBoostedTreeFactory.cc
+++ b/lib/maths/CBoostedTreeFactory.cc
@@ -345,11 +345,11 @@ void CBoostedTreeFactory::initializeUnsetRegularizationHyperparameters(core::CDa
         }
     }
 
-    double scale{
-        static_cast<double>(m_TreeImpl->m_NumberFolds - 1) /
-        static_cast<double>(m_TreeImpl->m_NumberFolds) /
-        ((m_TreeImpl->m_RegularizationOverride.gamma() != boost::none ? 0.0 : 1.0) +
-         (m_TreeImpl->m_RegularizationOverride.lambda() != boost::none ? 0.0 : 1.0))};
+    double freeRegularizationParameters{
+        (m_TreeImpl->m_RegularizationOverride.gamma() != boost::none ? 0.0 : 1.0) +
+        (m_TreeImpl->m_RegularizationOverride.lambda() != boost::none ? 0.0 : 1.0)};
+    double scale{static_cast<double>(m_TreeImpl->m_NumberFolds - 1) /
+                 static_cast<double>(m_TreeImpl->m_NumberFolds) / freeRegularizationParameters};
 
     if (m_TreeImpl->m_RegularizationOverride.gamma() == boost::none) {
         m_GammaSearchInterval *= scale;

From f63c2289883ea053fdf643ec38e19a7885a97c56 Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@elastic.co>
Date: Tue, 24 Sep 2019 13:14:42 +0100
Subject: [PATCH 13/23] Improve comments

---
 include/maths/CBoostedTreeImpl.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/maths/CBoostedTreeImpl.h b/include/maths/CBoostedTreeImpl.h
index 0c11fd3f0b..f2e9c6ac49 100644
--- a/include/maths/CBoostedTreeImpl.h
+++ b/include/maths/CBoostedTreeImpl.h
@@ -120,18 +120,18 @@ class MATHS_EXPORT CBoostedTreeImpl final {
     using TNodeVec = std::vector<CNode>;
     using TNodeVecVec = std::vector<TNodeVec>;
 
-    //! \brief Holds the parameters associated with the different types of regulariser
+    //! \brief Holds the parameters associated with the different types of regularizer
     //! terms available.
     template<typename T>
     class CRegularization final {
     public:
-        //! Set the multiplier of the tree size regularizer.
+        //! Set the multiplier of the tree size penalty.
         CRegularization& gamma(double gamma) {
             m_Gamma = gamma;
             return *this;
         }
 
-        //! Set the multiplier of the square leaf weight regularizer.
+        //! Set the multiplier of the square leaf weight penalty.
         CRegularization& lambda(double lambda) {
             m_Lambda = lambda;
             return *this;
@@ -142,10 +142,10 @@ class MATHS_EXPORT CBoostedTreeImpl final {
             return (m_Gamma == T{} ? 1 : 0) + (m_Lambda == T{} ? 1 : 0);
         }
 
-        //! Multiplier of the tree size regularizer.
+        //! Multiplier of the tree size penalty.
         T gamma() const { return m_Gamma; }
 
-        //! Multiplier of the square leaf weight regularizer.
+        //! Multiplier of the square leaf weight penalty.
         T lambda() const { return m_Lambda; }
 
         //! Get description of the regularization parameters.

From e0dd26ede432e16cd19d7ce0961f925db920f2ef Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@elastic.co>
Date: Tue, 24 Sep 2019 13:17:21 +0100
Subject: [PATCH 14/23] Correct out-of-date comment

---
 include/maths/CBoostedTreeImpl.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/maths/CBoostedTreeImpl.h b/include/maths/CBoostedTreeImpl.h
index f2e9c6ac49..820ab324d2 100644
--- a/include/maths/CBoostedTreeImpl.h
+++ b/include/maths/CBoostedTreeImpl.h
@@ -674,8 +674,8 @@ class MATHS_EXPORT CBoostedTreeImpl final {
     //! the dependent variable.
     core::CPackedBitVector allTrainingRowsMask() const;
 
-    //! Compute the sum loss for the predictions from \p frame and the leaf
-    //! count and squared weight sum from \p forest.
+    //! Compute the \p percentile percentile gain per split and the sum of row 
+    //! curvatures per internal node of \p forest.
     TDoubleDoublePr gainAndCurvatureAtPercentile(double percentile,
                                                  const TNodeVecVec& forest) const;
 

From 4cee56fb0a27c62d34fe3490e140ee32d673d34e Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@elastic.co>
Date: Tue, 24 Sep 2019 13:50:57 +0100
Subject: [PATCH 15/23] Improve function naming and unpack expression

---
 include/maths/CBoostedTreeFactory.h | 15 +++++---
 lib/maths/CBoostedTreeFactory.cc    | 54 ++++++++++++++++++-----------
 2 files changed, 43 insertions(+), 26 deletions(-)

diff --git a/include/maths/CBoostedTreeFactory.h b/include/maths/CBoostedTreeFactory.h
index 570684ee14..e661fcb9fa 100644
--- a/include/maths/CBoostedTreeFactory.h
+++ b/include/maths/CBoostedTreeFactory.h
@@ -98,7 +98,7 @@ class MATHS_EXPORT CBoostedTreeFactory final {
     using TOptionalVector = boost::optional<TVector>;
     using TPackedBitVectorVec = std::vector<core::CPackedBitVector>;
     using TBoostedTreeImplUPtr = std::unique_ptr<CBoostedTreeImpl>;
-    using TScaleRegularization = std::function<void(double)>;
+    using TScaleRegularization = std::function<void(CBoostedTreeImpl&, double)>;
 
 private:
     static const double MINIMUM_ETA;
@@ -135,10 +135,15 @@ class MATHS_EXPORT CBoostedTreeFactory final {
     TDoubleDoublePr estimateTreeGainAndCurvature(core::CDataFrame& frame,
                                                  const core::CPackedBitVector& trainingRowMask) const;
 
-    //! Get the regularizer value at the point the model starts to overfit.
-    TOptionalVector candidateRegularizerSearchInterval(core::CDataFrame& frame,
-                                                       core::CPackedBitVector trainingRowMask,
-                                                       TScaleRegularization scale) const;
+    //! Perform a line search with quadratic approximation for the regularizer
+    //! value at the model starts to overfit.
+    //!
+    //! \note applyScaleToRegularizer Applies a specified scale to the initial
+    //! choosen value for tree implemenation.
+    TOptionalVector
+    lineSearchWithQuadraticApproxToTestError(core::CDataFrame& frame,
+                                             core::CPackedBitVector trainingRowMask,
+                                             const TScaleRegularization& applyScaleToRegularizer) const;
 
     //! Initialize the state for hyperparameter optimisation.
     void initializeHyperparameterOptimisation() const;
diff --git a/lib/maths/CBoostedTreeFactory.cc b/lib/maths/CBoostedTreeFactory.cc
index 0817b693cd..1c676db3cb 100644
--- a/lib/maths/CBoostedTreeFactory.cc
+++ b/lib/maths/CBoostedTreeFactory.cc
@@ -317,11 +317,15 @@ void CBoostedTreeFactory::initializeUnsetRegularizationHyperparameters(core::CDa
         if (gainPerNode > 0.0) {
             TVector fallbackInterval{{MIN_REGULARIZER_SCALE, 1.0, MAX_REGULARIZER_SCALE}};
             fallbackInterval *= m_TreeImpl->m_Eta;
-            auto interval = this->candidateRegularizerSearchInterval(
-                frame, allTrainingRowsMask, [this, gainPerNode](double scale) {
-                    m_TreeImpl->m_Regularization.gamma(scale * gainPerNode);
-                });
-            m_GammaSearchInterval = interval.value_or(fallbackInterval) * gainPerNode;
+
+            double initialGamma{gainPerNode};
+            auto gammaStep = [initialGamma](CBoostedTreeImpl& tree, double scale) {
+                tree.m_Regularization.gamma(scale * initialGamma);
+            };
+            m_GammaSearchInterval = this->lineSearchWithQuadraticApproxToTestError(
+                                            frame, allTrainingRowsMask, gammaStep)
+                                        .value_or(fallbackInterval) *
+                                    gainPerNode;
             LOG_TRACE(<< "gamma search interval = ["
                       << m_GammaSearchInterval.toDelimited() << "]");
         } else {
@@ -333,11 +337,15 @@ void CBoostedTreeFactory::initializeUnsetRegularizationHyperparameters(core::CDa
         if (totalCurvaturePerNode > 0.0) {
             TVector fallbackInterval{{MIN_REGULARIZER_SCALE, 1.0, MAX_REGULARIZER_SCALE}};
             m_TreeImpl->m_Regularization.gamma(m_GammaSearchInterval(MIN_REGULARIZER_INDEX));
-            auto interval = this->candidateRegularizerSearchInterval(
-                frame, allTrainingRowsMask, [this, totalCurvaturePerNode](double scale) {
-                    m_TreeImpl->m_Regularization.lambda(scale * totalCurvaturePerNode);
-                });
-            m_LambdaSearchInterval = interval.value_or(fallbackInterval) * totalCurvaturePerNode;
+
+            double initialLambda{totalCurvaturePerNode};
+            auto lambdaStep = [initialLambda](CBoostedTreeImpl& tree, double scale) {
+                tree.m_Regularization.lambda(scale * initialLambda);
+            };
+            m_LambdaSearchInterval = this->lineSearchWithQuadraticApproxToTestError(
+                                             frame, allTrainingRowsMask, lambdaStep)
+                                         .value_or(fallbackInterval) *
+                                     totalCurvaturePerNode;
             LOG_TRACE(<< "lambda search interval = ["
                       << m_LambdaSearchInterval.toDelimited() << "]");
         } else {
@@ -380,10 +388,10 @@ CBoostedTreeFactory::estimateTreeGainAndCurvature(core::CDataFrame& frame,
     return {gain, curvature};
 }
 
-CBoostedTreeFactory::TOptionalVector
-CBoostedTreeFactory::candidateRegularizerSearchInterval(core::CDataFrame& frame,
-                                                        core::CPackedBitVector trainingRowMask,
-                                                        TScaleRegularization scaleRegularization) const {
+CBoostedTreeFactory::TOptionalVector CBoostedTreeFactory::lineSearchWithQuadraticApproxToTestError(
+    core::CDataFrame& frame,
+    core::CPackedBitVector trainingRowMask,
+    const TScaleRegularization& regularizerStep) const {
 
     // This uses a quadratic approximation to the test loss function w.r.t.
     // the scaled regularization hyperparameter from which it estimates the
@@ -419,7 +427,7 @@ CBoostedTreeFactory::candidateRegularizerSearchInterval(core::CDataFrame& frame,
 
     double scale{1.0};
     for (std::size_t i = 0; i < INITIAL_REGULARIZER_SEARCH_ITERATIONS; ++i) {
-        scaleRegularization(scale);
+        regularizerStep(*m_TreeImpl, scale);
         scale *= multiplier;
         auto forest = m_TreeImpl->trainForest(frame, trainingRowMask, m_RecordMemoryUsage);
         double testLoss{m_TreeImpl->meanLoss(frame, testRowMask, forest)};
@@ -443,12 +451,16 @@ CBoostedTreeFactory::candidateRegularizerSearchInterval(core::CDataFrame& frame,
     double leftEndpoint{0.0};
     double rightEndpoint{static_cast<double>(INITIAL_REGULARIZER_SEARCH_ITERATIONS - 1)};
     double stationaryPoint{-gradient / 2.0 / curvature};
-    double distanceToLeftEndpoint{std::fabs(leftEndpoint - stationaryPoint)};
-    double distanceToRightEndpoint{std::fabs(rightEndpoint - stationaryPoint)};
-    double logBestRegularizerScale{
-        curvature < 0.0
-            ? (distanceToLeftEndpoint > distanceToRightEndpoint ? leftEndpoint : rightEndpoint)
-            : CTools::truncate(stationaryPoint, leftEndpoint, rightEndpoint)};
+    double logBestRegularizerScale{[&] {
+        double distanceToLeftEndpoint{std::fabs(leftEndpoint - stationaryPoint)};
+        double distanceToRightEndpoint{std::fabs(rightEndpoint - stationaryPoint)};
+        if (curvature < 0.0) {
+            // Stationary point is a maximum so use furthest point in interval.
+            return distanceToLeftEndpoint > distanceToRightEndpoint ? leftEndpoint : rightEndpoint;
+        }
+        // Stationary point is a minimum so use nearest point in the interval.
+        return CTools::truncate(stationaryPoint, leftEndpoint, rightEndpoint);
+    }()};
     double bestRegularizerScale{std::pow(0.5, logBestRegularizerScale)};
 
     // Find an interval with a high probability of containing the optimal

From cbe44d64570bf1d000cca3e1714df0f29e5e0d8b Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@elastic.co>
Date: Tue, 24 Sep 2019 14:35:29 +0100
Subject: [PATCH 16/23] Corrections to search endpoint estimates

---
 lib/maths/CBoostedTreeFactory.cc | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/lib/maths/CBoostedTreeFactory.cc b/lib/maths/CBoostedTreeFactory.cc
index 1c676db3cb..f766eb7dd8 100644
--- a/lib/maths/CBoostedTreeFactory.cc
+++ b/lib/maths/CBoostedTreeFactory.cc
@@ -463,23 +463,28 @@ CBoostedTreeFactory::TOptionalVector CBoostedTreeFactory::lineSearchWithQuadrati
     }()};
     double bestRegularizerScale{std::pow(0.5, logBestRegularizerScale)};
 
-    // Find an interval with a high probability of containing the optimal
-    // regularisation parameter if the interval we searched has a minimum.
     TVector interval{{MIN_REGULARIZER_SCALE, 1.0, MAX_REGULARIZER_SCALE}};
     if (curvature > 0.0) {
+        // Find a short interval with a high probability of containing the optimal
+        // regularisation parameter if we found a minimum. In particular, we solve
+        // curvature * (x - best)^2 = 3 sigma where sigma is the standard deviation
+        // of the test loss residuals. We don't extrapolate so don't truncate if a
+        // crossing point lies outside the searched interval.
         TMeanVarAccumulator residualMoments;
         for (std::size_t i = 0; i < INITIAL_REGULARIZER_SEARCH_ITERATIONS; ++i) {
             residualMoments.add(testLosses[i] - leastSquaresQuadraticTestLoss.predict(
                                                     static_cast<double>(i)));
         }
-        double margin{2.0 * std::sqrt(CBasicStatistics::variance(residualMoments)) / curvature};
-        if (logBestRegularizerScale - margin >= leftEndpoint) {
-            interval(MIN_REGULARIZER_INDEX) =
-                std::max(std::pow(0.5, margin), MIN_REGULARIZER_SCALE);
+        double sigma{std::sqrt(CBasicStatistics::variance(residualMoments))};
+        double logScaleAtThreeSigma{std::sqrt(3.0 * sigma / curvature)};
+        if (logBestRegularizerScale - logScaleAtThreeSigma >= leftEndpoint) {
+            // These are scales > bestRegularizerScale hence 1 / multiplier.
+            interval(MAX_REGULARIZER_INDEX) = std::min(
+                std::pow(1.0 / multiplier, logScaleAtThreeSigma), MAX_REGULARIZER_SCALE);
         }
-        if (logBestRegularizerScale + margin <= rightEndpoint) {
-            interval(MAX_REGULARIZER_INDEX) =
-                std::min(std::pow(2.0, margin), MAX_REGULARIZER_SCALE);
+        if (logBestRegularizerScale + logScaleAtThreeSigma <= rightEndpoint) {
+            interval(MIN_REGULARIZER_INDEX) = std::max(
+                std::pow(multiplier, logScaleAtThreeSigma), MIN_REGULARIZER_SCALE);
         }
     }
     interval *= bestRegularizerScale;

From e2c53d9008990cc7f5125f40ace72b33c769240a Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@elastic.co>
Date: Tue, 24 Sep 2019 14:40:35 +0100
Subject: [PATCH 17/23] Formatting

---
 include/maths/CBoostedTreeImpl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/maths/CBoostedTreeImpl.h b/include/maths/CBoostedTreeImpl.h
index 820ab324d2..893dbc51f8 100644
--- a/include/maths/CBoostedTreeImpl.h
+++ b/include/maths/CBoostedTreeImpl.h
@@ -674,7 +674,7 @@ class MATHS_EXPORT CBoostedTreeImpl final {
     //! the dependent variable.
     core::CPackedBitVector allTrainingRowsMask() const;
 
-    //! Compute the \p percentile percentile gain per split and the sum of row 
+    //! Compute the \p percentile percentile gain per split and the sum of row
     //! curvatures per internal node of \p forest.
     TDoubleDoublePr gainAndCurvatureAtPercentile(double percentile,
                                                  const TNodeVecVec& forest) const;

From 1370c18769d7b5aa0665e2487ff80b190f4c0027 Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@elastic.co>
Date: Tue, 24 Sep 2019 14:42:09 +0100
Subject: [PATCH 18/23] Test fix for rename

---
 lib/core/unittest/CLoopProgressTest.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/core/unittest/CLoopProgressTest.cc b/lib/core/unittest/CLoopProgressTest.cc
index bdcae9fb76..ac31deb6fd 100644
--- a/lib/core/unittest/CLoopProgressTest.cc
+++ b/lib/core/unittest/CLoopProgressTest.cc
@@ -165,7 +165,7 @@ void CLoopProgressTest::testSerialization() {
     auto restoredRecordProgress = [&restoredProgress](double p) {
         restoredProgress += p;
     };
-    restoredLoopProgress.attach(restoredRecordProgress);
+    restoredLoopProgress.progressCallback(restoredRecordProgress);
     restoredLoopProgress.resumeRestored();
 
     CPPUNIT_ASSERT_EQUAL(loopProgress.checksum(), restoredLoopProgress.checksum());

From 958f45a1d1af37c53fcd32b218f340f78b793db6 Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@elastic.co>
Date: Tue, 24 Sep 2019 16:48:50 +0100
Subject: [PATCH 19/23] Update test for refactor

---
 lib/api/CDataFrameAnalysisRunner.cc        |  3 ++-
 lib/api/unittest/CDataFrameAnalyzerTest.cc | 20 +++++++++++---------
 lib/api/unittest/CDataFrameAnalyzerTest.h  |  4 +++-
 3 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/lib/api/CDataFrameAnalysisRunner.cc b/lib/api/CDataFrameAnalysisRunner.cc
index 4dc3872da7..2acf7264ab 100644
--- a/lib/api/CDataFrameAnalysisRunner.cc
+++ b/lib/api/CDataFrameAnalysisRunner.cc
@@ -84,7 +84,8 @@ void CDataFrameAnalysisRunner::computeAndSaveExecutionStrategy() {
         if (memoryUsage <= memoryLimit) {
             break;
         }
-        // if we are not allowed to spill over to disk then only one partition is possible
+        // If we are not allowed to spill over to disk then only one partition
+        // is possible.
         if (m_Spec.diskUsageAllowed() == false) {
             LOG_TRACE(<< "stop partition number computation since disk usage is turned off");
             break;
diff --git a/lib/api/unittest/CDataFrameAnalyzerTest.cc b/lib/api/unittest/CDataFrameAnalyzerTest.cc
index d833f03bef..0f9e161082 100644
--- a/lib/api/unittest/CDataFrameAnalyzerTest.cc
+++ b/lib/api/unittest/CDataFrameAnalyzerTest.cc
@@ -83,7 +83,6 @@ class CTestDataAdder : public core::CDataAdder {
 private:
     TOStreamP m_Stream;
 };
-}
 
 std::vector<std::string> streamToStringVector(std::stringstream&& tokenStream) {
     std::vector<std::string> results;
@@ -362,6 +361,7 @@ void addRegressionTestData(const TStrVec& fieldNames,
         }
     });
 }
+}
 
 void CDataFrameAnalyzerTest::testWithoutControlMessages() {
 
@@ -1130,8 +1130,7 @@ void CDataFrameAnalyzerTest::testRunBoostedTreeTrainingWithStateRecoverySubrouti
     rng.generateUniformSamples(-10.0, 10.0, weights.size() * numberExamples, values);
 
     auto persistenceStream{std::make_shared<std::ostringstream>()};
-    CDataFrameAnalyzerTest::TPersisterSupplier persisterSupplier =
-        [&persistenceStream]() -> TDataAdderUPtr {
+    TPersisterSupplier persisterSupplier = [&persistenceStream]() -> TDataAdderUPtr {
         return std::make_unique<api::CSingleStreamDataAdder>(persistenceStream);
     };
 
@@ -1142,20 +1141,21 @@ void CDataFrameAnalyzerTest::testRunBoostedTreeTrainingWithStateRecoverySubrouti
                        numberRoundsPerHyperparameter, 12, {}, lambda, gamma, eta,
                        maximumNumberTrees, featureBagFraction, &persisterSupplier),
         outputWriterFactory};
+    std::size_t dependentVariable(
+        std::find(fieldNames.begin(), fieldNames.end(), "c5") - fieldNames.begin());
 
     auto frame{passDataToAnalyzer(fieldNames, fieldValues, analyzer, weights, values)};
     analyzer.handleRecord(fieldNames, {"", "", "", "", "", "", "$"});
 
     TStrVec persistedStatesString{
         streamToStringVector(std::stringstream(persistenceStream->str()))};
-    auto expectedTree{getFinalTree(persistedStatesString, frame)};
+    auto expectedTree{this->getFinalTree(persistedStatesString, frame, dependentVariable)};
 
     // Compute actual tree
     persistenceStream->str("");
 
     std::istringstream intermediateStateStream{persistedStatesString[iterationToRestartFrom]};
-    CDataFrameAnalyzerTest::TRestoreSearcherSupplier restoreSearcherSupplier =
-        [&intermediateStateStream]() -> TDataSearcherUPtr {
+    TRestoreSearcherSupplier restoreSearcherSupplier = [&intermediateStateStream]() -> TDataSearcherUPtr {
         return std::make_unique<CTestDataSearcher>(intermediateStateStream.str());
     };
 
@@ -1170,7 +1170,7 @@ void CDataFrameAnalyzerTest::testRunBoostedTreeTrainingWithStateRecoverySubrouti
 
     persistedStatesString =
         streamToStringVector(std::stringstream(persistenceStream->str()));
-    auto actualTree{getFinalTree(persistedStatesString, frame)};
+    auto actualTree{this->getFinalTree(persistedStatesString, frame, dependentVariable)};
 
     // compare hyperparameter
 
@@ -1199,11 +1199,13 @@ void CDataFrameAnalyzerTest::testRunBoostedTreeTrainingWithStateRecoverySubrouti
 
 maths::CBoostedTreeFactory::TBoostedTreeUPtr
 CDataFrameAnalyzerTest::getFinalTree(const TStrVec& persistedStates,
-                                     std::unique_ptr<core::CDataFrame>& frame) const {
+                                     std::unique_ptr<core::CDataFrame>& frame,
+                                     std::size_t dependentVariable) const {
     CTestDataSearcher dataSearcher(persistedStates.back());
     auto decompressor{std::make_unique<core::CStateDecompressor>(dataSearcher)};
     decompressor->setStateRestoreSearch(api::ML_STATE_INDEX,
                                         api::getRegressionStateId("testJob"));
     auto stream{decompressor->search(1, 1)};
-    return maths::CBoostedTreeFactory::constructFromString(*stream, *frame);
+    return maths::CBoostedTreeFactory::constructFromString(*stream).buildFor(
+        *frame, dependentVariable);
 }
diff --git a/lib/api/unittest/CDataFrameAnalyzerTest.h b/lib/api/unittest/CDataFrameAnalyzerTest.h
index 7b307a0f80..7943653c08 100644
--- a/lib/api/unittest/CDataFrameAnalyzerTest.h
+++ b/lib/api/unittest/CDataFrameAnalyzerTest.h
@@ -57,7 +57,9 @@ class CDataFrameAnalyzerTest : public CppUnit::TestFixture {
         std::size_t iterationToRestartFrom) const;
 
     ml::maths::CBoostedTreeFactory::TBoostedTreeUPtr
-    getFinalTree(const TStrVec& persistedStates, TDataFrameUPtr& frame) const;
+    getFinalTree(const TStrVec& persistedStates,
+                 TDataFrameUPtr& frame,
+                 std::size_t dependentVariable) const;
 };
 
 #endif // INCLUDED_CDataFrameAnalyzerTest_h

From 937df235947e1c0302eb346225aa545b26903d32 Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@elastic.co>
Date: Wed, 25 Sep 2019 16:52:11 +0100
Subject: [PATCH 20/23] Rejig line search to hide the fact it's working on
 exponential scale

---
 include/maths/CBoostedTreeFactory.h    |  23 ++--
 lib/maths/CBoostedTree.cc              |   2 +-
 lib/maths/CBoostedTreeFactory.cc       | 148 ++++++++++++++-----------
 lib/maths/unittest/CBoostedTreeTest.cc |   8 +-
 4 files changed, 100 insertions(+), 81 deletions(-)

diff --git a/include/maths/CBoostedTreeFactory.h b/include/maths/CBoostedTreeFactory.h
index e661fcb9fa..1f90683ba0 100644
--- a/include/maths/CBoostedTreeFactory.h
+++ b/include/maths/CBoostedTreeFactory.h
@@ -98,7 +98,8 @@ class MATHS_EXPORT CBoostedTreeFactory final {
     using TOptionalVector = boost::optional<TVector>;
     using TPackedBitVectorVec = std::vector<core::CPackedBitVector>;
     using TBoostedTreeImplUPtr = std::unique_ptr<CBoostedTreeImpl>;
-    using TScaleRegularization = std::function<void(CBoostedTreeImpl&, double)>;
+    using TApplyRegularizerStep =
+        std::function<void(CBoostedTreeImpl&, double, std::size_t)>;
 
 private:
     static const double MINIMUM_ETA;
@@ -135,15 +136,17 @@ class MATHS_EXPORT CBoostedTreeFactory final {
     TDoubleDoublePr estimateTreeGainAndCurvature(core::CDataFrame& frame,
                                                  const core::CPackedBitVector& trainingRowMask) const;
 
-    //! Perform a line search with quadratic approximation for the regularizer
-    //! value at the model starts to overfit.
+    //! Perform a line search for the test loss w.r.t. a single regularization
+    //! hyperparameter and apply Newton's method to find the minimum. The plan
+    //! is to find a value near where the model starts to overfit.
     //!
-    //! \note applyScaleToRegularizer Applies a specified scale to the initial
-    //! choosen value for tree implemenation.
-    TOptionalVector
-    lineSearchWithQuadraticApproxToTestError(core::CDataFrame& frame,
+    //! \return The interval to search during the main hyperparameter optimisation
+    //! loop or null if this couldn't be found.
+    TOptionalVector testLossNewtonLineSearch(core::CDataFrame& frame,
                                              core::CPackedBitVector trainingRowMask,
-                                             const TScaleRegularization& applyScaleToRegularizer) const;
+                                             const TApplyRegularizerStep& applyRegularizerStep,
+                                             double returnedIntervalLeftEndOffset,
+                                             double returnedIntervalRightEndOffset) const;
 
     //! Initialize the state for hyperparameter optimisation.
     void initializeHyperparameterOptimisation() const;
@@ -166,8 +169,8 @@ class MATHS_EXPORT CBoostedTreeFactory final {
     TOptionalSize m_BayesianOptimisationRestarts;
     bool m_Restored = false;
     TBoostedTreeImplUPtr m_TreeImpl;
-    TVector m_GammaSearchInterval;
-    TVector m_LambdaSearchInterval;
+    TVector m_LogGammaSearchInterval;
+    TVector m_LogLambdaSearchInterval;
     TProgressCallback m_RecordProgress = noopRecordProgress;
     TMemoryUsageCallback m_RecordMemoryUsage = noopRecordMemoryUsage;
     TTrainingStateCallback m_RecordTrainingState = noopRecordTrainingState;
diff --git a/lib/maths/CBoostedTree.cc b/lib/maths/CBoostedTree.cc
index 0df48cd91b..a8ad65c078 100644
--- a/lib/maths/CBoostedTree.cc
+++ b/lib/maths/CBoostedTree.cc
@@ -76,7 +76,7 @@ double CMse::value(double prediction, double actual) const {
 }
 
 double CMse::gradient(double prediction, double actual) const {
-    return prediction - actual;
+    return 2.0 * (prediction - actual);
 }
 
 double CMse::curvature(double /*prediction*/, double /*actual*/) const {
diff --git a/lib/maths/CBoostedTreeFactory.cc b/lib/maths/CBoostedTreeFactory.cc
index f766eb7dd8..73c6d02b38 100644
--- a/lib/maths/CBoostedTreeFactory.cc
+++ b/lib/maths/CBoostedTreeFactory.cc
@@ -102,12 +102,12 @@ void CBoostedTreeFactory::initializeHyperparameterOptimisation() const {
 
     CBayesianOptimisation::TDoubleDoublePrVec boundingBox;
     if (m_TreeImpl->m_RegularizationOverride.lambda() == boost::none) {
-        boundingBox.emplace_back(std::log(m_LambdaSearchInterval(MIN_REGULARIZER_INDEX)),
-                                 std::log(m_LambdaSearchInterval(MAX_REGULARIZER_INDEX)));
+        boundingBox.emplace_back(m_LogLambdaSearchInterval(MIN_REGULARIZER_INDEX),
+                                 m_LogLambdaSearchInterval(MAX_REGULARIZER_INDEX));
     }
     if (m_TreeImpl->m_RegularizationOverride.gamma() == boost::none) {
-        boundingBox.emplace_back(std::log(m_GammaSearchInterval(MIN_REGULARIZER_INDEX)),
-                                 std::log(m_GammaSearchInterval(MAX_REGULARIZER_INDEX)));
+        boundingBox.emplace_back(m_LogGammaSearchInterval(MIN_REGULARIZER_INDEX),
+                                 m_LogGammaSearchInterval(MAX_REGULARIZER_INDEX));
     }
     if (m_TreeImpl->m_EtaOverride == boost::none) {
         double rate{m_TreeImpl->m_EtaGrowthRatePerTree - 1.0};
@@ -315,19 +315,24 @@ void CBoostedTreeFactory::initializeUnsetRegularizationHyperparameters(core::CDa
 
     if (m_TreeImpl->m_RegularizationOverride.gamma() == boost::none) {
         if (gainPerNode > 0.0) {
-            TVector fallbackInterval{{MIN_REGULARIZER_SCALE, 1.0, MAX_REGULARIZER_SCALE}};
-            fallbackInterval *= m_TreeImpl->m_Eta;
-
-            double initialGamma{gainPerNode};
-            auto gammaStep = [initialGamma](CBoostedTreeImpl& tree, double scale) {
-                tree.m_Regularization.gamma(scale * initialGamma);
+            TVector fallbackInterval{{std::log(MIN_REGULARIZER_SCALE), 0.0,
+                                      std::log(MAX_REGULARIZER_SCALE)}};
+            fallbackInterval += TVector{std::log(m_TreeImpl->m_Eta)};
+
+            double logInitialGamma{std::log(gainPerNode)};
+            auto applyGammaStep = [logInitialGamma](CBoostedTreeImpl& tree,
+                                                    double stepSize, std::size_t step) {
+                tree.m_Regularization.gamma(
+                    std::exp(logInitialGamma + static_cast<double>(step) * stepSize));
             };
-            m_GammaSearchInterval = this->lineSearchWithQuadraticApproxToTestError(
-                                            frame, allTrainingRowsMask, gammaStep)
-                                        .value_or(fallbackInterval) *
-                                    gainPerNode;
-            LOG_TRACE(<< "gamma search interval = ["
-                      << m_GammaSearchInterval.toDelimited() << "]");
+            m_LogGammaSearchInterval =
+                TVector{std::log(gainPerNode)} +
+                this->testLossNewtonLineSearch(frame, allTrainingRowsMask, applyGammaStep,
+                                               std::log(MIN_REGULARIZER_SCALE),
+                                               std::log(MAX_REGULARIZER_SCALE))
+                    .value_or(fallbackInterval);
+            LOG_TRACE(<< "log gamma search interval = ["
+                      << m_LogGammaSearchInterval.toDelimited() << "]");
         } else {
             m_TreeImpl->m_RegularizationOverride.gamma(0.0);
         }
@@ -335,19 +340,25 @@ void CBoostedTreeFactory::initializeUnsetRegularizationHyperparameters(core::CDa
 
     if (m_TreeImpl->m_RegularizationOverride.lambda() == boost::none) {
         if (totalCurvaturePerNode > 0.0) {
-            TVector fallbackInterval{{MIN_REGULARIZER_SCALE, 1.0, MAX_REGULARIZER_SCALE}};
-            m_TreeImpl->m_Regularization.gamma(m_GammaSearchInterval(MIN_REGULARIZER_INDEX));
-
-            double initialLambda{totalCurvaturePerNode};
-            auto lambdaStep = [initialLambda](CBoostedTreeImpl& tree, double scale) {
-                tree.m_Regularization.lambda(scale * initialLambda);
+            TVector fallbackInterval{{std::log(MIN_REGULARIZER_SCALE), 0.0,
+                                      std::log(MAX_REGULARIZER_SCALE)}};
+            m_TreeImpl->m_Regularization.gamma(
+                std::exp(m_LogGammaSearchInterval(MIN_REGULARIZER_INDEX)));
+
+            double logInitialLambda{std::log(totalCurvaturePerNode)};
+            auto applyLambdaStep = [logInitialLambda](CBoostedTreeImpl& tree,
+                                                      double stepSize, std::size_t step) {
+                tree.m_Regularization.lambda(std::exp(
+                    logInitialLambda + static_cast<double>(step) * stepSize));
             };
-            m_LambdaSearchInterval = this->lineSearchWithQuadraticApproxToTestError(
-                                             frame, allTrainingRowsMask, lambdaStep)
-                                         .value_or(fallbackInterval) *
-                                     totalCurvaturePerNode;
-            LOG_TRACE(<< "lambda search interval = ["
-                      << m_LambdaSearchInterval.toDelimited() << "]");
+            m_LogLambdaSearchInterval =
+                TVector{std::log(totalCurvaturePerNode)} +
+                this->testLossNewtonLineSearch(frame, allTrainingRowsMask, applyLambdaStep,
+                                               std::log(MIN_REGULARIZER_SCALE),
+                                               std::log(MAX_REGULARIZER_SCALE))
+                    .value_or(fallbackInterval);
+            LOG_TRACE(<< "log lambda search interval = ["
+                      << m_LogLambdaSearchInterval.toDelimited() << "]");
         } else {
             m_TreeImpl->m_RegularizationOverride.lambda(0.0);
         }
@@ -360,12 +371,14 @@ void CBoostedTreeFactory::initializeUnsetRegularizationHyperparameters(core::CDa
                  static_cast<double>(m_TreeImpl->m_NumberFolds) / freeRegularizationParameters};
 
     if (m_TreeImpl->m_RegularizationOverride.gamma() == boost::none) {
-        m_GammaSearchInterval *= scale;
-        m_TreeImpl->m_Regularization.gamma(m_GammaSearchInterval(BEST_REGULARIZER_INDEX));
+        m_LogGammaSearchInterval += TVector{std::log(scale)};
+        m_TreeImpl->m_Regularization.gamma(
+            std::exp(m_LogGammaSearchInterval(BEST_REGULARIZER_INDEX)));
     }
     if (m_TreeImpl->m_RegularizationOverride.lambda() == boost::none) {
-        m_LambdaSearchInterval *= scale;
-        m_TreeImpl->m_Regularization.lambda(m_LambdaSearchInterval(BEST_REGULARIZER_INDEX));
+        m_LogLambdaSearchInterval += TVector{std::log(scale)};
+        m_TreeImpl->m_Regularization.lambda(
+            std::exp(m_LogLambdaSearchInterval(BEST_REGULARIZER_INDEX)));
     }
     LOG_TRACE(<< "regularization(initial) = " << m_TreeImpl->m_Regularization.print());
 }
@@ -388,10 +401,12 @@ CBoostedTreeFactory::estimateTreeGainAndCurvature(core::CDataFrame& frame,
     return {gain, curvature};
 }
 
-CBoostedTreeFactory::TOptionalVector CBoostedTreeFactory::lineSearchWithQuadraticApproxToTestError(
-    core::CDataFrame& frame,
-    core::CPackedBitVector trainingRowMask,
-    const TScaleRegularization& regularizerStep) const {
+CBoostedTreeFactory::TOptionalVector
+CBoostedTreeFactory::testLossNewtonLineSearch(core::CDataFrame& frame,
+                                              core::CPackedBitVector trainingRowMask,
+                                              const TApplyRegularizerStep& applyRegularizerStep,
+                                              double returnedIntervalLeftEndOffset,
+                                              double returnedIntervalRightEndOffset) const {
 
     // This uses a quadratic approximation to the test loss function w.r.t.
     // the scaled regularization hyperparameter from which it estimates the
@@ -419,19 +434,17 @@ CBoostedTreeFactory::TOptionalVector CBoostedTreeFactory::lineSearchWithQuadrati
     double maximumTreeSizeMultiplier{MAIN_TRAINING_LOOP_TREE_SIZE_MULTIPLIER};
     std::swap(maximumTreeSizeMultiplier, m_TreeImpl->m_MaximumTreeSizeMultiplier);
 
-    double multiplier{std::exp(
-        -std::log(1024.0) / static_cast<double>(INITIAL_REGULARIZER_SEARCH_ITERATIONS))};
+    double stepSize{-std::log(1024.0) /
+                    static_cast<double>(INITIAL_REGULARIZER_SEARCH_ITERATIONS)};
 
     CLeastSquaresOnlineRegression<2, double> leastSquaresQuadraticTestLoss;
     TDoubleVec testLosses(INITIAL_REGULARIZER_SEARCH_ITERATIONS);
 
-    double scale{1.0};
     for (std::size_t i = 0; i < INITIAL_REGULARIZER_SEARCH_ITERATIONS; ++i) {
-        regularizerStep(*m_TreeImpl, scale);
-        scale *= multiplier;
+        applyRegularizerStep(*m_TreeImpl, stepSize, i);
         auto forest = m_TreeImpl->trainForest(frame, trainingRowMask, m_RecordMemoryUsage);
         double testLoss{m_TreeImpl->meanLoss(frame, testRowMask, forest)};
-        leastSquaresQuadraticTestLoss.add(static_cast<double>(i), testLoss);
+        leastSquaresQuadraticTestLoss.add(static_cast<double>(i) * stepSize, testLoss);
         testLosses[i] = testLoss;
         m_TreeImpl->m_TrainingProgress.increment();
     }
@@ -440,20 +453,23 @@ CBoostedTreeFactory::TOptionalVector CBoostedTreeFactory::lineSearchWithQuadrati
     std::swap(maximumTreeSizeMultiplier, m_TreeImpl->m_MaximumTreeSizeMultiplier);
 
     CLeastSquaresOnlineRegression<2, double>::TArray params;
-    bool successful{leastSquaresQuadraticTestLoss.parameters(params)};
+    if (leastSquaresQuadraticTestLoss.parameters(params) == false) {
+        return TOptionalVector{};
+    }
+
     double gradient{params[1]};
     double curvature{params[2]};
     LOG_TRACE(<< "[intercept, slope, curvature] = "
               << core::CContainerPrinter::print(params));
 
-    // Find the scale at the minimum of the least squares quadratic fit
-    // to the test loss in the search interval.
-    double leftEndpoint{0.0};
-    double rightEndpoint{static_cast<double>(INITIAL_REGULARIZER_SEARCH_ITERATIONS - 1)};
+    // Find the scale at the minimum of the least squares quadratic fit to
+    // the test loss in the search interval. Note step size is negative.
+    double leftEndpoint{static_cast<double>(INITIAL_REGULARIZER_SEARCH_ITERATIONS - 1) * stepSize};
+    double rightEndpoint{0.0};
     double stationaryPoint{-gradient / 2.0 / curvature};
-    double logBestRegularizerScale{[&] {
-        double distanceToLeftEndpoint{std::fabs(leftEndpoint - stationaryPoint)};
-        double distanceToRightEndpoint{std::fabs(rightEndpoint - stationaryPoint)};
+    double bestRegularizer{[&] {
+        double distanceToLeftEndpoint{std::fabs(rightEndpoint - stationaryPoint)};
+        double distanceToRightEndpoint{std::fabs(leftEndpoint - stationaryPoint)};
         if (curvature < 0.0) {
             // Stationary point is a maximum so use furthest point in interval.
             return distanceToLeftEndpoint > distanceToRightEndpoint ? leftEndpoint : rightEndpoint;
@@ -461,35 +477,35 @@ CBoostedTreeFactory::TOptionalVector CBoostedTreeFactory::lineSearchWithQuadrati
         // Stationary point is a minimum so use nearest point in the interval.
         return CTools::truncate(stationaryPoint, leftEndpoint, rightEndpoint);
     }()};
-    double bestRegularizerScale{std::pow(0.5, logBestRegularizerScale)};
+    LOG_TRACE(<< "best regularizer = " << bestRegularizer);
 
-    TVector interval{{MIN_REGULARIZER_SCALE, 1.0, MAX_REGULARIZER_SCALE}};
+    TVector interval{{returnedIntervalLeftEndOffset, 0.0, returnedIntervalRightEndOffset}};
     if (curvature > 0.0) {
         // Find a short interval with a high probability of containing the optimal
         // regularisation parameter if we found a minimum. In particular, we solve
         // curvature * (x - best)^2 = 3 sigma where sigma is the standard deviation
-        // of the test loss residuals. We don't extrapolate so don't truncate if a
-        // crossing point lies outside the searched interval.
+        // of the test loss residuals to get the interval endpoints. We don't
+        // extrapolate the loss function outside the line segment we searched so
+        // don't truncate if an endpoint lies outside the searched interval.
         TMeanVarAccumulator residualMoments;
         for (std::size_t i = 0; i < INITIAL_REGULARIZER_SEARCH_ITERATIONS; ++i) {
             residualMoments.add(testLosses[i] - leastSquaresQuadraticTestLoss.predict(
-                                                    static_cast<double>(i)));
+                                                    static_cast<double>(i) * stepSize));
         }
         double sigma{std::sqrt(CBasicStatistics::variance(residualMoments))};
-        double logScaleAtThreeSigma{std::sqrt(3.0 * sigma / curvature)};
-        if (logBestRegularizerScale - logScaleAtThreeSigma >= leftEndpoint) {
-            // These are scales > bestRegularizerScale hence 1 / multiplier.
-            interval(MAX_REGULARIZER_INDEX) = std::min(
-                std::pow(1.0 / multiplier, logScaleAtThreeSigma), MAX_REGULARIZER_SCALE);
+        double threeSigmaInterval{std::sqrt(3.0 * sigma / curvature)};
+        if (bestRegularizer - threeSigmaInterval >= leftEndpoint) {
+            interval(MIN_REGULARIZER_INDEX) =
+                std::max(-threeSigmaInterval, returnedIntervalLeftEndOffset);
         }
-        if (logBestRegularizerScale + logScaleAtThreeSigma <= rightEndpoint) {
-            interval(MIN_REGULARIZER_INDEX) = std::max(
-                std::pow(multiplier, logScaleAtThreeSigma), MIN_REGULARIZER_SCALE);
+        if (bestRegularizer + threeSigmaInterval <= rightEndpoint) {
+            interval(MAX_REGULARIZER_INDEX) =
+                std::min(threeSigmaInterval, returnedIntervalRightEndOffset);
         }
     }
-    interval *= bestRegularizerScale;
+    interval += TVector{bestRegularizer};
 
-    return successful ? TOptionalVector{interval} : TOptionalVector{};
+    return TOptionalVector{interval};
 }
 
 CBoostedTreeFactory CBoostedTreeFactory::constructFromParameters(std::size_t numberThreads,
@@ -514,7 +530,7 @@ CBoostedTreeFactory CBoostedTreeFactory::constructFromString(std::istream& jsonS
 CBoostedTreeFactory::CBoostedTreeFactory(bool restored, std::size_t numberThreads, TLossFunctionUPtr loss)
     : m_Restored{restored}, m_TreeImpl{std::make_unique<CBoostedTreeImpl>(numberThreads,
                                                                           std::move(loss))},
-      m_GammaSearchInterval{0.0}, m_LambdaSearchInterval{0.0} {
+      m_LogGammaSearchInterval{0.0}, m_LogLambdaSearchInterval{0.0} {
 }
 
 CBoostedTreeFactory::CBoostedTreeFactory(CBoostedTreeFactory&&) = default;
diff --git a/lib/maths/unittest/CBoostedTreeTest.cc b/lib/maths/unittest/CBoostedTreeTest.cc
index 13c9dcc543..e7afb4c336 100644
--- a/lib/maths/unittest/CBoostedTreeTest.cc
+++ b/lib/maths/unittest/CBoostedTreeTest.cc
@@ -199,7 +199,7 @@ void CBoostedTreeTest::testPiecewiseConstant() {
             0.0, modelBias[i][0],
             7.0 * std::sqrt(noiseVariance / static_cast<double>(trainRows)));
         // Good R^2...
-        CPPUNIT_ASSERT(modelRSquared[i][0] > 0.94);
+        CPPUNIT_ASSERT(modelRSquared[i][0] > 0.93);
 
         meanModelRSquared.add(modelRSquared[i][0]);
     }
@@ -321,7 +321,7 @@ void CBoostedTreeTest::testNonLinear() {
             0.0, modelBias[i][0],
             8.0 * std::sqrt(noiseVariance / static_cast<double>(trainRows)));
         // Good R^2...
-        CPPUNIT_ASSERT(modelRSquared[i][0] > 0.92);
+        CPPUNIT_ASSERT(modelRSquared[i][0] > 0.91);
 
         meanModelRSquared.add(modelRSquared[i][0]);
     }
@@ -602,8 +602,8 @@ void CBoostedTreeTest::testCategoricalRegressors() {
 
     LOG_DEBUG(<< "bias = " << modelBias);
     LOG_DEBUG(<< " R^2 = " << modelRSquared);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, modelBias, 0.13);
-    CPPUNIT_ASSERT(modelRSquared > 0.9);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, modelBias, 0.2);
+    CPPUNIT_ASSERT(modelRSquared > 0.92);
 }
 
 void CBoostedTreeTest::testIntegerRegressor() {

From 7d2ccb0c4be88a24d40bfdc018188e3d5df25dc2 Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@elastic.co>
Date: Wed, 25 Sep 2019 17:37:21 +0100
Subject: [PATCH 21/23] Comment plus correct scale

---
 lib/maths/CBoostedTreeFactory.cc       | 12 +++++++-----
 lib/maths/unittest/CBoostedTreeTest.cc |  8 ++++----
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/lib/maths/CBoostedTreeFactory.cc b/lib/maths/CBoostedTreeFactory.cc
index 73c6d02b38..48dc09c384 100644
--- a/lib/maths/CBoostedTreeFactory.cc
+++ b/lib/maths/CBoostedTreeFactory.cc
@@ -364,11 +364,13 @@ void CBoostedTreeFactory::initializeUnsetRegularizationHyperparameters(core::CDa
         }
     }
 
-    double freeRegularizationParameters{
-        (m_TreeImpl->m_RegularizationOverride.gamma() != boost::none ? 0.0 : 1.0) +
-        (m_TreeImpl->m_RegularizationOverride.lambda() != boost::none ? 0.0 : 1.0)};
-    double scale{static_cast<double>(m_TreeImpl->m_NumberFolds - 1) /
-                 static_cast<double>(m_TreeImpl->m_NumberFolds) / freeRegularizationParameters};
+    // If we aren't supplied a fixed value for a parameter, we find its "best"
+    // value forcing the other regularizers to zero. Therefore, we divide here
+    // by the number of unspecified parameters so the sum of the regularization
+    // terms is about the same in the first loop.
+    double scale{
+        1.0 / ((m_TreeImpl->m_RegularizationOverride.gamma() == boost::none ? 1.0 : 0.0) +
+               (m_TreeImpl->m_RegularizationOverride.lambda() == boost::none ? 1.0 : 0.0))};
 
     if (m_TreeImpl->m_RegularizationOverride.gamma() == boost::none) {
         m_LogGammaSearchInterval += TVector{std::log(scale)};
diff --git a/lib/maths/unittest/CBoostedTreeTest.cc b/lib/maths/unittest/CBoostedTreeTest.cc
index e7afb4c336..7a8096df0c 100644
--- a/lib/maths/unittest/CBoostedTreeTest.cc
+++ b/lib/maths/unittest/CBoostedTreeTest.cc
@@ -199,7 +199,7 @@ void CBoostedTreeTest::testPiecewiseConstant() {
             0.0, modelBias[i][0],
             7.0 * std::sqrt(noiseVariance / static_cast<double>(trainRows)));
         // Good R^2...
-        CPPUNIT_ASSERT(modelRSquared[i][0] > 0.93);
+        CPPUNIT_ASSERT(modelRSquared[i][0] > 0.94);
 
         meanModelRSquared.add(modelRSquared[i][0]);
     }
@@ -321,7 +321,7 @@ void CBoostedTreeTest::testNonLinear() {
             0.0, modelBias[i][0],
             8.0 * std::sqrt(noiseVariance / static_cast<double>(trainRows)));
         // Good R^2...
-        CPPUNIT_ASSERT(modelRSquared[i][0] > 0.91);
+        CPPUNIT_ASSERT(modelRSquared[i][0] > 0.92);
 
         meanModelRSquared.add(modelRSquared[i][0]);
     }
@@ -602,8 +602,8 @@ void CBoostedTreeTest::testCategoricalRegressors() {
 
     LOG_DEBUG(<< "bias = " << modelBias);
     LOG_DEBUG(<< " R^2 = " << modelRSquared);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, modelBias, 0.2);
-    CPPUNIT_ASSERT(modelRSquared > 0.92);
+    CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0, modelBias, 0.05);
+    CPPUNIT_ASSERT(modelRSquared > 0.91);
 }
 
 void CBoostedTreeTest::testIntegerRegressor() {

From afc570b68d24a0b6cee924ae91f5407217495cdb Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@elastic.co>
Date: Wed, 25 Sep 2019 17:51:13 +0100
Subject: [PATCH 22/23] Typo in refactor

---
 lib/maths/CBoostedTreeFactory.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/maths/CBoostedTreeFactory.cc b/lib/maths/CBoostedTreeFactory.cc
index 48dc09c384..8a847f6407 100644
--- a/lib/maths/CBoostedTreeFactory.cc
+++ b/lib/maths/CBoostedTreeFactory.cc
@@ -470,10 +470,10 @@ CBoostedTreeFactory::testLossNewtonLineSearch(core::CDataFrame& frame,
     double rightEndpoint{0.0};
     double stationaryPoint{-gradient / 2.0 / curvature};
     double bestRegularizer{[&] {
-        double distanceToLeftEndpoint{std::fabs(rightEndpoint - stationaryPoint)};
-        double distanceToRightEndpoint{std::fabs(leftEndpoint - stationaryPoint)};
         if (curvature < 0.0) {
             // Stationary point is a maximum so use furthest point in interval.
+            double distanceToLeftEndpoint{std::fabs(leftEndpoint - stationaryPoint)};
+            double distanceToRightEndpoint{std::fabs(rightEndpoint - stationaryPoint)};
             return distanceToLeftEndpoint > distanceToRightEndpoint ? leftEndpoint : rightEndpoint;
         }
         // Stationary point is a minimum so use nearest point in the interval.

From 311ade134dbbcf22ad12997c8ad1e75c135aa770 Mon Sep 17 00:00:00 2001
From: Tom Veasey <tveasey@elastic.co>
Date: Thu, 26 Sep 2019 14:15:33 +0100
Subject: [PATCH 23/23] Improve comment

---
 lib/maths/CBoostedTreeFactory.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/maths/CBoostedTreeFactory.cc b/lib/maths/CBoostedTreeFactory.cc
index 8a847f6407..c4ee31fdfc 100644
--- a/lib/maths/CBoostedTreeFactory.cc
+++ b/lib/maths/CBoostedTreeFactory.cc
@@ -464,8 +464,8 @@ CBoostedTreeFactory::testLossNewtonLineSearch(core::CDataFrame& frame,
     LOG_TRACE(<< "[intercept, slope, curvature] = "
               << core::CContainerPrinter::print(params));
 
-    // Find the scale at the minimum of the least squares quadratic fit to
-    // the test loss in the search interval. Note step size is negative.
+    // Find the minimizer of the least squares quadratic fit to the test loss
+    // in the search interval. (Note step size is negative.)
     double leftEndpoint{static_cast<double>(INITIAL_REGULARIZER_SEARCH_ITERATIONS - 1) * stepSize};
     double rightEndpoint{0.0};
     double stationaryPoint{-gradient / 2.0 / curvature};