|
19 | 19 | #include <functional>
|
20 | 20 | #include <memory>
|
21 | 21 | #include <string>
|
| 22 | +#include <utility> |
22 | 23 | #include <vector>
|
23 | 24 |
|
24 | 25 | namespace ml {
|
@@ -66,6 +67,66 @@ class MATHS_EXPORT CArgMinMseImpl final : public CArgMinLossImpl {
|
66 | 67 | TMeanAccumulator m_MeanError;
|
67 | 68 | };
|
68 | 69 |
|
| 70 | +//! \brief Finds the value to add to a set of predictions which approximately |
| 71 | +//! minimises the regularised mean squared logarithmic error (MSLE). |
| 72 | +class MATHS_EXPORT CArgMinMsleImpl final : public CArgMinLossImpl { |
| 73 | +public: |
| 74 | + using TObjective = std::function<double(double)>; |
| 75 | + |
| 76 | +public: |
| 77 | + CArgMinMsleImpl(double lambda); |
| 78 | + std::unique_ptr<CArgMinLossImpl> clone() const override; |
| 79 | + bool nextPass() override; |
| 80 | + void add(const TMemoryMappedFloatVector& prediction, double actual, double weight = 1.0) override; |
| 81 | + void merge(const CArgMinLossImpl& other) override; |
| 82 | + TDoubleVector value() const override; |
| 83 | + |
| 84 | + // Exposed for unit testing. |
| 85 | + TObjective objective() const; |
| 86 | + |
| 87 | +private: |
| 88 | + using TMinMaxAccumulator = CBasicStatistics::CMinMax<double>; |
| 89 | + using TMeanAccumulator = CBasicStatistics::SSampleMean<double>::TAccumulator; |
| 90 | + using TMeanVarAccumulator = CBasicStatistics::SSampleMeanVar<double>::TAccumulator; |
| 91 | + using TVector = CVectorNx1<double, 3>; |
| 92 | + using TVectorMeanAccumulator = CBasicStatistics::SSampleMean<TVector>::TAccumulator; |
| 93 | + using TVectorMeanAccumulatorVec = std::vector<TVectorMeanAccumulator>; |
| 94 | + using TVectorMeanAccumulatorVecVec = std::vector<TVectorMeanAccumulatorVec>; |
| 95 | + using TDoubleDoublePr = std::pair<double, double>; |
| 96 | + using TSizeSizePr = std::pair<std::size_t, std::size_t>; |
| 97 | + |
| 98 | +private: |
| 99 | + TSizeSizePr bucket(double prediction, double actual) const { |
| 100 | + auto bucketWidth{this->bucketWidth()}; |
| 101 | + double bucketPrediction{(prediction - m_ExpPredictionMinMax.min()) / |
| 102 | + bucketWidth.first}; |
| 103 | + std::size_t predictionBucketIndex{std::min( |
| 104 | + static_cast<std::size_t>(bucketPrediction), m_Buckets.size() - 1)}; |
| 105 | + |
| 106 | + double bucketActual{(actual - m_LogActualMinMax.min()) / bucketWidth.second}; |
| 107 | + std::size_t actualBucketIndex{std::min( |
| 108 | + static_cast<std::size_t>(bucketActual), m_Buckets[0].size() - 1)}; |
| 109 | + |
| 110 | + return std::make_pair(predictionBucketIndex, actualBucketIndex); |
| 111 | + } |
| 112 | + |
| 113 | + TDoubleDoublePr bucketWidth() const { |
| 114 | + double predictionBucketWidth{m_ExpPredictionMinMax.range() / |
| 115 | + static_cast<double>(m_Buckets.size())}; |
| 116 | + double actualBucketWidth{m_LogActualMinMax.range() / |
| 117 | + static_cast<double>(m_Buckets[0].size())}; |
| 118 | + return std::make_pair(predictionBucketWidth, actualBucketWidth); |
| 119 | + } |
| 120 | + |
| 121 | +private: |
| 122 | + std::size_t m_CurrentPass = 0; |
| 123 | + TMinMaxAccumulator m_ExpPredictionMinMax; |
| 124 | + TMinMaxAccumulator m_LogActualMinMax; |
| 125 | + TVectorMeanAccumulatorVecVec m_Buckets; |
| 126 | + TMeanVarAccumulator m_MeanLogActual; |
| 127 | + TMeanAccumulator m_MeanError; |
| 128 | +}; |
| 129 | + |
69 | 130 | //! \brief Finds the value to add to a set of predicted log-odds which minimises
|
70 | 131 | //! regularised cross entropy loss w.r.t. the actual categories.
|
71 | 132 | //!
|
@@ -278,6 +339,9 @@ class MATHS_EXPORT CLoss {
|
278 | 339 | //! Get the name of the loss function
|
279 | 340 | virtual const std::string& name() const = 0;
|
280 | 341 |
|
| 342 | + //! Returns true if the loss function is used for regression. |
| 343 | + virtual bool isRegression() const = 0; |
| 344 | + |
281 | 345 | protected:
|
282 | 346 | CArgMinLoss makeMinimizer(const boosted_tree_detail::CArgMinLossImpl& impl) const;
|
283 | 347 | };
|
@@ -307,6 +371,7 @@ class MATHS_EXPORT CMse final : public CLoss {
|
307 | 371 | TDoubleVector transform(const TMemoryMappedFloatVector& prediction) const override;
|
308 | 372 | CArgMinLoss minimizer(double lambda, const CPRNG::CXorOShiro128Plus& rng) const override;
|
309 | 373 | const std::string& name() const override;
|
| 374 | + bool isRegression() const override; |
310 | 375 | };
|
311 | 376 |
|
312 | 377 | //! \brief Implements loss for binomial logistic regression.
|
@@ -342,6 +407,7 @@ class MATHS_EXPORT CBinomialLogisticLoss final : public CLoss {
|
342 | 407 | TDoubleVector transform(const TMemoryMappedFloatVector& prediction) const override;
|
343 | 408 | CArgMinLoss minimizer(double lambda, const CPRNG::CXorOShiro128Plus& rng) const override;
|
344 | 409 | const std::string& name() const override;
|
| 410 | + bool isRegression() const override; |
345 | 411 | };
|
346 | 412 |
|
347 | 413 | //! \brief Implements loss for multinomial logistic regression.
|
@@ -380,10 +446,49 @@ class MATHS_EXPORT CMultinomialLogisticLoss final : public CLoss {
|
380 | 446 | TDoubleVector transform(const TMemoryMappedFloatVector& prediction) const override;
|
381 | 447 | CArgMinLoss minimizer(double lambda, const CPRNG::CXorOShiro128Plus& rng) const override;
|
382 | 448 | const std::string& name() const override;
|
| 449 | + bool isRegression() const override; |
383 | 450 |
|
384 | 451 | private:
|
385 | 452 | std::size_t m_NumberClasses;
|
386 | 453 | };
|
| 454 | +//! \brief The MSLE loss function. |
| 455 | +//! |
| 456 | +//! DESCRIPTION:\n |
| 457 | +//! Formally, the MSLE error definition we use is \f$(\log(1+p) - \log(1+a))^2\f$. |
| 458 | +//! However, we approximate this by a quadratic form which has its minimum p = a and |
| 459 | +//! matches the value and derivative of MSLE loss function. For example, if the |
| 460 | +//! current prediction for the i'th training point is \f$p_i\f$, the loss is defined |
| 461 | +//! as |
| 462 | +//! <pre class="fragment"> |
| 463 | +//! \f$\displaystyle l_i(p) = c_i + w_i(p - a_i)^2\f$ |
| 464 | +//! </pre> |
| 465 | +//! where \f$w_i = \frac{\log(1+p_i) - \log(1+a_i)}{(1+p_i)(p_i-a_i)}\f$ and \f$c_i\f$ |
| 466 | +//! is chosen so \f$l_i(p_i) = (\log(1+p_i) - \log(1+a_i))^2\f$. |
| 467 | +class MATHS_EXPORT CMsle final : public CLoss { |
| 468 | +public: |
| 469 | + static const std::string NAME; |
| 470 | + |
| 471 | +public: |
| 472 | + EType type() const override; |
| 473 | + std::unique_ptr<CLoss> clone() const override; |
| 474 | + std::size_t numberParameters() const override; |
| 475 | + double value(const TMemoryMappedFloatVector& prediction, |
| 476 | + double actual, |
| 477 | + double weight = 1.0) const override; |
| 478 | + void gradient(const TMemoryMappedFloatVector& prediction, |
| 479 | + double actual, |
| 480 | + TWriter writer, |
| 481 | + double weight = 1.0) const override; |
| 482 | + void curvature(const TMemoryMappedFloatVector& prediction, |
| 483 | + double actual, |
| 484 | + TWriter writer, |
| 485 | + double weight = 1.0) const override; |
| 486 | + bool isCurvatureConstant() const override; |
| 487 | + TDoubleVector transform(const TMemoryMappedFloatVector& prediction) const override; |
| 488 | + CArgMinLoss minimizer(double lambda, const CPRNG::CXorOShiro128Plus& rng) const override; |
| 489 | + const std::string& name() const override; |
| 490 | + bool isRegression() const override; |
| 491 | +}; |
387 | 492 | }
|
388 | 493 | }
|
389 | 494 | }
|
|
0 commit comments