Skip to content

[7.8][ML] Multiclass maximise minimum recall #1133

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@
when CPU is constrained. (See {ml-pull}1109[#1109].)
* Take `training_percent` into account when estimating memory usage for classification and regression.
(See {ml-pull}1111[#1111].)
* Support maximize minimum recall when assigning class labels for multiclass classification.
(See {ml-pull}1113[#1113].)
* Improve robustness of anomaly detection to bad input data. (See {ml-pull}1114[#1114].)
* Adds new `num_matches` and `preferred_to_categories` fields to category output.
(See {ml-pull}1062[#1062])
* Improve robustness of anomaly detection to bad input data. (See {ml-pull}1114[#1114].)
Expand Down
2 changes: 1 addition & 1 deletion include/maths/CBoostedTreeImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ class MATHS_EXPORT CBoostedTreeImpl final {
//! Get the number of columns training the model will add to the data frame.
static std::size_t numberExtraColumnsForTrain(std::size_t numberLossParameters) {
// We store as follows:
// 1. The predicted values for the dependent variables
// 1. The predicted values for the dependent variable
// 2. The gradient of the loss function
// 3. The upper triangle of the hessian of the loss function
// 4. The example's weight
Expand Down
16 changes: 15 additions & 1 deletion include/maths/CDataFrameUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ class MATHS_EXPORT CDataFrameUtils : private core::CNonInstantiatable {
using TRowRef = core::CDataFrame::TRowRef;
using TWeightFunc = std::function<double(const TRowRef&)>;
using TDoubleVector = CDenseVector<double>;
using TReadPredictionFunc = std::function<TDoubleVector(const TRowRef)>;
using TMemoryMappedFloatVector = CMemoryMappedDenseVector<CFloatStorage>;
using TReadPredictionFunc = std::function<TMemoryMappedFloatVector(const TRowRef&)>;
using TQuantileSketchVec = std::vector<CQuantileSketch>;
using TPackedBitVectorVec = std::vector<core::CPackedBitVector>;

Expand Down Expand Up @@ -408,6 +409,19 @@ class MATHS_EXPORT CDataFrameUtils : private core::CNonInstantiatable {
const core::CPackedBitVector& rowMask,
const TSizeVec& columnMask,
std::size_t numberSamples);
static TDoubleVector
maximizeMinimumRecallForBinary(std::size_t numberThreads,
const core::CDataFrame& frame,
const core::CPackedBitVector& rowMask,
std::size_t targetColumn,
const TReadPredictionFunc& readPrediction);
static TDoubleVector
maximizeMinimumRecallForMulticlass(std::size_t numberThreads,
const core::CDataFrame& frame,
const core::CPackedBitVector& rowMask,
std::size_t numberClasses,
std::size_t targetColumn,
const TReadPredictionFunc& readPrediction);
static void removeMetricColumns(const core::CDataFrame& frame, TSizeVec& columnMask);
static void removeCategoricalColumns(const core::CDataFrame& frame, TSizeVec& columnMask);
static double unitWeight(const TRowRef&);
Expand Down
24 changes: 22 additions & 2 deletions include/maths/CTools.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <cstring>
#include <iosfwd>
#include <limits>
#include <numeric>
#include <vector>

namespace ml {
Expand Down Expand Up @@ -684,7 +685,7 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable {
return sigmoid(std::exp(std::copysign(1.0, sign) * (x - x0) / width));
}

//! Compute the softmax from the multinomial logit values \p logit.
//! Compute the softmax for the multinomial logit values \p logit.
//!
//! i.e. \f$[\sigma(z)]_i = \frac{exp(z_i)}{\sum_j exp(z_j)}\f$.
//!
Expand All @@ -703,10 +704,29 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable {
}
}

//! Specialize the softmax for our dense vector type.
//! Compute the log of the softmax for the multinomial logit values \p logit.
template<typename COLLECTION>
static void inplaceLogSoftmax(COLLECTION& z) {
double zmax{*std::max_element(z.begin(), z.end())};
for (auto& zi : z) {
zi -= zmax;
}
double logZ{std::log(std::accumulate(
z.begin(), z.end(), 0.0,
[](double sum, const auto& zi) { return sum + std::exp(zi); }))};
for (auto& zi : z) {
zi -= logZ;
}
}

//! Specialize the softmax for CDenseVector.
template<typename T>
static void inplaceSoftmax(CDenseVector<T>& z);

//! Specialize the log(softmax) for CDenseVector.
template<typename SCALAR>
static void inplaceLogSoftmax(CDenseVector<SCALAR>& z);

//! Linearly interpolate a function on the interval [\p a, \p b].
static double linearlyInterpolate(double a, double b, double fa, double fb, double x);

Expand Down
10 changes: 10 additions & 0 deletions include/maths/CToolsDetail.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include <maths/CCompositeFunctions.h>
#include <maths/CIntegration.h>
#include <maths/CLinearAlgebraEigen.h>
#include <maths/CMixtureDistribution.h>
#include <maths/COrderings.h>
#include <maths/CTools.h>
Expand Down Expand Up @@ -308,6 +309,15 @@ void CTools::inplaceSoftmax(CDenseVector<T>& z) {
z.array() = z.array().exp();
z /= z.sum();
}

template<typename SCALAR>
void CTools::inplaceLogSoftmax(CDenseVector<SCALAR>& z) {
// Handle under/overflow when taking exponentials by subtracting zmax.
double zmax{z.maxCoeff()};
z.array() -= zmax;
double Z{z.array().exp().sum()};
z.array() -= std::log(Z);
}
}
}

Expand Down
15 changes: 9 additions & 6 deletions include/test/CDataFrameAnalyzerTrainingFactory.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#define INCLUDED_ml_test_CDataFrameAnalyzerTrainingFactory_h

#include <core/CDataFrame.h>
#include <core/CSmallVector.h>

#include <maths/CBoostedTreeFactory.h>
#include <maths/CBoostedTreeLoss.h>
Expand Down Expand Up @@ -122,13 +123,11 @@ class TEST_EXPORT CDataFrameAnalyzerTrainingFactory {
auto prediction = tree->readAndAdjustPrediction(*row);
switch (type) {
case E_Regression:
appendPrediction(*frame, weights.size(), prediction[0], expectedPredictions);
appendPrediction(*frame, weights.size(), prediction, expectedPredictions);
break;
case E_BinaryClassification:
appendPrediction(*frame, weights.size(), prediction[1], expectedPredictions);
break;
case E_MulticlassClassification:
// TODO.
appendPrediction(*frame, weights.size(), prediction, expectedPredictions);
break;
}
}
Expand All @@ -149,15 +148,19 @@ class TEST_EXPORT CDataFrameAnalyzerTrainingFactory {
TStrVec& targets);

private:
using TDouble2Vec = core::CSmallVector<double, 2>;
using TBoolVec = std::vector<bool>;
using TRowItr = core::CDataFrame::TRowItr;

private:
static void appendPrediction(core::CDataFrame&, std::size_t, double prediction, TDoubleVec& predictions);
static void appendPrediction(core::CDataFrame&,
std::size_t,
const TDouble2Vec& prediction,
TDoubleVec& predictions);

static void appendPrediction(core::CDataFrame& frame,
std::size_t target,
double class1Score,
const TDouble2Vec& class1Score,
TStrVec& predictions);
};
}
Expand Down
21 changes: 18 additions & 3 deletions lib/maths/CBoostedTreeImpl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -377,12 +377,16 @@ void CBoostedTreeImpl::initializePerFoldTestLosses() {
}

void CBoostedTreeImpl::computeClassificationWeights(const core::CDataFrame& frame) {

using TFloatStorageVec = std::vector<CFloatStorage>;

if (m_Loss->type() == CLoss::E_BinaryClassification ||
m_Loss->type() == CLoss::E_MulticlassClassification) {

std::size_t numberClasses{m_Loss->type() == CLoss::E_BinaryClassification
? 2
: m_Loss->numberParameters()};
TFloatStorageVec storage(2);

switch (m_ClassAssignmentObjective) {
case CBoostedTree::E_Accuracy:
Expand All @@ -391,9 +395,20 @@ void CBoostedTreeImpl::computeClassificationWeights(const core::CDataFrame& fram
case CBoostedTree::E_MinimumRecall:
m_ClassificationWeights = CDataFrameUtils::maximumMinimumRecallClassWeights(
m_NumberThreads, frame, this->allTrainingRowsMask(),
numberClasses, m_DependentVariable, [this](const TRowRef& row) {
return m_Loss->transform(readPrediction(
row, m_NumberInputColumns, m_Loss->numberParameters()));
numberClasses, m_DependentVariable,
[storage, numberClasses, this](const TRowRef& row) mutable {
if (m_Loss->type() == CLoss::E_BinaryClassification) {
// We predict the log-odds but this is expected to return
// the log of the predicted class probabilities.
TMemoryMappedFloatVector result{&storage[0], 2};
result.array() = m_Loss
->transform(readPrediction(
row, m_NumberInputColumns, numberClasses))
.array()
.log();
return result;
}
return readPrediction(row, m_NumberInputColumns, numberClasses);
});
break;
}
Expand Down
13 changes: 2 additions & 11 deletions lib/maths/CBoostedTreeLoss.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,6 @@ double logLogistic(double logOdds) {
}
return std::log(CTools::logisticFunction(logOdds));
}

template<typename SCALAR>
void inplaceLogSoftmax(CDenseVector<SCALAR>& z) {
// Handle under/overflow when taking exponentials by subtracting zmax.
double zmax{z.maxCoeff()};
z.array() -= zmax;
double Z{z.array().exp().sum()};
z.array() -= std::log(Z);
}
}

namespace boosted_tree_detail {
Expand Down Expand Up @@ -332,7 +323,7 @@ CArgMinMultinomialLogisticLossImpl::objective() const {
if (m_Centres.size() == 1) {
return [logProbabilities, lambda, this](const TDoubleVector& weight) mutable {
logProbabilities = m_Centres[0] + weight;
inplaceLogSoftmax(logProbabilities);
CTools::inplaceLogSoftmax(logProbabilities);
return lambda * weight.squaredNorm() - m_ClassCounts.transpose() * logProbabilities;
};
}
Expand All @@ -341,7 +332,7 @@ CArgMinMultinomialLogisticLossImpl::objective() const {
for (std::size_t i = 0; i < m_CentresClassCounts.size(); ++i) {
if (m_CentresClassCounts[i].sum() > 0.0) {
logProbabilities = m_Centres[i] + weight;
inplaceLogSoftmax(logProbabilities);
CTools::inplaceLogSoftmax(logProbabilities);
loss -= m_CentresClassCounts[i].transpose() * logProbabilities;
}
}
Expand Down
Loading