diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc
index ec1892d7e3..b6a61f9ca6 100644
--- a/docs/CHANGELOG.asciidoc
+++ b/docs/CHANGELOG.asciidoc
@@ -37,6 +37,9 @@
   when CPU is constrained. (See {ml-pull}1109[#1109].)
 * Take `training_percent` into account when estimating memory usage for classification and regression. 
   (See {ml-pull}1111[#1111].)
+* Support maximize minimum recall when assigning class labels for multiclass classification.
+  (See {ml-pull}1113[#1113].)
+* Improve robustness of anomaly detection to bad input data. (See {ml-pull}1114[#1114].)
 * Adds new `num_matches` and `preferred_to_categories` fields to category output.
   (See {ml-pull}1062[#1062])
 * Improve robustness of anomaly detection to bad input data. (See {ml-pull}1114[#1114].)
diff --git a/include/maths/CBoostedTreeImpl.h b/include/maths/CBoostedTreeImpl.h
index 1c3cdca371..8969a743c9 100644
--- a/include/maths/CBoostedTreeImpl.h
+++ b/include/maths/CBoostedTreeImpl.h
@@ -112,7 +112,7 @@ class MATHS_EXPORT CBoostedTreeImpl final {
     //! Get the number of columns training the model will add to the data frame.
     static std::size_t numberExtraColumnsForTrain(std::size_t numberLossParameters) {
         // We store as follows:
-        //   1. The predicted values for the dependent variables
+        //   1. The predicted values for the dependent variable
         //   2. The gradient of the loss function
         //   3. The upper triangle of the hessian of the loss function
         //   4. The example's weight
diff --git a/include/maths/CDataFrameUtils.h b/include/maths/CDataFrameUtils.h
index 636380024c..ab822d34c1 100644
--- a/include/maths/CDataFrameUtils.h
+++ b/include/maths/CDataFrameUtils.h
@@ -69,7 +69,8 @@ class MATHS_EXPORT CDataFrameUtils : private core::CNonInstantiatable {
     using TRowRef = core::CDataFrame::TRowRef;
     using TWeightFunc = std::function<double(const TRowRef&)>;
     using TDoubleVector = CDenseVector<double>;
-    using TReadPredictionFunc = std::function<TDoubleVector(const TRowRef)>;
+    using TMemoryMappedFloatVector = CMemoryMappedDenseVector<CFloatStorage>;
+    using TReadPredictionFunc = std::function<TMemoryMappedFloatVector(const TRowRef&)>;
     using TQuantileSketchVec = std::vector<CQuantileSketch>;
     using TPackedBitVectorVec = std::vector<core::CPackedBitVector>;
 
@@ -408,6 +409,19 @@ class MATHS_EXPORT CDataFrameUtils : private core::CNonInstantiatable {
                                        const core::CPackedBitVector& rowMask,
                                        const TSizeVec& columnMask,
                                        std::size_t numberSamples);
+    static TDoubleVector
+    maximizeMinimumRecallForBinary(std::size_t numberThreads,
+                                   const core::CDataFrame& frame,
+                                   const core::CPackedBitVector& rowMask,
+                                   std::size_t targetColumn,
+                                   const TReadPredictionFunc& readPrediction);
+    static TDoubleVector
+    maximizeMinimumRecallForMulticlass(std::size_t numberThreads,
+                                       const core::CDataFrame& frame,
+                                       const core::CPackedBitVector& rowMask,
+                                       std::size_t numberClasses,
+                                       std::size_t targetColumn,
+                                       const TReadPredictionFunc& readPrediction);
     static void removeMetricColumns(const core::CDataFrame& frame, TSizeVec& columnMask);
     static void removeCategoricalColumns(const core::CDataFrame& frame, TSizeVec& columnMask);
     static double unitWeight(const TRowRef&);
diff --git a/include/maths/CTools.h b/include/maths/CTools.h
index cb84754487..0a02d7ceeb 100644
--- a/include/maths/CTools.h
+++ b/include/maths/CTools.h
@@ -26,6 +26,7 @@
 #include <cstring>
 #include <iosfwd>
 #include <limits>
+#include <numeric>
 #include <vector>
 
 namespace ml {
@@ -684,7 +685,7 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable {
         return sigmoid(std::exp(std::copysign(1.0, sign) * (x - x0) / width));
     }
 
-    //! Compute the softmax from the multinomial logit values \p logit.
+    //! Compute the softmax for the multinomial logit values \p logit.
     //!
     //! i.e. \f$[\sigma(z)]_i = \frac{exp(z_i)}{\sum_j exp(z_j)}\f$.
     //!
@@ -703,10 +704,29 @@ class MATHS_EXPORT CTools : private core::CNonInstantiatable {
         }
     }
 
-    //! Specialize the softmax for our dense vector type.
+    //! Compute the log of the softmax for the multinomial logit values \p logit.
+    template<typename COLLECTION>
+    static void inplaceLogSoftmax(COLLECTION& z) {
+        double zmax{*std::max_element(z.begin(), z.end())};
+        for (auto& zi : z) {
+            zi -= zmax;
+        }
+        double logZ{std::log(std::accumulate(
+            z.begin(), z.end(), 0.0,
+            [](double sum, const auto& zi) { return sum + std::exp(zi); }))};
+        for (auto& zi : z) {
+            zi -= logZ;
+        }
+    }
+
+    //! Specialize the softmax for CDenseVector.
     template<typename T>
     static void inplaceSoftmax(CDenseVector<T>& z);
 
+    //! Specialize the log(softmax) for CDenseVector.
+    template<typename SCALAR>
+    static void inplaceLogSoftmax(CDenseVector<SCALAR>& z);
+
     //! Linearly interpolate a function on the interval [\p a, \p b].
     static double linearlyInterpolate(double a, double b, double fa, double fb, double x);
 
diff --git a/include/maths/CToolsDetail.h b/include/maths/CToolsDetail.h
index ade77a745b..ac4e02e8ba 100644
--- a/include/maths/CToolsDetail.h
+++ b/include/maths/CToolsDetail.h
@@ -11,6 +11,7 @@
 
 #include <maths/CCompositeFunctions.h>
 #include <maths/CIntegration.h>
+#include <maths/CLinearAlgebraEigen.h>
 #include <maths/CMixtureDistribution.h>
 #include <maths/COrderings.h>
 #include <maths/CTools.h>
@@ -308,6 +309,15 @@ void CTools::inplaceSoftmax(CDenseVector<T>& z) {
     z.array() = z.array().exp();
     z /= z.sum();
 }
+
+template<typename SCALAR>
+void CTools::inplaceLogSoftmax(CDenseVector<SCALAR>& z) {
+    // Handle under/overflow when taking exponentials by subtracting zmax.
+    double zmax{z.maxCoeff()};
+    z.array() -= zmax;
+    double Z{z.array().exp().sum()};
+    z.array() -= std::log(Z);
+}
 }
 }
 
diff --git a/include/test/CDataFrameAnalyzerTrainingFactory.h b/include/test/CDataFrameAnalyzerTrainingFactory.h
index 1f79bfd72a..63bfad67ab 100644
--- a/include/test/CDataFrameAnalyzerTrainingFactory.h
+++ b/include/test/CDataFrameAnalyzerTrainingFactory.h
@@ -8,6 +8,7 @@
 #define INCLUDED_ml_test_CDataFrameAnalyzerTrainingFactory_h
 
 #include <core/CDataFrame.h>
+#include <core/CSmallVector.h>
 
 #include <maths/CBoostedTreeFactory.h>
 #include <maths/CBoostedTreeLoss.h>
@@ -122,13 +123,11 @@ class TEST_EXPORT CDataFrameAnalyzerTrainingFactory {
                 auto prediction = tree->readAndAdjustPrediction(*row);
                 switch (type) {
                 case E_Regression:
-                    appendPrediction(*frame, weights.size(), prediction[0], expectedPredictions);
+                    appendPrediction(*frame, weights.size(), prediction, expectedPredictions);
                     break;
                 case E_BinaryClassification:
-                    appendPrediction(*frame, weights.size(), prediction[1], expectedPredictions);
-                    break;
                 case E_MulticlassClassification:
-                    // TODO.
+                    appendPrediction(*frame, weights.size(), prediction, expectedPredictions);
                     break;
                 }
             }
@@ -149,15 +148,19 @@ class TEST_EXPORT CDataFrameAnalyzerTrainingFactory {
                                                     TStrVec& targets);
 
 private:
+    using TDouble2Vec = core::CSmallVector<double, 2>;
     using TBoolVec = std::vector<bool>;
     using TRowItr = core::CDataFrame::TRowItr;
 
 private:
-    static void appendPrediction(core::CDataFrame&, std::size_t, double prediction, TDoubleVec& predictions);
+    static void appendPrediction(core::CDataFrame&,
+                                 std::size_t,
+                                 const TDouble2Vec& prediction,
+                                 TDoubleVec& predictions);
 
     static void appendPrediction(core::CDataFrame& frame,
                                  std::size_t target,
-                                 double class1Score,
+                                 const TDouble2Vec& class1Score,
                                  TStrVec& predictions);
 };
 }
diff --git a/lib/maths/CBoostedTreeImpl.cc b/lib/maths/CBoostedTreeImpl.cc
index 4f1cb2e251..0e65870281 100644
--- a/lib/maths/CBoostedTreeImpl.cc
+++ b/lib/maths/CBoostedTreeImpl.cc
@@ -377,12 +377,16 @@ void CBoostedTreeImpl::initializePerFoldTestLosses() {
 }
 
 void CBoostedTreeImpl::computeClassificationWeights(const core::CDataFrame& frame) {
+
+    using TFloatStorageVec = std::vector<CFloatStorage>;
+
     if (m_Loss->type() == CLoss::E_BinaryClassification ||
         m_Loss->type() == CLoss::E_MulticlassClassification) {
 
         std::size_t numberClasses{m_Loss->type() == CLoss::E_BinaryClassification
                                       ? 2
                                       : m_Loss->numberParameters()};
+        TFloatStorageVec storage(2);
 
         switch (m_ClassAssignmentObjective) {
         case CBoostedTree::E_Accuracy:
@@ -391,9 +395,20 @@ void CBoostedTreeImpl::computeClassificationWeights(const core::CDataFrame& fram
         case CBoostedTree::E_MinimumRecall:
             m_ClassificationWeights = CDataFrameUtils::maximumMinimumRecallClassWeights(
                 m_NumberThreads, frame, this->allTrainingRowsMask(),
-                numberClasses, m_DependentVariable, [this](const TRowRef& row) {
-                    return m_Loss->transform(readPrediction(
-                        row, m_NumberInputColumns, m_Loss->numberParameters()));
+                numberClasses, m_DependentVariable,
+                [storage, numberClasses, this](const TRowRef& row) mutable {
+                    if (m_Loss->type() == CLoss::E_BinaryClassification) {
+                        // We predict the log-odds but this is expected to return
+                        // the log of the predicted class probabilities.
+                        TMemoryMappedFloatVector result{&storage[0], 2};
+                        result.array() = m_Loss
+                                             ->transform(readPrediction(
+                                                 row, m_NumberInputColumns, numberClasses))
+                                             .array()
+                                             .log();
+                        return result;
+                    }
+                    return readPrediction(row, m_NumberInputColumns, numberClasses);
                 });
             break;
         }
diff --git a/lib/maths/CBoostedTreeLoss.cc b/lib/maths/CBoostedTreeLoss.cc
index 61394d5cb7..c43a358144 100644
--- a/lib/maths/CBoostedTreeLoss.cc
+++ b/lib/maths/CBoostedTreeLoss.cc
@@ -39,15 +39,6 @@ double logLogistic(double logOdds) {
     }
     return std::log(CTools::logisticFunction(logOdds));
 }
-
-template<typename SCALAR>
-void inplaceLogSoftmax(CDenseVector<SCALAR>& z) {
-    // Handle under/overflow when taking exponentials by subtracting zmax.
-    double zmax{z.maxCoeff()};
-    z.array() -= zmax;
-    double Z{z.array().exp().sum()};
-    z.array() -= std::log(Z);
-}
 }
 
 namespace boosted_tree_detail {
@@ -332,7 +323,7 @@ CArgMinMultinomialLogisticLossImpl::objective() const {
     if (m_Centres.size() == 1) {
         return [logProbabilities, lambda, this](const TDoubleVector& weight) mutable {
             logProbabilities = m_Centres[0] + weight;
-            inplaceLogSoftmax(logProbabilities);
+            CTools::inplaceLogSoftmax(logProbabilities);
             return lambda * weight.squaredNorm() - m_ClassCounts.transpose() * logProbabilities;
         };
     }
@@ -341,7 +332,7 @@ CArgMinMultinomialLogisticLossImpl::objective() const {
         for (std::size_t i = 0; i < m_CentresClassCounts.size(); ++i) {
             if (m_CentresClassCounts[i].sum() > 0.0) {
                 logProbabilities = m_Centres[i] + weight;
-                inplaceLogSoftmax(logProbabilities);
+                CTools::inplaceLogSoftmax(logProbabilities);
                 loss -= m_CentresClassCounts[i].transpose() * logProbabilities;
             }
         }
diff --git a/lib/maths/CDataFrameUtils.cc b/lib/maths/CDataFrameUtils.cc
index 6d411ee463..ffa157347a 100644
--- a/lib/maths/CDataFrameUtils.cc
+++ b/lib/maths/CDataFrameUtils.cc
@@ -13,16 +13,21 @@
 
 #include <maths/CBasicStatistics.h>
 #include <maths/CDataFrameCategoryEncoder.h>
+#include <maths/CLbfgs.h>
+#include <maths/CLinearAlgebraEigen.h>
 #include <maths/CMathsFuncs.h>
 #include <maths/CMic.h>
 #include <maths/COrderings.h>
+#include <maths/CPRNG.h>
 #include <maths/CQuantileSketch.h>
 #include <maths/CSampling.h>
 #include <maths/CSolvers.h>
 #include <maths/CTools.h>
+#include <maths/CToolsDetail.h>
 
 #include <boost/unordered_map.hpp>
 
+#include <limits>
 #include <memory>
 #include <numeric>
 #include <vector>
@@ -117,23 +122,21 @@ class CStratifiedSampler {
     TRowSamplerVec m_Samplers;
     TSamplerSelector m_Selector;
 };
+using TStratifiedSamplerPtr = std::unique_ptr<CStratifiedSampler>;
 
 //! Get a classifier stratified row sampler for cross fold validation.
-std::pair<std::unique_ptr<CStratifiedSampler>, TDoubleVec>
+std::pair<TStratifiedSamplerPtr, TDoubleVec>
 classifierStratifiedCrossValidationRowSampler(std::size_t numberThreads,
                                               const core::CDataFrame& frame,
                                               std::size_t targetColumn,
                                               CPRNG::CXorOShiro128Plus rng,
-                                              std::size_t numberFolds,
-                                              const core::CPackedBitVector& allTrainingRowsMask) {
+                                              std::size_t desiredCount,
+                                              const core::CPackedBitVector& rowMask) {
 
     TDoubleVec categoryFrequencies{CDataFrameUtils::categoryFrequencies(
-        numberThreads, frame, allTrainingRowsMask, {targetColumn})[targetColumn]};
+        numberThreads, frame, rowMask, {targetColumn})[targetColumn]};
 
     TSizeVec categoryCounts;
-    double numberTrainingRows{allTrainingRowsMask.manhattan()};
-    std::size_t desiredCount{
-        (static_cast<std::size_t>(numberTrainingRows) + numberFolds / 2) / numberFolds};
     CSampling::weightedSample(desiredCount, categoryFrequencies, categoryCounts);
     LOG_TRACE(<< "desired category counts per test fold = "
               << core::CContainerPrinter::print(categoryCounts));
@@ -150,17 +153,17 @@ classifierStratifiedCrossValidationRowSampler(std::size_t numberThreads,
 }
 
 //! Get a regression stratified row sampler for cross fold validation.
-std::unique_ptr<CStratifiedSampler>
+TStratifiedSamplerPtr
 regressionStratifiedCrossValiationRowSampler(std::size_t numberThreads,
                                              const core::CDataFrame& frame,
                                              std::size_t targetColumn,
                                              CPRNG::CXorOShiro128Plus rng,
-                                             std::size_t numberFolds,
+                                             std::size_t desiredCount,
                                              std::size_t numberBuckets,
-                                             const core::CPackedBitVector& allTrainingRowsMask) {
+                                             const core::CPackedBitVector& rowMask) {
 
     auto quantiles = CDataFrameUtils::columnQuantiles(
-                         numberThreads, frame, allTrainingRowsMask, {targetColumn},
+                         numberThreads, frame, rowMask, {targetColumn},
                          CQuantileSketch{CQuantileSketch::E_Linear, 50})
                          .first;
 
@@ -198,8 +201,7 @@ regressionStratifiedCrossValiationRowSampler(std::size_t numberThreads,
     };
 
     TDoubleVec bucketFrequencies;
-    doReduce(frame.readRows(numberThreads, 0, frame.numberRows(),
-                            countBucketRows, &allTrainingRowsMask),
+    doReduce(frame.readRows(numberThreads, 0, frame.numberRows(), countBucketRows, &rowMask),
              copyBucketRowCounts, reduceBucketRowCounts, bucketFrequencies);
     double totalCount{std::accumulate(bucketFrequencies.begin(),
                                       bucketFrequencies.end(), 0.0)};
@@ -208,8 +210,6 @@ regressionStratifiedCrossValiationRowSampler(std::size_t numberThreads,
     }
 
     TSizeVec bucketCounts;
-    std::size_t desiredCount{
-        (static_cast<std::size_t>(totalCount) + numberFolds / 2) / numberFolds};
     CSampling::weightedSample(desiredCount, bucketFrequencies, bucketCounts);
     LOG_TRACE(<< "desired bucket counts per fold = "
               << core::CContainerPrinter::print(bucketCounts));
@@ -495,17 +495,20 @@ CDataFrameUtils::stratifiedCrossValidationRowMasks(std::size_t numberThreads,
                                                    std::size_t numberFolds,
                                                    std::size_t numberBuckets,
                                                    const core::CPackedBitVector& allTrainingRowsMask) {
-
     TDoubleVec frequencies;
-    std::unique_ptr<CStratifiedSampler> sampler;
+    TStratifiedSamplerPtr sampler;
+
+    double numberTrainingRows{allTrainingRowsMask.manhattan()};
+    std::size_t desiredCount{
+        (static_cast<std::size_t>(numberTrainingRows) + numberFolds / 2) / numberFolds};
 
     if (frame.columnIsCategorical()[targetColumn]) {
         std::tie(sampler, frequencies) = classifierStratifiedCrossValidationRowSampler(
-            numberThreads, frame, targetColumn, rng, numberFolds, allTrainingRowsMask);
+            numberThreads, frame, targetColumn, rng, desiredCount, allTrainingRowsMask);
     } else {
         sampler = regressionStratifiedCrossValiationRowSampler(
-            numberThreads, frame, targetColumn, rng, numberFolds, numberBuckets,
-            allTrainingRowsMask);
+            numberThreads, frame, targetColumn, rng, desiredCount,
+            numberBuckets, allTrainingRowsMask);
     }
 
     LOG_TRACE(<< "number training rows = " << allTrainingRowsMask.manhattan());
@@ -586,13 +589,9 @@ CDataFrameUtils::categoryFrequencies(std::size_t numberThreads,
 
     TDoubleVecVec result;
     try {
-        if (doReduce(frame.readRows(numberThreads, 0, frame.numberRows(),
-                                    readCategoryCounts, &rowMask),
-                     copyCategoryCounts, reduceCategoryCounts, result) == false) {
-            HANDLE_FATAL(<< "Internal error: failed to calculate category"
-                         << " frequencies. Please report this problem.")
-            return result;
-        }
+        doReduce(frame.readRows(numberThreads, 0, frame.numberRows(),
+                                readCategoryCounts, &rowMask),
+                 copyCategoryCounts, reduceCategoryCounts, result);
     } catch (const std::exception& e) {
         HANDLE_FATAL(<< "Internal error: '" << e.what() << "' exception calculating"
                      << " category frequencies. Please report this problem.")
@@ -653,12 +652,8 @@ CDataFrameUtils::meanValueOfTargetForCategories(const CColumnValue& target,
 
     TMeanAccumulatorVecVec means;
     try {
-        if (doReduce(frame.readRows(numberThreads, 0, frame.numberRows(), readColumnMeans, &rowMask),
-                     copyColumnMeans, reduceColumnMeans, means) == false) {
-            HANDLE_FATAL(<< "Internal error: failed to calculate mean target values"
-                         << " for categories. Please report this problem.")
-            return result;
-        }
+        doReduce(frame.readRows(numberThreads, 0, frame.numberRows(), readColumnMeans, &rowMask),
+                 copyColumnMeans, reduceColumnMeans, means);
     } catch (const std::exception& e) {
         HANDLE_FATAL(<< "Internal error: '" << e.what() << "' exception calculating"
                      << " mean target values for categories. Please report this problem.")
@@ -741,58 +736,11 @@ CDataFrameUtils::maximumMinimumRecallClassWeights(std::size_t numberThreads,
                                                   std::size_t targetColumn,
                                                   const TReadPredictionFunc& readPrediction) {
 
-    if (numberClasses == 2) {
-        auto readQuantiles = core::bindRetrievableState(
-            [&](TQuantileSketchVec& quantiles, TRowItr beginRows, TRowItr endRows) {
-                for (auto row = beginRows; row != endRows; ++row) {
-                    if (isMissing((*row)[targetColumn]) == false) {
-                        quantiles[static_cast<std::size_t>((*row)[targetColumn])]
-                            .add(readPrediction(*row)(1));
-                    }
-                }
-            },
-            TQuantileSketchVec(2, CQuantileSketch{CQuantileSketch::E_Linear, 100}));
-        auto copyQuantiles = [](TQuantileSketchVec quantiles, TQuantileSketchVec& result) {
-            result = std::move(quantiles);
-        };
-        auto reduceQuantiles = [&](TQuantileSketchVec quantiles, TQuantileSketchVec& result) {
-            for (std::size_t i = 0; i < 2; ++i) {
-                result[i] += quantiles[i];
-            }
-        };
-
-        TQuantileSketchVec classProbabilityClassOneQuantiles;
-        if (doReduce(frame.readRows(numberThreads, 0, frame.numberRows(), readQuantiles, &rowMask),
-                     copyQuantiles, reduceQuantiles,
-                     classProbabilityClassOneQuantiles) == false) {
-            HANDLE_FATAL(<< "Failed to compute category quantiles")
-            return TDoubleVector::Ones(2);
-        }
-
-        auto minRecall = [&](double threshold) {
-            double cdf[2];
-            classProbabilityClassOneQuantiles[0].cdf(threshold, cdf[0]);
-            classProbabilityClassOneQuantiles[1].cdf(threshold, cdf[1]);
-            double recalls[]{cdf[0], 1.0 - cdf[1]};
-            return std::min(recalls[0], recalls[1]);
-        };
-
-        double threshold;
-        double minRecallAtThreshold;
-        std::size_t maxIterations{20};
-        CSolvers::maximize(0.0, 1.0, minRecall(0.0), minRecall(1.0), minRecall,
-                           1e-3, maxIterations, threshold, minRecallAtThreshold);
-        LOG_TRACE(<< "threshold = " << threshold
-                  << ", min recall at threshold = " << minRecallAtThreshold);
-
-        TDoubleVector result{2};
-        result(0) = 0.5 / (1.0 - threshold);
-        result(1) = 0.5 / threshold;
-        return result;
-    }
-
-    // TODO fixme
-    return TDoubleVector::Ones(numberClasses);
+    return numberClasses == 2
+               ? maximizeMinimumRecallForBinary(numberThreads, frame, rowMask,
+                                                targetColumn, readPrediction)
+               : maximizeMinimumRecallForMulticlass(numberThreads, frame, rowMask, numberClasses,
+                                                    targetColumn, readPrediction);
 }
 
 bool CDataFrameUtils::isMissing(double x) {
@@ -1049,6 +997,250 @@ CDataFrameUtils::metricMicWithColumnDataFrameOnDisk(const CColumnValue& target,
     return mics;
 }
 
+CDataFrameUtils::TDoubleVector
+CDataFrameUtils::maximizeMinimumRecallForBinary(std::size_t numberThreads,
+                                                const core::CDataFrame& frame,
+                                                const core::CPackedBitVector& rowMask,
+                                                std::size_t targetColumn,
+                                                const TReadPredictionFunc& readPrediction) {
+    TDoubleVector probabilities;
+    auto readQuantiles = core::bindRetrievableState(
+        [=](TQuantileSketchVec& quantiles, TRowItr beginRows, TRowItr endRows) mutable {
+            for (auto row = beginRows; row != endRows; ++row) {
+                if (isMissing((*row)[targetColumn]) == false) {
+                    std::size_t actualClass{static_cast<std::size_t>((*row)[targetColumn])};
+                    probabilities = readPrediction(*row);
+                    CTools::inplaceSoftmax(probabilities);
+                    quantiles[actualClass].add(probabilities(1));
+                }
+            }
+        },
+        TQuantileSketchVec(2, CQuantileSketch{CQuantileSketch::E_Linear, 100}));
+    auto copyQuantiles = [](TQuantileSketchVec quantiles, TQuantileSketchVec& result) {
+        result = std::move(quantiles);
+    };
+    auto reduceQuantiles = [&](TQuantileSketchVec quantiles, TQuantileSketchVec& result) {
+        for (std::size_t i = 0; i < 2; ++i) {
+            result[i] += quantiles[i];
+        }
+    };
+
+    TQuantileSketchVec classProbabilityClassOneQuantiles;
+    if (doReduce(frame.readRows(numberThreads, 0, frame.numberRows(), readQuantiles, &rowMask),
+                 copyQuantiles, reduceQuantiles, classProbabilityClassOneQuantiles) == false) {
+        HANDLE_FATAL(<< "Failed to compute category quantiles")
+        return TDoubleVector::Ones(2);
+    }
+
+    auto minRecall = [&](double threshold) {
+        double cdf[2];
+        classProbabilityClassOneQuantiles[0].cdf(threshold, cdf[0]);
+        classProbabilityClassOneQuantiles[1].cdf(threshold, cdf[1]);
+        double recalls[]{cdf[0], 1.0 - cdf[1]};
+        return std::min(recalls[0], recalls[1]);
+    };
+
+    double threshold;
+    double minRecallAtThreshold;
+    std::size_t maxIterations{20};
+    CSolvers::maximize(0.0, 1.0, minRecall(0.0), minRecall(1.0), minRecall,
+                       1e-3, maxIterations, threshold, minRecallAtThreshold);
+    LOG_TRACE(<< "threshold = " << threshold
+              << ", min recall at threshold = " << minRecallAtThreshold);
+
+    TDoubleVector result{2};
+    result(0) = threshold < 0.5 ? threshold / (1.0 - threshold) : 1.0;
+    result(1) = threshold < 0.5 ? 1.0 : (1.0 - threshold) / threshold;
+    return result;
+}
+
+CDataFrameUtils::TDoubleVector
+CDataFrameUtils::maximizeMinimumRecallForMulticlass(std::size_t numberThreads,
+                                                    const core::CDataFrame& frame,
+                                                    const core::CPackedBitVector& rowMask,
+                                                    std::size_t numberClasses,
+                                                    std::size_t targetColumn,
+                                                    const TReadPredictionFunc& readPrediction) {
+
+    // Use a large random sample of the data frame and compute the expected
+    // optimisation objective for the whole data set from this.
+
+    using TDoubleMatrix = CDenseMatrix<double>;
+    using TMinAccumulator = CBasicStatistics::SMin<double>::TAccumulator;
+
+    CPRNG::CXorOShiro128Plus rng;
+    std::size_t numberSamples{std::min(std::size_t{1000}, rowMask.size())};
+
+    TStratifiedSamplerPtr sampler;
+    std::tie(sampler, std::ignore) = classifierStratifiedCrossValidationRowSampler(
+        numberThreads, frame, targetColumn, rng, numberSamples, rowMask);
+
+    TSizeVec rowIndices;
+    frame.readRows(1, 0, frame.numberRows(),
+                   [&](TRowItr beginRows, TRowItr endRows) {
+                       for (auto row = beginRows; row != endRows; ++row) {
+                           sampler->sample(*row);
+                       }
+                   },
+                   &rowMask);
+    sampler->finishSampling(rng, rowIndices);
+    std::sort(rowIndices.begin(), rowIndices.end());
+    core::CPackedBitVector sampleMask;
+    for (auto row : rowIndices) {
+        sampleMask.extend(false, row - sampleMask.size());
+        sampleMask.extend(true);
+    }
+    sampleMask.extend(false, rowMask.size() - sampleMask.size());
+    LOG_TRACE(<< "# row indices = " << rowIndices.size());
+
+    // Compute the count of each class in the sample set.
+    auto readClassCountsAndRecalls = core::bindRetrievableState(
+        [&](TDoubleVector& state, TRowItr beginRows, TRowItr endRows) {
+            for (auto row = beginRows; row != endRows; ++row) {
+                if (isMissing((*row)[targetColumn]) == false) {
+                    int j{static_cast<int>((*row)[targetColumn])};
+                    int k;
+                    readPrediction(*row).maxCoeff(&k);
+                    state(j) += 1.0;
+                    state(numberClasses + j) += j == k ? 1.0 : 0.0;
+                }
+            }
+        },
+        TDoubleVector{TDoubleVector::Zero(2 * numberClasses)});
+    auto copyClassCountsAndRecalls = [](TDoubleVector state, TDoubleVector& result) {
+        result = std::move(state);
+    };
+    auto reduceClassCountsAndRecalls =
+        [&](TDoubleVector state, TDoubleVector& result) { result += state; };
+    TDoubleVector classCountsAndRecalls;
+    doReduce(frame.readRows(numberThreads, 0, frame.numberRows(),
+                            readClassCountsAndRecalls, &sampleMask),
+             copyClassCountsAndRecalls, reduceClassCountsAndRecalls, classCountsAndRecalls);
+    TDoubleVector classCounts{classCountsAndRecalls.topRows(numberClasses)};
+    TDoubleVector classRecalls{classCountsAndRecalls.bottomRows(numberClasses)};
+    classRecalls.array() /= classCounts.array();
+    LOG_TRACE(<< "class counts = " << classCounts.transpose());
+    LOG_TRACE(<< "class recalls = " << classRecalls.transpose());
+
+    TSizeVec recallOrder(numberClasses);
+    std::iota(recallOrder.begin(), recallOrder.end(), 0);
+    std::sort(recallOrder.begin(), recallOrder.end(), [&](std::size_t lhs, std::size_t rhs) {
+        return classRecalls(lhs) > classRecalls(rhs);
+    });
+    LOG_TRACE(<< "decreasing recall order = " << core::CContainerPrinter::print(recallOrder));
+
+    // We want to solve max_w{min_j{recall(class_j)}} = max_w{min_j{c_j(w) / n_j}}
+    // where c_j(w) and n_j are correct predictions for weight w and count of class_j
+    // in the sample set, respectively. We use an equivalent formulation
+    //
+    //   min_w{max_j{f_j(w)}} = min_w{max_j{1 - c_j(w) / n_j}}
+    //
+    // We can write f_j(w) as
+    //
+    //    max_j{sum_i{1 - 1{argmax_i(w_i p_i) == j}} / n_j}                     (1)
+    //
+    // where 1{.} denotes the indicator function. (1) has a smooth relaxation given
+    // by f_j(w) = max_j{sum_i{1 - softmax_j(w_i p_i)} / n_j}. Note that this isn't
+    // convex so we use multiple restarts.
+
+    auto objective = [&](const TDoubleVector& weights) {
+        TDoubleVector probabilities;
+        TDoubleVector scores;
+        auto computeObjective = core::bindRetrievableState(
+            [=](TDoubleVector& state, TRowItr beginRows, TRowItr endRows) mutable {
+                for (auto row = beginRows; row != endRows; ++row) {
+                    if (isMissing((*row)[targetColumn]) == false) {
+                        std::size_t j{static_cast<std::size_t>((*row)[targetColumn])};
+                        probabilities = readPrediction(*row);
+                        CTools::inplaceSoftmax(probabilities);
+                        scores = probabilities.cwiseProduct(weights);
+                        CTools::inplaceSoftmax(scores);
+                        state(j) += (1.0 - scores(j)) / classCounts(j);
+                    }
+                }
+            },
+            TDoubleVector{TDoubleVector::Zero(numberClasses)});
+        auto copyObjective = [](TDoubleVector state, TDoubleVector& result) {
+            result = std::move(state);
+        };
+        auto reduceObjective = [&](TDoubleVector state, TDoubleVector& result) {
+            result += state;
+        };
+
+        TDoubleVector objective_;
+        doReduce(frame.readRows(numberThreads, 0, frame.numberRows(), computeObjective, &rowMask),
+                 copyObjective, reduceObjective, objective_);
+        return objective_.maxCoeff();
+    };
+
+    auto objectiveGradient = [&](const TDoubleVector& weights) {
+        TDoubleVector probabilities;
+        TDoubleVector scores;
+        auto computeObjectiveAndGradient = core::bindRetrievableState(
+            [=](TDoubleMatrix& state, TRowItr beginRows, TRowItr endRows) mutable {
+                for (auto row = beginRows; row != endRows; ++row) {
+                    if (isMissing((*row)[targetColumn]) == false) {
+                        std::size_t j{static_cast<std::size_t>((*row)[targetColumn])};
+                        probabilities = readPrediction(*row);
+                        CTools::inplaceSoftmax(probabilities);
+                        scores = probabilities.cwiseProduct(weights);
+                        CTools::inplaceSoftmax(scores);
+                        state(j, 0) += (1.0 - scores(j)) / classCounts(j);
+                        state.col(j + 1) +=
+                            scores(j) *
+                            probabilities
+                                .cwiseProduct(scores - TDoubleVector::Unit(numberClasses, j))
+                                .cwiseQuotient(classCounts);
+                    }
+                }
+            },
+            TDoubleMatrix{TDoubleMatrix::Zero(numberClasses, numberClasses + 1)});
+        auto copyObjectiveAndGradient = [](TDoubleMatrix state, TDoubleMatrix& result) {
+            result = std::move(state);
+        };
+        auto reduceObjectiveAndGradient =
+            [&](TDoubleMatrix state, TDoubleMatrix& result) { result += state; };
+
+        TDoubleMatrix objectiveAndGradient;
+        doReduce(frame.readRows(numberThreads, 0, frame.numberRows(),
+                                computeObjectiveAndGradient, &rowMask),
+                 copyObjectiveAndGradient, reduceObjectiveAndGradient, objectiveAndGradient);
+        std::size_t max;
+        objectiveAndGradient.col(0).maxCoeff(&max);
+        return TDoubleVector{objectiveAndGradient.col(max + 1)};
+    };
+
+    // We always try initialising with all weights equal because our minimization
+    // algorithm ensures we'll only decrease the initial value of the optimisation
+    // objective. This means we'll never do worse than not reweighting. Also, we
+    // expect weights to be (roughly) monotonic increasing for decreasing recall
+    // and use this to bias initial values.
+
+    TMinAccumulator minLoss;
+    TDoubleVector minWeights;
+    TDoubleVector w0{TDoubleVector::Ones(numberClasses)};
+    for (std::size_t i = 0; i < 5; ++i) {
+        CLbfgs<TDoubleVector> lbfgs{5};
+        double loss;
+        std::tie(w0, loss) = lbfgs.minimize(objective, objectiveGradient, std::move(w0));
+        LOG_TRACE(<< "weights* = " << w0.transpose() << ", loss* = " << loss);
+        if (minLoss.add(loss)) {
+            minWeights = std::move(w0);
+        }
+        w0 = TDoubleVector::Ones(numberClasses);
+        for (std::size_t j = 1; j < numberClasses; ++j) {
+            w0(j) = w0(j - 1) * CSampling::uniformSample(rng, 0.9, 1.3);
+        }
+    }
+
+    // Since we take argmax_i w_i p_i we can multiply by a constant. We arrange for
+    // the largest weight to always be one.
+    minWeights.array() /= minWeights.maxCoeff();
+    LOG_TRACE(<< "weights = " << minWeights.transpose());
+
+    return minWeights;
+}
+
 void CDataFrameUtils::removeMetricColumns(const core::CDataFrame& frame, TSizeVec& columnMask) {
     const auto& columnIsCategorical = frame.columnIsCategorical();
     columnMask.erase(std::remove_if(columnMask.begin(), columnMask.end(),
diff --git a/lib/maths/unittest/CBoostedTreeTest.cc b/lib/maths/unittest/CBoostedTreeTest.cc
index fab188bd16..19712ea302 100644
--- a/lib/maths/unittest/CBoostedTreeTest.cc
+++ b/lib/maths/unittest/CBoostedTreeTest.cc
@@ -1039,17 +1039,17 @@ BOOST_AUTO_TEST_CASE(testImbalancedClasses) {
         TDoubleVec falseNegatives(2, 0.0);
         frame->readRows(1, [&](TRowItr beginRows, TRowItr endRows) {
             for (auto row = beginRows; row != endRows; ++row) {
-                double prediction{
-                    classification->readAndAdjustPrediction(*row)[1] < 0.5 ? 0.0 : 1.0};
+                auto scores = classification->readAndAdjustPrediction(*row);
+                std::size_t prediction(scores[1] < scores[0] ? 0 : 1);
                 if (row->index() >= trainRows &&
                     row->index() < trainRows + classesRowCounts[2]) {
                     // Actual is zero.
-                    (prediction == 0.0 ? truePositives[0] : falseNegatives[0]) += 1.0;
-                    (prediction == 0.0 ? trueNegatives[1] : falsePositives[1]) += 1.0;
+                    (prediction == 0 ? truePositives[0] : falseNegatives[0]) += 1.0;
+                    (prediction == 0 ? trueNegatives[1] : falsePositives[1]) += 1.0;
                 } else if (row->index() >= trainRows + classesRowCounts[2]) {
                     // Actual is one.
-                    (prediction == 1.0 ? truePositives[1] : falseNegatives[1]) += 1.0;
-                    (prediction == 1.0 ? trueNegatives[0] : falsePositives[0]) += 1.0;
+                    (prediction == 1 ? truePositives[1] : falseNegatives[1]) += 1.0;
+                    (prediction == 1 ? trueNegatives[0] : falsePositives[0]) += 1.0;
                 }
             }
         });
diff --git a/lib/maths/unittest/CDataFrameUtilsTest.cc b/lib/maths/unittest/CDataFrameUtilsTest.cc
index cb89e921bb..b005e46096 100644
--- a/lib/maths/unittest/CDataFrameUtilsTest.cc
+++ b/lib/maths/unittest/CDataFrameUtilsTest.cc
@@ -10,9 +10,12 @@
 #include <maths/CBasicStatistics.h>
 #include <maths/CDataFrameCategoryEncoder.h>
 #include <maths/CDataFrameUtils.h>
+#include <maths/CLinearAlgebraEigen.h>
 #include <maths/CMic.h>
 #include <maths/COrderings.h>
 #include <maths/CQuantileSketch.h>
+#include <maths/CTools.h>
+#include <maths/CToolsDetail.h>
 
 #include <test/BoostTestCloseAbsolute.h>
 #include <test/CRandomNumbers.h>
@@ -1202,4 +1205,115 @@ BOOST_AUTO_TEST_CASE(testCategoryMicWithColumnWithMissing) {
     }
 }
 
+BOOST_AUTO_TEST_CASE(testMaximumMinimumRecallClassWeights) {
+
+    // Test we reliably increase the minimum class recall for predictions with uneven accuracy.
+
+    using TDoubleVector = maths::CDenseVector<double>;
+    using TMemoryMappedFloatVector = maths::CMemoryMappedDenseVector<maths::CFloatStorage>;
+
+    std::size_t rows{5000};
+    std::size_t capacity{2000};
+
+    test::CRandomNumbers rng;
+
+    for (std::size_t numberClasses : {2, 3}) {
+
+        std::size_t cols{numberClasses + 1};
+
+        auto readPrediction = [&](const core::CDataFrame::TRowRef& row) {
+            return TMemoryMappedFloatVector{row.data(), static_cast<int>(numberClasses)};
+        };
+
+        TBoolVec categoricalColumns(cols, false);
+        categoricalColumns[numberClasses] = true;
+
+        for (std::size_t t = 0; t < 5; ++t) {
+            core::stopDefaultAsyncExecutor();
+
+            TDoubleVec predictions;
+            TSizeVec category;
+            auto frame = core::makeMainStorageDataFrame(cols, capacity).first;
+            frame->categoricalColumns(categoricalColumns);
+            for (std::size_t i = 0; i < rows; ++i) {
+                frame->writeRow([&](core::CDataFrame::TFloatVecItr column, std::int32_t&) {
+                    rng.generateUniformSamples(0, numberClasses, 1, category);
+                    rng.generateNormalSamples(0.0, 1.0, numberClasses, predictions);
+                    for (std::size_t j = 0; j < numberClasses; ++j) {
+                        column[j] += predictions[j];
+                    }
+                    column[category[0]] += static_cast<double>(category[0] + 1);
+                    column[numberClasses] = static_cast<double>(category[0]);
+                });
+            }
+            frame->finishWritingRows();
+
+            TDoubleVecVec minRecalls(2, TDoubleVec(2));
+            TDoubleVecVec maxRecalls(2, TDoubleVec(2));
+
+            std::size_t i{0};
+            for (auto numberThreads : {1, 4}) {
+
+                auto weights = maths::CDataFrameUtils::maximumMinimumRecallClassWeights(
+                    numberThreads, *frame, maskAll(rows), numberClasses,
+                    numberClasses, readPrediction);
+
+                TDoubleVector prediction;
+                TDoubleVector correct[2]{TDoubleVector::Zero(numberClasses),
+                                         TDoubleVector::Zero(numberClasses)};
+                TDoubleVector counts{TDoubleVector::Zero(numberClasses)};
+
+                frame->readRows(1, [&](core::CDataFrame::TRowItr beginRows,
+                                       core::CDataFrame::TRowItr endRows) {
+                    for (auto row = beginRows; row != endRows; ++row) {
+                        prediction = readPrediction(*row);
+                        maths::CTools::inplaceSoftmax(prediction);
+                        std::size_t weightedPredictedClass;
+                        weights.cwiseProduct(prediction).maxCoeff(&weightedPredictedClass);
+                        std::size_t actualClass{
+                            static_cast<std::size_t>((*row)[numberClasses])};
+                        if (weightedPredictedClass == actualClass) {
+                            correct[0](actualClass) += 1.0;
+                        }
+                        std::size_t unweightedPredictedClass;
+                        prediction.maxCoeff(&unweightedPredictedClass);
+                        if (unweightedPredictedClass == actualClass) {
+                            correct[1](actualClass) += 1.0;
+                        }
+                        counts(actualClass) += 1.0;
+                    }
+                });
+
+                LOG_TRACE(<< "weighted class recalls = "
+                          << correct[0].cwiseQuotient(counts).transpose());
+                LOG_TRACE(<< "unweighted class recalls = "
+                          << correct[1].cwiseQuotient(counts).transpose());
+
+                minRecalls[i][0] = correct[0].cwiseQuotient(counts).minCoeff();
+                maxRecalls[i][0] = correct[0].cwiseQuotient(counts).maxCoeff();
+                minRecalls[i][1] = correct[1].cwiseQuotient(counts).minCoeff();
+                maxRecalls[i][1] = correct[1].cwiseQuotient(counts).maxCoeff();
+
+                ++i;
+                core::startDefaultAsyncExecutor();
+            }
+
+            LOG_DEBUG(<< "min recalls = " << core::CContainerPrinter::print(minRecalls));
+            LOG_DEBUG(<< "max recalls = " << core::CContainerPrinter::print(maxRecalls));
+
+            // Threaded and non-threaded results are close.
+            BOOST_REQUIRE_CLOSE(minRecalls[0][0], minRecalls[1][0], 1.0); // 1 %
+
+            // We improved the minimum class recall by at least 10%.
+            BOOST_TEST_REQUIRE(minRecalls[0][0] > 1.1 * minRecalls[0][1]);
+
+            // The minimum and maximum class recalls are close: we're at the global maximum.
+            BOOST_TEST_REQUIRE(1.02 * minRecalls[0][0] > maxRecalls[0][0]);
+            BOOST_TEST_REQUIRE(1.02 * minRecalls[1][0] > maxRecalls[1][0]);
+        }
+    }
+
+    core::stopDefaultAsyncExecutor();
+}
+
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/lib/maths/unittest/CToolsTest.cc b/lib/maths/unittest/CToolsTest.cc
index 0351236c51..d1f601cbac 100644
--- a/lib/maths/unittest/CToolsTest.cc
+++ b/lib/maths/unittest/CToolsTest.cc
@@ -1206,7 +1206,7 @@ BOOST_AUTO_TEST_CASE(testLgamma) {
     BOOST_REQUIRE_EQUAL(result, std::numeric_limits<double>::infinity());
 }
 
-BOOST_AUTO_TEST_CASE(testSoftMax) {
+BOOST_AUTO_TEST_CASE(testSoftmax) {
     // Test some invariants and that std::vector and maths::CDenseVector versions agree.
 
     using TDoubleVector = maths::CDenseVector<double>;
@@ -1231,4 +1231,33 @@ BOOST_AUTO_TEST_CASE(testSoftMax) {
     }
 }
 
+BOOST_AUTO_TEST_CASE(testLogSoftmax) {
+    // Test some invariants and that std::vector and maths::CDenseVector versions agree.
+
+    using TDoubleVector = maths::CDenseVector<double>;
+
+    test::CRandomNumbers rng;
+
+    TDoubleVec z;
+    for (std::size_t t = 0; t < 100; ++t) {
+
+        rng.generateUniformSamples(-3.0, 3.0, 5, z);
+        TDoubleVec p{z};
+        CTools::inplaceSoftmax(p);
+        TDoubleVec logP{z};
+        CTools::inplaceLogSoftmax(logP);
+
+        BOOST_TEST_REQUIRE(*std::max_element(logP.begin(), logP.end()) <= 0.0);
+        for (std::size_t i = 0; i < 5; ++i) {
+            BOOST_REQUIRE_CLOSE(std::log(p[i]), logP[i], 1e-6);
+        }
+
+        TDoubleVector logP_{TDoubleVector::fromStdVector(z)};
+        CTools::inplaceLogSoftmax(logP_);
+        for (std::size_t i = 0; i < 5; ++i) {
+            BOOST_REQUIRE_CLOSE(logP[i], logP_[i], 1e-6);
+        }
+    }
+}
+
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/lib/test/CDataFrameAnalyzerTrainingFactory.cc b/lib/test/CDataFrameAnalyzerTrainingFactory.cc
index fa28917bcd..5ebf1aa412 100644
--- a/lib/test/CDataFrameAnalyzerTrainingFactory.cc
+++ b/lib/test/CDataFrameAnalyzerTrainingFactory.cc
@@ -11,18 +11,17 @@ namespace test {
 
 void CDataFrameAnalyzerTrainingFactory::appendPrediction(core::CDataFrame&,
                                                          std::size_t,
-                                                         double prediction,
+                                                         const TDouble2Vec& prediction,
                                                          TDoubleVec& predictions) {
-    predictions.push_back(prediction);
+    predictions.push_back(prediction[0]);
 }
 
 void CDataFrameAnalyzerTrainingFactory::appendPrediction(core::CDataFrame& frame,
                                                          std::size_t target,
-                                                         double class1Score,
+                                                         const TDouble2Vec& scores,
                                                          TStrVec& predictions) {
-    predictions.push_back(class1Score < 0.5
-                              ? frame.categoricalColumnValues()[target][0]
-                              : frame.categoricalColumnValues()[target][1]);
+    std::size_t i(std::max_element(scores.begin(), scores.end()) - scores.begin());
+    predictions.push_back(frame.categoricalColumnValues()[target][i]);
 }
 
 CDataFrameAnalyzerTrainingFactory::TDataFrameUPtr