From a785c0c65e41e6d50f21d1713c66d1a62b97f3b3 Mon Sep 17 00:00:00 2001 From: Tom Veasey Date: Tue, 1 Oct 2019 15:53:35 +0100 Subject: [PATCH] Fix out-of-bounds read --- lib/maths/CBoostedTreeImpl.cc | 5 +-- lib/maths/unittest/CBoostedTreeTest.cc | 46 ++++++++++++++++++++++++++ lib/maths/unittest/CBoostedTreeTest.h | 1 + 3 files changed, 50 insertions(+), 2 deletions(-) diff --git a/lib/maths/CBoostedTreeImpl.cc b/lib/maths/CBoostedTreeImpl.cc index 5055f1b49e..522f845c40 100644 --- a/lib/maths/CBoostedTreeImpl.cc +++ b/lib/maths/CBoostedTreeImpl.cc @@ -364,8 +364,9 @@ CBoostedTreeImpl::gainAndCurvatureAtPercentile(double percentile, return {0.0, 0.0}; } - std::size_t index{static_cast( - percentile * static_cast(gains.size()) / 100.0 + 0.5)}; + std::size_t index{std::min( + static_cast(percentile * static_cast(gains.size()) / 100.0 + 0.5), + gains.size() - 1)}; std::nth_element(gains.begin(), gains.begin() + index, gains.end()); std::nth_element(curvatures.begin(), curvatures.begin() + index, curvatures.end()); diff --git a/lib/maths/unittest/CBoostedTreeTest.cc b/lib/maths/unittest/CBoostedTreeTest.cc index 7a8096df0c..2152e4ac1d 100644 --- a/lib/maths/unittest/CBoostedTreeTest.cc +++ b/lib/maths/unittest/CBoostedTreeTest.cc @@ -650,6 +650,50 @@ void CBoostedTreeTest::testIntegerRegressor() { CPPUNIT_ASSERT(modelRSquared > 0.99); } +void CBoostedTreeTest::testSingleSplit() { + + // We were getting an out-of-bound read in initialization when running on + // data with only two distinct values for the target variable. This test + // fails intermittently without the fix. + + test::CRandomNumbers rng; + + std::size_t rows{100}; + std::size_t cols{2}; + + TDoubleVec x; + rng.generateUniformSamples(0.0, 1.0, rows, x); + for (auto& xi : x) { + xi = std::floor(xi + 0.5); + } + + auto frame = core::makeMainStorageDataFrame(cols).first; + frame->categoricalColumns({false, false}); + for (std::size_t i = 0; i < rows; ++i) { + frame->writeRow([&](core::CDataFrame::TFloatVecItr column, std::int32_t&) { + *(column++) = x[i]; + *column = 10.0 * x[i]; + }); + } + frame->finishWritingRows(); + + auto regression = maths::CBoostedTreeFactory::constructFromParameters( + 1, std::make_unique()) + .buildFor(*frame, cols - 1); + + regression->train(); + + double modelBias; + double modelRSquared; + std::tie(modelBias, modelRSquared) = computeEvaluationMetrics( + *frame, 0, rows, regression->columnHoldingPrediction(frame->numberColumns()), + [](const TRowRef& row) { return 10.0 * row[0]; }, 0.0); + + LOG_DEBUG(<< "bias = " << modelBias); + LOG_DEBUG(<< " R^2 = " << modelRSquared); + CPPUNIT_ASSERT(modelRSquared > 0.99); +} + void CBoostedTreeTest::testTranslationInvariance() { // We should get similar performance independent of fixed shifts for the target. @@ -1036,6 +1080,8 @@ CppUnit::Test* CBoostedTreeTest::suite() { &CBoostedTreeTest::testCategoricalRegressors)); suiteOfTests->addTest(new CppUnit::TestCaller( "CBoostedTreeTest::testIntegerRegressor", &CBoostedTreeTest::testIntegerRegressor)); + suiteOfTests->addTest(new CppUnit::TestCaller( + "CBoostedTreeTest::testSingleSplit", &CBoostedTreeTest::testSingleSplit)); suiteOfTests->addTest(new CppUnit::TestCaller( "CBoostedTreeTest::testTranslationInvariance", &CBoostedTreeTest::testTranslationInvariance)); diff --git a/lib/maths/unittest/CBoostedTreeTest.h b/lib/maths/unittest/CBoostedTreeTest.h index a0e8748e37..7df8c4906d 100644 --- a/lib/maths/unittest/CBoostedTreeTest.h +++ b/lib/maths/unittest/CBoostedTreeTest.h @@ -19,6 +19,7 @@ class CBoostedTreeTest : public CppUnit::TestFixture { void testConstantTarget(); void testCategoricalRegressors(); void testIntegerRegressor(); + void testSingleSplit(); void testTranslationInvariance(); void testEstimateMemoryUsedByTrain(); void testProgressMonitoring();