@@ -55,7 +55,7 @@ const std::size_t MAX_NUMBER_TREES{static_cast<std::size_t>(2.0 / MIN_ETA + 0.5)
55
55
// for progress monitoring because we don't know what value we'll choose in the
56
56
// line search. Assuming it is less than one avoids a large pause in progress if
57
57
// it is reduced in the line search.
58
- const double LINE_SEARCH_ETA_MARGIN {0.5 };
58
+ const double MAIN_LOOP_ETA_SCALE_FOR_PROGRESS {0.5 };
59
59
60
60
double computeEta (std::size_t numberRegressors) {
61
61
// eta is the learning rate. There is a lot of empirical evidence that
@@ -315,7 +315,7 @@ void CBoostedTreeFactory::selectFeaturesAndEncodeCategories(const core::CDataFra
315
315
.minimumFrequencyToOneHotEncode (m_MinimumFrequencyToOneHotEncode)
316
316
.rowMask (m_TreeImpl->allTrainingRowsMask ())
317
317
.columnMask (std::move (regressors)));
318
- m_TreeImpl->m_TrainingProgress .increment (1 );
318
+ m_TreeImpl->m_TrainingProgress .increment (100 );
319
319
}
320
320
321
321
void CBoostedTreeFactory::determineFeatureDataTypes (const core::CDataFrame& frame) const {
@@ -741,7 +741,7 @@ void CBoostedTreeFactory::initializeUnsetEta(core::CDataFrame& frame) {
741
741
742
742
m_TreeImpl->m_TrainingProgress .incrementRange (
743
743
static_cast <int >(this ->mainLoopNumberSteps (m_TreeImpl->m_Eta )) -
744
- static_cast <int >(this ->mainLoopNumberSteps (LINE_SEARCH_ETA_MARGIN * eta)));
744
+ static_cast <int >(this ->mainLoopNumberSteps (MAIN_LOOP_ETA_SCALE_FOR_PROGRESS * eta)));
745
745
}
746
746
}
747
747
@@ -1162,7 +1162,7 @@ void CBoostedTreeFactory::initializeTrainingProgressMonitoring(const core::CData
1162
1162
//
1163
1163
// This comprises:
1164
1164
// - The cost of category encoding and feature selection which we count as
1165
- // one unit ,
1165
+ // one hundred units ,
1166
1166
// - One unit for estimating the expected gain and sum curvature per node,
1167
1167
// - LINE_SEARCH_ITERATIONS * "maximum number trees" units per regularization
1168
1168
// parameter which isn't user defined,
@@ -1178,7 +1178,7 @@ void CBoostedTreeFactory::initializeTrainingProgressMonitoring(const core::CData
1178
1178
? *m_TreeImpl->m_EtaOverride
1179
1179
: computeEta (frame.numberColumns ())};
1180
1180
1181
- std::size_t totalNumberSteps{2 };
1181
+ std::size_t totalNumberSteps{101 };
1182
1182
std::size_t lineSearchMaximumNumberTrees{computeMaximumNumberTrees (eta)};
1183
1183
if (m_TreeImpl->m_RegularizationOverride .softTreeDepthLimit () == boost::none) {
1184
1184
totalNumberSteps += MAX_LINE_SEARCH_ITERATIONS * lineSearchMaximumNumberTrees;
@@ -1196,10 +1196,10 @@ void CBoostedTreeFactory::initializeTrainingProgressMonitoring(const core::CData
1196
1196
totalNumberSteps += MAX_LINE_SEARCH_ITERATIONS * lineSearchMaximumNumberTrees;
1197
1197
}
1198
1198
if (m_TreeImpl->m_EtaOverride == boost::none) {
1199
- totalNumberSteps += MAX_LINE_SEARCH_ITERATIONS * lineSearchMaximumNumberTrees *
1200
- computeMaximumNumberTrees (LINE_SEARCH_ETA_MARGIN * eta);
1199
+ totalNumberSteps += MAX_LINE_SEARCH_ITERATIONS *
1200
+ computeMaximumNumberTrees (MAIN_LOOP_ETA_SCALE_FOR_PROGRESS * eta);
1201
1201
}
1202
- totalNumberSteps += this ->mainLoopNumberSteps (LINE_SEARCH_ETA_MARGIN * eta);
1202
+ totalNumberSteps += this ->mainLoopNumberSteps (MAIN_LOOP_ETA_SCALE_FOR_PROGRESS * eta);
1203
1203
LOG_TRACE (<< " total number steps = " << totalNumberSteps);
1204
1204
m_TreeImpl->m_TrainingProgress = core::CLoopProgress{
1205
1205
totalNumberSteps, m_TreeImpl->m_Instrumentation ->progressCallback (), 1.0 , 1024 };
0 commit comments