Skip to content

More trainer extensions, bug fixes and consistency across trainer extensions #1524

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Nov 10, 2018
Merged
2 changes: 1 addition & 1 deletion docs/code/MlNetCookBook.md
Original file line number Diff line number Diff line change
Expand Up @@ -960,7 +960,7 @@ var learningPipeline = reader.MakeNewEstimator()
IEstimator<ITransformer> dynamicPipe = learningPipeline.AsDynamic;

// Create a binary classification trainer.
var binaryTrainer = mlContext.BinaryClassification.Trainers.AveragedPerceptron();
var binaryTrainer = mlContext.BinaryClassification.Trainers.AveragedPerceptron("Label", "Features");

// Append the OVA learner to the pipeline.
dynamicPipe = dynamicPipe.Append(new Ova(mlContext, binaryTrainer));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ public static void FastTreeBinaryClassification()
row.Features,
numTrees: 100, // try: (int) 20-2000
numLeaves: 20, // try: (int) 2-128
minDatapointsInLeafs: 10, // try: (int) 1-100
minDatapointsInLeaves: 10, // try: (int) 1-100
learningRate: 0.2))) // try: (float) 0.025-0.4
.Append(row => (
Label: row.Label,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public static void FastTreeRegression()
r.features,
numTrees: 100, // try: (int) 20-2000
numLeaves: 20, // try: (int) 2-128
minDatapointsInLeafs: 10, // try: (int) 1-100
minDatapointsInLeaves: 10, // try: (int) 1-100
learningRate: 0.2, // try: (float) 0.025-0.4
onFit: p => pred = p)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public Arguments()
BasePredictors = new[]
{
ComponentFactoryUtils.CreateFromFunction(
env => new MulticlassLogisticRegression(env, FeatureColumn, LabelColumn))
env => new MulticlassLogisticRegression(env, LabelColumn, FeatureColumn))
};
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.FastTree/BoostingFastTree.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ protected BoostingFastTreeTrainerBase(IHostEnvironment env,
string groupIdColumn,
int numLeaves,
int numTrees,
int minDocumentsInLeafs,
int minDatapointsInLeaves,
double learningRate,
Action<TArgs> advancedSettings)
: base(env, label, featureColumn, weightColumn, groupIdColumn, numLeaves, numTrees, minDocumentsInLeafs, advancedSettings)
: base(env, label, featureColumn, weightColumn, groupIdColumn, numLeaves, numTrees, minDatapointsInLeaves, advancedSettings)
{

if (Args.LearningRates != learningRate)
Expand Down
8 changes: 4 additions & 4 deletions src/Microsoft.ML.FastTree/FastTree.cs
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ private protected FastTreeTrainerBase(IHostEnvironment env,
string groupIdColumn,
int numLeaves,
int numTrees,
int minDocumentsInLeafs,
int minDatapointsInLeaves,
Action<TArgs> advancedSettings)
: base(Contracts.CheckRef(env, nameof(env)).Register(RegisterName), TrainerUtils.MakeR4VecFeature(featureColumn), label, TrainerUtils.MakeR4ScalarWeightColumn(weightColumn), TrainerUtils.MakeU4ScalarColumn(groupIdColumn))
{
Expand All @@ -112,7 +112,7 @@ private protected FastTreeTrainerBase(IHostEnvironment env,
// override with the directly provided values.
Args.NumLeaves = numLeaves;
Args.NumTrees = numTrees;
Args.MinDocumentsInLeafs = minDocumentsInLeafs;
Args.MinDocumentsInLeafs = minDatapointsInLeaves;

//apply the advanced args, if the user supplied any
advancedSettings?.Invoke(Args);
Expand All @@ -121,10 +121,10 @@ private protected FastTreeTrainerBase(IHostEnvironment env,
Args.FeatureColumn = featureColumn;

if (weightColumn != null)
Args.WeightColumn = Optional<string>.Explicit(weightColumn); ;
Args.WeightColumn = Optional<string>.Explicit(weightColumn);

if (groupIdColumn != null)
Args.GroupIdColumn = Optional<string>.Explicit(groupIdColumn); ;
Args.GroupIdColumn = Optional<string>.Explicit(groupIdColumn);

// The discretization step renders this trainer non-parametric, and therefore it does not need normalization.
// Also since it builds its own internal discretized columnar structures, it cannot benefit from caching.
Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.FastTree/FastTreeArguments.cs
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ internal static class Defaults
{
internal const int NumTrees = 100;
internal const int NumLeaves = 20;
internal const int MinDocumentsInLeafs = 10;
internal const int MinDocumentsInLeaves = 10;
internal const double LearningRates = 0.2;
}

Expand Down Expand Up @@ -245,7 +245,7 @@ public abstract class TreeArgs : LearnerInputBaseWithGroupId
[Argument(ArgumentType.LastOccurenceWins, HelpText = "The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data", ShortName = "mil", SortOrder = 3)]
[TGUI(Description = "Minimum number of training instances required to form a leaf", SuggestedSweeps = "1,10,50")]
[TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[] { 1, 10, 50 })]
public int MinDocumentsInLeafs = Defaults.MinDocumentsInLeafs;
public int MinDocumentsInLeafs = Defaults.MinDocumentsInLeaves;

// REVIEW: Different shortname than FastRank module. Same as the TLC FRWrapper.
[Argument(ArgumentType.LastOccurenceWins, HelpText = "Total number of decision trees to create in the ensemble", ShortName = "iter", SortOrder = 1)]
Expand Down
6 changes: 3 additions & 3 deletions src/Microsoft.ML.FastTree/FastTreeClassification.cs
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ public sealed partial class FastTreeBinaryClassificationTrainer :
/// <param name="featureColumn">The name of the feature column.</param>
/// <param name="weightColumn">The name for the column containing the initial weight.</param>
/// <param name="learningRate">The learning rate.</param>
/// <param name="minDocumentsInLeafs">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
/// <param name="minDatapointsInLeaves">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
/// <param name="numLeaves">The max number of leaves in each regression tree.</param>
/// <param name="numTrees">Total number of decision trees to create in the ensemble.</param>
/// <param name="advancedSettings">A delegate to apply all the advanced arguments to the algorithm.</param>
Expand All @@ -133,10 +133,10 @@ public FastTreeBinaryClassificationTrainer(IHostEnvironment env,
string weightColumn = null,
int numLeaves = Defaults.NumLeaves,
int numTrees = Defaults.NumTrees,
int minDocumentsInLeafs = Defaults.MinDocumentsInLeafs,
int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves,
double learningRate = Defaults.LearningRates,
Action<Arguments> advancedSettings = null)
: base(env, TrainerUtils.MakeBoolScalarLabel(labelColumn), featureColumn, weightColumn, null, numLeaves, numTrees, minDocumentsInLeafs, learningRate, advancedSettings)
: base(env, TrainerUtils.MakeBoolScalarLabel(labelColumn), featureColumn, weightColumn, null, numLeaves, numTrees, minDatapointsInLeaves, learningRate, advancedSettings)
{
// Set the sigmoid parameter to the 2 * learning rate, for traditional FastTreeClassification loss
_sigmoidParameter = 2.0 * Args.LearningRates;
Expand Down
6 changes: 3 additions & 3 deletions src/Microsoft.ML.FastTree/FastTreeRanking.cs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ public sealed partial class FastTreeRankingTrainer
/// <param name="weightColumn">The name for the column containing the initial weight.</param>
/// <param name="numLeaves">The max number of leaves in each regression tree.</param>
/// <param name="numTrees">Total number of decision trees to create in the ensemble.</param>
/// <param name="minDocumentsInLeafs">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
/// <param name="minDatapointsInLeaves">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
/// <param name="learningRate">The learning rate.</param>
/// <param name="advancedSettings">A delegate to apply all the advanced arguments to the algorithm.</param>
public FastTreeRankingTrainer(IHostEnvironment env,
Expand All @@ -79,10 +79,10 @@ public FastTreeRankingTrainer(IHostEnvironment env,
string weightColumn = null,
int numLeaves = Defaults.NumLeaves,
int numTrees = Defaults.NumTrees,
int minDocumentsInLeafs = Defaults.MinDocumentsInLeafs,
int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves,
double learningRate = Defaults.LearningRates,
Action<Arguments> advancedSettings = null)
: base(env, TrainerUtils.MakeR4ScalarLabel(labelColumn), featureColumn, weightColumn, groupIdColumn, numLeaves, numTrees, minDocumentsInLeafs, learningRate, advancedSettings)
: base(env, TrainerUtils.MakeR4ScalarLabel(labelColumn), featureColumn, weightColumn, groupIdColumn, numLeaves, numTrees, minDatapointsInLeaves, learningRate, advancedSettings)
{
Host.CheckNonEmpty(groupIdColumn, nameof(groupIdColumn));
}
Expand Down
6 changes: 3 additions & 3 deletions src/Microsoft.ML.FastTree/FastTreeRegression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public sealed partial class FastTreeRegressionTrainer
/// <param name="featureColumn">The name of the feature column.</param>
/// <param name="weightColumn">The name for the column containing the initial weight.</param>
/// <param name="learningRate">The learning rate.</param>
/// <param name="minDocumentsInLeafs">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
/// <param name="minDatapointsInLeaves">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
/// <param name="numLeaves">The max number of leaves in each regression tree.</param>
/// <param name="numTrees">Total number of decision trees to create in the ensemble.</param>
/// <param name="advancedSettings">A delegate to apply all the advanced arguments to the algorithm.</param>
Expand All @@ -69,10 +69,10 @@ public FastTreeRegressionTrainer(IHostEnvironment env,
string weightColumn = null,
int numLeaves = Defaults.NumLeaves,
int numTrees = Defaults.NumTrees,
int minDocumentsInLeafs = Defaults.MinDocumentsInLeafs,
int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves,
double learningRate = Defaults.LearningRates,
Action<Arguments> advancedSettings = null)
: base(env, TrainerUtils.MakeR4ScalarLabel(labelColumn), featureColumn, weightColumn, null, numLeaves, numTrees, minDocumentsInLeafs, learningRate, advancedSettings)
: base(env, TrainerUtils.MakeR4ScalarLabel(labelColumn), featureColumn, weightColumn, null, numLeaves, numTrees, minDatapointsInLeaves, learningRate, advancedSettings)
{
}

Expand Down
6 changes: 3 additions & 3 deletions src/Microsoft.ML.FastTree/FastTreeTweedie.cs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public sealed partial class FastTreeTweedieTrainer
/// <param name="featureColumn">The name of the feature column.</param>
/// <param name="weightColumn">The name for the column containing the initial weight.</param>
/// <param name="learningRate">The learning rate.</param>
/// <param name="minDocumentsInLeafs">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
/// <param name="minDatapointsInLeaves">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
/// <param name="numLeaves">The max number of leaves in each regression tree.</param>
/// <param name="numTrees">Total number of decision trees to create in the ensemble.</param>
/// <param name="advancedSettings">A delegate to apply all the advanced arguments to the algorithm.</param>
Expand All @@ -66,10 +66,10 @@ public FastTreeTweedieTrainer(IHostEnvironment env,
string weightColumn = null,
int numLeaves = Defaults.NumLeaves,
int numTrees = Defaults.NumTrees,
int minDocumentsInLeafs = Defaults.MinDocumentsInLeafs,
int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves,
double learningRate = Defaults.LearningRates,
Action<Arguments> advancedSettings = null)
: base(env, TrainerUtils.MakeR4ScalarLabel(labelColumn), featureColumn, weightColumn, null, numLeaves, numTrees, minDocumentsInLeafs, learningRate, advancedSettings)
: base(env, TrainerUtils.MakeR4ScalarLabel(labelColumn), featureColumn, weightColumn, null, numLeaves, numTrees, minDatapointsInLeaves, learningRate, advancedSettings)
{
Host.CheckNonEmpty(labelColumn, nameof(labelColumn));
Host.CheckNonEmpty(featureColumn, nameof(featureColumn));
Expand Down
6 changes: 3 additions & 3 deletions src/Microsoft.ML.FastTree/GamClassification.cs
Original file line number Diff line number Diff line change
Expand Up @@ -63,16 +63,16 @@ internal BinaryClassificationGamTrainer(IHostEnvironment env, Arguments args)
/// <param name="featureColumn">The name of the feature column.</param>
/// <param name="weightColumn">The name for the column containing the initial weight.</param>
/// <param name="learningRate">The learning rate.</param>
/// <param name="minDocumentsInLeafs">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
/// <param name="minDatapointsInLeaves">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
/// <param name="advancedSettings">A delegate to apply all the advanced arguments to the algorithm.</param>
public BinaryClassificationGamTrainer(IHostEnvironment env,
string labelColumn,
string featureColumn,
string weightColumn = null,
int minDocumentsInLeafs = Defaults.MinDocumentsInLeafs,
int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves,
double learningRate = Defaults.LearningRates,
Action<Arguments> advancedSettings = null)
: base(env, LoadNameValue, TrainerUtils.MakeBoolScalarLabel(labelColumn), featureColumn, weightColumn, minDocumentsInLeafs, learningRate, advancedSettings)
: base(env, LoadNameValue, TrainerUtils.MakeBoolScalarLabel(labelColumn), featureColumn, weightColumn, minDatapointsInLeaves, learningRate, advancedSettings)
{
_sigmoidParameter = 1;
}
Expand Down
6 changes: 3 additions & 3 deletions src/Microsoft.ML.FastTree/GamRegression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,17 @@ internal RegressionGamTrainer(IHostEnvironment env, Arguments args)
/// <param name="labelColumn">The name of the label column.</param>
/// <param name="featureColumn">The name of the feature column.</param>
/// <param name="weightColumn">The name for the column containing the initial weight.</param>
/// <param name="minDocumentsInLeafs">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
/// <param name="minDatapointsInLeaves">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
/// <param name="learningRate">The learning rate.</param>
/// <param name="advancedSettings">A delegate to apply all the advanced arguments to the algorithm.</param>
public RegressionGamTrainer(IHostEnvironment env,
string labelColumn,
string featureColumn,
string weightColumn = null,
int minDocumentsInLeafs = Defaults.MinDocumentsInLeafs,
int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves,
double learningRate = Defaults.LearningRates,
Action<Arguments> advancedSettings = null)
: base(env, LoadNameValue, TrainerUtils.MakeR4ScalarLabel(labelColumn), featureColumn, weightColumn, minDocumentsInLeafs, learningRate, advancedSettings)
: base(env, LoadNameValue, TrainerUtils.MakeR4ScalarLabel(labelColumn), featureColumn, weightColumn, minDatapointsInLeaves, learningRate, advancedSettings)
{
}

Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.FastTree/GamTrainer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -137,14 +137,14 @@ private protected GamTrainerBase(IHostEnvironment env,
SchemaShape.Column label,
string featureColumn,
string weightColumn,
int minDocumentsInLeafs,
int minDatapointsInLeaves,
double learningRate,
Action<TArgs> advancedSettings)
: base(Contracts.CheckRef(env, nameof(env)).Register(name), TrainerUtils.MakeR4VecFeature(featureColumn), label, TrainerUtils.MakeR4ScalarWeightColumn(weightColumn))
{
Args = new TArgs();

Args.MinDocuments = minDocumentsInLeafs;
Args.MinDocuments = minDatapointsInLeaves;
Args.LearningRates = learningRate;

//apply the advanced args, if the user supplied any
Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.FastTree/RandomForest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@ protected RandomForestTrainerBase(IHostEnvironment env,
string groupIdColumn,
int numLeaves,
int numTrees,
int minDocumentsInLeafs,
int minDatapointsInLeaves,
double learningRate,
Action<TArgs> advancedSettings,
bool quantileEnabled = false)
: base(env, label, featureColumn, weightColumn, null, numLeaves, numTrees, minDocumentsInLeafs, advancedSettings)
: base(env, label, featureColumn, weightColumn, null, numLeaves, numTrees, minDatapointsInLeaves, advancedSettings)
{
_quantileEnabled = quantileEnabled;
}
Expand Down
6 changes: 3 additions & 3 deletions src/Microsoft.ML.FastTree/RandomForestClassification.cs
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ public sealed class Arguments : FastForestArgumentsBase
/// <param name="weightColumn">The name for the column containing the initial weight.</param>
/// <param name="numLeaves">The max number of leaves in each regression tree.</param>
/// <param name="numTrees">Total number of decision trees to create in the ensemble.</param>
/// <param name="minDocumentsInLeafs">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
/// <param name="minDatapointsInLeaves">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
/// <param name="learningRate">The learning rate.</param>
/// <param name="advancedSettings">A delegate to apply all the advanced arguments to the algorithm.</param>
public FastForestClassification(IHostEnvironment env,
Expand All @@ -151,10 +151,10 @@ public FastForestClassification(IHostEnvironment env,
string weightColumn = null,
int numLeaves = Defaults.NumLeaves,
int numTrees = Defaults.NumTrees,
int minDocumentsInLeafs = Defaults.MinDocumentsInLeafs,
int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves,
double learningRate = Defaults.LearningRates,
Action<Arguments> advancedSettings = null)
: base(env, TrainerUtils.MakeBoolScalarLabel(labelColumn), featureColumn, weightColumn, null, numLeaves, numTrees, minDocumentsInLeafs, learningRate, advancedSettings)
: base(env, TrainerUtils.MakeBoolScalarLabel(labelColumn), featureColumn, weightColumn, null, numLeaves, numTrees, minDatapointsInLeaves, learningRate, advancedSettings)
{
Host.CheckNonEmpty(labelColumn, nameof(labelColumn));
Host.CheckNonEmpty(featureColumn, nameof(featureColumn));
Expand Down
Loading