Skip to content

More trainer extensions, bug fixes and consistency across trainer extensions #1524

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Nov 10, 2018
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/code/MlNetCookBook.md
Original file line number Diff line number Diff line change
Expand Up @@ -1331,7 +1331,7 @@ var learningPipeline = reader.MakeNewEstimator()
IEstimator<ITransformer> dynamicPipe = learningPipeline.AsDynamic;

// Create a binary classification trainer.
var binaryTrainer = mlContext.BinaryClassification.Trainers.AveragedPerceptron();
var binaryTrainer = mlContext.BinaryClassification.Trainers.AveragedPerceptron("Label", "Features");

// Append the OVA learner to the pipeline.
dynamicPipe = dynamicPipe.Append(new Ova(mlContext, binaryTrainer));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,10 @@ public static void MatrixFactorizationInMemoryData()
// Create a matrix factorization trainer which may consume "Value" as the training label, "MatrixColumnIndex" as the
// matrix's column index, and "MatrixRowIndex" as the matrix's row index. Here nameof(...) is used to extract field
// names' in MatrixElement class.
var pipeline = new MatrixFactorizationTrainer(mlContext, nameof(MatrixElement.Value),
nameof(MatrixElement.MatrixColumnIndex), nameof(MatrixElement.MatrixRowIndex),
var pipeline = new MatrixFactorizationTrainer(mlContext,
nameof(MatrixElement.MatrixColumnIndex),
nameof(MatrixElement.MatrixRowIndex),
nameof(MatrixElement.Value),
advancedSettings: s =>
{
s.NumIterations = 10;
Expand Down
6 changes: 3 additions & 3 deletions docs/samples/Microsoft.ML.Samples/Dynamic/SDCA.cs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public static void SDCA_BinaryClassification()
// Then append a binary classifier, setting the "Label" column as the label of the dataset, and
// the "Features" column produced by FeaturizeText as the features column.
var pipeline = mlContext.Transforms.Text.FeaturizeText("SentimentText", "Features")
.Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent(label: "Sentiment", features: "Features", l2Const: 0.001f));
.Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent(labelColumn: "Sentiment", featureColumn: "Features", l2Const: 0.001f));

// Step 3: Run Cross-Validation on this pipeline.
var cvResults = mlContext.BinaryClassification.CrossValidate(data, pipeline, labelColumn: "Sentiment");
Expand All @@ -60,8 +60,8 @@ public static void SDCA_BinaryClassification()
// we could do so by tweaking the 'advancedSetting'.
var advancedPipeline = mlContext.Transforms.Text.FeaturizeText("SentimentText", "Features")
.Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent
(label: "Sentiment",
features: "Features",
(labelColumn: "Sentiment",
featureColumn: "Features",
advancedSettings: s=>
{
s.ConvergenceTolerance = 0.01f; // The learning rate for adjusting bias from being regularized
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ public static void FastTreeBinaryClassification()
row.Features,
numTrees: 100, // try: (int) 20-2000
numLeaves: 20, // try: (int) 2-128
minDatapointsInLeafs: 10, // try: (int) 1-100
minDatapointsInLeaves: 10, // try: (int) 1-100
learningRate: 0.2))) // try: (float) 0.025-0.4
.Append(row => (
Label: row.Label,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public static void FastTreeRegression()
r.features,
numTrees: 100, // try: (int) 20-2000
numLeaves: 20, // try: (int) 2-128
minDatapointsInLeafs: 10, // try: (int) 1-100
minDatapointsInLeaves: 10, // try: (int) 1-100
learningRate: 0.2, // try: (float) 0.025-0.4
onFit: p => pred = p)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public Arguments()
BasePredictors = new[]
{
ComponentFactoryUtils.CreateFromFunction(
env => new MulticlassLogisticRegression(env, FeatureColumn, LabelColumn))
env => new MulticlassLogisticRegression(env, LabelColumn, FeatureColumn))
};
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.FastTree/BoostingFastTree.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ protected BoostingFastTreeTrainerBase(IHostEnvironment env,
string groupIdColumn,
int numLeaves,
int numTrees,
int minDocumentsInLeafs,
int minDatapointsInLeaves,
double learningRate,
Action<TArgs> advancedSettings)
: base(env, label, featureColumn, weightColumn, groupIdColumn, numLeaves, numTrees, minDocumentsInLeafs, advancedSettings)
: base(env, label, featureColumn, weightColumn, groupIdColumn, numLeaves, numTrees, minDatapointsInLeaves, advancedSettings)
{

if (Args.LearningRates != learningRate)
Expand Down
8 changes: 4 additions & 4 deletions src/Microsoft.ML.FastTree/FastTree.cs
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ private protected FastTreeTrainerBase(IHostEnvironment env,
string groupIdColumn,
int numLeaves,
int numTrees,
int minDocumentsInLeafs,
int minDatapointsInLeaves,
Action<TArgs> advancedSettings)
: base(Contracts.CheckRef(env, nameof(env)).Register(RegisterName), TrainerUtils.MakeR4VecFeature(featureColumn), label, TrainerUtils.MakeR4ScalarWeightColumn(weightColumn), TrainerUtils.MakeU4ScalarColumn(groupIdColumn))
{
Expand All @@ -112,7 +112,7 @@ private protected FastTreeTrainerBase(IHostEnvironment env,
// override with the directly provided values.
Args.NumLeaves = numLeaves;
Args.NumTrees = numTrees;
Args.MinDocumentsInLeafs = minDocumentsInLeafs;
Args.MinDocumentsInLeafs = minDatapointsInLeaves;

//apply the advanced args, if the user supplied any
advancedSettings?.Invoke(Args);
Expand All @@ -121,10 +121,10 @@ private protected FastTreeTrainerBase(IHostEnvironment env,
Args.FeatureColumn = featureColumn;

if (weightColumn != null)
Args.WeightColumn = Optional<string>.Explicit(weightColumn); ;
Args.WeightColumn = Optional<string>.Explicit(weightColumn);

if (groupIdColumn != null)
Args.GroupIdColumn = Optional<string>.Explicit(groupIdColumn); ;
Args.GroupIdColumn = Optional<string>.Explicit(groupIdColumn);

// The discretization step renders this trainer non-parametric, and therefore it does not need normalization.
// Also since it builds its own internal discretized columnar structures, it cannot benefit from caching.
Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.FastTree/FastTreeArguments.cs
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ internal static class Defaults
{
internal const int NumTrees = 100;
internal const int NumLeaves = 20;
internal const int MinDocumentsInLeafs = 10;
internal const int MinDocumentsInLeaves = 10;
internal const double LearningRates = 0.2;
}

Expand Down Expand Up @@ -245,7 +245,7 @@ public abstract class TreeArgs : LearnerInputBaseWithGroupId
[Argument(ArgumentType.LastOccurenceWins, HelpText = "The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data", ShortName = "mil", SortOrder = 3)]
[TGUI(Description = "Minimum number of training instances required to form a leaf", SuggestedSweeps = "1,10,50")]
[TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[] { 1, 10, 50 })]
public int MinDocumentsInLeafs = Defaults.MinDocumentsInLeafs;
public int MinDocumentsInLeafs = Defaults.MinDocumentsInLeaves;

// REVIEW: Different shortname than FastRank module. Same as the TLC FRWrapper.
[Argument(ArgumentType.LastOccurenceWins, HelpText = "Total number of decision trees to create in the ensemble", ShortName = "iter", SortOrder = 1)]
Expand Down
10 changes: 5 additions & 5 deletions src/Microsoft.ML.FastTree/FastTreeClassification.cs
Original file line number Diff line number Diff line change
Expand Up @@ -123,20 +123,20 @@ public sealed partial class FastTreeBinaryClassificationTrainer :
/// <param name="featureColumn">The name of the feature column.</param>
/// <param name="weightColumn">The name for the column containing the initial weight.</param>
/// <param name="learningRate">The learning rate.</param>
/// <param name="minDocumentsInLeafs">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
/// <param name="minDatapointsInLeaves">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
/// <param name="numLeaves">The max number of leaves in each regression tree.</param>
/// <param name="numTrees">Total number of decision trees to create in the ensemble.</param>
/// <param name="advancedSettings">A delegate to apply all the advanced arguments to the algorithm.</param>
public FastTreeBinaryClassificationTrainer(IHostEnvironment env,
string labelColumn,
string featureColumn,
string labelColumn = DefaultColumnNames.Label,
string featureColumn = DefaultColumnNames.Features,
string weightColumn = null,
int numLeaves = Defaults.NumLeaves,
int numTrees = Defaults.NumTrees,
int minDocumentsInLeafs = Defaults.MinDocumentsInLeafs,
int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves,
double learningRate = Defaults.LearningRates,
Action<Arguments> advancedSettings = null)
: base(env, TrainerUtils.MakeBoolScalarLabel(labelColumn), featureColumn, weightColumn, null, numLeaves, numTrees, minDocumentsInLeafs, learningRate, advancedSettings)
: base(env, TrainerUtils.MakeBoolScalarLabel(labelColumn), featureColumn, weightColumn, null, numLeaves, numTrees, minDatapointsInLeaves, learningRate, advancedSettings)
{
// Set the sigmoid parameter to the 2 * learning rate, for traditional FastTreeClassification loss
_sigmoidParameter = 2.0 * Args.LearningRates;
Expand Down
12 changes: 6 additions & 6 deletions src/Microsoft.ML.FastTree/FastTreeRanking.cs
Original file line number Diff line number Diff line change
Expand Up @@ -69,20 +69,20 @@ public sealed partial class FastTreeRankingTrainer
/// <param name="weightColumn">The name for the column containing the initial weight.</param>
/// <param name="numLeaves">The max number of leaves in each regression tree.</param>
/// <param name="numTrees">Total number of decision trees to create in the ensemble.</param>
/// <param name="minDocumentsInLeafs">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
/// <param name="minDatapointsInLeaves">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
/// <param name="learningRate">The learning rate.</param>
/// <param name="advancedSettings">A delegate to apply all the advanced arguments to the algorithm.</param>
public FastTreeRankingTrainer(IHostEnvironment env,
string labelColumn,
string featureColumn,
string groupIdColumn,
string labelColumn = DefaultColumnNames.Label,
string featureColumn = DefaultColumnNames.Features,
string groupIdColumn = DefaultColumnNames.GroupId,
string weightColumn = null,
int numLeaves = Defaults.NumLeaves,
int numTrees = Defaults.NumTrees,
int minDocumentsInLeafs = Defaults.MinDocumentsInLeafs,
int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves,
double learningRate = Defaults.LearningRates,
Action<Arguments> advancedSettings = null)
: base(env, TrainerUtils.MakeR4ScalarLabel(labelColumn), featureColumn, weightColumn, groupIdColumn, numLeaves, numTrees, minDocumentsInLeafs, learningRate, advancedSettings)
: base(env, TrainerUtils.MakeR4ScalarLabel(labelColumn), featureColumn, weightColumn, groupIdColumn, numLeaves, numTrees, minDatapointsInLeaves, learningRate, advancedSettings)
{
Host.CheckNonEmpty(groupIdColumn, nameof(groupIdColumn));
}
Expand Down
10 changes: 5 additions & 5 deletions src/Microsoft.ML.FastTree/FastTreeRegression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -59,20 +59,20 @@ public sealed partial class FastTreeRegressionTrainer
/// <param name="featureColumn">The name of the feature column.</param>
/// <param name="weightColumn">The name for the column containing the initial weight.</param>
/// <param name="learningRate">The learning rate.</param>
/// <param name="minDocumentsInLeafs">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
/// <param name="minDatapointsInLeaves">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
/// <param name="numLeaves">The max number of leaves in each regression tree.</param>
/// <param name="numTrees">Total number of decision trees to create in the ensemble.</param>
/// <param name="advancedSettings">A delegate to apply all the advanced arguments to the algorithm.</param>
public FastTreeRegressionTrainer(IHostEnvironment env,
string labelColumn,
string featureColumn,
string labelColumn = DefaultColumnNames.Label,
string featureColumn = DefaultColumnNames.Features,
string weightColumn = null,
int numLeaves = Defaults.NumLeaves,
int numTrees = Defaults.NumTrees,
int minDocumentsInLeafs = Defaults.MinDocumentsInLeafs,
int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves,
double learningRate = Defaults.LearningRates,
Action<Arguments> advancedSettings = null)
: base(env, TrainerUtils.MakeR4ScalarLabel(labelColumn), featureColumn, weightColumn, null, numLeaves, numTrees, minDocumentsInLeafs, learningRate, advancedSettings)
: base(env, TrainerUtils.MakeR4ScalarLabel(labelColumn), featureColumn, weightColumn, null, numLeaves, numTrees, minDatapointsInLeaves, learningRate, advancedSettings)
{
}

Expand Down
10 changes: 5 additions & 5 deletions src/Microsoft.ML.FastTree/FastTreeTweedie.cs
Original file line number Diff line number Diff line change
Expand Up @@ -56,20 +56,20 @@ public sealed partial class FastTreeTweedieTrainer
/// <param name="featureColumn">The name of the feature column.</param>
/// <param name="weightColumn">The name for the column containing the initial weight.</param>
/// <param name="learningRate">The learning rate.</param>
/// <param name="minDocumentsInLeafs">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
/// <param name="minDatapointsInLeaves">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
/// <param name="numLeaves">The max number of leaves in each regression tree.</param>
/// <param name="numTrees">Total number of decision trees to create in the ensemble.</param>
/// <param name="advancedSettings">A delegate to apply all the advanced arguments to the algorithm.</param>
public FastTreeTweedieTrainer(IHostEnvironment env,
string labelColumn,
string featureColumn,
string labelColumn = DefaultColumnNames.Label,
string featureColumn = DefaultColumnNames.Features,
string weightColumn = null,
int numLeaves = Defaults.NumLeaves,
int numTrees = Defaults.NumTrees,
int minDocumentsInLeafs = Defaults.MinDocumentsInLeafs,
int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves,
double learningRate = Defaults.LearningRates,
Action<Arguments> advancedSettings = null)
: base(env, TrainerUtils.MakeR4ScalarLabel(labelColumn), featureColumn, weightColumn, null, numLeaves, numTrees, minDocumentsInLeafs, learningRate, advancedSettings)
: base(env, TrainerUtils.MakeR4ScalarLabel(labelColumn), featureColumn, weightColumn, null, numLeaves, numTrees, minDatapointsInLeaves, learningRate, advancedSettings)
{
Host.CheckNonEmpty(labelColumn, nameof(labelColumn));
Host.CheckNonEmpty(featureColumn, nameof(featureColumn));
Expand Down
10 changes: 5 additions & 5 deletions src/Microsoft.ML.FastTree/GamClassification.cs
Original file line number Diff line number Diff line change
Expand Up @@ -63,16 +63,16 @@ internal BinaryClassificationGamTrainer(IHostEnvironment env, Arguments args)
/// <param name="featureColumn">The name of the feature column.</param>
/// <param name="weightColumn">The name for the column containing the initial weight.</param>
/// <param name="learningRate">The learning rate.</param>
/// <param name="minDocumentsInLeafs">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
/// <param name="minDatapointsInLeaves">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
/// <param name="advancedSettings">A delegate to apply all the advanced arguments to the algorithm.</param>
public BinaryClassificationGamTrainer(IHostEnvironment env,
string labelColumn,
string featureColumn,
string labelColumn = DefaultColumnNames.Label,
string featureColumn = DefaultColumnNames.Features,
string weightColumn = null,
int minDocumentsInLeafs = Defaults.MinDocumentsInLeafs,
int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves,
double learningRate = Defaults.LearningRates,
Action<Arguments> advancedSettings = null)
: base(env, LoadNameValue, TrainerUtils.MakeBoolScalarLabel(labelColumn), featureColumn, weightColumn, minDocumentsInLeafs, learningRate, advancedSettings)
: base(env, LoadNameValue, TrainerUtils.MakeBoolScalarLabel(labelColumn), featureColumn, weightColumn, minDatapointsInLeaves, learningRate, advancedSettings)
{
_sigmoidParameter = 1;
}
Expand Down
10 changes: 5 additions & 5 deletions src/Microsoft.ML.FastTree/GamRegression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,17 @@ internal RegressionGamTrainer(IHostEnvironment env, Arguments args)
/// <param name="labelColumn">The name of the label column.</param>
/// <param name="featureColumn">The name of the feature column.</param>
/// <param name="weightColumn">The name for the column containing the initial weight.</param>
/// <param name="minDocumentsInLeafs">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
/// <param name="minDatapointsInLeaves">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
/// <param name="learningRate">The learning rate.</param>
/// <param name="advancedSettings">A delegate to apply all the advanced arguments to the algorithm.</param>
public RegressionGamTrainer(IHostEnvironment env,
string labelColumn,
string featureColumn,
string labelColumn = DefaultColumnNames.Label,
string featureColumn = DefaultColumnNames.Features,
string weightColumn = null,
int minDocumentsInLeafs = Defaults.MinDocumentsInLeafs,
int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves,
double learningRate = Defaults.LearningRates,
Action<Arguments> advancedSettings = null)
: base(env, LoadNameValue, TrainerUtils.MakeR4ScalarLabel(labelColumn), featureColumn, weightColumn, minDocumentsInLeafs, learningRate, advancedSettings)
: base(env, LoadNameValue, TrainerUtils.MakeR4ScalarLabel(labelColumn), featureColumn, weightColumn, minDatapointsInLeaves, learningRate, advancedSettings)
{
}

Expand Down
Loading