Skip to content

Commit 851558d

Browse files
authored
More trainer extensions, bug fixes and consistency across trainer extensions (#1524)
* fixing GAM trainers extending the right context. Removing unused arguments from the extensions of their signatures. No clusters in advancedSettings now overiddes the number of clusters passed directly in KMeans.
1 parent e968757 commit 851558d

File tree

85 files changed

+757
-733
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+757
-733
lines changed

docs/code/MlNetCookBook.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1331,7 +1331,7 @@ var learningPipeline = reader.MakeNewEstimator()
13311331
IEstimator<ITransformer> dynamicPipe = learningPipeline.AsDynamic;
13321332

13331333
// Create a binary classification trainer.
1334-
var binaryTrainer = mlContext.BinaryClassification.Trainers.AveragedPerceptron();
1334+
var binaryTrainer = mlContext.BinaryClassification.Trainers.AveragedPerceptron("Label", "Features");
13351335

13361336
// Append the OVA learner to the pipeline.
13371337
dynamicPipe = dynamicPipe.Append(new Ova(mlContext, binaryTrainer));

docs/samples/Microsoft.ML.Samples/Dynamic/MatrixFactorization.cs

+4-2
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,10 @@ public static void MatrixFactorizationInMemoryData()
7171
// Create a matrix factorization trainer which may consume "Value" as the training label, "MatrixColumnIndex" as the
7272
// matrix's column index, and "MatrixRowIndex" as the matrix's row index. Here nameof(...) is used to extract field
7373
// names' in MatrixElement class.
74-
var pipeline = new MatrixFactorizationTrainer(mlContext, nameof(MatrixElement.Value),
75-
nameof(MatrixElement.MatrixColumnIndex), nameof(MatrixElement.MatrixRowIndex),
74+
var pipeline = new MatrixFactorizationTrainer(mlContext,
75+
nameof(MatrixElement.MatrixColumnIndex),
76+
nameof(MatrixElement.MatrixRowIndex),
77+
nameof(MatrixElement.Value),
7678
advancedSettings: s =>
7779
{
7880
s.NumIterations = 10;

docs/samples/Microsoft.ML.Samples/Dynamic/SDCA.cs

+3-3
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ public static void SDCA_BinaryClassification()
4848
// Then append a binary classifier, setting the "Label" column as the label of the dataset, and
4949
// the "Features" column produced by FeaturizeText as the features column.
5050
var pipeline = mlContext.Transforms.Text.FeaturizeText("SentimentText", "Features")
51-
.Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent(label: "Sentiment", features: "Features", l2Const: 0.001f));
51+
.Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent(labelColumn: "Sentiment", featureColumn: "Features", l2Const: 0.001f));
5252

5353
// Step 3: Run Cross-Validation on this pipeline.
5454
var cvResults = mlContext.BinaryClassification.CrossValidate(data, pipeline, labelColumn: "Sentiment");
@@ -60,8 +60,8 @@ public static void SDCA_BinaryClassification()
6060
// we could do so by tweaking the 'advancedSetting'.
6161
var advancedPipeline = mlContext.Transforms.Text.FeaturizeText("SentimentText", "Features")
6262
.Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent
63-
(label: "Sentiment",
64-
features: "Features",
63+
(labelColumn: "Sentiment",
64+
featureColumn: "Features",
6565
advancedSettings: s=>
6666
{
6767
s.ConvergenceTolerance = 0.01f; // The learning rate for adjusting bias from being regularized

docs/samples/Microsoft.ML.Samples/Static/FastTreeBinaryClassification.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ public static void FastTreeBinaryClassification()
8989
row.Features,
9090
numTrees: 100, // try: (int) 20-2000
9191
numLeaves: 20, // try: (int) 2-128
92-
minDatapointsInLeafs: 10, // try: (int) 1-100
92+
minDatapointsInLeaves: 10, // try: (int) 1-100
9393
learningRate: 0.2))) // try: (float) 0.025-0.4
9494
.Append(row => (
9595
Label: row.Label,

docs/samples/Microsoft.ML.Samples/Static/FastTreeRegression.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ public static void FastTreeRegression()
4747
r.features,
4848
numTrees: 100, // try: (int) 20-2000
4949
numLeaves: 20, // try: (int) 2-128
50-
minDatapointsInLeafs: 10, // try: (int) 1-100
50+
minDatapointsInLeaves: 10, // try: (int) 1-100
5151
learningRate: 0.2, // try: (float) 0.025-0.4
5252
onFit: p => pred = p)
5353
)

src/Microsoft.ML.Ensemble/Trainer/Multiclass/MulticlassDataPartitionEnsembleTrainer.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ public Arguments()
5959
BasePredictors = new[]
6060
{
6161
ComponentFactoryUtils.CreateFromFunction(
62-
env => new MulticlassLogisticRegression(env, FeatureColumn, LabelColumn))
62+
env => new MulticlassLogisticRegression(env, LabelColumn, FeatureColumn))
6363
};
6464
}
6565
}

src/Microsoft.ML.FastTree/BoostingFastTree.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,10 @@ protected BoostingFastTreeTrainerBase(IHostEnvironment env,
2828
string groupIdColumn,
2929
int numLeaves,
3030
int numTrees,
31-
int minDocumentsInLeafs,
31+
int minDatapointsInLeaves,
3232
double learningRate,
3333
Action<TArgs> advancedSettings)
34-
: base(env, label, featureColumn, weightColumn, groupIdColumn, numLeaves, numTrees, minDocumentsInLeafs, advancedSettings)
34+
: base(env, label, featureColumn, weightColumn, groupIdColumn, numLeaves, numTrees, minDatapointsInLeaves, advancedSettings)
3535
{
3636

3737
if (Args.LearningRates != learningRate)

src/Microsoft.ML.FastTree/FastTree.cs

+4-4
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ private protected FastTreeTrainerBase(IHostEnvironment env,
102102
string groupIdColumn,
103103
int numLeaves,
104104
int numTrees,
105-
int minDocumentsInLeafs,
105+
int minDatapointsInLeaves,
106106
Action<TArgs> advancedSettings)
107107
: base(Contracts.CheckRef(env, nameof(env)).Register(RegisterName), TrainerUtils.MakeR4VecFeature(featureColumn), label, TrainerUtils.MakeR4ScalarWeightColumn(weightColumn), TrainerUtils.MakeU4ScalarColumn(groupIdColumn))
108108
{
@@ -112,7 +112,7 @@ private protected FastTreeTrainerBase(IHostEnvironment env,
112112
// override with the directly provided values.
113113
Args.NumLeaves = numLeaves;
114114
Args.NumTrees = numTrees;
115-
Args.MinDocumentsInLeafs = minDocumentsInLeafs;
115+
Args.MinDocumentsInLeafs = minDatapointsInLeaves;
116116

117117
//apply the advanced args, if the user supplied any
118118
advancedSettings?.Invoke(Args);
@@ -121,10 +121,10 @@ private protected FastTreeTrainerBase(IHostEnvironment env,
121121
Args.FeatureColumn = featureColumn;
122122

123123
if (weightColumn != null)
124-
Args.WeightColumn = Optional<string>.Explicit(weightColumn); ;
124+
Args.WeightColumn = Optional<string>.Explicit(weightColumn);
125125

126126
if (groupIdColumn != null)
127-
Args.GroupIdColumn = Optional<string>.Explicit(groupIdColumn); ;
127+
Args.GroupIdColumn = Optional<string>.Explicit(groupIdColumn);
128128

129129
// The discretization step renders this trainer non-parametric, and therefore it does not need normalization.
130130
// Also since it builds its own internal discretized columnar structures, it cannot benefit from caching.

src/Microsoft.ML.FastTree/FastTreeArguments.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ internal static class Defaults
143143
{
144144
internal const int NumTrees = 100;
145145
internal const int NumLeaves = 20;
146-
internal const int MinDocumentsInLeafs = 10;
146+
internal const int MinDocumentsInLeaves = 10;
147147
internal const double LearningRates = 0.2;
148148
}
149149

@@ -245,7 +245,7 @@ public abstract class TreeArgs : LearnerInputBaseWithGroupId
245245
[Argument(ArgumentType.LastOccurenceWins, HelpText = "The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data", ShortName = "mil", SortOrder = 3)]
246246
[TGUI(Description = "Minimum number of training instances required to form a leaf", SuggestedSweeps = "1,10,50")]
247247
[TlcModule.SweepableDiscreteParamAttribute("MinDocumentsInLeafs", new object[] { 1, 10, 50 })]
248-
public int MinDocumentsInLeafs = Defaults.MinDocumentsInLeafs;
248+
public int MinDocumentsInLeafs = Defaults.MinDocumentsInLeaves;
249249

250250
// REVIEW: Different shortname than FastRank module. Same as the TLC FRWrapper.
251251
[Argument(ArgumentType.LastOccurenceWins, HelpText = "Total number of decision trees to create in the ensemble", ShortName = "iter", SortOrder = 1)]

src/Microsoft.ML.FastTree/FastTreeClassification.cs

+5-5
Original file line numberDiff line numberDiff line change
@@ -123,20 +123,20 @@ public sealed partial class FastTreeBinaryClassificationTrainer :
123123
/// <param name="featureColumn">The name of the feature column.</param>
124124
/// <param name="weightColumn">The name for the column containing the initial weight.</param>
125125
/// <param name="learningRate">The learning rate.</param>
126-
/// <param name="minDocumentsInLeafs">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
126+
/// <param name="minDatapointsInLeaves">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
127127
/// <param name="numLeaves">The max number of leaves in each regression tree.</param>
128128
/// <param name="numTrees">Total number of decision trees to create in the ensemble.</param>
129129
/// <param name="advancedSettings">A delegate to apply all the advanced arguments to the algorithm.</param>
130130
public FastTreeBinaryClassificationTrainer(IHostEnvironment env,
131-
string labelColumn,
132-
string featureColumn,
131+
string labelColumn = DefaultColumnNames.Label,
132+
string featureColumn = DefaultColumnNames.Features,
133133
string weightColumn = null,
134134
int numLeaves = Defaults.NumLeaves,
135135
int numTrees = Defaults.NumTrees,
136-
int minDocumentsInLeafs = Defaults.MinDocumentsInLeafs,
136+
int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves,
137137
double learningRate = Defaults.LearningRates,
138138
Action<Arguments> advancedSettings = null)
139-
: base(env, TrainerUtils.MakeBoolScalarLabel(labelColumn), featureColumn, weightColumn, null, numLeaves, numTrees, minDocumentsInLeafs, learningRate, advancedSettings)
139+
: base(env, TrainerUtils.MakeBoolScalarLabel(labelColumn), featureColumn, weightColumn, null, numLeaves, numTrees, minDatapointsInLeaves, learningRate, advancedSettings)
140140
{
141141
// Set the sigmoid parameter to the 2 * learning rate, for traditional FastTreeClassification loss
142142
_sigmoidParameter = 2.0 * Args.LearningRates;

src/Microsoft.ML.FastTree/FastTreeRanking.cs

+6-6
Original file line numberDiff line numberDiff line change
@@ -69,20 +69,20 @@ public sealed partial class FastTreeRankingTrainer
6969
/// <param name="weightColumn">The name for the column containing the initial weight.</param>
7070
/// <param name="numLeaves">The max number of leaves in each regression tree.</param>
7171
/// <param name="numTrees">Total number of decision trees to create in the ensemble.</param>
72-
/// <param name="minDocumentsInLeafs">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
72+
/// <param name="minDatapointsInLeaves">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
7373
/// <param name="learningRate">The learning rate.</param>
7474
/// <param name="advancedSettings">A delegate to apply all the advanced arguments to the algorithm.</param>
7575
public FastTreeRankingTrainer(IHostEnvironment env,
76-
string labelColumn,
77-
string featureColumn,
78-
string groupIdColumn,
76+
string labelColumn = DefaultColumnNames.Label,
77+
string featureColumn = DefaultColumnNames.Features,
78+
string groupIdColumn = DefaultColumnNames.GroupId,
7979
string weightColumn = null,
8080
int numLeaves = Defaults.NumLeaves,
8181
int numTrees = Defaults.NumTrees,
82-
int minDocumentsInLeafs = Defaults.MinDocumentsInLeafs,
82+
int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves,
8383
double learningRate = Defaults.LearningRates,
8484
Action<Arguments> advancedSettings = null)
85-
: base(env, TrainerUtils.MakeR4ScalarLabel(labelColumn), featureColumn, weightColumn, groupIdColumn, numLeaves, numTrees, minDocumentsInLeafs, learningRate, advancedSettings)
85+
: base(env, TrainerUtils.MakeR4ScalarLabel(labelColumn), featureColumn, weightColumn, groupIdColumn, numLeaves, numTrees, minDatapointsInLeaves, learningRate, advancedSettings)
8686
{
8787
Host.CheckNonEmpty(groupIdColumn, nameof(groupIdColumn));
8888
}

src/Microsoft.ML.FastTree/FastTreeRegression.cs

+5-5
Original file line numberDiff line numberDiff line change
@@ -59,20 +59,20 @@ public sealed partial class FastTreeRegressionTrainer
5959
/// <param name="featureColumn">The name of the feature column.</param>
6060
/// <param name="weightColumn">The name for the column containing the initial weight.</param>
6161
/// <param name="learningRate">The learning rate.</param>
62-
/// <param name="minDocumentsInLeafs">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
62+
/// <param name="minDatapointsInLeaves">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
6363
/// <param name="numLeaves">The max number of leaves in each regression tree.</param>
6464
/// <param name="numTrees">Total number of decision trees to create in the ensemble.</param>
6565
/// <param name="advancedSettings">A delegate to apply all the advanced arguments to the algorithm.</param>
6666
public FastTreeRegressionTrainer(IHostEnvironment env,
67-
string labelColumn,
68-
string featureColumn,
67+
string labelColumn = DefaultColumnNames.Label,
68+
string featureColumn = DefaultColumnNames.Features,
6969
string weightColumn = null,
7070
int numLeaves = Defaults.NumLeaves,
7171
int numTrees = Defaults.NumTrees,
72-
int minDocumentsInLeafs = Defaults.MinDocumentsInLeafs,
72+
int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves,
7373
double learningRate = Defaults.LearningRates,
7474
Action<Arguments> advancedSettings = null)
75-
: base(env, TrainerUtils.MakeR4ScalarLabel(labelColumn), featureColumn, weightColumn, null, numLeaves, numTrees, minDocumentsInLeafs, learningRate, advancedSettings)
75+
: base(env, TrainerUtils.MakeR4ScalarLabel(labelColumn), featureColumn, weightColumn, null, numLeaves, numTrees, minDatapointsInLeaves, learningRate, advancedSettings)
7676
{
7777
}
7878

src/Microsoft.ML.FastTree/FastTreeTweedie.cs

+5-5
Original file line numberDiff line numberDiff line change
@@ -56,20 +56,20 @@ public sealed partial class FastTreeTweedieTrainer
5656
/// <param name="featureColumn">The name of the feature column.</param>
5757
/// <param name="weightColumn">The name for the column containing the initial weight.</param>
5858
/// <param name="learningRate">The learning rate.</param>
59-
/// <param name="minDocumentsInLeafs">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
59+
/// <param name="minDatapointsInLeaves">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
6060
/// <param name="numLeaves">The max number of leaves in each regression tree.</param>
6161
/// <param name="numTrees">Total number of decision trees to create in the ensemble.</param>
6262
/// <param name="advancedSettings">A delegate to apply all the advanced arguments to the algorithm.</param>
6363
public FastTreeTweedieTrainer(IHostEnvironment env,
64-
string labelColumn,
65-
string featureColumn,
64+
string labelColumn = DefaultColumnNames.Label,
65+
string featureColumn = DefaultColumnNames.Features,
6666
string weightColumn = null,
6767
int numLeaves = Defaults.NumLeaves,
6868
int numTrees = Defaults.NumTrees,
69-
int minDocumentsInLeafs = Defaults.MinDocumentsInLeafs,
69+
int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves,
7070
double learningRate = Defaults.LearningRates,
7171
Action<Arguments> advancedSettings = null)
72-
: base(env, TrainerUtils.MakeR4ScalarLabel(labelColumn), featureColumn, weightColumn, null, numLeaves, numTrees, minDocumentsInLeafs, learningRate, advancedSettings)
72+
: base(env, TrainerUtils.MakeR4ScalarLabel(labelColumn), featureColumn, weightColumn, null, numLeaves, numTrees, minDatapointsInLeaves, learningRate, advancedSettings)
7373
{
7474
Host.CheckNonEmpty(labelColumn, nameof(labelColumn));
7575
Host.CheckNonEmpty(featureColumn, nameof(featureColumn));

src/Microsoft.ML.FastTree/GamClassification.cs

+5-5
Original file line numberDiff line numberDiff line change
@@ -63,16 +63,16 @@ internal BinaryClassificationGamTrainer(IHostEnvironment env, Arguments args)
6363
/// <param name="featureColumn">The name of the feature column.</param>
6464
/// <param name="weightColumn">The name for the column containing the initial weight.</param>
6565
/// <param name="learningRate">The learning rate.</param>
66-
/// <param name="minDocumentsInLeafs">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
66+
/// <param name="minDatapointsInLeaves">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
6767
/// <param name="advancedSettings">A delegate to apply all the advanced arguments to the algorithm.</param>
6868
public BinaryClassificationGamTrainer(IHostEnvironment env,
69-
string labelColumn,
70-
string featureColumn,
69+
string labelColumn = DefaultColumnNames.Label,
70+
string featureColumn = DefaultColumnNames.Features,
7171
string weightColumn = null,
72-
int minDocumentsInLeafs = Defaults.MinDocumentsInLeafs,
72+
int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves,
7373
double learningRate = Defaults.LearningRates,
7474
Action<Arguments> advancedSettings = null)
75-
: base(env, LoadNameValue, TrainerUtils.MakeBoolScalarLabel(labelColumn), featureColumn, weightColumn, minDocumentsInLeafs, learningRate, advancedSettings)
75+
: base(env, LoadNameValue, TrainerUtils.MakeBoolScalarLabel(labelColumn), featureColumn, weightColumn, minDatapointsInLeaves, learningRate, advancedSettings)
7676
{
7777
_sigmoidParameter = 1;
7878
}

src/Microsoft.ML.FastTree/GamRegression.cs

+5-5
Original file line numberDiff line numberDiff line change
@@ -51,17 +51,17 @@ internal RegressionGamTrainer(IHostEnvironment env, Arguments args)
5151
/// <param name="labelColumn">The name of the label column.</param>
5252
/// <param name="featureColumn">The name of the feature column.</param>
5353
/// <param name="weightColumn">The name for the column containing the initial weight.</param>
54-
/// <param name="minDocumentsInLeafs">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
54+
/// <param name="minDatapointsInLeaves">The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data.</param>
5555
/// <param name="learningRate">The learning rate.</param>
5656
/// <param name="advancedSettings">A delegate to apply all the advanced arguments to the algorithm.</param>
5757
public RegressionGamTrainer(IHostEnvironment env,
58-
string labelColumn,
59-
string featureColumn,
58+
string labelColumn = DefaultColumnNames.Label,
59+
string featureColumn = DefaultColumnNames.Features,
6060
string weightColumn = null,
61-
int minDocumentsInLeafs = Defaults.MinDocumentsInLeafs,
61+
int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves,
6262
double learningRate = Defaults.LearningRates,
6363
Action<Arguments> advancedSettings = null)
64-
: base(env, LoadNameValue, TrainerUtils.MakeR4ScalarLabel(labelColumn), featureColumn, weightColumn, minDocumentsInLeafs, learningRate, advancedSettings)
64+
: base(env, LoadNameValue, TrainerUtils.MakeR4ScalarLabel(labelColumn), featureColumn, weightColumn, minDatapointsInLeaves, learningRate, advancedSettings)
6565
{
6666
}
6767

0 commit comments

Comments
 (0)