From f698b96609b070a0b9d5e2740e6d28b557f04b02 Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Sun, 4 Nov 2018 08:07:54 -0800 Subject: [PATCH 01/12] fixing GAM trainers extending the right context. Removing unused arguments from the extensions of their signatures. No clusters in advancedSettings now overiddes the number of clusters passed directly in KMeans. --- src/Microsoft.ML.FastTree/FastTree.cs | 4 +- .../RandomForestRegression.cs | 6 +- .../TreeTrainersCatalog.cs | 98 ++++++++++++++----- .../KMeansCatalog.cs | 2 +- .../KMeansPlusPlusTrainer.cs | 20 ++-- 5 files changed, 90 insertions(+), 40 deletions(-) diff --git a/src/Microsoft.ML.FastTree/FastTree.cs b/src/Microsoft.ML.FastTree/FastTree.cs index 60c9333038..65a39e22e9 100644 --- a/src/Microsoft.ML.FastTree/FastTree.cs +++ b/src/Microsoft.ML.FastTree/FastTree.cs @@ -121,10 +121,10 @@ private protected FastTreeTrainerBase(IHostEnvironment env, Args.FeatureColumn = featureColumn; if (weightColumn != null) - Args.WeightColumn = Optional.Explicit(weightColumn); ; + Args.WeightColumn = Optional.Explicit(weightColumn); if (groupIdColumn != null) - Args.GroupIdColumn = Optional.Explicit(groupIdColumn); ; + Args.GroupIdColumn = Optional.Explicit(groupIdColumn); // The discretization step renders this trainer non-parametric, and therefore it does not need normalization. // Also since it builds its own internal discretized columnar structures, it cannot benefit from caching. diff --git a/src/Microsoft.ML.FastTree/RandomForestRegression.cs b/src/Microsoft.ML.FastTree/RandomForestRegression.cs index 8349888cd5..6b91cb955d 100644 --- a/src/Microsoft.ML.FastTree/RandomForestRegression.cs +++ b/src/Microsoft.ML.FastTree/RandomForestRegression.cs @@ -160,11 +160,11 @@ public sealed class Arguments : FastForestArgumentsBase /// The private instance of . /// The name of the label column. /// The name of the feature column. - /// The name for the column containing the initial weight. - /// The learning rate. - /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data. + /// The optional name for the column containing the initial weight. /// The max number of leaves in each regression tree. /// Total number of decision trees to create in the ensemble. + /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data. + /// The learning rate. /// A delegate to apply all the advanced arguments to the algorithm. public FastForestRegression(IHostEnvironment env, string labelColumn, diff --git a/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs b/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs index 2d6d1c25ef..b73923d3eb 100644 --- a/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs +++ b/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs @@ -10,7 +10,7 @@ namespace Microsoft.ML { /// - /// FastTree extension methods. + /// Tree extension methods. /// public static class TreeExtensions { @@ -27,8 +27,8 @@ public static class TreeExtensions /// The learning rate. /// Algorithm advanced settings. public static FastTreeRegressionTrainer FastTree(this RegressionContext.RegressionTrainers ctx, - string label = DefaultColumnNames.Label, - string features = DefaultColumnNames.Features, + string label, + string features, string weights = null, int numLeaves = Defaults.NumLeaves, int numTrees = Defaults.NumTrees, @@ -54,8 +54,8 @@ public static FastTreeRegressionTrainer FastTree(this RegressionContext.Regressi /// The learning rate. /// Algorithm advanced settings. public static FastTreeBinaryClassificationTrainer FastTree(this BinaryClassificationContext.BinaryClassificationTrainers ctx, - string label = DefaultColumnNames.Label, - string features = DefaultColumnNames.Features, + string label, + string features, string weights = null, int numLeaves = Defaults.NumLeaves, int numTrees = Defaults.NumTrees, @@ -71,7 +71,7 @@ public static FastTreeBinaryClassificationTrainer FastTree(this BinaryClassifica /// /// Ranks a series of inputs based on their relevance, training a decision tree ranking model through the . /// - /// The . + /// The . /// The label column. /// The features column. /// The groupId column. @@ -82,9 +82,9 @@ public static FastTreeBinaryClassificationTrainer FastTree(this BinaryClassifica /// The learning rate. /// Algorithm advanced settings. public static FastTreeRankingTrainer FastTree(this RankingContext.RankingTrainers ctx, - string label = DefaultColumnNames.Label, - string groupId = DefaultColumnNames.GroupId, - string features = DefaultColumnNames.Features, + string label, + string features, + string groupId , string weights = null, int numLeaves = Defaults.NumLeaves, int numTrees = Defaults.NumTrees, @@ -100,21 +100,17 @@ public static FastTreeRankingTrainer FastTree(this RankingContext.RankingTrainer /// /// Predict a target using a decision tree regression model trained with the . /// - /// The . + /// The . /// The label column. /// The features column. /// The optional weights column. - /// Total number of decision trees to create in the ensemble. - /// The maximum number of leaves per decision tree. /// The minimal number of datapoints allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. /// Algorithm advanced settings. - public static BinaryClassificationGamTrainer GeneralizedAdditiveMethods(this RegressionContext.RegressionTrainers ctx, + public static BinaryClassificationGamTrainer GeneralizedAdditiveMethods(this BinaryClassificationContext.BinaryClassificationTrainers ctx, string label = DefaultColumnNames.Label, string features = DefaultColumnNames.Features, string weights = null, - int numLeaves = Defaults.NumLeaves, - int numTrees = Defaults.NumTrees, int minDatapointsInLeafs = Defaults.MinDocumentsInLeafs, double learningRate = Defaults.LearningRates, Action advancedSettings = null) @@ -125,23 +121,19 @@ public static BinaryClassificationGamTrainer GeneralizedAdditiveMethods(this Reg } /// - /// Predict a target using a decision tree binary classification model trained with the . + /// Predict a target using a decision tree binary classification model trained with the . /// - /// The . + /// The . /// The label column. /// The features column. /// The optional weights column. - /// Total number of decision trees to create in the ensemble. - /// The maximum number of leaves per decision tree. /// The minimal number of datapoints allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. /// Algorithm advanced settings. - public static RegressionGamTrainer GeneralizedAdditiveMethods(this BinaryClassificationContext.BinaryClassificationTrainers ctx, - string label = DefaultColumnNames.Label, - string features = DefaultColumnNames.Features, + public static RegressionGamTrainer GeneralizedAdditiveMethods(this RegressionContext.RegressionTrainers ctx, + string label, + string features, string weights = null, - int numLeaves = Defaults.NumLeaves, - int numTrees = Defaults.NumTrees, int minDatapointsInLeafs = Defaults.MinDocumentsInLeafs, double learningRate = Defaults.LearningRates, Action advancedSettings = null) @@ -164,8 +156,8 @@ public static RegressionGamTrainer GeneralizedAdditiveMethods(this BinaryClassif /// The learning rate. /// Algorithm advanced settings. public static FastTreeTweedieTrainer FastTreeTweedie(this RegressionContext.RegressionTrainers ctx, - string label = DefaultColumnNames.Label, - string features = DefaultColumnNames.Features, + string label, + string features, string weights = null, int numLeaves = Defaults.NumLeaves, int numTrees = Defaults.NumTrees, @@ -177,5 +169,59 @@ public static FastTreeTweedieTrainer FastTreeTweedie(this RegressionContext.Regr var env = CatalogUtils.GetEnvironment(ctx); return new FastTreeTweedieTrainer(env, label, features, weights, numLeaves, numTrees, minDatapointsInLeafs, learningRate, advancedSettings); } + + /// + /// Predict a target using a decision tree regression model trained with the . + /// + /// The . + /// The label column. + /// The features column. + /// The optional weights column. + /// Total number of decision trees to create in the ensemble. + /// The maximum number of leaves per decision tree. + /// The minimal number of datapoints allowed in a leaf of the tree, out of the subsampled data. + /// The learning rate. + /// Algorithm advanced settings. + public static FastForestRegression FastForest(this RegressionContext.RegressionTrainers ctx, + string label, + string features, + string weights = null, + int numLeaves = Defaults.NumLeaves, + int numTrees = Defaults.NumTrees, + int minDatapointsInLeafs = Defaults.MinDocumentsInLeafs, + double learningRate = Defaults.LearningRates, + Action advancedSettings = null) + { + Contracts.CheckValue(ctx, nameof(ctx)); + var env = CatalogUtils.GetEnvironment(ctx); + return new FastForestRegression(env, label, features, weights, numLeaves, numTrees, minDatapointsInLeafs, learningRate, advancedSettings); + } + + /// + /// Predict a target using a decision tree regression model trained with the . + /// + /// The . + /// The label column. + /// The features column. + /// The optional weights column. + /// Total number of decision trees to create in the ensemble. + /// The maximum number of leaves per decision tree. + /// The minimal number of datapoints allowed in a leaf of the tree, out of the subsampled data. + /// The learning rate. + /// Algorithm advanced settings. + public static FastForestClassification FastForest(this BinaryClassificationContext.BinaryClassificationTrainers ctx, + string label, + string features, + string weights = null, + int numLeaves = Defaults.NumLeaves, + int numTrees = Defaults.NumTrees, + int minDatapointsInLeafs = Defaults.MinDocumentsInLeafs, + double learningRate = Defaults.LearningRates, + Action advancedSettings = null) + { + Contracts.CheckValue(ctx, nameof(ctx)); + var env = CatalogUtils.GetEnvironment(ctx); + return new FastForestClassification(env, label, features, weights,numLeaves, numTrees, minDatapointsInLeafs, learningRate, advancedSettings); + } } } diff --git a/src/Microsoft.ML.KMeansClustering/KMeansCatalog.cs b/src/Microsoft.ML.KMeansClustering/KMeansCatalog.cs index 7ba14181a0..c096750ae0 100644 --- a/src/Microsoft.ML.KMeansClustering/KMeansCatalog.cs +++ b/src/Microsoft.ML.KMeansClustering/KMeansCatalog.cs @@ -23,7 +23,7 @@ public static class KMeansClusteringExtensions /// The number of clusters to use for KMeans. /// Algorithm advanced settings. public static KMeansPlusPlusTrainer KMeans(this ClusteringContext.ClusteringTrainers ctx, - string features = DefaultColumnNames.Features, + string features, string weights = null, int clustersCount = KMeansPlusPlusTrainer.Defaults.K, Action advancedSettings = null) diff --git a/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs b/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs index c325f87a4d..d362452da9 100644 --- a/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs +++ b/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs @@ -94,35 +94,39 @@ public class Arguments : UnsupervisedLearnerInputBaseWithWeight /// /// Initializes a new instance of /// - /// The private instance of . + /// The local instance of . /// The name of the feature column. /// The name for the column containing the example weights. /// A delegate to apply all the advanced arguments to the algorithm. /// The number of clusters. public KMeansPlusPlusTrainer(IHostEnvironment env, string featureColumn, int clustersCount = Defaults.K, string weightColumn = null, Action advancedSettings = null) - : this(env, new Arguments(), featureColumn, weightColumn, advancedSettings) + : this(env, new Arguments + { + FeatureColumn = featureColumn, + WeightColumn = weightColumn, + K = clustersCount + }, advancedSettings) { - _k = clustersCount; } internal KMeansPlusPlusTrainer(IHostEnvironment env, Arguments args) - : this(env, args, args.FeatureColumn, args.WeightColumn, null) + : this(env, args, null) { } - private KMeansPlusPlusTrainer(IHostEnvironment env, Arguments args, string featureColumn, string weightColumn, Action advancedSettings = null) - : base(Contracts.CheckRef(env, nameof(env)).Register(LoadNameValue), TrainerUtils.MakeR4VecFeature(featureColumn), null, TrainerUtils.MakeR4ScalarWeightColumn(weightColumn)) + private KMeansPlusPlusTrainer(IHostEnvironment env, Arguments args, Action advancedSettings = null) + : base(Contracts.CheckRef(env, nameof(env)).Register(LoadNameValue), TrainerUtils.MakeR4VecFeature(args.FeatureColumn), null, TrainerUtils.MakeR4ScalarWeightColumn(args.WeightColumn)) { Host.CheckValue(args, nameof(args)); + // override with the advanced settings. if (advancedSettings != null) advancedSettings.Invoke(args); Host.CheckUserArg(args.K > 0, nameof(args.K), "Must be positive"); - Host.CheckNonEmpty(featureColumn, nameof(featureColumn)); - _featureColumn = featureColumn; + _featureColumn = args.FeatureColumn; _k = args.K; From d10ea0a3e8ff72f6c24f19aea5a91e93a2c28090 Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Sun, 4 Nov 2018 08:20:16 -0800 Subject: [PATCH 02/12] fixing #1521 removing the default name for Features and Label colum. --- src/Microsoft.ML.LightGBM/LightGbmCatalog.cs | 18 +++++++++--------- .../Standard/LinearClassificationTrainer.cs | 4 ++-- .../Standard/SdcaCatalog.cs | 17 ++++++++++------- .../Standard/SdcaMultiClass.cs | 4 ++-- .../Standard/SdcaRegression.cs | 2 +- .../Standard/SgdCatalog.cs | 4 ++-- 6 files changed, 26 insertions(+), 23 deletions(-) diff --git a/src/Microsoft.ML.LightGBM/LightGbmCatalog.cs b/src/Microsoft.ML.LightGBM/LightGbmCatalog.cs index 56a892be4a..d2e218e68e 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmCatalog.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmCatalog.cs @@ -30,8 +30,8 @@ public static class LightGbmExtensions /// if both are present and have different values. /// The columns names, however need to be provided directly, not through the . public static LightGbmRegressorTrainer LightGbm(this RegressionContext.RegressionTrainers ctx, - string label = DefaultColumnNames.Label, - string features = DefaultColumnNames.Features, + string label, + string features, string weights = null, int? numLeaves = null, int? minDataPerLeaf = null, @@ -60,8 +60,8 @@ public static LightGbmRegressorTrainer LightGbm(this RegressionContext.Regressio /// if both are present and have different values. /// The columns names, however need to be provided directly, not through the . public static LightGbmBinaryTrainer LightGbm(this BinaryClassificationContext.BinaryClassificationTrainers ctx, - string label = DefaultColumnNames.Label, - string features = DefaultColumnNames.Features, + string label, + string features, string weights = null, int? numLeaves = null, int? minDataPerLeaf = null, @@ -92,9 +92,9 @@ public static LightGbmBinaryTrainer LightGbm(this BinaryClassificationContext.Bi /// if both are present and have different values. /// The columns names, however need to be provided directly, not through the . public static LightGbmRankingTrainer LightGbm(this RankingContext.RankingTrainers ctx, - string label = DefaultColumnNames.Label, - string features = DefaultColumnNames.Features, - string groupId = DefaultColumnNames.GroupId, + string label, + string features, + string groupId, string weights = null, int? numLeaves = null, int? minDataPerLeaf = null, @@ -124,8 +124,8 @@ public static LightGbmRankingTrainer LightGbm(this RankingContext.RankingTrainer /// if both are present and have different values. /// The columns names, however need to be provided directly, not through the . public static LightGbmMulticlassTrainer LightGbm(this MulticlassClassificationContext.MulticlassClassificationTrainers ctx, - string label = DefaultColumnNames.Label, - string features = DefaultColumnNames.Features, + string label, + string features, string weights = null, int? numLeaves = null, int? minDataPerLeaf = null, diff --git a/src/Microsoft.ML.StandardLearners/Standard/LinearClassificationTrainer.cs b/src/Microsoft.ML.StandardLearners/Standard/LinearClassificationTrainer.cs index 5805629230..ec3b9d6508 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LinearClassificationTrainer.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LinearClassificationTrainer.cs @@ -1427,8 +1427,8 @@ internal override void Check(IHostEnvironment env) /// Initializes a new instance of /// /// The environment to use. - /// The features, or independent variables. /// The label, or dependent variable. + /// The features, or independent variables. /// The custom loss. /// The optional example weights. /// The L2 regularization hyperparameter. @@ -1439,8 +1439,8 @@ internal override void Check(IHostEnvironment env) /// if both are present and have different values. /// The columns names, however need to be provided directly, not through the . public LinearClassificationTrainer(IHostEnvironment env, - string featureColumn, string labelColumn, + string featureColumn, string weightColumn = null, ISupportSdcaClassificationLoss loss = null, float? l2Const = null, diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaCatalog.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaCatalog.cs index 7d3e1025b9..57ab8215ac 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/SdcaCatalog.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaCatalog.cs @@ -31,7 +31,9 @@ public static class SdcaRegressionExtensions /// if both are present and have different values. /// The columns names, however need to be provided directly, not through the . public static SdcaRegressionTrainer StochasticDualCoordinateAscent(this RegressionContext.RegressionTrainers ctx, - string label = DefaultColumnNames.Label, string features = DefaultColumnNames.Features, string weights = null, + string label, + string features, + string weights = null, ISupportSdcaRegressionLoss loss = null, float? l2Const = null, float? l1Threshold = null, @@ -40,7 +42,7 @@ public static SdcaRegressionTrainer StochasticDualCoordinateAscent(this Regressi { Contracts.CheckValue(ctx, nameof(ctx)); var env = CatalogUtils.GetEnvironment(ctx); - return new SdcaRegressionTrainer(env, features, label, weights, loss, l2Const, l1Threshold, maxIterations, advancedSettings); + return new SdcaRegressionTrainer(env, label, features, weights, loss, l2Const, l1Threshold, maxIterations, advancedSettings); } } @@ -75,7 +77,8 @@ public static class SdcaBinaryClassificationExtensions /// public static LinearClassificationTrainer StochasticDualCoordinateAscent( this BinaryClassificationContext.BinaryClassificationTrainers ctx, - string label = DefaultColumnNames.Label, string features = DefaultColumnNames.Features, + string label, + string features, string weights = null, ISupportSdcaClassificationLoss loss = null, float? l2Const = null, @@ -86,7 +89,7 @@ public static LinearClassificationTrainer StochasticDualCoordinateAscent( { Contracts.CheckValue(ctx, nameof(ctx)); var env = CatalogUtils.GetEnvironment(ctx); - return new LinearClassificationTrainer(env, features, label, weights, loss, l2Const, l1Threshold, maxIterations, advancedSettings); + return new LinearClassificationTrainer(env, label, features, weights, loss, l2Const, l1Threshold, maxIterations, advancedSettings); } } @@ -109,8 +112,8 @@ public static class SdcaMulticlassExtensions /// if both are present and have different values. /// The columns names, however need to be provided directly, not through the . public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this MulticlassClassificationContext.MulticlassClassificationTrainers ctx, - string label = DefaultColumnNames.Label, - string features = DefaultColumnNames.Features, + string label, + string features, string weights = null, ISupportSdcaClassificationLoss loss = null, float? l2Const = null, @@ -120,7 +123,7 @@ public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this Multicla { Contracts.CheckValue(ctx, nameof(ctx)); var env = CatalogUtils.GetEnvironment(ctx); - return new SdcaMultiClassTrainer(env, features, label, weights, loss, l2Const, l1Threshold, maxIterations, advancedSettings); + return new SdcaMultiClassTrainer(env, label, features, weights, loss, l2Const, l1Threshold, maxIterations, advancedSettings); } } } diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs index deaa204b03..0a3bbfdc3b 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs @@ -51,8 +51,8 @@ public sealed class Arguments : ArgumentsBase /// Initializes a new instance of /// /// The environment to use. - /// The features, or independent variables. /// The label, or dependent variable. + /// The features, or independent variables. /// The custom loss. /// The optional example weights. /// The L2 regularization hyperparameter. @@ -63,8 +63,8 @@ public sealed class Arguments : ArgumentsBase /// if both are present and have different values. /// The columns names, however need to be provided directly, not through the . public SdcaMultiClassTrainer(IHostEnvironment env, - string featureColumn, string labelColumn, + string featureColumn, string weightColumn = null, ISupportSdcaClassificationLoss loss = null, float? l2Const = null, diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaRegression.cs index f55ba18fe1..cc11020389 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/SdcaRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaRegression.cs @@ -68,8 +68,8 @@ public Arguments() /// if both are present and have different values. /// The columns names, however need to be provided directly, not through the . public SdcaRegressionTrainer(IHostEnvironment env, - string featureColumn, string labelColumn, + string featureColumn, string weightColumn = null, ISupportSdcaRegressionLoss loss = null, float? l2Const = null, diff --git a/src/Microsoft.ML.StandardLearners/Standard/SgdCatalog.cs b/src/Microsoft.ML.StandardLearners/Standard/SgdCatalog.cs index be68580822..6e51fd517e 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/SgdCatalog.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/SgdCatalog.cs @@ -29,8 +29,8 @@ public static class StochasticGradientDescentCatalog /// The loss function to use. /// A delegate to apply all the advanced arguments to the algorithm. public static StochasticGradientDescentClassificationTrainer StochasticGradientDescent(this BinaryClassificationContext.BinaryClassificationTrainers ctx, - string label = DefaultColumnNames.Label, - string features = DefaultColumnNames.Features, + string label, + string features, string weights = null, int maxIterations = Arguments.Defaults.MaxIterations, double initLearningRate = Arguments.Defaults.InitLearningRate, From 011de9916dcf060e827676881be62b288c794877 Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Sun, 4 Nov 2018 08:26:10 -0800 Subject: [PATCH 03/12] more consistency --- .../FactorizationMachineCatalog.cs | 3 ++- .../FactorizationMachineTrainer.cs | 8 ++++++-- .../LogisticRegression/LbfgsCatalog.cs | 18 +++++++++--------- .../MulticlassLogisticRegression.cs | 4 +++- .../Standard/Online/OnlineLearnerCatalog.cs | 8 ++++---- .../PoissonRegression/PoissonRegression.cs | 8 +++++--- 6 files changed, 29 insertions(+), 20 deletions(-) diff --git a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineCatalog.cs b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineCatalog.cs index f94511ec76..35bacce86b 100644 --- a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineCatalog.cs +++ b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineCatalog.cs @@ -26,7 +26,8 @@ public static class FactorizationMachineExtensions /// if both are present and have different values. /// The columns names, however need to be provided directly, not through the . public static FieldAwareFactorizationMachineTrainer FieldAwareFactorizationMachine(this BinaryClassificationContext.BinaryClassificationTrainers ctx, - string label, string[] features, + string label, + string[] features, string weights = null, Action advancedSettings = null) { diff --git a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs index 714e7a5003..d29de3e86a 100644 --- a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs +++ b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs @@ -139,8 +139,12 @@ public FieldAwareFactorizationMachineTrainer(IHostEnvironment env, Arguments arg /// A delegate to apply all the advanced arguments to the algorithm. /// The name of the weight column. /// The for additional input data to training. - public FieldAwareFactorizationMachineTrainer(IHostEnvironment env, string labelColumn, string[] featureColumns, - string weightColumn = null, TrainerEstimatorContext context = null, Action advancedSettings = null) + public FieldAwareFactorizationMachineTrainer(IHostEnvironment env, + string labelColumn, + string[] featureColumns, + string weightColumn = null, + TrainerEstimatorContext context = null, + Action advancedSettings = null) : base(env, LoadName) { var args = new Arguments(); diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsCatalog.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsCatalog.cs index 5580e66e77..e87c27453e 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsCatalog.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsCatalog.cs @@ -32,8 +32,8 @@ public static class LbfgsBinaryClassificationExtensions /// Threshold for optimizer convergence. /// A delegate to apply all the advanced arguments to the algorithm. public static LogisticRegression LogisticRegression(this BinaryClassificationContext.BinaryClassificationTrainers ctx, - string label = DefaultColumnNames.Label, - string features = DefaultColumnNames.Features, + string label, + string features, string weights = null, float l1Weight = Arguments.Defaults.L1Weight, float l2Weight = Arguments.Defaults.L2Weight, @@ -61,15 +61,15 @@ public static class LbfgsRegressionExtensions /// The label, or dependent variable. /// The features, or independent variables. /// The optional example weights. - /// Enforce non-negative weights. /// Weight of L1 regularization term. /// Weight of L2 regularization term. - /// Memory size for . Lower=faster, less accurate. /// Threshold for optimizer convergence. + /// Memory size for . Lower=faster, less accurate. + /// Enforce non-negative weights. /// A delegate to apply all the advanced arguments to the algorithm. public static PoissonRegression PoissonRegression(this RegressionContext.RegressionTrainers ctx, - string label = DefaultColumnNames.Label, - string features = DefaultColumnNames.Features, + string label, + string features, string weights = null, float l1Weight = Arguments.Defaults.L1Weight, float l2Weight = Arguments.Defaults.L2Weight, @@ -80,7 +80,7 @@ public static PoissonRegression PoissonRegression(this RegressionContext.Regress { Contracts.CheckValue(ctx, nameof(ctx)); var env = CatalogUtils.GetEnvironment(ctx); - return new PoissonRegression(env, features, label, weights, l1Weight, l2Weight, optimizationTolerance, memorySize, enforceNoNegativity, advancedSettings); + return new PoissonRegression(env, label, features, weights, l1Weight, l2Weight, optimizationTolerance, memorySize, enforceNoNegativity, advancedSettings); } } @@ -104,8 +104,8 @@ public static class LbfgsMulticlassExtensions /// Threshold for optimizer convergence. /// A delegate to apply all the advanced arguments to the algorithm. public static MulticlassLogisticRegression LogisticRegression(this MulticlassClassificationContext.MulticlassClassificationTrainers ctx, - string label = DefaultColumnNames.Label, - string features = DefaultColumnNames.Features, + string label, + string features, string weights = null, float l1Weight = Arguments.Defaults.L1Weight, float l2Weight = Arguments.Defaults.L2Weight, diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs index d379263068..5203f7726c 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs @@ -83,7 +83,9 @@ public sealed class Arguments : ArgumentsBase /// Memory size for . Lower=faster, less accurate. /// Threshold for optimizer convergence. /// A delegate to apply all the advanced arguments to the algorithm. - public MulticlassLogisticRegression(IHostEnvironment env, string featureColumn, string labelColumn, + public MulticlassLogisticRegression(IHostEnvironment env, + string labelColumn, + string featureColumn, string weightColumn = null, float l1Weight = Arguments.Defaults.L1Weight, float l2Weight = Arguments.Defaults.L2Weight, diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLearnerCatalog.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLearnerCatalog.cs index 6580defb4b..bc5aca9015 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLearnerCatalog.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLearnerCatalog.cs @@ -29,8 +29,8 @@ public static class AveragedPerceptronExtensions /// A delegate to supply more advanced arguments to the algorithm. public static AveragedPerceptronTrainer AveragedPerceptron( this BinaryClassificationContext.BinaryClassificationTrainers ctx, - string label = DefaultColumnNames.Label, - string features = DefaultColumnNames.Features, + string label, + string features, string weights = null, IClassificationLoss lossFunction = null, float learningRate = AveragedLinearArguments.AveragedDefaultArgs.LearningRate, @@ -79,8 +79,8 @@ public static class OnlineGradientDescentExtensions /// Number of training iterations through the data. /// A delegate to supply more advanced arguments to the algorithm. public static OnlineGradientDescentTrainer OnlineGradientDescent(this RegressionContext.RegressionTrainers ctx, - string label = DefaultColumnNames.Label, - string features = DefaultColumnNames.Features, + string label, + string features, string weights = null, IRegressionLoss lossFunction = null, float learningRate = OnlineGradientDescentTrainer.Arguments.OgdDefaultArgs.LearningRate, diff --git a/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs index 34d9a743cc..c7af6e77b8 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs @@ -47,13 +47,15 @@ public sealed class Arguments : ArgumentsBase /// The name of the label column. /// The name of the feature column. /// The name for the example weight column. - /// Enforce non-negative weights. /// Weight of L1 regularizer term. /// Weight of L2 regularizer term. - /// Memory size for . Lower=faster, less accurate. /// Threshold for optimizer convergence. + /// Memory size for . Lower=faster, less accurate. + /// Enforce non-negative weights. /// A delegate to apply all the advanced arguments to the algorithm. - public PoissonRegression(IHostEnvironment env, string featureColumn, string labelColumn, + public PoissonRegression(IHostEnvironment env, + string labelColumn, + string featureColumn, string weightColumn = null, float l1Weight = Arguments.Defaults.L1Weight, float l2Weight = Arguments.Defaults.L2Weight, From fb99d510b931fd89cb932c9d6a11dd4620e330eb Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Mon, 5 Nov 2018 10:21:12 -0800 Subject: [PATCH 04/12] Fixing the tests to have the Label and Features columns required. --- docs/code/MlNetCookBook.md | 2 +- .../Scenarios/Api/CookbookSamples/CookbookSamples.cs | 2 +- .../Scenarios/Api/Estimators/CrossValidation.cs | 2 +- .../Scenarios/Api/Estimators/DecomposableTrainAndPredict.cs | 2 +- .../Microsoft.ML.Tests/Scenarios/Api/Estimators/Evaluation.cs | 2 +- .../Scenarios/Api/Estimators/Extensibility.cs | 2 +- .../Scenarios/Api/Estimators/FileBasedSavingOfData.cs | 2 +- .../Scenarios/Api/Estimators/IntrospectiveTraining.cs | 2 +- .../Scenarios/Api/Estimators/Metacomponents.cs | 2 +- .../Scenarios/Api/Estimators/MultithreadedPrediction.cs | 2 +- .../Scenarios/Api/Estimators/ReconfigurablePrediction.cs | 2 +- .../Scenarios/Api/Estimators/SimpleTrainAndPredict.cs | 2 +- .../Scenarios/Api/Estimators/TrainSaveModelAndPredict.cs | 2 +- .../Scenarios/Api/Estimators/TrainWithInitialPredictor.cs | 4 ++-- .../Scenarios/Api/Estimators/TrainWithValidationSet.cs | 2 +- 15 files changed, 16 insertions(+), 16 deletions(-) diff --git a/docs/code/MlNetCookBook.md b/docs/code/MlNetCookBook.md index 542c745e56..a90213c6a8 100644 --- a/docs/code/MlNetCookBook.md +++ b/docs/code/MlNetCookBook.md @@ -960,7 +960,7 @@ var learningPipeline = reader.MakeNewEstimator() IEstimator dynamicPipe = learningPipeline.AsDynamic; // Create a binary classification trainer. -var binaryTrainer = mlContext.BinaryClassification.Trainers.AveragedPerceptron(); +var binaryTrainer = mlContext.BinaryClassification.Trainers.AveragedPerceptron("Label", "Features"); // Append the OVA learner to the pipeline. dynamicPipe = dynamicPipe.Append(new Ova(mlContext, binaryTrainer)); diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs index 62cd9e0708..f900470266 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs @@ -618,7 +618,7 @@ private void MixMatch(string dataPath) IEstimator dynamicPipe = learningPipeline.AsDynamic; // Create a binary classification trainer. - var binaryTrainer = mlContext.BinaryClassification.Trainers.AveragedPerceptron(); + var binaryTrainer = mlContext.BinaryClassification.Trainers.AveragedPerceptron("Label", "Features"); // Append the OVA learner to the pipeline. dynamicPipe = dynamicPipe.Append(new Ova(mlContext, binaryTrainer)); diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/CrossValidation.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/CrossValidation.cs index 55a88e28cb..0264629e62 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/CrossValidation.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/CrossValidation.cs @@ -26,7 +26,7 @@ void New_CrossValidation() var data = ml.Data.TextReader(MakeSentimentTextLoaderArgs()).Read(GetDataPath(TestDatasets.Sentiment.trainFilename)); // Pipeline. var pipeline = ml.Transforms.Text.FeaturizeText("SentimentText", "Features") - .Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent(advancedSettings: (s) => { s.ConvergenceTolerance = 1f; s.NumThreads = 1; })); + .Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features", advancedSettings: (s) => { s.ConvergenceTolerance = 1f; s.NumThreads = 1; })); var cvResult = ml.BinaryClassification.CrossValidate(data, pipeline); } diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/DecomposableTrainAndPredict.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/DecomposableTrainAndPredict.cs index d316cf6ae9..c9c7b9af97 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/DecomposableTrainAndPredict.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/DecomposableTrainAndPredict.cs @@ -34,7 +34,7 @@ void New_DecomposableTrainAndPredict() var pipeline = new ColumnConcatenatingEstimator (ml, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") .Append(new ValueToKeyMappingEstimator(ml, "Label"), TransformerScope.TrainTest) - .Append(ml.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(advancedSettings: s => { s.MaxIterations = 100; s.Shuffle = true; s.NumThreads = 1; })) + .Append(ml.MulticlassClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features",advancedSettings: s => { s.MaxIterations = 100; s.Shuffle = true; s.NumThreads = 1; })) .Append(new KeyToValueEstimator(ml, "PredictedLabel")); var model = pipeline.Fit(data).GetModelFor(TransformerScope.Scoring); diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Evaluation.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Evaluation.cs index 434264cbe7..ad249fa6e9 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Evaluation.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Evaluation.cs @@ -24,7 +24,7 @@ public void New_Evaluation() // Pipeline. var pipeline = ml.Data.TextReader(MakeSentimentTextLoaderArgs()) .Append(ml.Transforms.Text.FeaturizeText("SentimentText", "Features")) - .Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent(advancedSettings: s => s.NumThreads = 1)); + .Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features", advancedSettings: s => s.NumThreads = 1)); // Train. var readerModel = pipeline.Fit(new MultiFileSource(GetDataPath(TestDatasets.Sentiment.trainFilename))); diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Extensibility.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Extensibility.cs index a6e81fbc96..80cc5869b9 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Extensibility.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Extensibility.cs @@ -42,7 +42,7 @@ void New_Extensibility() var pipeline = new ColumnConcatenatingEstimator (ml, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") .Append(new CustomMappingEstimator(ml, action, null), TransformerScope.TrainTest) .Append(new ValueToKeyMappingEstimator(ml, "Label"), TransformerScope.TrainTest) - .Append(ml.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(advancedSettings: (s) => { s.MaxIterations = 100; s.Shuffle = true; s.NumThreads = 1; })) + .Append(ml.MulticlassClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features", advancedSettings: (s) => { s.MaxIterations = 100; s.Shuffle = true; s.NumThreads = 1; })) .Append(new KeyToValueEstimator(ml, "PredictedLabel")); var model = pipeline.Fit(data).GetModelFor(TransformerScope.Scoring); diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/FileBasedSavingOfData.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/FileBasedSavingOfData.cs index d304c8bda8..667581c9b3 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/FileBasedSavingOfData.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/FileBasedSavingOfData.cs @@ -39,7 +39,7 @@ void New_FileBasedSavingOfData() DataSaverUtils.SaveDataView(ch, saver, trainData, file); } - var trainer = ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent(advancedSettings: s => s.NumThreads = 1); + var trainer = ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features", advancedSettings: s => s.NumThreads = 1); var loadedTrainData = new BinaryLoader(ml, new BinaryLoader.Arguments(), new MultiFileSource(path)); // Train. diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/IntrospectiveTraining.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/IntrospectiveTraining.cs index 60552a2863..d0ba9a82db 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/IntrospectiveTraining.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/IntrospectiveTraining.cs @@ -38,7 +38,7 @@ public void New_IntrospectiveTraining() .Read(GetDataPath(TestDatasets.Sentiment.trainFilename)); var pipeline = ml.Transforms.Text.FeaturizeText("SentimentText", "Features") - .Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent(advancedSettings: s => s.NumThreads = 1)); + .Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features", advancedSettings: s => s.NumThreads = 1)); // Train. var model = pipeline.Fit(data); diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Metacomponents.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Metacomponents.cs index 6b8abd8d39..b15b3065e4 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Metacomponents.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Metacomponents.cs @@ -27,7 +27,7 @@ public void New_Metacomponents() var data = ml.Data.TextReader(MakeIrisTextLoaderArgs()) .Read(GetDataPath(TestDatasets.irisData.trainFilename)); - var sdcaTrainer = ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent(advancedSettings: (s) => { s.MaxIterations = 100; s.Shuffle = true; s.NumThreads = 1; }); + var sdcaTrainer = ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features", advancedSettings: (s) => { s.MaxIterations = 100; s.Shuffle = true; s.NumThreads = 1; }); var pipeline = new ColumnConcatenatingEstimator (ml, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") .Append(new ValueToKeyMappingEstimator(ml, "Label"), TransformerScope.TrainTest) diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/MultithreadedPrediction.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/MultithreadedPrediction.cs index 784d14c770..4f52662d34 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/MultithreadedPrediction.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/MultithreadedPrediction.cs @@ -31,7 +31,7 @@ void New_MultithreadedPrediction() // Pipeline. var pipeline = ml.Transforms.Text.FeaturizeText("SentimentText", "Features") - .Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent(advancedSettings: s => s.NumThreads = 1)); + .Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features", advancedSettings: s => s.NumThreads = 1)); // Train. var model = pipeline.Fit(data); diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/ReconfigurablePrediction.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/ReconfigurablePrediction.cs index 343d1ee6a9..5755efe56e 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/ReconfigurablePrediction.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/ReconfigurablePrediction.cs @@ -31,7 +31,7 @@ public void New_ReconfigurablePrediction() var pipeline = ml.Transforms.Text.FeaturizeText("SentimentText", "Features") .Fit(data); - var trainer = ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent(advancedSettings: (s) => s.NumThreads = 1); + var trainer = ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features", advancedSettings: (s) => s.NumThreads = 1); var trainData = pipeline.Transform(data); var model = trainer.Fit(trainData); diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/SimpleTrainAndPredict.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/SimpleTrainAndPredict.cs index 907736ec43..ad1f37e161 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/SimpleTrainAndPredict.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/SimpleTrainAndPredict.cs @@ -26,7 +26,7 @@ public void New_SimpleTrainAndPredict() var data = reader.Read(GetDataPath(TestDatasets.Sentiment.trainFilename)); // Pipeline. var pipeline = ml.Transforms.Text.FeaturizeText("SentimentText", "Features") - .Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent(advancedSettings: s => s.NumThreads = 1)); + .Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features", advancedSettings: s => s.NumThreads = 1)); // Train. var model = pipeline.Fit(data); diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainSaveModelAndPredict.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainSaveModelAndPredict.cs index 38902d75cf..630d9e71aa 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainSaveModelAndPredict.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainSaveModelAndPredict.cs @@ -30,7 +30,7 @@ public void New_TrainSaveModelAndPredict() // Pipeline. var pipeline = ml.Transforms.Text.FeaturizeText("SentimentText", "Features") - .Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent(advancedSettings: s => s.NumThreads = 1)); + .Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features", advancedSettings: s => s.NumThreads = 1)); // Train. var model = pipeline.Fit(data); diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainWithInitialPredictor.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainWithInitialPredictor.cs index 4b29dd16d3..1bfa09377b 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainWithInitialPredictor.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainWithInitialPredictor.cs @@ -31,11 +31,11 @@ public void New_TrainWithInitialPredictor() var trainData = pipeline.Fit(data).Transform(data); // Train the first predictor. - var trainer = ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent(advancedSettings: s => s.NumThreads = 1); + var trainer = ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features",advancedSettings: s => s.NumThreads = 1); var firstModel = trainer.Fit(trainData); // Train the second predictor on the same data. - var secondTrainer = ml.BinaryClassification.Trainers.AveragedPerceptron(); + var secondTrainer = ml.BinaryClassification.Trainers.AveragedPerceptron("Label","Features"); var trainRoles = new RoleMappedData(trainData, label: "Label", feature: "Features"); var finalModel = secondTrainer.Train(new TrainContext(trainRoles, initialPredictor: firstModel.Model)); diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainWithValidationSet.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainWithValidationSet.cs index 4a977a6b2b..07c9ebeb8a 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainWithValidationSet.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainWithValidationSet.cs @@ -30,7 +30,7 @@ public void New_TrainWithValidationSet() var validData = preprocess.Transform(reader.Read(GetDataPath(TestDatasets.Sentiment.testFilename))); // Train model with validation set. - var trainer = ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent(); + var trainer = ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent("Label","Features"); var model = trainer.Train(trainData, validData); } } From b776880ffb45ef64c8b6f299b9c9a4da88cd2367 Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Mon, 5 Nov 2018 13:23:31 -0800 Subject: [PATCH 05/12] Fixing tests and the static SDCA extensions --- src/Microsoft.ML.StandardLearners/Standard/SdcaStatic.cs | 8 ++++---- test/Microsoft.ML.Benchmarks/PredictionEngineBench.cs | 6 +++--- .../StochasticDualCoordinateAscentClassifierBench.cs | 2 +- .../IrisPlantClassificationTests.cs | 2 +- .../TrainerEstimators/MetalinearEstimators.cs | 4 ++-- test/Microsoft.ML.Tests/TrainerEstimators/SdcaTests.cs | 6 +++--- .../TrainerEstimators/SymSgdClassificationTests.cs | 8 ++++---- 7 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaStatic.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaStatic.cs index 2313bcd180..7faf6b5e19 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/SdcaStatic.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaStatic.cs @@ -66,7 +66,7 @@ public static Scalar Sdca(this RegressionContext.RegressionTrainers ctx, var rec = new TrainerEstimatorReconciler.Regression( (env, labelName, featuresName, weightsName) => { - var trainer = new SdcaRegressionTrainer(env, featuresName, labelName, weightsName, loss, l2Const, l1Threshold, maxIterations, advancedSettings); + var trainer = new SdcaRegressionTrainer(env, labelName, featuresName, weightsName, loss, l2Const, l1Threshold, maxIterations, advancedSettings); if (onFit != null) return trainer.WithOnFitDelegate(trans => onFit(trans.Model)); return trainer; @@ -117,7 +117,7 @@ public static (Scalar score, Scalar probability, Scalar pred var rec = new TrainerEstimatorReconciler.BinaryClassifier( (env, labelName, featuresName, weightsName) => { - var trainer = new LinearClassificationTrainer(env, featuresName, labelName, weightsName, loss: new LogLoss(), l2Const, l1Threshold, maxIterations, advancedSettings); + var trainer = new LinearClassificationTrainer(env, labelName, featuresName, weightsName, loss: new LogLoss(), l2Const, l1Threshold, maxIterations, advancedSettings); if (onFit != null) { return trainer.WithOnFitDelegate(trans => @@ -187,7 +187,7 @@ public static (Scalar score, Scalar predictedLabel) Sdca( var rec = new TrainerEstimatorReconciler.BinaryClassifierNoCalibration( (env, labelName, featuresName, weightsName) => { - var trainer = new LinearClassificationTrainer(env, featuresName, labelName, weightsName, loss, l2Const, l1Threshold, maxIterations, advancedSettings); + var trainer = new LinearClassificationTrainer(env, labelName, featuresName, weightsName, loss, l2Const, l1Threshold, maxIterations, advancedSettings); if (onFit != null) { return trainer.WithOnFitDelegate(trans => @@ -251,7 +251,7 @@ public static (Vector score, Key predictedLabel) var rec = new TrainerEstimatorReconciler.MulticlassClassifier( (env, labelName, featuresName, weightsName) => { - var trainer = new SdcaMultiClassTrainer(env, featuresName, labelName, weightsName, loss, l2Const, l1Threshold, maxIterations, advancedSettings); + var trainer = new SdcaMultiClassTrainer(env, labelName, featuresName, weightsName, loss, l2Const, l1Threshold, maxIterations, advancedSettings); if (onFit != null) return trainer.WithOnFitDelegate(trans => onFit(trans.Model)); return trainer; diff --git a/test/Microsoft.ML.Benchmarks/PredictionEngineBench.cs b/test/Microsoft.ML.Benchmarks/PredictionEngineBench.cs index 6401ab803d..55285d2d0c 100644 --- a/test/Microsoft.ML.Benchmarks/PredictionEngineBench.cs +++ b/test/Microsoft.ML.Benchmarks/PredictionEngineBench.cs @@ -56,7 +56,7 @@ public void SetupIrisPipeline() IDataView data = reader.Read(_irisDataPath); var pipeline = new ColumnConcatenatingEstimator (env, "Features", new[] { "SepalLength", "SepalWidth", "PetalLength", "PetalWidth" }) - .Append(new SdcaMultiClassTrainer(env, "Features", "Label", advancedSettings: (s) => { s.NumThreads = 1; s.ConvergenceTolerance = 1e-2f; })); + .Append(new SdcaMultiClassTrainer(env, "Label", "Features", advancedSettings: (s) => { s.NumThreads = 1; s.ConvergenceTolerance = 1e-2f; })); var model = pipeline.Fit(data); @@ -91,7 +91,7 @@ public void SetupSentimentPipeline() IDataView data = reader.Read(_sentimentDataPath); var pipeline = new TextFeaturizingEstimator(env, "SentimentText", "Features") - .Append(new LinearClassificationTrainer(env, "Features", "Label", advancedSettings: (s) => { s.NumThreads = 1; s.ConvergenceTolerance = 1e-2f; })); + .Append(new LinearClassificationTrainer(env, "Label", "Features", advancedSettings: (s) => { s.NumThreads = 1; s.ConvergenceTolerance = 1e-2f; })); var model = pipeline.Fit(data); @@ -125,7 +125,7 @@ public void SetupBreastCancerPipeline() IDataView data = reader.Read(_breastCancerDataPath); - var pipeline = new LinearClassificationTrainer(env, "Features", "Label", advancedSettings: (s) => { s.NumThreads = 1; s.ConvergenceTolerance = 1e-2f; }); + var pipeline = new LinearClassificationTrainer(env, "Label", "Features", advancedSettings: (s) => { s.NumThreads = 1; s.ConvergenceTolerance = 1e-2f; }); var model = pipeline.Fit(data); diff --git a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs index 7db3814b98..c528d2ad99 100644 --- a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs +++ b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs @@ -122,7 +122,7 @@ public void TrainSentiment() }, text); // Train - var trainer = new SdcaMultiClassTrainer(env, "Features", "Label", maxIterations: 20); + var trainer = new SdcaMultiClassTrainer(env, "Label", "Features", maxIterations: 20); var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); var predicted = trainer.Train(trainRoles); diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs index 8ed4aa335e..a247d58899 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs @@ -48,7 +48,7 @@ public void TrainAndPredictIrisModelUsingDirectInstantiationTest() pipeline = NormalizeTransform.CreateMinMaxNormalizer(env, pipeline, "Features"); // Train - var trainer = new SdcaMultiClassTrainer(env, "Features", "Label", advancedSettings: (s) => s.NumThreads = 1); + var trainer = new SdcaMultiClassTrainer(env, "Label", "Features", advancedSettings: (s) => s.NumThreads = 1); // Explicity adding CacheDataView since caching is not working though trainer has 'Caching' On/Auto var cached = new CacheDataView(env, pipeline, prefetch: null); diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/MetalinearEstimators.cs b/test/Microsoft.ML.Tests/TrainerEstimators/MetalinearEstimators.cs index 1d14718a49..43d7ec16af 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/MetalinearEstimators.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/MetalinearEstimators.cs @@ -44,7 +44,7 @@ public void OVAWithAllConstructorArgs() public void OVAUncalibrated() { var (pipeline, data) = GetMultiClassPipeline(); - var sdcaTrainer = new LinearClassificationTrainer(Env, "Features", "Label", advancedSettings: (s) => { s.MaxIterations = 100; s.Shuffle = true; s.NumThreads = 1; s.Calibrator = null; }); + var sdcaTrainer = new LinearClassificationTrainer(Env, "Label", "Features", advancedSettings: (s) => { s.MaxIterations = 100; s.Shuffle = true; s.NumThreads = 1; s.Calibrator = null; }); pipeline.Append(new Ova(Env, sdcaTrainer, useProbabilities: false)) .Append(new KeyToValueEstimator(Env, "PredictedLabel")); @@ -61,7 +61,7 @@ public void Pkpd() { var (pipeline, data) = GetMultiClassPipeline(); - var sdcaTrainer = new LinearClassificationTrainer(Env, "Features", "Label", advancedSettings: (s) => { s.MaxIterations = 100; s.Shuffle = true; s.NumThreads = 1; }); + var sdcaTrainer = new LinearClassificationTrainer(Env, "Label", "Features", advancedSettings: (s) => { s.MaxIterations = 100; s.Shuffle = true; s.NumThreads = 1; }); pipeline.Append(new Pkpd(Env, sdcaTrainer)) .Append(new KeyToValueEstimator(Env, "PredictedLabel")); diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/SdcaTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/SdcaTests.cs index 24661af9df..43009ec246 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/SdcaTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/SdcaTests.cs @@ -19,13 +19,13 @@ public void SdcaWorkout() var data = TextLoader.CreateReader(Env, ctx => (Label: ctx.LoadFloat(0), Features: ctx.LoadFloat(1, 10))) .Read(dataPath); - IEstimator est = new LinearClassificationTrainer(Env, "Features", "Label", advancedSettings: (s) => s.ConvergenceTolerance = 1e-2f); + IEstimator est = new LinearClassificationTrainer(Env, "Label", "Features", advancedSettings: (s) => s.ConvergenceTolerance = 1e-2f); TestEstimatorCore(est, data.AsDynamic); - est = new SdcaRegressionTrainer(Env, "Features", "Label", advancedSettings: (s) => s.ConvergenceTolerance = 1e-2f); + est = new SdcaRegressionTrainer(Env, "Label", "Features", advancedSettings: (s) => s.ConvergenceTolerance = 1e-2f); TestEstimatorCore(est, data.AsDynamic); - est = new SdcaMultiClassTrainer(Env, "Features", "Label", advancedSettings: (s) => s.ConvergenceTolerance = 1e-2f); + est = new SdcaMultiClassTrainer(Env, "Label", "Features", advancedSettings: (s) => s.ConvergenceTolerance = 1e-2f); TestEstimatorCore(est, data.AsDynamic); Done(); diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/SymSgdClassificationTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/SymSgdClassificationTests.cs index 868ea7335e..9db9580f52 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/SymSgdClassificationTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/SymSgdClassificationTests.cs @@ -18,7 +18,7 @@ public partial class TrainerEstimators public void TestEstimatorSymSgdClassificationTrainer() { (var pipe, var dataView) = GetBinaryClassificationPipeline(); - pipe = pipe.Append(new SymSgdClassificationTrainer(Env, "Features", "Label")); + pipe = pipe.Append(new SymSgdClassificationTrainer(Env, "Label", "Features")); TestEstimatorCore(pipe, dataView); Done(); } @@ -29,13 +29,13 @@ public void TestEstimatorSymSgdInitPredictor() (var pipe, var dataView) = GetBinaryClassificationPipeline(); var transformedData = pipe.Fit(dataView).Transform(dataView); - var initPredictor = new LinearClassificationTrainer(Env, "Features", "Label").Fit(transformedData); + var initPredictor = new LinearClassificationTrainer(Env,"Label", "Features").Fit(transformedData); var data = initPredictor.Transform(transformedData); - var withInitPredictor = new SymSgdClassificationTrainer(Env, "Features", "Label").Train(transformedData, initialPredictor: initPredictor.Model); + var withInitPredictor = new SymSgdClassificationTrainer(Env, "Label", "Features").Train(transformedData, initialPredictor: initPredictor.Model); var outInitData = withInitPredictor.Transform(transformedData); - var notInitPredictor = new SymSgdClassificationTrainer(Env, "Features", "Label").Train(transformedData); + var notInitPredictor = new SymSgdClassificationTrainer(Env, "Label", "Features").Train(transformedData); var outNoInitData = notInitPredictor.Transform(transformedData); int numExamples = 10; From d4b9ce7b283885d0ed5cba775b9b7b83ff64e397 Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Mon, 5 Nov 2018 23:08:50 -0800 Subject: [PATCH 06/12] Fixing order in tests --- .../Multiclass/MulticlassDataPartitionEnsembleTrainer.cs | 2 +- .../Standard/LinearClassificationTrainer.cs | 5 ++++- .../Standard/LogisticRegression/LbfgsCatalog.cs | 4 ++-- .../Standard/LogisticRegression/LbfgsStatic.cs | 4 ++-- src/Microsoft.ML.StandardLearners/Standard/SgdCatalog.cs | 2 +- src/Microsoft.ML.StandardLearners/Standard/SgdStatic.cs | 2 +- test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs | 2 +- test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs | 4 ++-- .../TrainerEstimators/TrainerEstimators.cs | 2 +- 9 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/Microsoft.ML.Ensemble/Trainer/Multiclass/MulticlassDataPartitionEnsembleTrainer.cs b/src/Microsoft.ML.Ensemble/Trainer/Multiclass/MulticlassDataPartitionEnsembleTrainer.cs index f207c9e0ad..ef19bd2c2b 100644 --- a/src/Microsoft.ML.Ensemble/Trainer/Multiclass/MulticlassDataPartitionEnsembleTrainer.cs +++ b/src/Microsoft.ML.Ensemble/Trainer/Multiclass/MulticlassDataPartitionEnsembleTrainer.cs @@ -59,7 +59,7 @@ public Arguments() BasePredictors = new[] { ComponentFactoryUtils.CreateFromFunction( - env => new MulticlassLogisticRegression(env, FeatureColumn, LabelColumn)) + env => new MulticlassLogisticRegression(env, LabelColumn,FeatureColumn)) }; } } diff --git a/src/Microsoft.ML.StandardLearners/Standard/LinearClassificationTrainer.cs b/src/Microsoft.ML.StandardLearners/Standard/LinearClassificationTrainer.cs index ec3b9d6508..b1aed234d1 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LinearClassificationTrainer.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LinearClassificationTrainer.cs @@ -1675,7 +1675,10 @@ internal static class Defaults /// The L2 regularizer constant. /// The loss function to use. /// A delegate to apply all the advanced arguments to the algorithm. - public StochasticGradientDescentClassificationTrainer(IHostEnvironment env, string featureColumn, string labelColumn, string weightColumn = null, + public StochasticGradientDescentClassificationTrainer(IHostEnvironment env, + string labelColumn, + string featureColumn, + string weightColumn = null, int maxIterations = Arguments.Defaults.MaxIterations, double initLearningRate = Arguments.Defaults.InitLearningRate, float l2Weight = Arguments.Defaults.L2Weight, diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsCatalog.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsCatalog.cs index e87c27453e..9dc857e934 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsCatalog.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsCatalog.cs @@ -44,7 +44,7 @@ public static LogisticRegression LogisticRegression(this BinaryClassificationCon { Contracts.CheckValue(ctx, nameof(ctx)); var env = CatalogUtils.GetEnvironment(ctx); - return new LogisticRegression(env, features, label, weights, l1Weight, l2Weight, optimizationTolerance, memorySize, enforceNoNegativity, advancedSettings); + return new LogisticRegression(env, label, features, weights, l1Weight, l2Weight, optimizationTolerance, memorySize, enforceNoNegativity, advancedSettings); } } @@ -116,7 +116,7 @@ public static MulticlassLogisticRegression LogisticRegression(this MulticlassCla { Contracts.CheckValue(ctx, nameof(ctx)); var env = CatalogUtils.GetEnvironment(ctx); - return new MulticlassLogisticRegression(env, features, label, weights, l1Weight, l2Weight, optimizationTolerance, memorySize, enforceNoNegativity, advancedSettings); + return new MulticlassLogisticRegression(env, label, features, weights, l1Weight, l2Weight, optimizationTolerance, memorySize, enforceNoNegativity, advancedSettings); } } diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsStatic.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsStatic.cs index 75056d6f20..349aea83b3 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsStatic.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsStatic.cs @@ -110,7 +110,7 @@ public static Scalar PoissonRegression(this RegressionContext.RegressionT var rec = new TrainerEstimatorReconciler.Regression( (env, labelName, featuresName, weightsName) => { - var trainer = new PoissonRegression(env, featuresName, labelName, weightsName, + var trainer = new PoissonRegression(env, labelName, featuresName, weightsName, l1Weight, l2Weight, optimizationTolerance, memorySize, enoforceNoNegativity); if (onFit != null) @@ -166,7 +166,7 @@ public static (Vector score, Key predictedLabel) var rec = new TrainerEstimatorReconciler.MulticlassClassifier( (env, labelName, featuresName, weightsName) => { - var trainer = new MulticlassLogisticRegression(env, featuresName, labelName, weightsName, + var trainer = new MulticlassLogisticRegression(env, labelName, featuresName, weightsName, l1Weight, l2Weight, optimizationTolerance, memorySize, enoforceNoNegativity); if (onFit != null) diff --git a/src/Microsoft.ML.StandardLearners/Standard/SgdCatalog.cs b/src/Microsoft.ML.StandardLearners/Standard/SgdCatalog.cs index 6e51fd517e..407651f12e 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/SgdCatalog.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/SgdCatalog.cs @@ -40,7 +40,7 @@ public static StochasticGradientDescentClassificationTrainer StochasticGradientD { Contracts.CheckValue(ctx, nameof(ctx)); var env = CatalogUtils.GetEnvironment(ctx); - return new StochasticGradientDescentClassificationTrainer(env, features, label, weights, maxIterations, initLearningRate, l2Weight, loss, advancedSettings); + return new StochasticGradientDescentClassificationTrainer(env, label, features, weights, maxIterations, initLearningRate, l2Weight, loss, advancedSettings); } } } diff --git a/src/Microsoft.ML.StandardLearners/Standard/SgdStatic.cs b/src/Microsoft.ML.StandardLearners/Standard/SgdStatic.cs index 96c3b1a5ef..8bc0b510a1 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/SgdStatic.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/SgdStatic.cs @@ -50,7 +50,7 @@ public static (Scalar score, Scalar probability, Scalar pred var rec = new TrainerEstimatorReconciler.BinaryClassifier( (env, labelName, featuresName, weightsName) => { - var trainer = new StochasticGradientDescentClassificationTrainer(env, featuresName, labelName, weightsName, maxIterations, initLearningRate, l2Weight, loss, advancedSettings); + var trainer = new StochasticGradientDescentClassificationTrainer(env, labelName, featuresName, weightsName, maxIterations, initLearningRate, l2Weight, loss, advancedSettings); if (onFit != null) return trainer.WithOnFitDelegate(trans => onFit(trans.Model)); diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs index 523af789f4..00da3c78e9 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs @@ -1231,7 +1231,7 @@ public void EntryPointMulticlassPipelineEnsemble() Column = new[] { new ConcatTransform.Column() { Name = "Features", Source = new[] { "Features1", "Features2" } } } }, data); - var mlr = new MulticlassLogisticRegression(Env, "Features", "Label"); + var mlr = new MulticlassLogisticRegression(Env, "Label", "Features"); var rmd = new RoleMappedData(data, "Label", "Features"); predictorModels[i] = new PredictorModel(Env, rmd, data, mlr.Train(rmd)); diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs index fc3b458145..b0c0742b50 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs @@ -25,7 +25,7 @@ public void TestEstimatorLogisticRegression() public void TestEstimatorMulticlassLogisticRegression() { (IEstimator pipe, IDataView dataView) = GetMultiClassPipeline(); - pipe = pipe.Append(new MulticlassLogisticRegression(Env, "Features", "Label")); + pipe = pipe.Append(new MulticlassLogisticRegression(Env, "Label", "Features")); TestEstimatorCore(pipe, dataView); Done(); } @@ -34,7 +34,7 @@ public void TestEstimatorMulticlassLogisticRegression() public void TestEstimatorPoissonRegression() { var dataView = GetRegressionPipeline(); - var pipe = new PoissonRegression(Env, "Features", "Label"); + var pipe = new PoissonRegression(Env, "Label", "Features"); TestEstimatorCore(pipe, dataView); Done(); } diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/TrainerEstimators.cs b/test/Microsoft.ML.Tests/TrainerEstimators/TrainerEstimators.cs index fb052f7e65..4d10984fae 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/TrainerEstimators.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/TrainerEstimators.cs @@ -86,7 +86,7 @@ public void KMeansEstimator() public void TestEstimatorHogwildSGD() { (IEstimator pipe, IDataView dataView) = GetBinaryClassificationPipeline(); - pipe = pipe.Append(new StochasticGradientDescentClassificationTrainer(Env, "Features", "Label")); + pipe = pipe.Append(new StochasticGradientDescentClassificationTrainer(Env, "Label", "Features")); TestEstimatorCore(pipe, dataView); Done(); } From 191f28c390c4e060c6d76cfc8fe4cbcfdc136846 Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Mon, 5 Nov 2018 23:47:38 -0800 Subject: [PATCH 07/12] removing space that causes the docs CI to fail --- src/Microsoft.ML.Data/Transforms/NormalizerCatalog.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.Data/Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Data/Transforms/NormalizerCatalog.cs index e6cb5f7df8..f02c9136c4 100644 --- a/src/Microsoft.ML.Data/Transforms/NormalizerCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/NormalizerCatalog.cs @@ -56,7 +56,7 @@ public static NormalizingEstimator Normalize(this TransformsCatalog catalog, /// /// /// /// /// From 7d980337670417bd33b43914f0fcc822af047ee5 Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Tue, 6 Nov 2018 10:57:35 -0800 Subject: [PATCH 08/12] naming standartization --- .../MulticlassDataPartitionEnsembleTrainer.cs | 2 +- .../OlsLinearRegression.cs | 15 ++++++++----- .../SymSgdClassificationTrainer.cs | 17 ++++++++------ .../KMeansPlusPlusTrainer.cs | 3 +-- .../LightGbmBinaryTrainer.cs | 14 +++++++----- .../LightGbmMulticlassTrainer.cs | 14 ++++++------ .../LightGbmRankingTrainer.cs | 20 ++++++++--------- .../LightGbmRegressionTrainer.cs | 14 +++++++----- .../FactorizationMachineTrainer.cs | 22 +++++++++---------- .../LogisticRegression/LogisticRegression.cs | 18 +++++++-------- .../MulticlassLogisticRegression.cs | 18 +++++++-------- .../Standard/Online/OnlineGradientDescent.cs | 12 +++++----- .../PoissonRegression/PoissonRegression.cs | 18 +++++++-------- .../Standard/SdcaMultiClass.cs | 18 +++++++-------- .../Standard/SdcaRegression.cs | 18 +++++++-------- .../OlsLinearRegressionTests.cs | 2 +- 16 files changed, 117 insertions(+), 108 deletions(-) diff --git a/src/Microsoft.ML.Ensemble/Trainer/Multiclass/MulticlassDataPartitionEnsembleTrainer.cs b/src/Microsoft.ML.Ensemble/Trainer/Multiclass/MulticlassDataPartitionEnsembleTrainer.cs index ef19bd2c2b..3909fb1b07 100644 --- a/src/Microsoft.ML.Ensemble/Trainer/Multiclass/MulticlassDataPartitionEnsembleTrainer.cs +++ b/src/Microsoft.ML.Ensemble/Trainer/Multiclass/MulticlassDataPartitionEnsembleTrainer.cs @@ -59,7 +59,7 @@ public Arguments() BasePredictors = new[] { ComponentFactoryUtils.CreateFromFunction( - env => new MulticlassLogisticRegression(env, LabelColumn,FeatureColumn)) + env => new MulticlassLogisticRegression(env, LabelColumn, FeatureColumn)) }; } } diff --git a/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs b/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs index 154e552d80..c11f4e2d4f 100644 --- a/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs +++ b/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs @@ -68,13 +68,16 @@ public sealed class Arguments : LearnerInputBaseWithWeight /// Initializes a new instance of /// /// The environment to use. - /// The name of the label column. - /// The name of the feature column. - /// The name for the example weight column. + /// The name of the label column. + /// The name of the feature column. + /// The name for the example weight column. /// A delegate to apply all the advanced arguments to the algorithm. - public OlsLinearRegressionTrainer(IHostEnvironment env, string featureColumn, string labelColumn, - string weightColumn = null, Action advancedSettings = null) - : this(env, ArgsInit(featureColumn, labelColumn, weightColumn, advancedSettings)) + public OlsLinearRegressionTrainer(IHostEnvironment env, + string label, + string feature, + string weight = null, + Action advancedSettings = null) + : this(env, ArgsInit(feature, label, weight, advancedSettings)) { } diff --git a/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs b/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs index b0ec0608f1..1107e63494 100644 --- a/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs +++ b/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs @@ -154,20 +154,23 @@ protected override TPredictor TrainModelCore(TrainContext context) /// Initializes a new instance of /// /// The private instance of . - /// The name of the label column. - /// The name of the feature column. + /// The name of the label column. + /// The name of the feature column. /// A delegate to apply all the advanced arguments to the algorithm. - public SymSgdClassificationTrainer(IHostEnvironment env, string featureColumn, string labelColumn, Action advancedSettings = null) - : base(Contracts.CheckRef(env, nameof(env)).Register(LoadNameValue), TrainerUtils.MakeR4VecFeature(featureColumn), - TrainerUtils.MakeBoolScalarLabel(labelColumn)) + public SymSgdClassificationTrainer(IHostEnvironment env, + string label, + string feature, + Action advancedSettings = null) + : base(Contracts.CheckRef(env, nameof(env)).Register(LoadNameValue), TrainerUtils.MakeR4VecFeature(feature), + TrainerUtils.MakeBoolScalarLabel(label)) { _args = new Arguments(); // Apply the advanced args, if the user supplied any. _args.Check(Host); advancedSettings?.Invoke(_args); - _args.FeatureColumn = featureColumn; - _args.LabelColumn = labelColumn; + _args.FeatureColumn = feature; + _args.LabelColumn = label; Info = new TrainerInfo(); } diff --git a/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs b/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs index d362452da9..ea52235d8a 100644 --- a/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs +++ b/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs @@ -121,8 +121,7 @@ private KMeansPlusPlusTrainer(IHostEnvironment env, Arguments args, Action 0, nameof(args.K), "Must be positive"); diff --git a/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs index bd9adbebbc..df7e3353c1 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs @@ -103,9 +103,9 @@ internal LightGbmBinaryTrainer(IHostEnvironment env, LightGbmArguments args) /// Initializes a new instance of /// /// The private instance of . - /// The name of the label column. - /// The name of the feature column. - /// The name for the column containing the initial weight. + /// The name of the label column. + /// The name of the feature column. + /// The name for the column containing the initial weight. /// The number of leaves to use. /// Number of iterations. /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. @@ -114,14 +114,16 @@ internal LightGbmBinaryTrainer(IHostEnvironment env, LightGbmArguments args) /// The settings here will override the ones provided in the direct signature, /// if both are present and have different values. /// The columns names, however need to be provided directly, not through the . - public LightGbmBinaryTrainer(IHostEnvironment env, string labelColumn, string featureColumn, - string weightColumn = null, + public LightGbmBinaryTrainer(IHostEnvironment env, + string label, + string feature, + string weight = null, int? numLeaves = null, int? minDataPerLeaf = null, double? learningRate = null, int numBoostRound = LightGbmArguments.Defaults.NumBoostRound, Action advancedSettings = null) - : base(env, LoadNameValue, TrainerUtils.MakeBoolScalarLabel(labelColumn), featureColumn, weightColumn, null, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings) + : base(env, LoadNameValue, TrainerUtils.MakeBoolScalarLabel(label), feature, weight, null, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings) { } diff --git a/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs index bc73e3a41f..e72f707c23 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs @@ -44,9 +44,9 @@ internal LightGbmMulticlassTrainer(IHostEnvironment env, LightGbmArguments args) /// Initializes a new instance of /// /// The private instance of . - /// The name of the label column. - /// The name of the feature column. - /// The name for the column containing the initial weight. + /// The name of the label column. + /// The name of the feature column. + /// The name for the column containing the initial weight. /// The number of leaves to use. /// Number of iterations. /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. @@ -56,15 +56,15 @@ internal LightGbmMulticlassTrainer(IHostEnvironment env, LightGbmArguments args) /// if both are present and have different values. /// The columns names, however need to be provided directly, not through the . public LightGbmMulticlassTrainer(IHostEnvironment env, - string labelColumn, - string featureColumn, - string weightColumn = null, + string label, + string feature, + string weight = null, int? numLeaves = null, int? minDataPerLeaf = null, double? learningRate = null, int numBoostRound = LightGbmArguments.Defaults.NumBoostRound, Action advancedSettings = null) - : base(env, LoadNameValue, TrainerUtils.MakeU4ScalarColumn(labelColumn), featureColumn, weightColumn, null, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings) + : base(env, LoadNameValue, TrainerUtils.MakeU4ScalarColumn(label), feature, weight, null, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings) { _numClass = -1; } diff --git a/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs index fdd4b09959..751a762183 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs @@ -90,10 +90,10 @@ internal LightGbmRankingTrainer(IHostEnvironment env, LightGbmArguments args) /// Initializes a new instance of /// /// The private instance of . - /// The name of the label column. - /// The name of the feature column. - /// The name of the column containing the group ID. - /// The name of the column containing the initial weight. + /// The name of the label column. + /// The name of the feature column. + /// The name of the column containing the group ID. + /// The name of the column containing the initial weight. /// The number of leaves to use. /// Number of iterations. /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. @@ -103,18 +103,18 @@ internal LightGbmRankingTrainer(IHostEnvironment env, LightGbmArguments args) /// if both are present and have different values. /// The columns names, however need to be provided directly, not through the . public LightGbmRankingTrainer(IHostEnvironment env, - string labelColumn, - string featureColumn, - string groupIdColumn, - string weightColumn = null, + string label, + string feature, + string groupId, + string weight = null, int? numLeaves = null, int? minDataPerLeaf = null, double? learningRate = null, int numBoostRound = LightGbmArguments.Defaults.NumBoostRound, Action advancedSettings = null) - : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarLabel(labelColumn), featureColumn, weightColumn, groupIdColumn, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings) + : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarLabel(label), feature, weight, groupId, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings) { - Host.CheckNonEmpty(groupIdColumn, nameof(groupIdColumn)); + Host.CheckNonEmpty(groupId, nameof(groupId)); } protected override void CheckDataValid(IChannel ch, RoleMappedData data) diff --git a/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs index 612ef15f6d..9a769df92d 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs @@ -89,9 +89,9 @@ public sealed class LightGbmRegressorTrainer : LightGbmTrainerBase /// /// The private instance of . - /// The name of the label column. - /// The name of the feature column. - /// The name for the column containing the initial weight. + /// The name of the label column. + /// The name of the feature column. + /// The name for the column containing the initial weight. /// The number of leaves to use. /// Number of iterations. /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. @@ -100,14 +100,16 @@ public sealed class LightGbmRegressorTrainer : LightGbmTrainerBase. - public LightGbmRegressorTrainer(IHostEnvironment env, string labelColumn, string featureColumn, - string weightColumn = null, + public LightGbmRegressorTrainer(IHostEnvironment env, + string label, + string features, + string weights = null, int? numLeaves = null, int? minDataPerLeaf = null, double? learningRate = null, int numBoostRound = LightGbmArguments.Defaults.NumBoostRound, Action advancedSettings = null) - : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarLabel(labelColumn), featureColumn, weightColumn, null, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings) + : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarLabel(label), features, weights, null, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings) { } diff --git a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs index d29de3e86a..ae307cd887 100644 --- a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs +++ b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs @@ -134,15 +134,15 @@ public FieldAwareFactorizationMachineTrainer(IHostEnvironment env, Arguments arg /// Initializing a new instance of . /// /// The private instance of . - /// The name of the label column. - /// The name of column hosting the features. + /// The name of the label column. + /// The name of column hosting the features. /// A delegate to apply all the advanced arguments to the algorithm. - /// The name of the weight column. + /// The name of the optional weights' column. /// The for additional input data to training. public FieldAwareFactorizationMachineTrainer(IHostEnvironment env, - string labelColumn, - string[] featureColumns, - string weightColumn = null, + string label, + string[] features, + string weights = null, TrainerEstimatorContext context = null, Action advancedSettings = null) : base(env, LoadName) @@ -155,13 +155,13 @@ public FieldAwareFactorizationMachineTrainer(IHostEnvironment env, Context = context; - FeatureColumns = new SchemaShape.Column[featureColumns.Length]; + FeatureColumns = new SchemaShape.Column[features.Length]; - for (int i = 0; i < featureColumns.Length; i++) - FeatureColumns[i] = new SchemaShape.Column(featureColumns[i], SchemaShape.Column.VectorKind.Vector, NumberType.R4, false); + for (int i = 0; i < features.Length; i++) + FeatureColumns[i] = new SchemaShape.Column(features[i], SchemaShape.Column.VectorKind.Vector, NumberType.R4, false); - LabelColumn = new SchemaShape.Column(labelColumn, SchemaShape.Column.VectorKind.Scalar, BoolType.Instance, false); - WeightColumn = weightColumn != null ? new SchemaShape.Column(weightColumn, SchemaShape.Column.VectorKind.Scalar, NumberType.R4, false) : null; + LabelColumn = new SchemaShape.Column(label, SchemaShape.Column.VectorKind.Scalar, BoolType.Instance, false); + WeightColumn = weights != null ? new SchemaShape.Column(weights, SchemaShape.Column.VectorKind.Scalar, NumberType.R4, false) : null; } /// diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs index 2db21d6320..e965e5cd65 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs @@ -51,9 +51,9 @@ public sealed class Arguments : ArgumentsBase /// Initializes a new instance of /// /// The environment to use. - /// The name of the label column. - /// The name of the feature column. - /// The name for the example weight column. + /// The name of the label column. + /// The name of the feature column. + /// The name for the example weight column. /// Enforce non-negative weights. /// Weight of L1 regularizer term. /// Weight of L2 regularizer term. @@ -61,20 +61,20 @@ public sealed class Arguments : ArgumentsBase /// Threshold for optimizer convergence. /// A delegate to apply all the advanced arguments to the algorithm. public LogisticRegression(IHostEnvironment env, - string featureColumn, - string labelColumn, - string weightColumn = null, + string feature, + string label, + string weight = null, float l1Weight = Arguments.Defaults.L1Weight, float l2Weight = Arguments.Defaults.L2Weight, float optimizationTolerance = Arguments.Defaults.OptTol, int memorySize = Arguments.Defaults.MemorySize, bool enforceNoNegativity = Arguments.Defaults.EnforceNonNegativity, Action advancedSettings = null) - : base(env, featureColumn, TrainerUtils.MakeBoolScalarLabel(labelColumn), weightColumn, advancedSettings, + : base(env, feature, TrainerUtils.MakeBoolScalarLabel(label), weight, advancedSettings, l1Weight, l2Weight, optimizationTolerance, memorySize, enforceNoNegativity) { - Host.CheckNonEmpty(featureColumn, nameof(featureColumn)); - Host.CheckNonEmpty(labelColumn, nameof(labelColumn)); + Host.CheckNonEmpty(feature, nameof(feature)); + Host.CheckNonEmpty(label, nameof(label)); _posWeight = 0; ShowTrainingStats = Args.ShowTrainingStats; diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs index 5203f7726c..455e9a9e7b 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs @@ -74,9 +74,9 @@ public sealed class Arguments : ArgumentsBase /// Initializes a new instance of /// /// The environment to use. - /// The name of the label column. - /// The name of the feature column. - /// The name for the example weight column. + /// The name of the label column. + /// The name of the feature column. + /// The name for the example weight column. /// Enforce non-negative weights. /// Weight of L1 regularizer term. /// Weight of L2 regularizer term. @@ -84,20 +84,20 @@ public sealed class Arguments : ArgumentsBase /// Threshold for optimizer convergence. /// A delegate to apply all the advanced arguments to the algorithm. public MulticlassLogisticRegression(IHostEnvironment env, - string labelColumn, - string featureColumn, - string weightColumn = null, + string label, + string features, + string weights = null, float l1Weight = Arguments.Defaults.L1Weight, float l2Weight = Arguments.Defaults.L2Weight, float optimizationTolerance = Arguments.Defaults.OptTol, int memorySize = Arguments.Defaults.MemorySize, bool enforceNoNegativity = Arguments.Defaults.EnforceNonNegativity, Action advancedSettings = null) - : base(env, featureColumn, TrainerUtils.MakeU4ScalarColumn(labelColumn), weightColumn, advancedSettings, + : base(env, features, TrainerUtils.MakeU4ScalarColumn(label), weights, advancedSettings, l1Weight, l2Weight, optimizationTolerance, memorySize, enforceNoNegativity) { - Host.CheckNonEmpty(featureColumn, nameof(featureColumn)); - Host.CheckNonEmpty(labelColumn, nameof(labelColumn)); + Host.CheckNonEmpty(features, nameof(features)); + Host.CheckNonEmpty(label, nameof(label)); ShowTrainingStats = Args.ShowTrainingStats; } diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs index 7bc7b7d96d..3a7a74f761 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs @@ -91,8 +91,8 @@ public override LinearRegressionPredictor CreatePredictor() /// Trains a new . /// /// The pricate instance of . - /// Name of the label column. - /// Name of the feature column. + /// Name of the label column. + /// Name of the feature column. /// The learning Rate. /// Decrease learning rate as iterations progress. /// L2 Regularization Weight. @@ -101,8 +101,8 @@ public override LinearRegressionPredictor CreatePredictor() /// The custom loss functions. Defaults to if not provided. /// A delegate to supply advanced arguments to the algorithm. public OnlineGradientDescentTrainer(IHostEnvironment env, - string labelColumn, - string featureColumn, + string label, + string features, float learningRate = Arguments.OgdDefaultArgs.LearningRate, bool decreaseLearningRate = Arguments.OgdDefaultArgs.DecreaseLearningRate, float l2RegularizerWeight = Arguments.OgdDefaultArgs.L2RegularizerWeight, @@ -116,8 +116,8 @@ public OnlineGradientDescentTrainer(IHostEnvironment env, DecreaseLearningRate = decreaseLearningRate, L2RegularizerWeight = l2RegularizerWeight, NumIterations = numIterations, - LabelColumn = labelColumn, - FeatureColumn = featureColumn, + LabelColumn = label, + FeatureColumn = features, InitialWeights = weightsColumn, LossFunction = new TrivialFactory(lossFunction ?? new SquaredLoss()) })) diff --git a/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs index c7af6e77b8..bde5fb9a9b 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs @@ -44,9 +44,9 @@ public sealed class Arguments : ArgumentsBase /// Initializes a new instance of /// /// The environment to use. - /// The name of the label column. - /// The name of the feature column. - /// The name for the example weight column. + /// The name of the label column. + /// The name of the feature column. + /// The name for the example weight column. /// Weight of L1 regularizer term. /// Weight of L2 regularizer term. /// Threshold for optimizer convergence. @@ -54,20 +54,20 @@ public sealed class Arguments : ArgumentsBase /// Enforce non-negative weights. /// A delegate to apply all the advanced arguments to the algorithm. public PoissonRegression(IHostEnvironment env, - string labelColumn, - string featureColumn, - string weightColumn = null, + string label, + string features, + string weights = null, float l1Weight = Arguments.Defaults.L1Weight, float l2Weight = Arguments.Defaults.L2Weight, float optimizationTolerance = Arguments.Defaults.OptTol, int memorySize = Arguments.Defaults.MemorySize, bool enforceNoNegativity = Arguments.Defaults.EnforceNonNegativity, Action advancedSettings = null) - : base(env, featureColumn, TrainerUtils.MakeR4ScalarLabel(labelColumn), weightColumn, advancedSettings, + : base(env, features, TrainerUtils.MakeR4ScalarLabel(label), weights, advancedSettings, l1Weight, l2Weight, optimizationTolerance, memorySize, enforceNoNegativity) { - Host.CheckNonEmpty(featureColumn, nameof(featureColumn)); - Host.CheckNonEmpty(labelColumn, nameof(labelColumn)); + Host.CheckNonEmpty(features, nameof(features)); + Host.CheckNonEmpty(label, nameof(label)); } /// diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs index 0a3bbfdc3b..dea7ea126a 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs @@ -51,10 +51,10 @@ public sealed class Arguments : ArgumentsBase /// Initializes a new instance of /// /// The environment to use. - /// The label, or dependent variable. - /// The features, or independent variables. + /// The label, or dependent variable. + /// The features, or independent variables. /// The custom loss. - /// The optional example weights. + /// The optional example weights. /// The L2 regularization hyperparameter. /// The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model. /// The maximum number of passes to perform over the data. @@ -63,19 +63,19 @@ public sealed class Arguments : ArgumentsBase /// if both are present and have different values. /// The columns names, however need to be provided directly, not through the . public SdcaMultiClassTrainer(IHostEnvironment env, - string labelColumn, - string featureColumn, - string weightColumn = null, + string label, + string features, + string weights = null, ISupportSdcaClassificationLoss loss = null, float? l2Const = null, float? l1Threshold = null, int? maxIterations = null, Action advancedSettings = null) - : base(env, featureColumn, TrainerUtils.MakeU4ScalarColumn(labelColumn), TrainerUtils.MakeR4ScalarWeightColumn(weightColumn), advancedSettings, + : base(env, features, TrainerUtils.MakeU4ScalarColumn(label), TrainerUtils.MakeR4ScalarWeightColumn(weights), advancedSettings, l2Const, l1Threshold, maxIterations) { - Host.CheckNonEmpty(featureColumn, nameof(featureColumn)); - Host.CheckNonEmpty(labelColumn, nameof(labelColumn)); + Host.CheckNonEmpty(features, nameof(features)); + Host.CheckNonEmpty(label, nameof(label)); _loss = loss ?? Args.LossFunction.CreateComponent(env); Loss = _loss; } diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaRegression.cs index cc11020389..7f066be0a7 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/SdcaRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaRegression.cs @@ -56,10 +56,10 @@ public Arguments() /// Initializes a new instance of /// /// The environment to use. - /// The features, or independent variables. - /// The label, or dependent variable. + /// The label, or dependent variable. + /// The features, or independent variables. + /// The optional example weights. /// The custom loss. - /// The optional example weights. /// The L2 regularization hyperparameter. /// The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model. /// The maximum number of passes to perform over the data. @@ -68,19 +68,19 @@ public Arguments() /// if both are present and have different values. /// The columns names, however need to be provided directly, not through the . public SdcaRegressionTrainer(IHostEnvironment env, - string labelColumn, - string featureColumn, - string weightColumn = null, + string label, + string features, + string weights = null, ISupportSdcaRegressionLoss loss = null, float? l2Const = null, float? l1Threshold = null, int? maxIterations = null, Action advancedSettings = null) - : base(env, featureColumn, TrainerUtils.MakeR4ScalarLabel(labelColumn), TrainerUtils.MakeR4ScalarWeightColumn(weightColumn), advancedSettings, + : base(env, features, TrainerUtils.MakeR4ScalarLabel(label), TrainerUtils.MakeR4ScalarWeightColumn(weights), advancedSettings, l2Const, l1Threshold, maxIterations) { - Host.CheckNonEmpty(featureColumn, nameof(featureColumn)); - Host.CheckNonEmpty(labelColumn, nameof(labelColumn)); + Host.CheckNonEmpty(features, nameof(features)); + Host.CheckNonEmpty(label, nameof(label)); _loss = loss ?? Args.LossFunction.CreateComponent(env); Loss = _loss; } diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/OlsLinearRegressionTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/OlsLinearRegressionTests.cs index 87bdddc6e8..edd5d19bec 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/OlsLinearRegressionTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/OlsLinearRegressionTests.cs @@ -13,7 +13,7 @@ public partial class TrainerEstimators public void TestEstimatorOlsLinearRegression() { var dataView = GetRegressionPipeline(); - var pipe = new OlsLinearRegressionTrainer(Env, "Features", "Label"); + var pipe = new OlsLinearRegressionTrainer(Env, "Label", "Features"); TestEstimatorCore(pipe, dataView); Done(); } From e2a360bc692a451984dc7663344eb11d5c0e0f10 Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Tue, 6 Nov 2018 11:39:54 -0800 Subject: [PATCH 09/12] consistent naming in arguments --- .../OlsLinearRegression.cs | 10 +++++----- .../SymSgdClassificationTrainer.cs | 8 ++++---- .../KMeansPlusPlusTrainer.cs | 14 +++++++++----- .../LightGbmBinaryTrainer.cs | 12 ++++++------ .../LightGbmMulticlassTrainer.cs | 10 +++++----- .../LightGbmRankingTrainer.cs | 10 +++++----- src/Microsoft.ML.PCA/PcaTrainer.cs | 15 ++++++++++----- .../Standard/LogisticRegression/LbfgsStatic.cs | 2 +- .../LogisticRegression/LogisticRegression.cs | 12 ++++++------ .../Standard/SdcaMultiClass.cs | 2 +- .../KMeansAndLogisticRegressionBench.cs | 2 +- .../TrainerEstimators/LbfgsTests.cs | 2 +- .../TrainerEstimators/TrainerEstimators.cs | 2 +- 13 files changed, 55 insertions(+), 46 deletions(-) diff --git a/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs b/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs index c11f4e2d4f..b03fbfeaec 100644 --- a/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs +++ b/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs @@ -69,15 +69,15 @@ public sealed class Arguments : LearnerInputBaseWithWeight /// /// The environment to use. /// The name of the label column. - /// The name of the feature column. - /// The name for the example weight column. + /// The name of the feature column. + /// The name for the optional example weight column. /// A delegate to apply all the advanced arguments to the algorithm. public OlsLinearRegressionTrainer(IHostEnvironment env, string label, - string feature, - string weight = null, + string features, + string weights = null, Action advancedSettings = null) - : this(env, ArgsInit(feature, label, weight, advancedSettings)) + : this(env, ArgsInit(features, label, weights, advancedSettings)) { } diff --git a/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs b/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs index 1107e63494..25534ff98f 100644 --- a/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs +++ b/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs @@ -155,13 +155,13 @@ protected override TPredictor TrainModelCore(TrainContext context) /// /// The private instance of . /// The name of the label column. - /// The name of the feature column. + /// The name of the feature column. /// A delegate to apply all the advanced arguments to the algorithm. public SymSgdClassificationTrainer(IHostEnvironment env, string label, - string feature, + string features, Action advancedSettings = null) - : base(Contracts.CheckRef(env, nameof(env)).Register(LoadNameValue), TrainerUtils.MakeR4VecFeature(feature), + : base(Contracts.CheckRef(env, nameof(env)).Register(LoadNameValue), TrainerUtils.MakeR4VecFeature(features), TrainerUtils.MakeBoolScalarLabel(label)) { _args = new Arguments(); @@ -169,7 +169,7 @@ public SymSgdClassificationTrainer(IHostEnvironment env, // Apply the advanced args, if the user supplied any. _args.Check(Host); advancedSettings?.Invoke(_args); - _args.FeatureColumn = feature; + _args.FeatureColumn = features; _args.LabelColumn = label; Info = new TrainerInfo(); diff --git a/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs b/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs index ea52235d8a..343c6e1270 100644 --- a/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs +++ b/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs @@ -95,15 +95,19 @@ public class Arguments : UnsupervisedLearnerInputBaseWithWeight /// Initializes a new instance of /// /// The local instance of . - /// The name of the feature column. - /// The name for the column containing the example weights. + /// The name of the feature column. + /// The name for the optional column containing the example weights. /// A delegate to apply all the advanced arguments to the algorithm. /// The number of clusters. - public KMeansPlusPlusTrainer(IHostEnvironment env, string featureColumn, int clustersCount = Defaults.K, string weightColumn = null, Action advancedSettings = null) + public KMeansPlusPlusTrainer(IHostEnvironment env, + string features, + int clustersCount = Defaults.K, + string weights = null, + Action advancedSettings = null) : this(env, new Arguments { - FeatureColumn = featureColumn, - WeightColumn = weightColumn, + FeatureColumn = features, + WeightColumn = weights, K = clustersCount }, advancedSettings) { diff --git a/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs index df7e3353c1..8709d6a247 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs @@ -104,8 +104,8 @@ internal LightGbmBinaryTrainer(IHostEnvironment env, LightGbmArguments args) /// /// The private instance of . /// The name of the label column. - /// The name of the feature column. - /// The name for the column containing the initial weight. + /// The name of the feature column. + /// The name for the column containing the initial weight. /// The number of leaves to use. /// Number of iterations. /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. @@ -114,16 +114,16 @@ internal LightGbmBinaryTrainer(IHostEnvironment env, LightGbmArguments args) /// The settings here will override the ones provided in the direct signature, /// if both are present and have different values. /// The columns names, however need to be provided directly, not through the . - public LightGbmBinaryTrainer(IHostEnvironment env, + public LightGbmBinaryTrainer(IHostEnvironment env, string label, - string feature, - string weight = null, + string features, + string weights = null, int? numLeaves = null, int? minDataPerLeaf = null, double? learningRate = null, int numBoostRound = LightGbmArguments.Defaults.NumBoostRound, Action advancedSettings = null) - : base(env, LoadNameValue, TrainerUtils.MakeBoolScalarLabel(label), feature, weight, null, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings) + : base(env, LoadNameValue, TrainerUtils.MakeBoolScalarLabel(label), features, weights, null, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings) { } diff --git a/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs index e72f707c23..3bf422b1ac 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs @@ -45,8 +45,8 @@ internal LightGbmMulticlassTrainer(IHostEnvironment env, LightGbmArguments args) /// /// The private instance of . /// The name of the label column. - /// The name of the feature column. - /// The name for the column containing the initial weight. + /// The name of the feature column. + /// The name for the column containing the initial weight. /// The number of leaves to use. /// Number of iterations. /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. @@ -57,14 +57,14 @@ internal LightGbmMulticlassTrainer(IHostEnvironment env, LightGbmArguments args) /// The columns names, however need to be provided directly, not through the . public LightGbmMulticlassTrainer(IHostEnvironment env, string label, - string feature, - string weight = null, + string features, + string weights = null, int? numLeaves = null, int? minDataPerLeaf = null, double? learningRate = null, int numBoostRound = LightGbmArguments.Defaults.NumBoostRound, Action advancedSettings = null) - : base(env, LoadNameValue, TrainerUtils.MakeU4ScalarColumn(label), feature, weight, null, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings) + : base(env, LoadNameValue, TrainerUtils.MakeU4ScalarColumn(label), features, weights, null, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings) { _numClass = -1; } diff --git a/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs index 751a762183..b604d9a5c7 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs @@ -91,9 +91,9 @@ internal LightGbmRankingTrainer(IHostEnvironment env, LightGbmArguments args) /// /// The private instance of . /// The name of the label column. - /// The name of the feature column. + /// The name of the feature column. /// The name of the column containing the group ID. - /// The name of the column containing the initial weight. + /// The name of the optional column containing the initial weights. /// The number of leaves to use. /// Number of iterations. /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. @@ -104,15 +104,15 @@ internal LightGbmRankingTrainer(IHostEnvironment env, LightGbmArguments args) /// The columns names, however need to be provided directly, not through the . public LightGbmRankingTrainer(IHostEnvironment env, string label, - string feature, + string features, string groupId, - string weight = null, + string weights = null, int? numLeaves = null, int? minDataPerLeaf = null, double? learningRate = null, int numBoostRound = LightGbmArguments.Defaults.NumBoostRound, Action advancedSettings = null) - : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarLabel(label), feature, weight, groupId, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings) + : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarLabel(label), features, weights, groupId, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings) { Host.CheckNonEmpty(groupId, nameof(groupId)); } diff --git a/src/Microsoft.ML.PCA/PcaTrainer.cs b/src/Microsoft.ML.PCA/PcaTrainer.cs index 933a2769fb..5b37c071ad 100644 --- a/src/Microsoft.ML.PCA/PcaTrainer.cs +++ b/src/Microsoft.ML.PCA/PcaTrainer.cs @@ -84,15 +84,20 @@ public class Arguments : UnsupervisedLearnerInputBaseWithWeight /// Initializes a new instance of . /// /// The local instance of the . - /// The name of the feature column. - /// The name of the weight column. + /// The name of the feature column. + /// The name of the weight column. /// The number of components in the PCA. /// Oversampling parameter for randomized PCA training. /// If enabled, data is centered to be zero mean. /// The seed for random number generation. - public RandomizedPcaTrainer(IHostEnvironment env, string featureColumn, string weightColumn = null, - int rank = 20, int oversampling = 20, bool center = true, int? seed = null) - : this(env, null, featureColumn, weightColumn, rank, oversampling, center, seed) + public RandomizedPcaTrainer(IHostEnvironment env, + string features, + string weights = null, + int rank = 20, + int oversampling = 20, + bool center = true, + int? seed = null) + : this(env, null, features, weights, rank, oversampling, center, seed) { } diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsStatic.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsStatic.cs index 349aea83b3..08bef182cc 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsStatic.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsStatic.cs @@ -55,7 +55,7 @@ public static (Scalar score, Scalar probability, Scalar pred var rec = new TrainerEstimatorReconciler.BinaryClassifier( (env, labelName, featuresName, weightsName) => { - var trainer = new LogisticRegression(env, featuresName, labelName, weightsName, + var trainer = new LogisticRegression(env, labelName, featuresName, weightsName, l1Weight, l2Weight, optimizationTolerance, memorySize, enoforceNoNegativity, advancedSettings); if (onFit != null) diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs index e965e5cd65..9ca820fa35 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs @@ -52,8 +52,8 @@ public sealed class Arguments : ArgumentsBase /// /// The environment to use. /// The name of the label column. - /// The name of the feature column. - /// The name for the example weight column. + /// The name of the feature column. + /// The name for the example weight column. /// Enforce non-negative weights. /// Weight of L1 regularizer term. /// Weight of L2 regularizer term. @@ -61,19 +61,19 @@ public sealed class Arguments : ArgumentsBase /// Threshold for optimizer convergence. /// A delegate to apply all the advanced arguments to the algorithm. public LogisticRegression(IHostEnvironment env, - string feature, string label, - string weight = null, + string features, + string weights = null, float l1Weight = Arguments.Defaults.L1Weight, float l2Weight = Arguments.Defaults.L2Weight, float optimizationTolerance = Arguments.Defaults.OptTol, int memorySize = Arguments.Defaults.MemorySize, bool enforceNoNegativity = Arguments.Defaults.EnforceNonNegativity, Action advancedSettings = null) - : base(env, feature, TrainerUtils.MakeBoolScalarLabel(label), weight, advancedSettings, + : base(env, features, TrainerUtils.MakeBoolScalarLabel(label), weights, advancedSettings, l1Weight, l2Weight, optimizationTolerance, memorySize, enforceNoNegativity) { - Host.CheckNonEmpty(feature, nameof(feature)); + Host.CheckNonEmpty(features, nameof(features)); Host.CheckNonEmpty(label, nameof(label)); _posWeight = 0; diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs index dea7ea126a..6c3a314be1 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs @@ -53,8 +53,8 @@ public sealed class Arguments : ArgumentsBase /// The environment to use. /// The label, or dependent variable. /// The features, or independent variables. + /// The optional example weights. /// The custom loss. - /// The optional example weights. /// The L2 regularization hyperparameter. /// The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model. /// The maximum number of passes to perform over the data. diff --git a/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs b/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs index c56eae60aa..49a18bf0cf 100644 --- a/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs +++ b/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs @@ -64,7 +64,7 @@ public ParameterMixingCalibratedPredictor TrainKMeansAndLR() trans = new ConcatTransform(env, "Features", "Features", "Score").Transform(trans); // Train - var trainer = new LogisticRegression(env, "Features", "Label", advancedSettings: args => { args.EnforceNonNegativity = true; args.OptTol = 1e-3f; }); + var trainer = new LogisticRegression(env, "Label", "Features", advancedSettings: args => { args.EnforceNonNegativity = true; args.OptTol = 1e-3f; }); var trainRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); return trainer.Train(trainRoles); } diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs index b0c0742b50..dc5950f8d8 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/LbfgsTests.cs @@ -16,7 +16,7 @@ public partial class TrainerEstimators public void TestEstimatorLogisticRegression() { (IEstimator pipe, IDataView dataView) = GetBinaryClassificationPipeline(); - pipe = pipe.Append(new LogisticRegression(Env, "Features", "Label")); + pipe = pipe.Append(new LogisticRegression(Env, "Label", "Features")); TestEstimatorCore(pipe, dataView); Done(); } diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/TrainerEstimators.cs b/test/Microsoft.ML.Tests/TrainerEstimators/TrainerEstimators.cs index 4d10984fae..4330fb9b75 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/TrainerEstimators.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/TrainerEstimators.cs @@ -71,7 +71,7 @@ public void KMeansEstimator() // Pipeline. - var pipeline = new KMeansPlusPlusTrainer(Env, featureColumn, weightColumn: weights, + var pipeline = new KMeansPlusPlusTrainer(Env, featureColumn, weights: weights, advancedSettings: s => { s.InitAlgorithm = KMeansPlusPlusTrainer.InitAlgorithm.KMeansParallel; }); TestEstimatorCore(pipeline, data); From d2aaf1ffb7e4bcac8ad24fba97a5361bc3287a06 Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Wed, 7 Nov 2018 10:37:48 -0800 Subject: [PATCH 10/12] changing the comment --- src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs b/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs index 5250b2396f..87daf11c73 100644 --- a/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs +++ b/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs @@ -94,7 +94,7 @@ public class Arguments : UnsupervisedLearnerInputBaseWithWeight /// /// Initializes a new instance of /// - /// The local instance of . + /// The to use. /// The name of the feature column. /// The name for the optional column containing the example weights. /// A delegate to apply all the advanced arguments to the algorithm. From b18e2f4e3490f560f060b1c12f183d51834841b1 Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Thu, 8 Nov 2018 23:29:48 -0800 Subject: [PATCH 11/12] The Pandora Box --- .../Dynamic/MatrixFactorization.cs | 6 +- .../Microsoft.ML.Samples/Dynamic/SDCA.cs | 6 +- .../FastTreeClassification.cs | 4 +- src/Microsoft.ML.FastTree/FastTreeRanking.cs | 6 +- .../FastTreeRegression.cs | 4 +- src/Microsoft.ML.FastTree/FastTreeTweedie.cs | 4 +- .../GamClassification.cs | 4 +- src/Microsoft.ML.FastTree/GamRegression.cs | 4 +- .../RandomForestClassification.cs | 4 +- .../RandomForestRegression.cs | 4 +- .../TreeTrainersCatalog.cs | 82 ++--- .../HalLearnersCatalog.cs | 20 +- .../OlsLinearRegression.cs | 24 +- .../SymSgdClassificationTrainer.cs | 16 +- .../KMeansPlusPlusTrainer.cs | 6 +- .../LightGbmBinaryTrainer.cs | 10 +- src/Microsoft.ML.LightGBM/LightGbmCatalog.cs | 46 +-- .../LightGbmMulticlassTrainer.cs | 24 +- .../LightGbmRankingTrainer.cs | 12 +- .../LightGbmRegressionTrainer.cs | 10 +- .../MatrixFactorizationStatic.cs | 2 +- .../MatrixFactorizationTrainer.cs | 10 +- .../FactorizationMachineCatalog.cs | 14 +- .../FactorizationMachineStatic.cs | 2 +- .../FactorizationMachineTrainer.cs | 16 +- .../LogisticRegression/LbfgsCatalog.cs | 123 ------- .../LogisticRegression/LogisticRegression.cs | 14 +- .../MulticlassLogisticRegression.cs | 14 +- .../MultiClass/MultiClassNaiveBayesTrainer.cs | 4 +- .../Standard/MultiClass/Ova.cs | 10 +- .../Standard/MultiClass/Pkpd.cs | 8 +- .../Standard/Online/AveragedPerceptron.cs | 12 +- .../Standard/Online/OnlineGradientDescent.cs | 12 +- .../Standard/Online/OnlineLearnerCatalog.cs | 97 ------ .../PoissonRegression/PoissonRegression.cs | 14 +- .../Standard/SdcaBinary.cs | 8 +- .../Standard/SdcaCatalog.cs | 129 ------- .../Standard/SdcaMultiClass.cs | 14 +- .../Standard/SdcaRegression.cs | 14 +- .../Standard/SgdCatalog.cs | 46 --- .../StandardLearnersCatalog.cs | 314 ++++++++++++++++++ .../TrainerEstimators/FAFMEstimator.cs | 2 +- .../MatrixFactorizationTests.cs | 16 +- 43 files changed, 566 insertions(+), 625 deletions(-) delete mode 100644 src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsCatalog.cs delete mode 100644 src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLearnerCatalog.cs delete mode 100644 src/Microsoft.ML.StandardLearners/Standard/SdcaCatalog.cs delete mode 100644 src/Microsoft.ML.StandardLearners/Standard/SgdCatalog.cs create mode 100644 src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/MatrixFactorization.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/MatrixFactorization.cs index 71366a8035..3eadb5f98e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/MatrixFactorization.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/MatrixFactorization.cs @@ -71,8 +71,10 @@ public static void MatrixFactorizationInMemoryData() // Create a matrix factorization trainer which may consume "Value" as the training label, "MatrixColumnIndex" as the // matrix's column index, and "MatrixRowIndex" as the matrix's row index. Here nameof(...) is used to extract field // names' in MatrixElement class. - var pipeline = new MatrixFactorizationTrainer(mlContext, nameof(MatrixElement.Value), - nameof(MatrixElement.MatrixColumnIndex), nameof(MatrixElement.MatrixRowIndex), + var pipeline = new MatrixFactorizationTrainer(mlContext, + nameof(MatrixElement.MatrixColumnIndex), + nameof(MatrixElement.MatrixRowIndex), + nameof(MatrixElement.Value), advancedSettings: s => { s.NumIterations = 10; diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/SDCA.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/SDCA.cs index d018b3df4b..0a8dd0ae23 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/SDCA.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/SDCA.cs @@ -48,7 +48,7 @@ public static void SDCA_BinaryClassification() // Then append a binary classifier, setting the "Label" column as the label of the dataset, and // the "Features" column produced by FeaturizeText as the features column. var pipeline = mlContext.Transforms.Text.FeaturizeText("SentimentText", "Features") - .Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent(label: "Sentiment", features: "Features", l2Const: 0.001f)); + .Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent(labelColumn: "Sentiment", featureColumn: "Features", l2Const: 0.001f)); // Step 3: Run Cross-Validation on this pipeline. var cvResults = mlContext.BinaryClassification.CrossValidate(data, pipeline, labelColumn: "Sentiment"); @@ -60,8 +60,8 @@ public static void SDCA_BinaryClassification() // we could do so by tweaking the 'advancedSetting'. var advancedPipeline = mlContext.Transforms.Text.FeaturizeText("SentimentText", "Features") .Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent - (label: "Sentiment", - features: "Features", + (labelColumn: "Sentiment", + featureColumn: "Features", advancedSettings: s=> { s.ConvergenceTolerance = 0.01f; // The learning rate for adjusting bias from being regularized diff --git a/src/Microsoft.ML.FastTree/FastTreeClassification.cs b/src/Microsoft.ML.FastTree/FastTreeClassification.cs index 32621a7ad0..8d94a72a8a 100644 --- a/src/Microsoft.ML.FastTree/FastTreeClassification.cs +++ b/src/Microsoft.ML.FastTree/FastTreeClassification.cs @@ -128,8 +128,8 @@ public sealed partial class FastTreeBinaryClassificationTrainer : /// Total number of decision trees to create in the ensemble. /// A delegate to apply all the advanced arguments to the algorithm. public FastTreeBinaryClassificationTrainer(IHostEnvironment env, - string labelColumn, - string featureColumn, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weightColumn = null, int numLeaves = Defaults.NumLeaves, int numTrees = Defaults.NumTrees, diff --git a/src/Microsoft.ML.FastTree/FastTreeRanking.cs b/src/Microsoft.ML.FastTree/FastTreeRanking.cs index 58aa69f6ae..e6e2caa22d 100644 --- a/src/Microsoft.ML.FastTree/FastTreeRanking.cs +++ b/src/Microsoft.ML.FastTree/FastTreeRanking.cs @@ -73,9 +73,9 @@ public sealed partial class FastTreeRankingTrainer /// The learning rate. /// A delegate to apply all the advanced arguments to the algorithm. public FastTreeRankingTrainer(IHostEnvironment env, - string labelColumn, - string featureColumn, - string groupIdColumn, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, + string groupIdColumn = DefaultColumnNames.GroupId, string weightColumn = null, int numLeaves = Defaults.NumLeaves, int numTrees = Defaults.NumTrees, diff --git a/src/Microsoft.ML.FastTree/FastTreeRegression.cs b/src/Microsoft.ML.FastTree/FastTreeRegression.cs index 87a49a417d..71a142f599 100644 --- a/src/Microsoft.ML.FastTree/FastTreeRegression.cs +++ b/src/Microsoft.ML.FastTree/FastTreeRegression.cs @@ -64,8 +64,8 @@ public sealed partial class FastTreeRegressionTrainer /// Total number of decision trees to create in the ensemble. /// A delegate to apply all the advanced arguments to the algorithm. public FastTreeRegressionTrainer(IHostEnvironment env, - string labelColumn, - string featureColumn, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weightColumn = null, int numLeaves = Defaults.NumLeaves, int numTrees = Defaults.NumTrees, diff --git a/src/Microsoft.ML.FastTree/FastTreeTweedie.cs b/src/Microsoft.ML.FastTree/FastTreeTweedie.cs index 014e396088..2d4b961d80 100644 --- a/src/Microsoft.ML.FastTree/FastTreeTweedie.cs +++ b/src/Microsoft.ML.FastTree/FastTreeTweedie.cs @@ -61,8 +61,8 @@ public sealed partial class FastTreeTweedieTrainer /// Total number of decision trees to create in the ensemble. /// A delegate to apply all the advanced arguments to the algorithm. public FastTreeTweedieTrainer(IHostEnvironment env, - string labelColumn, - string featureColumn, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weightColumn = null, int numLeaves = Defaults.NumLeaves, int numTrees = Defaults.NumTrees, diff --git a/src/Microsoft.ML.FastTree/GamClassification.cs b/src/Microsoft.ML.FastTree/GamClassification.cs index 7bdcf04c1b..4afebf83ba 100644 --- a/src/Microsoft.ML.FastTree/GamClassification.cs +++ b/src/Microsoft.ML.FastTree/GamClassification.cs @@ -66,8 +66,8 @@ internal BinaryClassificationGamTrainer(IHostEnvironment env, Arguments args) /// The minimal number of documents allowed in a leaf of a regression tree, out of the subsampled data. /// A delegate to apply all the advanced arguments to the algorithm. public BinaryClassificationGamTrainer(IHostEnvironment env, - string labelColumn, - string featureColumn, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weightColumn = null, int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves, double learningRate = Defaults.LearningRates, diff --git a/src/Microsoft.ML.FastTree/GamRegression.cs b/src/Microsoft.ML.FastTree/GamRegression.cs index 3d4331fc77..9669bff805 100644 --- a/src/Microsoft.ML.FastTree/GamRegression.cs +++ b/src/Microsoft.ML.FastTree/GamRegression.cs @@ -55,8 +55,8 @@ internal RegressionGamTrainer(IHostEnvironment env, Arguments args) /// The learning rate. /// A delegate to apply all the advanced arguments to the algorithm. public RegressionGamTrainer(IHostEnvironment env, - string labelColumn, - string featureColumn, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weightColumn = null, int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves, double learningRate = Defaults.LearningRates, diff --git a/src/Microsoft.ML.FastTree/RandomForestClassification.cs b/src/Microsoft.ML.FastTree/RandomForestClassification.cs index 91d31c6f51..cdfd3fbb99 100644 --- a/src/Microsoft.ML.FastTree/RandomForestClassification.cs +++ b/src/Microsoft.ML.FastTree/RandomForestClassification.cs @@ -146,8 +146,8 @@ public sealed class Arguments : FastForestArgumentsBase /// The learning rate. /// A delegate to apply all the advanced arguments to the algorithm. public FastForestClassification(IHostEnvironment env, - string labelColumn, - string featureColumn, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weightColumn = null, int numLeaves = Defaults.NumLeaves, int numTrees = Defaults.NumTrees, diff --git a/src/Microsoft.ML.FastTree/RandomForestRegression.cs b/src/Microsoft.ML.FastTree/RandomForestRegression.cs index a8d511d1b7..ef0ac95250 100644 --- a/src/Microsoft.ML.FastTree/RandomForestRegression.cs +++ b/src/Microsoft.ML.FastTree/RandomForestRegression.cs @@ -167,8 +167,8 @@ public sealed class Arguments : FastForestArgumentsBase /// The learning rate. /// A delegate to apply all the advanced arguments to the algorithm. public FastForestRegression(IHostEnvironment env, - string labelColumn, - string featureColumn, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weightColumn = null, int numLeaves = Defaults.NumLeaves, int numTrees = Defaults.NumTrees, diff --git a/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs b/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs index 813b2af70d..c0133aacc5 100644 --- a/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs +++ b/src/Microsoft.ML.FastTree/TreeTrainersCatalog.cs @@ -18,8 +18,8 @@ public static class TreeExtensions /// Predict a target using a decision tree regression model trained with the . /// /// The . - /// The label column. - /// The features column. + /// The label column. + /// The feature column. /// The optional weights column. /// Total number of decision trees to create in the ensemble. /// The maximum number of leaves per decision tree. @@ -27,8 +27,8 @@ public static class TreeExtensions /// The learning rate. /// Algorithm advanced settings. public static FastTreeRegressionTrainer FastTree(this RegressionContext.RegressionTrainers ctx, - string label, - string features, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weights = null, int numLeaves = Defaults.NumLeaves, int numTrees = Defaults.NumTrees, @@ -38,15 +38,15 @@ public static FastTreeRegressionTrainer FastTree(this RegressionContext.Regressi { Contracts.CheckValue(ctx, nameof(ctx)); var env = CatalogUtils.GetEnvironment(ctx); - return new FastTreeRegressionTrainer(env, label, features, weights, numLeaves, numTrees, minDatapointsInLeaves, learningRate, advancedSettings); + return new FastTreeRegressionTrainer(env, labelColumn, featureColumn, weights, numLeaves, numTrees, minDatapointsInLeaves, learningRate, advancedSettings); } /// /// Predict a target using a decision tree binary classification model trained with the . /// /// The . - /// The label column. - /// The features column. + /// The labelColumn column. + /// The featureColumn column. /// The optional weights column. /// Total number of decision trees to create in the ensemble. /// The maximum number of leaves per decision tree. @@ -54,8 +54,8 @@ public static FastTreeRegressionTrainer FastTree(this RegressionContext.Regressi /// The learning rate. /// Algorithm advanced settings. public static FastTreeBinaryClassificationTrainer FastTree(this BinaryClassificationContext.BinaryClassificationTrainers ctx, - string label, - string features, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weights = null, int numLeaves = Defaults.NumLeaves, int numTrees = Defaults.NumTrees, @@ -65,15 +65,15 @@ public static FastTreeBinaryClassificationTrainer FastTree(this BinaryClassifica { Contracts.CheckValue(ctx, nameof(ctx)); var env = CatalogUtils.GetEnvironment(ctx); - return new FastTreeBinaryClassificationTrainer(env, label, features, weights, numLeaves, numTrees, minDatapointsInLeaves, learningRate, advancedSettings); + return new FastTreeBinaryClassificationTrainer(env, labelColumn, featureColumn, weights, numLeaves, numTrees, minDatapointsInLeaves, learningRate, advancedSettings); } /// /// Ranks a series of inputs based on their relevance, training a decision tree ranking model through the . /// /// The . - /// The label column. - /// The features column. + /// The labelColumn column. + /// The featureColumn column. /// The groupId column. /// The optional weights column. /// Total number of decision trees to create in the ensemble. @@ -82,9 +82,9 @@ public static FastTreeBinaryClassificationTrainer FastTree(this BinaryClassifica /// The learning rate. /// Algorithm advanced settings. public static FastTreeRankingTrainer FastTree(this RankingContext.RankingTrainers ctx, - string label, - string features, - string groupId , + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, + string groupId = DefaultColumnNames.GroupId, string weights = null, int numLeaves = Defaults.NumLeaves, int numTrees = Defaults.NumTrees, @@ -94,22 +94,22 @@ public static FastTreeRankingTrainer FastTree(this RankingContext.RankingTrainer { Contracts.CheckValue(ctx, nameof(ctx)); var env = CatalogUtils.GetEnvironment(ctx); - return new FastTreeRankingTrainer(env, label, features, groupId, weights, numLeaves, numTrees, minDatapointsInLeaves, learningRate, advancedSettings); + return new FastTreeRankingTrainer(env, labelColumn, featureColumn, groupId, weights, numLeaves, numTrees, minDatapointsInLeaves, learningRate, advancedSettings); } /// /// Predict a target using a decision tree regression model trained with the . /// /// The . - /// The label column. - /// The features column. + /// The labelColumn column. + /// The featureColumn column. /// The optional weights column. /// The minimal number of datapoints allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. /// Algorithm advanced settings. public static BinaryClassificationGamTrainer GeneralizedAdditiveMethods(this BinaryClassificationContext.BinaryClassificationTrainers ctx, - string label = DefaultColumnNames.Label, - string features = DefaultColumnNames.Features, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weights = null, int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves, double learningRate = Defaults.LearningRates, @@ -117,22 +117,22 @@ public static BinaryClassificationGamTrainer GeneralizedAdditiveMethods(this Bin { Contracts.CheckValue(ctx, nameof(ctx)); var env = CatalogUtils.GetEnvironment(ctx); - return new BinaryClassificationGamTrainer(env, label, features, weights, minDatapointsInLeaves, learningRate, advancedSettings); + return new BinaryClassificationGamTrainer(env, labelColumn, featureColumn, weights, minDatapointsInLeaves, learningRate, advancedSettings); } /// /// Predict a target using a decision tree binary classification model trained with the . /// /// The . - /// The label column. - /// The features column. + /// The labelColumn column. + /// The featureColumn column. /// The optional weights column. /// The minimal number of datapoints allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. /// Algorithm advanced settings. public static RegressionGamTrainer GeneralizedAdditiveMethods(this RegressionContext.RegressionTrainers ctx, - string label, - string features, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weights = null, int minDatapointsInLeaves = Defaults.MinDocumentsInLeaves, double learningRate = Defaults.LearningRates, @@ -140,15 +140,15 @@ public static RegressionGamTrainer GeneralizedAdditiveMethods(this RegressionCon { Contracts.CheckValue(ctx, nameof(ctx)); var env = CatalogUtils.GetEnvironment(ctx); - return new RegressionGamTrainer(env, label, features, weights, minDatapointsInLeaves, learningRate, advancedSettings); + return new RegressionGamTrainer(env, labelColumn, featureColumn, weights, minDatapointsInLeaves, learningRate, advancedSettings); } /// /// Predict a target using a decision tree regression model trained with the . /// /// The . - /// The label column. - /// The features column. + /// The labelColumn column. + /// The featureColumn column. /// The optional weights column. /// Total number of decision trees to create in the ensemble. /// The maximum number of leaves per decision tree. @@ -156,8 +156,8 @@ public static RegressionGamTrainer GeneralizedAdditiveMethods(this RegressionCon /// The learning rate. /// Algorithm advanced settings. public static FastTreeTweedieTrainer FastTreeTweedie(this RegressionContext.RegressionTrainers ctx, - string label, - string features, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weights = null, int numLeaves = Defaults.NumLeaves, int numTrees = Defaults.NumTrees, @@ -167,15 +167,15 @@ public static FastTreeTweedieTrainer FastTreeTweedie(this RegressionContext.Regr { Contracts.CheckValue(ctx, nameof(ctx)); var env = CatalogUtils.GetEnvironment(ctx); - return new FastTreeTweedieTrainer(env, label, features, weights, numLeaves, numTrees, minDatapointsInLeaves, learningRate, advancedSettings); + return new FastTreeTweedieTrainer(env, labelColumn, featureColumn, weights, numLeaves, numTrees, minDatapointsInLeaves, learningRate, advancedSettings); } /// /// Predict a target using a decision tree regression model trained with the . /// /// The . - /// The label column. - /// The features column. + /// The labelColumn column. + /// The featureColumn column. /// The optional weights column. /// Total number of decision trees to create in the ensemble. /// The maximum number of leaves per decision tree. @@ -183,8 +183,8 @@ public static FastTreeTweedieTrainer FastTreeTweedie(this RegressionContext.Regr /// The learning rate. /// Algorithm advanced settings. public static FastForestRegression FastForest(this RegressionContext.RegressionTrainers ctx, - string label, - string features, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weights = null, int numLeaves = Defaults.NumLeaves, int numTrees = Defaults.NumTrees, @@ -194,15 +194,15 @@ public static FastForestRegression FastForest(this RegressionContext.RegressionT { Contracts.CheckValue(ctx, nameof(ctx)); var env = CatalogUtils.GetEnvironment(ctx); - return new FastForestRegression(env, label, features, weights, numLeaves, numTrees, minDatapointsInLeaves, learningRate, advancedSettings); + return new FastForestRegression(env, labelColumn, featureColumn, weights, numLeaves, numTrees, minDatapointsInLeaves, learningRate, advancedSettings); } /// /// Predict a target using a decision tree regression model trained with the . /// /// The . - /// The label column. - /// The features column. + /// The labelColumn column. + /// The featureColumn column. /// The optional weights column. /// Total number of decision trees to create in the ensemble. /// The maximum number of leaves per decision tree. @@ -210,8 +210,8 @@ public static FastForestRegression FastForest(this RegressionContext.RegressionT /// The learning rate. /// Algorithm advanced settings. public static FastForestClassification FastForest(this BinaryClassificationContext.BinaryClassificationTrainers ctx, - string label, - string features, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weights = null, int numLeaves = Defaults.NumLeaves, int numTrees = Defaults.NumTrees, @@ -221,7 +221,7 @@ public static FastForestClassification FastForest(this BinaryClassificationConte { Contracts.CheckValue(ctx, nameof(ctx)); var env = CatalogUtils.GetEnvironment(ctx); - return new FastForestClassification(env, label, features, weights,numLeaves, numTrees, minDatapointsInLeaves, learningRate, advancedSettings); + return new FastForestClassification(env, labelColumn, featureColumn, weights,numLeaves, numTrees, minDatapointsInLeaves, learningRate, advancedSettings); } } } diff --git a/src/Microsoft.ML.HalLearners/HalLearnersCatalog.cs b/src/Microsoft.ML.HalLearners/HalLearnersCatalog.cs index acb83d11e7..6dab814ab1 100644 --- a/src/Microsoft.ML.HalLearners/HalLearnersCatalog.cs +++ b/src/Microsoft.ML.HalLearners/HalLearnersCatalog.cs @@ -19,36 +19,36 @@ public static class HalLearnersCatalog /// Predict a target using a linear regression model trained with the . /// /// The . - /// The label column. - /// The features column. + /// The labelColumn column. + /// The features column. /// The weights column. /// Algorithm advanced settings. public static OlsLinearRegressionTrainer OrdinaryLeastSquares(this RegressionContext.RegressionTrainers ctx, - string label, - string features, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weights = null, Action advancedSettings = null) { Contracts.CheckValue(ctx, nameof(ctx)); var env = CatalogUtils.GetEnvironment(ctx); - return new OlsLinearRegressionTrainer(env, label, features, weights, advancedSettings); + return new OlsLinearRegressionTrainer(env, labelColumn, featureColumn, weights, advancedSettings); } /// /// Predict a target using a linear regression model trained with the . /// /// The . - /// The label column. - /// The features column. + /// The labelColumn column. + /// The features column. /// Algorithm advanced settings. public static SymSgdClassificationTrainer SymbolicStochasticGradientDescent(this RegressionContext.RegressionTrainers ctx, - string label, - string features, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, Action advancedSettings = null) { Contracts.CheckValue(ctx, nameof(ctx)); var env = CatalogUtils.GetEnvironment(ctx); - return new SymSgdClassificationTrainer(env, label, features, advancedSettings); + return new SymSgdClassificationTrainer(env, labelColumn, featureColumn, advancedSettings); } } } diff --git a/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs b/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs index b03fbfeaec..233f5cfd95 100644 --- a/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs +++ b/src/Microsoft.ML.HalLearners/OlsLinearRegression.cs @@ -68,16 +68,16 @@ public sealed class Arguments : LearnerInputBaseWithWeight /// Initializes a new instance of /// /// The environment to use. - /// The name of the label column. - /// The name of the feature column. + /// The name of the labelColumn column. + /// The name of the feature column. /// The name for the optional example weight column. /// A delegate to apply all the advanced arguments to the algorithm. public OlsLinearRegressionTrainer(IHostEnvironment env, - string label, - string features, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weights = null, Action advancedSettings = null) - : this(env, ArgsInit(features, label, weights, advancedSettings)) + : this(env, ArgsInit(featureColumn, labelColumn, weights, advancedSettings)) { } @@ -94,8 +94,10 @@ internal OlsLinearRegressionTrainer(IHostEnvironment env, Arguments args) _perParameterSignificance = args.PerParameterSignificance; } - private static Arguments ArgsInit(string featureColumn, string labelColumn, - string weightColumn, Action advancedSettings) + private static Arguments ArgsInit(string featureColumn, + string labelColumn, + string weightColumn, + Action advancedSettings) { var args = new Arguments(); @@ -135,12 +137,12 @@ protected override OlsLinearRegressionPredictor TrainModelCore(TrainContext cont ch.CheckValue(context, nameof(context)); var examples = context.TrainingSet; ch.CheckParam(examples.Schema.Feature != null, nameof(examples), "Need a feature column"); - ch.CheckParam(examples.Schema.Label != null, nameof(examples), "Need a label column"); + ch.CheckParam(examples.Schema.Label != null, nameof(examples), "Need a labelColumn column"); - // The label type must be either Float or a key type based on int (if allowKeyLabels is true). + // The labelColumn type must be either Float or a key type based on int (if allowKeyLabels is true). var typeLab = examples.Schema.Label.Type; if (typeLab != NumberType.Float) - throw ch.Except("Incompatible label column type {0}, must be {1}", typeLab, NumberType.Float); + throw ch.Except("Incompatible labelColumn column type {0}, must be {1}", typeLab, NumberType.Float); // The feature type must be a vector of Float. var typeFeat = examples.Schema.Feature.Type; @@ -225,7 +227,7 @@ private OlsLinearRegressionPredictor TrainCore(IChannel ch, FloatLabelCursor.Fac } ch.Check(n > 0, "No training examples in dataset."); if (cursor.BadFeaturesRowCount > 0) - ch.Warning("Skipped {0} instances with missing features/label during training", cursor.SkippedRowCount); + ch.Warning("Skipped {0} instances with missing features/labelColumn during training", cursor.SkippedRowCount); if (_l2Weight > 0) { diff --git a/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs b/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs index bb3572f1e2..e18467270b 100644 --- a/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs +++ b/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs @@ -154,23 +154,23 @@ protected override TPredictor TrainModelCore(TrainContext context) /// Initializes a new instance of /// /// The private instance of . - /// The name of the label column. - /// The name of the feature column. + /// The name of the label column. + /// The name of the feature column. /// A delegate to apply all the advanced arguments to the algorithm. public SymSgdClassificationTrainer(IHostEnvironment env, - string label, - string features, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, Action advancedSettings = null) - : base(Contracts.CheckRef(env, nameof(env)).Register(LoadNameValue), TrainerUtils.MakeR4VecFeature(features), - TrainerUtils.MakeBoolScalarLabel(label)) + : base(Contracts.CheckRef(env, nameof(env)).Register(LoadNameValue), TrainerUtils.MakeR4VecFeature(featureColumn), + TrainerUtils.MakeBoolScalarLabel(labelColumn)) { _args = new Arguments(); // Apply the advanced args, if the user supplied any. _args.Check(Host); advancedSettings?.Invoke(_args); - _args.FeatureColumn = features; - _args.LabelColumn = label; + _args.FeatureColumn = featureColumn; + _args.LabelColumn = labelColumn; Info = new TrainerInfo(); } diff --git a/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs b/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs index 87daf11c73..6562a1b6b6 100644 --- a/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs +++ b/src/Microsoft.ML.KMeansClustering/KMeansPlusPlusTrainer.cs @@ -95,18 +95,18 @@ public class Arguments : UnsupervisedLearnerInputBaseWithWeight /// Initializes a new instance of /// /// The to use. - /// The name of the feature column. + /// The name of the feature column. /// The name for the optional column containing the example weights. /// A delegate to apply all the advanced arguments to the algorithm. /// The number of clusters. public KMeansPlusPlusTrainer(IHostEnvironment env, - string features, + string featureColumn = DefaultColumnNames.Features, int clustersCount = Defaults.K, string weights = null, Action advancedSettings = null) : this(env, new Arguments { - FeatureColumn = features, + FeatureColumn = featureColumn, WeightColumn = weights, K = clustersCount }, advancedSettings) diff --git a/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs index 8709d6a247..85cffa7460 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs @@ -103,8 +103,8 @@ internal LightGbmBinaryTrainer(IHostEnvironment env, LightGbmArguments args) /// Initializes a new instance of /// /// The private instance of . - /// The name of the label column. - /// The name of the feature column. + /// The name of the labelColumn column. + /// The name of the feature column. /// The name for the column containing the initial weight. /// The number of leaves to use. /// Number of iterations. @@ -115,15 +115,15 @@ internal LightGbmBinaryTrainer(IHostEnvironment env, LightGbmArguments args) /// if both are present and have different values. /// The columns names, however need to be provided directly, not through the . public LightGbmBinaryTrainer(IHostEnvironment env, - string label, - string features, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weights = null, int? numLeaves = null, int? minDataPerLeaf = null, double? learningRate = null, int numBoostRound = LightGbmArguments.Defaults.NumBoostRound, Action advancedSettings = null) - : base(env, LoadNameValue, TrainerUtils.MakeBoolScalarLabel(label), features, weights, null, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings) + : base(env, LoadNameValue, TrainerUtils.MakeBoolScalarLabel(labelColumn), featureColumn, weights, null, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings) { } diff --git a/src/Microsoft.ML.LightGBM/LightGbmCatalog.cs b/src/Microsoft.ML.LightGBM/LightGbmCatalog.cs index d2e218e68e..de4b84c3f1 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmCatalog.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmCatalog.cs @@ -10,7 +10,7 @@ namespace Microsoft.ML { /// - /// Regression trainer estimators. + /// LightGBM extension methods. /// public static class LightGbmExtensions { @@ -18,8 +18,8 @@ public static class LightGbmExtensions /// Predict a target using a decision tree regression model trained with the . /// /// The . - /// The label column. - /// The features column. + /// The labelColumn column. + /// The features column. /// The weights column. /// The number of leaves to use. /// Number of iterations. @@ -30,8 +30,8 @@ public static class LightGbmExtensions /// if both are present and have different values. /// The columns names, however need to be provided directly, not through the . public static LightGbmRegressorTrainer LightGbm(this RegressionContext.RegressionTrainers ctx, - string label, - string features, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weights = null, int? numLeaves = null, int? minDataPerLeaf = null, @@ -41,15 +41,15 @@ public static LightGbmRegressorTrainer LightGbm(this RegressionContext.Regressio { Contracts.CheckValue(ctx, nameof(ctx)); var env = CatalogUtils.GetEnvironment(ctx); - return new LightGbmRegressorTrainer(env, label, features, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings); + return new LightGbmRegressorTrainer(env, labelColumn, featureColumn, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings); } /// /// Predict a target using a decision tree binary classification model trained with the . /// /// The . - /// The label column. - /// The features column. + /// The labelColumn column. + /// The features column. /// The weights column. /// The number of leaves to use. /// Number of iterations. @@ -60,8 +60,8 @@ public static LightGbmRegressorTrainer LightGbm(this RegressionContext.Regressio /// if both are present and have different values. /// The columns names, however need to be provided directly, not through the . public static LightGbmBinaryTrainer LightGbm(this BinaryClassificationContext.BinaryClassificationTrainers ctx, - string label, - string features, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weights = null, int? numLeaves = null, int? minDataPerLeaf = null, @@ -71,7 +71,7 @@ public static LightGbmBinaryTrainer LightGbm(this BinaryClassificationContext.Bi { Contracts.CheckValue(ctx, nameof(ctx)); var env = CatalogUtils.GetEnvironment(ctx); - return new LightGbmBinaryTrainer(env, label, features, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings); + return new LightGbmBinaryTrainer(env, labelColumn, featureColumn, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings); } @@ -79,10 +79,10 @@ public static LightGbmBinaryTrainer LightGbm(this BinaryClassificationContext.Bi /// Predict a target using a decision tree binary classification model trained with the . /// /// The . - /// The label column. - /// The features column. + /// The labelColumn column. + /// The features column. /// The weights column. - /// The groupId column. + /// The groupId column. /// The number of leaves to use. /// Number of iterations. /// The minimal number of documents allowed in a leaf of the tree, out of the subsampled data. @@ -92,9 +92,9 @@ public static LightGbmBinaryTrainer LightGbm(this BinaryClassificationContext.Bi /// if both are present and have different values. /// The columns names, however need to be provided directly, not through the . public static LightGbmRankingTrainer LightGbm(this RankingContext.RankingTrainers ctx, - string label, - string features, - string groupId, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, + string groupIdColumn = DefaultColumnNames.GroupId, string weights = null, int? numLeaves = null, int? minDataPerLeaf = null, @@ -104,7 +104,7 @@ public static LightGbmRankingTrainer LightGbm(this RankingContext.RankingTrainer { Contracts.CheckValue(ctx, nameof(ctx)); var env = CatalogUtils.GetEnvironment(ctx); - return new LightGbmRankingTrainer(env, label, features, groupId, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings); + return new LightGbmRankingTrainer(env, labelColumn, featureColumn, groupIdColumn, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings); } @@ -112,8 +112,8 @@ public static LightGbmRankingTrainer LightGbm(this RankingContext.RankingTrainer /// Predict a target using a decision tree binary classification model trained with the . /// /// The . - /// The label column. - /// The features column. + /// The labelColumn column. + /// The features column. /// The weights column. /// The number of leaves to use. /// Number of iterations. @@ -124,8 +124,8 @@ public static LightGbmRankingTrainer LightGbm(this RankingContext.RankingTrainer /// if both are present and have different values. /// The columns names, however need to be provided directly, not through the . public static LightGbmMulticlassTrainer LightGbm(this MulticlassClassificationContext.MulticlassClassificationTrainers ctx, - string label, - string features, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weights = null, int? numLeaves = null, int? minDataPerLeaf = null, @@ -135,7 +135,7 @@ public static LightGbmMulticlassTrainer LightGbm(this MulticlassClassificationCo { Contracts.CheckValue(ctx, nameof(ctx)); var env = CatalogUtils.GetEnvironment(ctx); - return new LightGbmMulticlassTrainer(env, label, features, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings); + return new LightGbmMulticlassTrainer(env, labelColumn, featureColumn, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings); } } diff --git a/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs index 3bf422b1ac..7db6f9eeb8 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs @@ -44,8 +44,8 @@ internal LightGbmMulticlassTrainer(IHostEnvironment env, LightGbmArguments args) /// Initializes a new instance of /// /// The private instance of . - /// The name of the label column. - /// The name of the feature column. + /// The name of the labelColumn column. + /// The name of the feature column. /// The name for the column containing the initial weight. /// The number of leaves to use. /// Number of iterations. @@ -56,15 +56,15 @@ internal LightGbmMulticlassTrainer(IHostEnvironment env, LightGbmArguments args) /// if both are present and have different values. /// The columns names, however need to be provided directly, not through the . public LightGbmMulticlassTrainer(IHostEnvironment env, - string label, - string features, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weights = null, int? numLeaves = null, int? minDataPerLeaf = null, double? learningRate = null, int numBoostRound = LightGbmArguments.Defaults.NumBoostRound, Action advancedSettings = null) - : base(env, LoadNameValue, TrainerUtils.MakeU4ScalarColumn(label), features, weights, null, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings) + : base(env, LoadNameValue, TrainerUtils.MakeU4ScalarColumn(labelColumn), featureColumn, weights, null, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings) { _numClass = -1; } @@ -124,23 +124,23 @@ protected override void ConvertNaNLabels(IChannel ch, RoleMappedData data, float float minLabel = float.MaxValue; float maxLabel = float.MinValue; bool hasNaNLabel = false; - foreach (var label in labels) + foreach (var labelColumn in labels) { - if (float.IsNaN(label)) + if (float.IsNaN(labelColumn)) hasNaNLabel = true; else { - minLabel = Math.Min(minLabel, label); - maxLabel = Math.Max(maxLabel, label); + minLabel = Math.Min(minLabel, labelColumn); + maxLabel = Math.Max(maxLabel, labelColumn); } } - ch.CheckParam(minLabel >= 0, nameof(data), "min label cannot be negative"); + ch.CheckParam(minLabel >= 0, nameof(data), "min labelColumn cannot be negative"); if (maxLabel >= _maxNumClass) - throw ch.ExceptParam(nameof(data), $"max label cannot exceed {_maxNumClass}"); + throw ch.ExceptParam(nameof(data), $"max labelColumn cannot exceed {_maxNumClass}"); if (data.Schema.Label.Type.IsKey) { - ch.Check(data.Schema.Label.Type.AsKey.Contiguous, "label value should be contiguous"); + ch.Check(data.Schema.Label.Type.AsKey.Contiguous, "labelColumn value should be contiguous"); if (hasNaNLabel) _numClass = data.Schema.Label.Type.AsKey.Count + 1; else diff --git a/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs index b604d9a5c7..56aff6933c 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs @@ -90,8 +90,8 @@ internal LightGbmRankingTrainer(IHostEnvironment env, LightGbmArguments args) /// Initializes a new instance of /// /// The private instance of . - /// The name of the label column. - /// The name of the feature column. + /// The name of the label column. + /// The name of the feature column. /// The name of the column containing the group ID. /// The name of the optional column containing the initial weights. /// The number of leaves to use. @@ -103,16 +103,16 @@ internal LightGbmRankingTrainer(IHostEnvironment env, LightGbmArguments args) /// if both are present and have different values. /// The columns names, however need to be provided directly, not through the . public LightGbmRankingTrainer(IHostEnvironment env, - string label, - string features, - string groupId, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, + string groupId = DefaultColumnNames.GroupId, string weights = null, int? numLeaves = null, int? minDataPerLeaf = null, double? learningRate = null, int numBoostRound = LightGbmArguments.Defaults.NumBoostRound, Action advancedSettings = null) - : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarLabel(label), features, weights, groupId, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings) + : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarLabel(labelColumn), featureColumn, weights, groupId, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings) { Host.CheckNonEmpty(groupId, nameof(groupId)); } diff --git a/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs index 9a769df92d..0d00a14f66 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs @@ -89,8 +89,8 @@ public sealed class LightGbmRegressorTrainer : LightGbmTrainerBase /// /// The private instance of . - /// The name of the label column. - /// The name of the feature column. + /// The name of the label column. + /// The name of the feature column. /// The name for the column containing the initial weight. /// The number of leaves to use. /// Number of iterations. @@ -101,15 +101,15 @@ public sealed class LightGbmRegressorTrainer : LightGbmTrainerBase. public LightGbmRegressorTrainer(IHostEnvironment env, - string label, - string features, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weights = null, int? numLeaves = null, int? minDataPerLeaf = null, double? learningRate = null, int numBoostRound = LightGbmArguments.Defaults.NumBoostRound, Action advancedSettings = null) - : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarLabel(label), features, weights, null, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings) + : base(env, LoadNameValue, TrainerUtils.MakeR4ScalarLabel(labelColumn), featureColumn, weights, null, numLeaves, minDataPerLeaf, learningRate, numBoostRound, advancedSettings) { } diff --git a/src/Microsoft.ML.Recommender/MatrixFactorizationStatic.cs b/src/Microsoft.ML.Recommender/MatrixFactorizationStatic.cs index 5d53c1ec8d..fee3009a6e 100644 --- a/src/Microsoft.ML.Recommender/MatrixFactorizationStatic.cs +++ b/src/Microsoft.ML.Recommender/MatrixFactorizationStatic.cs @@ -56,7 +56,7 @@ public static Scalar MatrixFactorization(this RegressionContext.Regres var rec = new MatrixFactorizationReconciler((env, labelColName, matrixColumnIndexColName, matrixRowIndexColName) => { - var trainer = new MatrixFactorizationTrainer(env, labelColName, matrixColumnIndexColName, matrixRowIndexColName, advancedSettings: + var trainer = new MatrixFactorizationTrainer(env, matrixColumnIndexColName, matrixRowIndexColName, labelColName, advancedSettings: args => { args.Lambda = regularizationCoefficient; diff --git a/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs b/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs index 3894007701..eeb01cc166 100644 --- a/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs +++ b/src/Microsoft.ML.Recommender/MatrixFactorizationTrainer.cs @@ -206,13 +206,17 @@ public MatrixFactorizationTrainer(IHostEnvironment env, Arguments args) : base(e /// Initializing a new instance of . /// /// The private instance of . - /// The name of the label column. /// The name of the column hosting the matrix's column IDs. /// The name of the column hosting the matrix's row IDs. + /// The name of the label column. /// A delegate to apply all the advanced arguments to the algorithm. /// The for additional input data to training. - public MatrixFactorizationTrainer(IHostEnvironment env, string labelColumn, string matrixColumnIndexColumnName, string matrixRowIndexColumnName, - TrainerEstimatorContext context = null, Action advancedSettings = null) + public MatrixFactorizationTrainer(IHostEnvironment env, + string matrixColumnIndexColumnName, + string matrixRowIndexColumnName, + string labelColumn = DefaultColumnNames.Label, + TrainerEstimatorContext context = null, + Action advancedSettings = null) : base(env, LoadNameValue) { var args = new Arguments(); diff --git a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineCatalog.cs b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineCatalog.cs index 35bacce86b..cc11b083dd 100644 --- a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineCatalog.cs +++ b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineCatalog.cs @@ -18,22 +18,22 @@ public static class FactorizationMachineExtensions /// Predict a target using a field-aware factorization machine algorithm. /// /// The binary classification context trainer object. - /// The label, or dependent variable. - /// The features, or independent variables. + /// The features, or independent variables. + /// The label, or dependent variable. /// The optional example weights. /// A delegate to set more settings. /// The settings here will override the ones provided in the direct method signature, /// if both are present and have different values. /// The columns names, however need to be provided directly, not through the . public static FieldAwareFactorizationMachineTrainer FieldAwareFactorizationMachine(this BinaryClassificationContext.BinaryClassificationTrainers ctx, - string label, - string[] features, - string weights = null, - Action advancedSettings = null) + string[] featureColumns, + string labelColumn = DefaultColumnNames.Label, + string weights = null, + Action advancedSettings = null) { Contracts.CheckValue(ctx, nameof(ctx)); var env = CatalogUtils.GetEnvironment(ctx); - return new FieldAwareFactorizationMachineTrainer(env, label, features, weights, advancedSettings: advancedSettings); + return new FieldAwareFactorizationMachineTrainer(env, featureColumns, labelColumn, weights, advancedSettings: advancedSettings); } } } diff --git a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineStatic.cs b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineStatic.cs index 2a95df5dd7..3dbb900326 100644 --- a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineStatic.cs +++ b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineStatic.cs @@ -56,7 +56,7 @@ public static (Scalar score, Scalar predictedLabel) FieldAwareFacto var rec = new CustomReconciler((env, labelCol, featureCols) => { - var trainer = new FieldAwareFactorizationMachineTrainer(env, labelCol, featureCols, advancedSettings: + var trainer = new FieldAwareFactorizationMachineTrainer(env, featureCols, labelCol, advancedSettings: args => { args.LearningRate = learningRate; diff --git a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs index 74343e7d50..b730050a10 100644 --- a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs +++ b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FactorizationMachineTrainer.cs @@ -134,14 +134,14 @@ public FieldAwareFactorizationMachineTrainer(IHostEnvironment env, Arguments arg /// Initializing a new instance of . /// /// The private instance of . - /// The name of the label column. - /// The name of column hosting the features. + /// The name of column hosting the features. + /// The name of the label column. /// A delegate to apply all the advanced arguments to the algorithm. /// The name of the optional weights' column. /// The for additional input data to training. public FieldAwareFactorizationMachineTrainer(IHostEnvironment env, - string label, - string[] features, + string[] featureColumns, + string labelColumn = DefaultColumnNames.Label, string weights = null, TrainerEstimatorContext context = null, Action advancedSettings = null) @@ -155,12 +155,12 @@ public FieldAwareFactorizationMachineTrainer(IHostEnvironment env, Context = context; - FeatureColumns = new SchemaShape.Column[features.Length]; + FeatureColumns = new SchemaShape.Column[featureColumns.Length]; - for (int i = 0; i < features.Length; i++) - FeatureColumns[i] = new SchemaShape.Column(features[i], SchemaShape.Column.VectorKind.Vector, NumberType.R4, false); + for (int i = 0; i < featureColumns.Length; i++) + FeatureColumns[i] = new SchemaShape.Column(featureColumns[i], SchemaShape.Column.VectorKind.Vector, NumberType.R4, false); - LabelColumn = new SchemaShape.Column(label, SchemaShape.Column.VectorKind.Scalar, BoolType.Instance, false); + LabelColumn = new SchemaShape.Column(labelColumn, SchemaShape.Column.VectorKind.Scalar, BoolType.Instance, false); WeightColumn = weights != null ? new SchemaShape.Column(weights, SchemaShape.Column.VectorKind.Scalar, NumberType.R4, false) : null; } diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsCatalog.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsCatalog.cs deleted file mode 100644 index 9dc857e934..0000000000 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LbfgsCatalog.cs +++ /dev/null @@ -1,123 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using Microsoft.ML.Runtime; -using Microsoft.ML.Runtime.Data; -using Microsoft.ML.Runtime.Internal.Calibration; -using Microsoft.ML.Runtime.Learners; -using Microsoft.ML.Trainers; -using System; - -namespace Microsoft.ML -{ - using Arguments = LogisticRegression.Arguments; - - /// - /// Binary Classification trainer estimators. - /// - public static class LbfgsBinaryClassificationExtensions - { - /// - /// Predict a target using a linear binary classification model trained with the trainer. - /// - /// The binary classificaiton context trainer object. - /// The label, or dependent variable. - /// The features, or independent variables. - /// The optional example weights. - /// Enforce non-negative weights. - /// Weight of L1 regularization term. - /// Weight of L2 regularization term. - /// Memory size for . Lower=faster, less accurate. - /// Threshold for optimizer convergence. - /// A delegate to apply all the advanced arguments to the algorithm. - public static LogisticRegression LogisticRegression(this BinaryClassificationContext.BinaryClassificationTrainers ctx, - string label, - string features, - string weights = null, - float l1Weight = Arguments.Defaults.L1Weight, - float l2Weight = Arguments.Defaults.L2Weight, - float optimizationTolerance = Arguments.Defaults.OptTol, - int memorySize = Arguments.Defaults.MemorySize, - bool enforceNoNegativity = Arguments.Defaults.EnforceNonNegativity, - Action advancedSettings = null) - { - Contracts.CheckValue(ctx, nameof(ctx)); - var env = CatalogUtils.GetEnvironment(ctx); - return new LogisticRegression(env, label, features, weights, l1Weight, l2Weight, optimizationTolerance, memorySize, enforceNoNegativity, advancedSettings); - } - } - - /// - /// Regression trainer estimators. - /// - public static class LbfgsRegressionExtensions - { - - /// - /// Predict a target using a linear regression model trained with the trainer. - /// - /// The regression context trainer object. - /// The label, or dependent variable. - /// The features, or independent variables. - /// The optional example weights. - /// Weight of L1 regularization term. - /// Weight of L2 regularization term. - /// Threshold for optimizer convergence. - /// Memory size for . Lower=faster, less accurate. - /// Enforce non-negative weights. - /// A delegate to apply all the advanced arguments to the algorithm. - public static PoissonRegression PoissonRegression(this RegressionContext.RegressionTrainers ctx, - string label, - string features, - string weights = null, - float l1Weight = Arguments.Defaults.L1Weight, - float l2Weight = Arguments.Defaults.L2Weight, - float optimizationTolerance = Arguments.Defaults.OptTol, - int memorySize = Arguments.Defaults.MemorySize, - bool enforceNoNegativity = Arguments.Defaults.EnforceNonNegativity, - Action advancedSettings = null) - { - Contracts.CheckValue(ctx, nameof(ctx)); - var env = CatalogUtils.GetEnvironment(ctx); - return new PoissonRegression(env, label, features, weights, l1Weight, l2Weight, optimizationTolerance, memorySize, enforceNoNegativity, advancedSettings); - } - } - - /// - /// Multiclass Classification trainer estimators. - /// - public static class LbfgsMulticlassExtensions - { - - /// - /// Predict a target using a linear multiclass classification model trained with the trainer. - /// - /// The multiclass classification context trainer object. - /// The label, or dependent variable. - /// The features, or independent variables. - /// The optional example weights. - /// Enforce non-negative weights. - /// Weight of L1 regularization term. - /// Weight of L2 regularization term. - /// Memory size for . Lower=faster, less accurate. - /// Threshold for optimizer convergence. - /// A delegate to apply all the advanced arguments to the algorithm. - public static MulticlassLogisticRegression LogisticRegression(this MulticlassClassificationContext.MulticlassClassificationTrainers ctx, - string label, - string features, - string weights = null, - float l1Weight = Arguments.Defaults.L1Weight, - float l2Weight = Arguments.Defaults.L2Weight, - float optimizationTolerance = Arguments.Defaults.OptTol, - int memorySize = Arguments.Defaults.MemorySize, - bool enforceNoNegativity = Arguments.Defaults.EnforceNonNegativity, - Action advancedSettings = null) - { - Contracts.CheckValue(ctx, nameof(ctx)); - var env = CatalogUtils.GetEnvironment(ctx); - return new MulticlassLogisticRegression(env, label, features, weights, l1Weight, l2Weight, optimizationTolerance, memorySize, enforceNoNegativity, advancedSettings); - } - - } -} diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs index 9ca820fa35..1f5f49fe40 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs @@ -51,8 +51,8 @@ public sealed class Arguments : ArgumentsBase /// Initializes a new instance of /// /// The environment to use. - /// The name of the label column. - /// The name of the feature column. + /// The name of the label column. + /// The name of the feature column. /// The name for the example weight column. /// Enforce non-negative weights. /// Weight of L1 regularizer term. @@ -61,8 +61,8 @@ public sealed class Arguments : ArgumentsBase /// Threshold for optimizer convergence. /// A delegate to apply all the advanced arguments to the algorithm. public LogisticRegression(IHostEnvironment env, - string label, - string features, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weights = null, float l1Weight = Arguments.Defaults.L1Weight, float l2Weight = Arguments.Defaults.L2Weight, @@ -70,11 +70,11 @@ public LogisticRegression(IHostEnvironment env, int memorySize = Arguments.Defaults.MemorySize, bool enforceNoNegativity = Arguments.Defaults.EnforceNonNegativity, Action advancedSettings = null) - : base(env, features, TrainerUtils.MakeBoolScalarLabel(label), weights, advancedSettings, + : base(env, featureColumn, TrainerUtils.MakeBoolScalarLabel(labelColumn), weights, advancedSettings, l1Weight, l2Weight, optimizationTolerance, memorySize, enforceNoNegativity) { - Host.CheckNonEmpty(features, nameof(features)); - Host.CheckNonEmpty(label, nameof(label)); + Host.CheckNonEmpty(featureColumn, nameof(featureColumn)); + Host.CheckNonEmpty(labelColumn, nameof(labelColumn)); _posWeight = 0; ShowTrainingStats = Args.ShowTrainingStats; diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs index 455e9a9e7b..a854691c50 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs @@ -74,8 +74,8 @@ public sealed class Arguments : ArgumentsBase /// Initializes a new instance of /// /// The environment to use. - /// The name of the label column. - /// The name of the feature column. + /// The name of the label column. + /// The name of the feature column. /// The name for the example weight column. /// Enforce non-negative weights. /// Weight of L1 regularizer term. @@ -84,8 +84,8 @@ public sealed class Arguments : ArgumentsBase /// Threshold for optimizer convergence. /// A delegate to apply all the advanced arguments to the algorithm. public MulticlassLogisticRegression(IHostEnvironment env, - string label, - string features, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weights = null, float l1Weight = Arguments.Defaults.L1Weight, float l2Weight = Arguments.Defaults.L2Weight, @@ -93,11 +93,11 @@ public MulticlassLogisticRegression(IHostEnvironment env, int memorySize = Arguments.Defaults.MemorySize, bool enforceNoNegativity = Arguments.Defaults.EnforceNonNegativity, Action advancedSettings = null) - : base(env, features, TrainerUtils.MakeU4ScalarColumn(label), weights, advancedSettings, + : base(env, featureColumn, TrainerUtils.MakeU4ScalarColumn(labelColumn), weights, advancedSettings, l1Weight, l2Weight, optimizationTolerance, memorySize, enforceNoNegativity) { - Host.CheckNonEmpty(features, nameof(features)); - Host.CheckNonEmpty(label, nameof(label)); + Host.CheckNonEmpty(featureColumn, nameof(featureColumn)); + Host.CheckNonEmpty(labelColumn, nameof(labelColumn)); ShowTrainingStats = Args.ShowTrainingStats; } diff --git a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs index fdcb9e4026..1e663de791 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs @@ -51,7 +51,9 @@ public sealed class Arguments : LearnerInputBaseWithLabel /// The environment to use. /// The name of the label column. /// The name of the feature column. - public MultiClassNaiveBayesTrainer(IHostEnvironment env, string featureColumn, string labelColumn) + public MultiClassNaiveBayesTrainer(IHostEnvironment env, + string featureColumn = DefaultColumnNames.Features, + string labelColumn = DefaultColumnNames.Label) : base(Contracts.CheckRef(env, nameof(env)).Register(LoadName), TrainerUtils.MakeR4VecFeature(featureColumn), TrainerUtils.MakeU4ScalarColumn(labelColumn)) { diff --git a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Ova.cs b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Ova.cs index d934de4bd8..8b9ab67c02 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Ova.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Ova.cs @@ -83,9 +83,13 @@ public Ova(IHostEnvironment env, Arguments args) /// Whether to treat missing labels as having negative labels, instead of keeping them missing. /// Number of instances to train the calibrator. /// Use probabilities (vs. raw outputs) to identify top-score category. - public Ova(IHostEnvironment env, TScalarTrainer binaryEstimator, string labelColumn = DefaultColumnNames.Label, - bool imputeMissingLabelsAsNegative = false, ICalibratorTrainer calibrator = null, - int maxCalibrationExamples = 1000000000, bool useProbabilities = true) + public Ova(IHostEnvironment env, + TScalarTrainer binaryEstimator, + string labelColumn = DefaultColumnNames.Label, + bool imputeMissingLabelsAsNegative = false, + ICalibratorTrainer calibrator = null, + int maxCalibrationExamples = 1000000000, + bool useProbabilities = true) : base(env, new Arguments { diff --git a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Pkpd.cs b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Pkpd.cs index c68266b439..3693c1a9bd 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Pkpd.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Pkpd.cs @@ -87,8 +87,12 @@ internal Pkpd(IHostEnvironment env, Arguments args) /// The name of the label colum. /// Whether to treat missing labels as having negative labels, instead of keeping them missing. /// Number of instances to train the calibrator. - public Pkpd(IHostEnvironment env, TScalarTrainer binaryEstimator, string labelColumn = DefaultColumnNames.Label, - bool imputeMissingLabelsAsNegative = false, ICalibratorTrainer calibrator = null, int maxCalibrationExamples = 1000000000) + public Pkpd(IHostEnvironment env, + TScalarTrainer binaryEstimator, + string labelColumn = DefaultColumnNames.Label, + bool imputeMissingLabelsAsNegative = false, + ICalibratorTrainer calibrator = null, + int maxCalibrationExamples = 1000000000) : base(env, new Arguments { diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs index 0e86d91c47..ee21496f56 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs @@ -96,8 +96,8 @@ internal AveragedPerceptronTrainer(IHostEnvironment env, Arguments args) /// /// The local instance of the /// The classification loss function. - /// The name of the label column. - /// The name of the feature column. + /// The name of the label column. + /// The name of the feature column. /// The optional name of the weights column. /// The learning rate. /// Wheather to decrease learning rate as iterations progress. @@ -105,8 +105,8 @@ internal AveragedPerceptronTrainer(IHostEnvironment env, Arguments args) /// The number of training iteraitons. /// A delegate to supply more advanced arguments to the algorithm. public AveragedPerceptronTrainer(IHostEnvironment env, - string label, - string features, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weights = null, IClassificationLoss lossFunction = null, float learningRate = Arguments.AveragedDefaultArgs.LearningRate, @@ -116,8 +116,8 @@ public AveragedPerceptronTrainer(IHostEnvironment env, Action advancedSettings = null) : this(env, InvokeAdvanced(advancedSettings, new Arguments { - LabelColumn = label, - FeatureColumn = features, + LabelColumn = labelColumn, + FeatureColumn = featureColumn, InitialWeights = weights, LearningRate = learningRate, DecreaseLearningRate = decreaseLearningRate, diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs index 3a7a74f761..9614155412 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineGradientDescent.cs @@ -91,8 +91,8 @@ public override LinearRegressionPredictor CreatePredictor() /// Trains a new . /// /// The pricate instance of . - /// Name of the label column. - /// Name of the feature column. + /// Name of the label column. + /// Name of the feature column. /// The learning Rate. /// Decrease learning rate as iterations progress. /// L2 Regularization Weight. @@ -101,8 +101,8 @@ public override LinearRegressionPredictor CreatePredictor() /// The custom loss functions. Defaults to if not provided. /// A delegate to supply advanced arguments to the algorithm. public OnlineGradientDescentTrainer(IHostEnvironment env, - string label, - string features, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, float learningRate = Arguments.OgdDefaultArgs.LearningRate, bool decreaseLearningRate = Arguments.OgdDefaultArgs.DecreaseLearningRate, float l2RegularizerWeight = Arguments.OgdDefaultArgs.L2RegularizerWeight, @@ -116,8 +116,8 @@ public OnlineGradientDescentTrainer(IHostEnvironment env, DecreaseLearningRate = decreaseLearningRate, L2RegularizerWeight = l2RegularizerWeight, NumIterations = numIterations, - LabelColumn = label, - FeatureColumn = features, + LabelColumn = labelColumn, + FeatureColumn = featureColumn, InitialWeights = weightsColumn, LossFunction = new TrivialFactory(lossFunction ?? new SquaredLoss()) })) diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLearnerCatalog.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLearnerCatalog.cs deleted file mode 100644 index bc5aca9015..0000000000 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/OnlineLearnerCatalog.cs +++ /dev/null @@ -1,97 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using Microsoft.ML.Runtime; -using Microsoft.ML.Runtime.Data; -using Microsoft.ML.Trainers.Online; -using System; - -namespace Microsoft.ML -{ - /// - /// Binary Classification trainer estimators. - /// - public static class AveragedPerceptronExtensions - { - /// - /// Predict a target using a linear binary classification model trained with the AveragedPerceptron trainer, and a custom loss. - /// - /// The binary classification context trainer object. - /// The label, or dependent variable. - /// The features, or independent variables. - /// The custom loss. - /// The optional example weights. - /// The learning Rate. - /// Decrease learning rate as iterations progress. - /// L2 regularization weight. - /// Number of training iterations through the data. - /// A delegate to supply more advanced arguments to the algorithm. - public static AveragedPerceptronTrainer AveragedPerceptron( - this BinaryClassificationContext.BinaryClassificationTrainers ctx, - string label, - string features, - string weights = null, - IClassificationLoss lossFunction = null, - float learningRate = AveragedLinearArguments.AveragedDefaultArgs.LearningRate, - bool decreaseLearningRate = AveragedLinearArguments.AveragedDefaultArgs.DecreaseLearningRate, - float l2RegularizerWeight = AveragedLinearArguments.AveragedDefaultArgs.L2RegularizerWeight, - int numIterations = AveragedLinearArguments.AveragedDefaultArgs.NumIterations, - Action advancedSettings = null) - { - Contracts.CheckValue(ctx, nameof(ctx)); - var env = CatalogUtils.GetEnvironment(ctx); - return new AveragedPerceptronTrainer(env, label, features, weights, lossFunction ?? new LogLoss(), learningRate, decreaseLearningRate, l2RegularizerWeight, numIterations, advancedSettings); - } - - private sealed class TrivialClassificationLossFactory : ISupportClassificationLossFactory - { - private readonly IClassificationLoss _loss; - - public TrivialClassificationLossFactory(IClassificationLoss loss) - { - _loss = loss; - } - - public IClassificationLoss CreateComponent(IHostEnvironment env) - { - return _loss; - } - } - } - - /// - /// Regression trainer estimators. - /// - public static class OnlineGradientDescentExtensions - { - /// - /// Predict a target using a linear regression model trained with the trainer. - /// - /// The regression context trainer object. - /// The label, or dependent variable. - /// The features, or independent variables. - /// The optional example weights. - /// The custom loss. Defaults to if not provided. - /// The learning Rate. - /// Decrease learning rate as iterations progress. - /// L2 regularization weight. - /// Number of training iterations through the data. - /// A delegate to supply more advanced arguments to the algorithm. - public static OnlineGradientDescentTrainer OnlineGradientDescent(this RegressionContext.RegressionTrainers ctx, - string label, - string features, - string weights = null, - IRegressionLoss lossFunction = null, - float learningRate = OnlineGradientDescentTrainer.Arguments.OgdDefaultArgs.LearningRate, - bool decreaseLearningRate = OnlineGradientDescentTrainer.Arguments.OgdDefaultArgs.DecreaseLearningRate, - float l2RegularizerWeight = AveragedLinearArguments.AveragedDefaultArgs.L2RegularizerWeight, - int numIterations = OnlineLinearArguments.OnlineDefaultArgs.NumIterations, - Action advancedSettings = null) - { - Contracts.CheckValue(ctx, nameof(ctx)); - var env = CatalogUtils.GetEnvironment(ctx); - return new OnlineGradientDescentTrainer(env, label, features, learningRate, decreaseLearningRate, l2RegularizerWeight, numIterations, weights, lossFunction, advancedSettings); - } - } -} diff --git a/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs index bde5fb9a9b..606b09b341 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/PoissonRegression/PoissonRegression.cs @@ -44,8 +44,8 @@ public sealed class Arguments : ArgumentsBase /// Initializes a new instance of /// /// The environment to use. - /// The name of the label column. - /// The name of the feature column. + /// The name of the label column. + /// The name of the feature column. /// The name for the example weight column. /// Weight of L1 regularizer term. /// Weight of L2 regularizer term. @@ -54,8 +54,8 @@ public sealed class Arguments : ArgumentsBase /// Enforce non-negative weights. /// A delegate to apply all the advanced arguments to the algorithm. public PoissonRegression(IHostEnvironment env, - string label, - string features, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weights = null, float l1Weight = Arguments.Defaults.L1Weight, float l2Weight = Arguments.Defaults.L2Weight, @@ -63,11 +63,11 @@ public PoissonRegression(IHostEnvironment env, int memorySize = Arguments.Defaults.MemorySize, bool enforceNoNegativity = Arguments.Defaults.EnforceNonNegativity, Action advancedSettings = null) - : base(env, features, TrainerUtils.MakeR4ScalarLabel(label), weights, advancedSettings, + : base(env, featureColumn, TrainerUtils.MakeR4ScalarLabel(labelColumn), weights, advancedSettings, l1Weight, l2Weight, optimizationTolerance, memorySize, enforceNoNegativity) { - Host.CheckNonEmpty(features, nameof(features)); - Host.CheckNonEmpty(label, nameof(label)); + Host.CheckNonEmpty(featureColumn, nameof(featureColumn)); + Host.CheckNonEmpty(labelColumn, nameof(labelColumn)); } /// diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs index a9bf372779..27e4450f7f 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs @@ -1439,8 +1439,8 @@ internal override void Check(IHostEnvironment env) /// if both are present and have different values. /// The columns names, however need to be provided directly, not through the . public SdcaBinaryTrainer(IHostEnvironment env, - string labelColumn, - string featureColumn, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weightColumn = null, ISupportSdcaClassificationLoss loss = null, float? l2Const = null, @@ -1676,8 +1676,8 @@ internal static class Defaults /// The loss function to use. /// A delegate to apply all the advanced arguments to the algorithm. public StochasticGradientDescentClassificationTrainer(IHostEnvironment env, - string labelColumn, - string featureColumn, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weightColumn = null, int maxIterations = Arguments.Defaults.MaxIterations, double initLearningRate = Arguments.Defaults.InitLearningRate, diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaCatalog.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaCatalog.cs deleted file mode 100644 index 29a526c13e..0000000000 --- a/src/Microsoft.ML.StandardLearners/Standard/SdcaCatalog.cs +++ /dev/null @@ -1,129 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using Microsoft.ML.Runtime; -using Microsoft.ML.Runtime.Data; -using Microsoft.ML.Runtime.Learners; -using Microsoft.ML.Trainers; -using System; - -namespace Microsoft.ML -{ - /// - /// Extension methods for instantiating SDCA trainer estimators. - /// - public static class SdcaRegressionExtensions - { - /// - /// Predict a target using a linear regression model trained with the SDCA trainer. - /// - /// The regression context trainer object. - /// The label, or dependent variable. - /// The features, or independent variables. - /// The optional example weights. - /// The L2 regularization hyperparameter. - /// The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model. - /// The maximum number of passes to perform over the data. - /// The custom loss, if unspecified will be . - /// A delegate to set more settings. - /// The settings here will override the ones provided in the direct method signature, - /// if both are present and have different values. - /// The columns names, however need to be provided directly, not through the . - public static SdcaRegressionTrainer StochasticDualCoordinateAscent(this RegressionContext.RegressionTrainers ctx, - string label, - string features, - string weights = null, - ISupportSdcaRegressionLoss loss = null, - float? l2Const = null, - float? l1Threshold = null, - int? maxIterations = null, - Action advancedSettings = null) - { - Contracts.CheckValue(ctx, nameof(ctx)); - var env = CatalogUtils.GetEnvironment(ctx); - return new SdcaRegressionTrainer(env, label, features, weights, loss, l2Const, l1Threshold, maxIterations, advancedSettings); - } - } - - public static class SdcaBinaryClassificationExtensions - { - /// - /// Predict a target using a linear binary classification model trained with the SDCA trainer. - /// - /// The binary classification context trainer object. - /// The label, or dependent variable. - /// The features, or independent variables. - /// The optional example weights. - /// The custom loss. Defaults to log-loss if not specified. - /// The L2 regularization hyperparameter. - /// The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model. - /// The maximum number of passes to perform over the data. - /// A delegate to set more settings. - /// The settings here will override the ones provided in the direct method signature, - /// if both are present and have different values. - /// The columns names, however need to be provided directly, not through the . - /// - /// - /// - /// - /// - /// - /// - /// - public static SdcaBinaryTrainer StochasticDualCoordinateAscent( - this BinaryClassificationContext.BinaryClassificationTrainers ctx, - string label, - string features, - string weights = null, - ISupportSdcaClassificationLoss loss = null, - float? l2Const = null, - float? l1Threshold = null, - int? maxIterations = null, - Action advancedSettings = null - ) - { - Contracts.CheckValue(ctx, nameof(ctx)); - var env = CatalogUtils.GetEnvironment(ctx); - return new SdcaBinaryTrainer(env, label, features, weights, loss, l2Const, l1Threshold, maxIterations, advancedSettings); - } - } - - public static class SdcaMulticlassExtensions - { - - /// - /// Predict a target using a linear multiclass classification model trained with the SDCA trainer. - /// - /// The multiclass classification context trainer object. - /// The label, or dependent variable. - /// The features, or independent variables. - /// The optional custom loss. - /// The optional example weights. - /// The L2 regularization hyperparameter. - /// The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model. - /// The maximum number of passes to perform over the data. - /// A delegate to set more settings. - /// The settings here will override the ones provided in the direct method signature, - /// if both are present and have different values. - /// The columns names, however need to be provided directly, not through the . - public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this MulticlassClassificationContext.MulticlassClassificationTrainers ctx, - string label, - string features, - string weights = null, - ISupportSdcaClassificationLoss loss = null, - float? l2Const = null, - float? l1Threshold = null, - int? maxIterations = null, - Action advancedSettings = null) - { - Contracts.CheckValue(ctx, nameof(ctx)); - var env = CatalogUtils.GetEnvironment(ctx); - return new SdcaMultiClassTrainer(env, label, features, weights, loss, l2Const, l1Threshold, maxIterations, advancedSettings); - } - } -} diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs index 6c3a314be1..48a88d6009 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs @@ -51,8 +51,8 @@ public sealed class Arguments : ArgumentsBase /// Initializes a new instance of /// /// The environment to use. - /// The label, or dependent variable. - /// The features, or independent variables. + /// The label, or dependent variable. + /// The features, or independent variables. /// The optional example weights. /// The custom loss. /// The L2 regularization hyperparameter. @@ -63,19 +63,19 @@ public sealed class Arguments : ArgumentsBase /// if both are present and have different values. /// The columns names, however need to be provided directly, not through the . public SdcaMultiClassTrainer(IHostEnvironment env, - string label, - string features, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weights = null, ISupportSdcaClassificationLoss loss = null, float? l2Const = null, float? l1Threshold = null, int? maxIterations = null, Action advancedSettings = null) - : base(env, features, TrainerUtils.MakeU4ScalarColumn(label), TrainerUtils.MakeR4ScalarWeightColumn(weights), advancedSettings, + : base(env, featureColumn, TrainerUtils.MakeU4ScalarColumn(labelColumn), TrainerUtils.MakeR4ScalarWeightColumn(weights), advancedSettings, l2Const, l1Threshold, maxIterations) { - Host.CheckNonEmpty(features, nameof(features)); - Host.CheckNonEmpty(label, nameof(label)); + Host.CheckNonEmpty(featureColumn, nameof(featureColumn)); + Host.CheckNonEmpty(labelColumn, nameof(labelColumn)); _loss = loss ?? Args.LossFunction.CreateComponent(env); Loss = _loss; } diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaRegression.cs index 7f066be0a7..0fb28424e7 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/SdcaRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaRegression.cs @@ -56,8 +56,8 @@ public Arguments() /// Initializes a new instance of /// /// The environment to use. - /// The label, or dependent variable. - /// The features, or independent variables. + /// The label, or dependent variable. + /// The features, or independent variables. /// The optional example weights. /// The custom loss. /// The L2 regularization hyperparameter. @@ -68,19 +68,19 @@ public Arguments() /// if both are present and have different values. /// The columns names, however need to be provided directly, not through the . public SdcaRegressionTrainer(IHostEnvironment env, - string label, - string features, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, string weights = null, ISupportSdcaRegressionLoss loss = null, float? l2Const = null, float? l1Threshold = null, int? maxIterations = null, Action advancedSettings = null) - : base(env, features, TrainerUtils.MakeR4ScalarLabel(label), TrainerUtils.MakeR4ScalarWeightColumn(weights), advancedSettings, + : base(env, featureColumn, TrainerUtils.MakeR4ScalarLabel(labelColumn), TrainerUtils.MakeR4ScalarWeightColumn(weights), advancedSettings, l2Const, l1Threshold, maxIterations) { - Host.CheckNonEmpty(features, nameof(features)); - Host.CheckNonEmpty(label, nameof(label)); + Host.CheckNonEmpty(featureColumn, nameof(featureColumn)); + Host.CheckNonEmpty(labelColumn, nameof(labelColumn)); _loss = loss ?? Args.LossFunction.CreateComponent(env); Loss = _loss; } diff --git a/src/Microsoft.ML.StandardLearners/Standard/SgdCatalog.cs b/src/Microsoft.ML.StandardLearners/Standard/SgdCatalog.cs deleted file mode 100644 index 407651f12e..0000000000 --- a/src/Microsoft.ML.StandardLearners/Standard/SgdCatalog.cs +++ /dev/null @@ -1,46 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using Microsoft.ML.Runtime; -using Microsoft.ML.Runtime.Data; -using Microsoft.ML.Trainers; -using System; - -namespace Microsoft.ML -{ - using Arguments = StochasticGradientDescentClassificationTrainer.Arguments; - - /// - /// Binary Classification trainer estimators. - /// - public static class StochasticGradientDescentCatalog - { - /// - /// Predict a target using a linear binary classification model trained with the trainer. - /// - /// The binary classificaiton context trainer object. - /// The name of the label column. - /// The name of the feature column. - /// The name for the example weight column. - /// The maximum number of iterations; set to 1 to simulate online learning. - /// The initial learning rate used by SGD. - /// The L2 regularization constant. - /// The loss function to use. - /// A delegate to apply all the advanced arguments to the algorithm. - public static StochasticGradientDescentClassificationTrainer StochasticGradientDescent(this BinaryClassificationContext.BinaryClassificationTrainers ctx, - string label, - string features, - string weights = null, - int maxIterations = Arguments.Defaults.MaxIterations, - double initLearningRate = Arguments.Defaults.InitLearningRate, - float l2Weight = Arguments.Defaults.L2Weight, - ISupportClassificationLossFactory loss = null, - Action advancedSettings = null) - { - Contracts.CheckValue(ctx, nameof(ctx)); - var env = CatalogUtils.GetEnvironment(ctx); - return new StochasticGradientDescentClassificationTrainer(env, label, features, weights, maxIterations, initLearningRate, l2Weight, loss, advancedSettings); - } - } -} diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs new file mode 100644 index 0000000000..17d88a3ab5 --- /dev/null +++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs @@ -0,0 +1,314 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.Learners; +using Microsoft.ML.Trainers; +using Microsoft.ML.Trainers.Online; +using System; + +namespace Microsoft.ML +{ + using SgdArguments = StochasticGradientDescentClassificationTrainer.Arguments; + using LRArguments = LogisticRegression.Arguments; + + /// + /// TrainerEstimator extension methods. + /// + public static class StandardLearnersCatalog + { + /// + /// Predict a target using a linear binary classification model trained with the trainer. + /// + /// The binary classificaiton context trainer object. + /// The name of the label column. + /// The name of the feature column. + /// The name for the example weight column. + /// The maximum number of iterations; set to 1 to simulate online learning. + /// The initial learning rate used by SGD. + /// The L2 regularization constant. + /// The loss function to use. + /// A delegate to apply all the advanced arguments to the algorithm. + public static StochasticGradientDescentClassificationTrainer StochasticGradientDescent(this BinaryClassificationContext.BinaryClassificationTrainers ctx, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, + string weights = null, + int maxIterations = SgdArguments.Defaults.MaxIterations, + double initLearningRate = SgdArguments.Defaults.InitLearningRate, + float l2Weight = SgdArguments.Defaults.L2Weight, + ISupportClassificationLossFactory loss = null, + Action advancedSettings = null) + { + Contracts.CheckValue(ctx, nameof(ctx)); + var env = CatalogUtils.GetEnvironment(ctx); + return new StochasticGradientDescentClassificationTrainer(env, labelColumn, featureColumn, weights, maxIterations, initLearningRate, l2Weight, loss, advancedSettings); + } + + /// + /// Predict a target using a linear regression model trained with the SDCA trainer. + /// + /// The regression context trainer object. + /// The labelColumn, or dependent variable. + /// The features, or independent variables. + /// The optional example weights. + /// The L2 regularization hyperparameter. + /// The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model. + /// The maximum number of passes to perform over the data. + /// The custom loss, if unspecified will be . + /// A delegate to set more settings. + /// The settings here will override the ones provided in the direct method signature, + /// if both are present and have different values. + /// The columns names, however need to be provided directly, not through the . + public static SdcaRegressionTrainer StochasticDualCoordinateAscent(this RegressionContext.RegressionTrainers ctx, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, + string weights = null, + ISupportSdcaRegressionLoss loss = null, + float? l2Const = null, + float? l1Threshold = null, + int? maxIterations = null, + Action advancedSettings = null) + { + Contracts.CheckValue(ctx, nameof(ctx)); + var env = CatalogUtils.GetEnvironment(ctx); + return new SdcaRegressionTrainer(env, labelColumn, featureColumn, weights, loss, l2Const, l1Threshold, maxIterations, advancedSettings); + } + + /// + /// Predict a target using a linear binary classification model trained with the SDCA trainer. + /// + /// The binary classification context trainer object. + /// The labelColumn, or dependent variable. + /// The features, or independent variables. + /// The optional example weights. + /// The custom loss. Defaults to log-loss if not specified. + /// The L2 regularization hyperparameter. + /// The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model. + /// The maximum number of passes to perform over the data. + /// A delegate to set more settings. + /// The settings here will override the ones provided in the direct method signature, + /// if both are present and have different values. + /// The columns names, however need to be provided directly, not through the . + /// + /// + /// + /// + /// + /// + /// + /// + public static SdcaBinaryTrainer StochasticDualCoordinateAscent( + this BinaryClassificationContext.BinaryClassificationTrainers ctx, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, + string weights = null, + ISupportSdcaClassificationLoss loss = null, + float? l2Const = null, + float? l1Threshold = null, + int? maxIterations = null, + Action advancedSettings = null + ) + { + Contracts.CheckValue(ctx, nameof(ctx)); + var env = CatalogUtils.GetEnvironment(ctx); + return new SdcaBinaryTrainer(env, labelColumn, featureColumn, weights, loss, l2Const, l1Threshold, maxIterations, advancedSettings); + } + + /// + /// Predict a target using a linear multiclass classification model trained with the SDCA trainer. + /// + /// The multiclass classification context trainer object. + /// The labelColumn, or dependent variable. + /// The features, or independent variables. + /// The optional custom loss. + /// The optional example weights. + /// The L2 regularization hyperparameter. + /// The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model. + /// The maximum number of passes to perform over the data. + /// A delegate to set more settings. + /// The settings here will override the ones provided in the direct method signature, + /// if both are present and have different values. + /// The columns names, however need to be provided directly, not through the . + public static SdcaMultiClassTrainer StochasticDualCoordinateAscent(this MulticlassClassificationContext.MulticlassClassificationTrainers ctx, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, + string weights = null, + ISupportSdcaClassificationLoss loss = null, + float? l2Const = null, + float? l1Threshold = null, + int? maxIterations = null, + Action advancedSettings = null) + { + Contracts.CheckValue(ctx, nameof(ctx)); + var env = CatalogUtils.GetEnvironment(ctx); + return new SdcaMultiClassTrainer(env, labelColumn, featureColumn, weights, loss, l2Const, l1Threshold, maxIterations, advancedSettings); + } + + /// + /// Predict a target using a linear binary classification model trained with the AveragedPerceptron trainer, and a custom loss. + /// + /// The binary classification context trainer object. + /// The name of the label column, or dependent variable. + /// The features, or independent variables. + /// The custom loss. + /// The optional example weights. + /// The learning Rate. + /// Decrease learning rate as iterations progress. + /// L2 regularization weight. + /// Number of training iterations through the data. + /// A delegate to supply more advanced arguments to the algorithm. + public static AveragedPerceptronTrainer AveragedPerceptron( + this BinaryClassificationContext.BinaryClassificationTrainers ctx, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, + string weights = null, + IClassificationLoss lossFunction = null, + float learningRate = AveragedLinearArguments.AveragedDefaultArgs.LearningRate, + bool decreaseLearningRate = AveragedLinearArguments.AveragedDefaultArgs.DecreaseLearningRate, + float l2RegularizerWeight = AveragedLinearArguments.AveragedDefaultArgs.L2RegularizerWeight, + int numIterations = AveragedLinearArguments.AveragedDefaultArgs.NumIterations, + Action advancedSettings = null) + { + Contracts.CheckValue(ctx, nameof(ctx)); + var env = CatalogUtils.GetEnvironment(ctx); + return new AveragedPerceptronTrainer(env, labelColumn, featureColumn, weights, lossFunction ?? new LogLoss(), learningRate, decreaseLearningRate, l2RegularizerWeight, numIterations, advancedSettings); + } + + private sealed class TrivialClassificationLossFactory : ISupportClassificationLossFactory + { + private readonly IClassificationLoss _loss; + + public TrivialClassificationLossFactory(IClassificationLoss loss) + { + _loss = loss; + } + + public IClassificationLoss CreateComponent(IHostEnvironment env) + { + return _loss; + } + } + + /// + /// Predict a target using a linear regression model trained with the trainer. + /// + /// The regression context trainer object. + /// The name of the label, or dependent variable. + /// The features, or independent variables. + /// The optional example weights. + /// The custom loss. Defaults to if not provided. + /// The learning Rate. + /// Decrease learning rate as iterations progress. + /// L2 regularization weight. + /// Number of training iterations through the data. + /// A delegate to supply more advanced arguments to the algorithm. + public static OnlineGradientDescentTrainer OnlineGradientDescent(this RegressionContext.RegressionTrainers ctx, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, + string weights = null, + IRegressionLoss lossFunction = null, + float learningRate = OnlineGradientDescentTrainer.Arguments.OgdDefaultArgs.LearningRate, + bool decreaseLearningRate = OnlineGradientDescentTrainer.Arguments.OgdDefaultArgs.DecreaseLearningRate, + float l2RegularizerWeight = AveragedLinearArguments.AveragedDefaultArgs.L2RegularizerWeight, + int numIterations = OnlineLinearArguments.OnlineDefaultArgs.NumIterations, + Action advancedSettings = null) + { + Contracts.CheckValue(ctx, nameof(ctx)); + var env = CatalogUtils.GetEnvironment(ctx); + return new OnlineGradientDescentTrainer(env, labelColumn, featureColumn, learningRate, decreaseLearningRate, l2RegularizerWeight, numIterations, weights, lossFunction, advancedSettings); + } + + /// + /// Predict a target using a linear binary classification model trained with the trainer. + /// + /// The binary classificaiton context trainer object. + /// The label column name, or dependent variable. + /// The features, or independent variables. + /// The optional example weights. + /// Enforce non-negative weights. + /// Weight of L1 regularization term. + /// Weight of L2 regularization term. + /// Memory size for . Lower=faster, less accurate. + /// Threshold for optimizer convergence. + /// A delegate to apply all the advanced arguments to the algorithm. + public static LogisticRegression LogisticRegression(this BinaryClassificationContext.BinaryClassificationTrainers ctx, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, + string weights = null, + float l1Weight = LRArguments.Defaults.L1Weight, + float l2Weight = LRArguments.Defaults.L2Weight, + float optimizationTolerance = LRArguments.Defaults.OptTol, + int memorySize = LRArguments.Defaults.MemorySize, + bool enforceNoNegativity = LRArguments.Defaults.EnforceNonNegativity, + Action advancedSettings = null) + { + Contracts.CheckValue(ctx, nameof(ctx)); + var env = CatalogUtils.GetEnvironment(ctx); + return new LogisticRegression(env, labelColumn, featureColumn, weights, l1Weight, l2Weight, optimizationTolerance, memorySize, enforceNoNegativity, advancedSettings); + } + + /// + /// Predict a target using a linear regression model trained with the trainer. + /// + /// The regression context trainer object. + /// The labelColumn, or dependent variable. + /// The features, or independent variables. + /// The optional example weights. + /// Weight of L1 regularization term. + /// Weight of L2 regularization term. + /// Threshold for optimizer convergence. + /// Memory size for . Lower=faster, less accurate. + /// Enforce non-negative weights. + /// A delegate to apply all the advanced arguments to the algorithm. + public static PoissonRegression PoissonRegression(this RegressionContext.RegressionTrainers ctx, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, + string weights = null, + float l1Weight = LRArguments.Defaults.L1Weight, + float l2Weight = LRArguments.Defaults.L2Weight, + float optimizationTolerance = LRArguments.Defaults.OptTol, + int memorySize = LRArguments.Defaults.MemorySize, + bool enforceNoNegativity = LRArguments.Defaults.EnforceNonNegativity, + Action advancedSettings = null) + { + Contracts.CheckValue(ctx, nameof(ctx)); + var env = CatalogUtils.GetEnvironment(ctx); + return new PoissonRegression(env, labelColumn, featureColumn, weights, l1Weight, l2Weight, optimizationTolerance, memorySize, enforceNoNegativity, advancedSettings); + } + + /// + /// Predict a target using a linear multiclass classification model trained with the trainer. + /// + /// The multiclass classification context trainer object. + /// The labelColumn, or dependent variable. + /// The features, or independent variables. + /// The optional example weights. + /// Enforce non-negative weights. + /// Weight of L1 regularization term. + /// Weight of L2 regularization term. + /// Memory size for . Lower=faster, less accurate. + /// Threshold for optimizer convergence. + /// A delegate to apply all the advanced arguments to the algorithm. + public static MulticlassLogisticRegression LogisticRegression(this MulticlassClassificationContext.MulticlassClassificationTrainers ctx, + string labelColumn = DefaultColumnNames.Label, + string featureColumn = DefaultColumnNames.Features, + string weights = null, + float l1Weight = LRArguments.Defaults.L1Weight, + float l2Weight = LRArguments.Defaults.L2Weight, + float optimizationTolerance = LRArguments.Defaults.OptTol, + int memorySize = LRArguments.Defaults.MemorySize, + bool enforceNoNegativity = LRArguments.Defaults.EnforceNonNegativity, + Action advancedSettings = null) + { + Contracts.CheckValue(ctx, nameof(ctx)); + var env = CatalogUtils.GetEnvironment(ctx); + return new MulticlassLogisticRegression(env, labelColumn, featureColumn, weights, l1Weight, l2Weight, optimizationTolerance, memorySize, enforceNoNegativity, advancedSettings); + } + } +} diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/FAFMEstimator.cs b/test/Microsoft.ML.Tests/TrainerEstimators/FAFMEstimator.cs index 6d4a20cd15..7ccd12667c 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/FAFMEstimator.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/FAFMEstimator.cs @@ -20,7 +20,7 @@ public void FieldAwareFactorizationMachine_Estimator() var data = new TextLoader(Env, GetFafmBCLoaderArgs()) .Read(GetDataPath(TestDatasets.breastCancer.trainFilename)); - var est = new FieldAwareFactorizationMachineTrainer(Env, "Label", new[] { "Feature1", "Feature2", "Feature3", "Feature4" }, + var est = new FieldAwareFactorizationMachineTrainer(Env, new[] { "Feature1", "Feature2", "Feature3", "Feature4" }, "Label", advancedSettings:s=> { s.Shuffle = false; diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs index cb5a8b2748..3fd11ec6f2 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/MatrixFactorizationTests.cs @@ -31,7 +31,7 @@ public void MatrixFactorization_Estimator() var invalidData = new TextLoader(Env, GetLoaderArgs(labelColumnName, matrixColumnIndexColumnName + "Renamed", matrixRowIndexColumnName + "Renamed")) .Read(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.testFilename))); - var est = new MatrixFactorizationTrainer(Env, labelColumnName, matrixColumnIndexColumnName, matrixRowIndexColumnName, + var est = new MatrixFactorizationTrainer(Env, matrixColumnIndexColumnName, matrixRowIndexColumnName, labelColumnName, advancedSettings: s => { s.NumIterations = 3; @@ -62,7 +62,7 @@ public void MatrixFactorizationSimpleTrainAndPredict() var data = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.trivialMatrixFactorization.trainFilename))); // Create a pipeline with a single operator. - var pipeline = new MatrixFactorizationTrainer(mlContext, labelColumnName, userColumnName, itemColumnName, + var pipeline = new MatrixFactorizationTrainer(mlContext, userColumnName, itemColumnName, labelColumnName, advancedSettings: s => { s.NumIterations = 3; @@ -179,8 +179,10 @@ public void MatrixFactorizationInMemoryData() // Create a matrix factorization trainer which may consume "Value" as the training label, "MatrixColumnIndex" as the // matrix's column index, and "MatrixRowIndex" as the matrix's row index. var mlContext = new MLContext(seed: 1, conc: 1); - var pipeline = new MatrixFactorizationTrainer(mlContext, nameof(MatrixElement.Value), - nameof(MatrixElement.MatrixColumnIndex), nameof(MatrixElement.MatrixRowIndex), + var pipeline = new MatrixFactorizationTrainer(mlContext, + nameof(MatrixElement.MatrixColumnIndex), + nameof(MatrixElement.MatrixRowIndex), + nameof(MatrixElement.Value), advancedSettings: s => { s.NumIterations = 10; @@ -269,8 +271,10 @@ public void MatrixFactorizationInMemoryDataZeroBaseIndex() // Create a matrix factorization trainer which may consume "Value" as the training label, "MatrixColumnIndex" as the // matrix's column index, and "MatrixRowIndex" as the matrix's row index. var mlContext = new MLContext(seed: 1, conc: 1); - var pipeline = new MatrixFactorizationTrainer(mlContext, nameof(MatrixElementZeroBased.Value), - nameof(MatrixElementZeroBased.MatrixColumnIndex), nameof(MatrixElementZeroBased.MatrixRowIndex), + var pipeline = new MatrixFactorizationTrainer(mlContext, + nameof(MatrixElementZeroBased.MatrixColumnIndex), + nameof(MatrixElementZeroBased.MatrixRowIndex), + nameof(MatrixElementZeroBased.Value), advancedSettings: s => { s.NumIterations = 100; From b2631b7b465dbc2f47e0fd990fff6efd96ad6c60 Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Fri, 9 Nov 2018 12:33:31 -0800 Subject: [PATCH 12/12] post merge XML comment change. --- src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs | 4 ++-- .../Api/CookbookSamples/CookbookSamplesDynamicApi.cs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs index 17d88a3ab5..465a39a9ea 100644 --- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs +++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs @@ -50,13 +50,13 @@ public static StochasticGradientDescentClassificationTrainer StochasticGradientD /// Predict a target using a linear regression model trained with the SDCA trainer. /// /// The regression context trainer object. - /// The labelColumn, or dependent variable. + /// The label column, or dependent variable. /// The features, or independent variables. /// The optional example weights. /// The L2 regularization hyperparameter. /// The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model. /// The maximum number of passes to perform over the data. - /// The custom loss, if unspecified will be . + /// The custom loss, if unspecified will be . /// A delegate to set more settings. /// The settings here will override the ones provided in the direct method signature, /// if both are present and have different values. diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs index d2393d77b4..231ef9d876 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs @@ -117,7 +117,7 @@ private void TrainRegression(string trainDataPath, string testDataPath, string m // between -1 and 1 for all examples), and then train the model. mlContext.Transforms.Normalize("FeatureVector") // Add the SDCA regression trainer. - .Append(mlContext.Regression.Trainers.StochasticDualCoordinateAscent(label: "Target", features: "FeatureVector")); + .Append(mlContext.Regression.Trainers.StochasticDualCoordinateAscent(labelColumn: "Target", featureColumn: "FeatureVector")); // Step three. Fit the pipeline to the training data. var model = dynamicPipeline.Fit(trainData);