dotnet · abgoswam · Mar 29, 2019 · Mar 27, 2019 · Mar 29, 2019 · Mar 29, 2019
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/BootstrapSample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/BootstrapSample.cs
@@ -12,7 +12,7 @@ public static void Example()
             var mlContext = new MLContext();
 
             // Get a small dataset as an IEnumerable and them read it as ML.NET's data type.
-            IEnumerable<SamplesUtils.DatasetUtils.BinaryLabelFloatFeatureVectorSample> enumerableOfData = SamplesUtils.DatasetUtils.GenerateBinaryLabelFloatFeatureVectorSamples(5);
+            IEnumerable<SamplesUtils.DatasetUtils.BinaryLabelFloatFeatureVectorFloatWeightSample> enumerableOfData = SamplesUtils.DatasetUtils.GenerateBinaryLabelFloatFeatureVectorFloatWeightSamples(5);
             var data = mlContext.Data.LoadFromEnumerable(enumerableOfData);
 
             // Look at the original dataset
@@ -43,7 +43,7 @@ public static void Example()
             {
                 var resample = mlContext.Data.BootstrapSample(data, seed: i);
 
-                var enumerable = mlContext.Data.CreateEnumerable<SamplesUtils.DatasetUtils.BinaryLabelFloatFeatureVectorSample>(resample, reuseRowObject: false);
+                var enumerable = mlContext.Data.CreateEnumerable<SamplesUtils.DatasetUtils.BinaryLabelFloatFeatureVectorFloatWeightSample>(resample, reuseRowObject: false);
                 Console.WriteLine($"Label\tFeatures[0]");
                 foreach (var row in enumerable)
                 {

diff --git a/...ples/Dynamic/Trainers/BinaryClassification/StochasticDualCoordinateAscentNonCalibrated.cs b/...ples/Dynamic/Trainers/BinaryClassification/StochasticDualCoordinateAscentNonCalibrated.cs
@@ -9,7 +9,7 @@ public static class StochasticDualCoordinateAscentNonCalibrated
         public static void Example()
         {
             // Generate IEnumerable<BinaryLabelFloatFeatureVectorSample> as training examples.
-            var rawData = SamplesUtils.DatasetUtils.GenerateBinaryLabelFloatFeatureVectorSamples(100);
+            var rawData = SamplesUtils.DatasetUtils.GenerateBinaryLabelFloatFeatureVectorFloatWeightSamples(100);
 
             // Information in first example.
             // Label: true

diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
@@ -508,18 +508,20 @@ public static IEnumerable<SampleVectorOfNumbersData> GetVectorOfNumbersData()
         private const int _simpleBinaryClassSampleFeatureLength = 10;
 
         /// <summary>
-        /// Example with one binary label and 10 feature values.
+        /// Example with one binary label, 10 feature values and a weight (float).
         /// </summary>
-        public class BinaryLabelFloatFeatureVectorSample
+        public class BinaryLabelFloatFeatureVectorFloatWeightSample
         {
             public bool Label;
 
             [VectorType(_simpleBinaryClassSampleFeatureLength)]
             public float[] Features;
+
+            public float Weight;
         }
 
         /// <summary>
-        /// Class used to capture prediction of <see cref="BinaryLabelFloatFeatureVectorSample"/> when
+        /// Class used to capture prediction of <see cref="BinaryLabelFloatFeatureVectorFloatWeightSample"/> when
         /// calling <see cref="DataOperationsCatalog.CreateEnumerable{TRow}(IDataView, bool, bool, SchemaDefinition)"/> via on <see cref="MLContext"/>.
         /// </summary>
         public class CalibratedBinaryClassifierOutput
@@ -530,7 +532,7 @@ public class CalibratedBinaryClassifierOutput
         }
 
         /// <summary>
-        /// Class used to capture prediction of <see cref="BinaryLabelFloatFeatureVectorSample"/> when
+        /// Class used to capture prediction of <see cref="BinaryLabelFloatFeatureVectorFloatWeightSample"/> when
         /// calling <see cref="DataOperationsCatalog.CreateEnumerable{TRow}(IDataView, bool, bool, SchemaDefinition)"/> via on <see cref="MLContext"/>.
         /// </summary>
         public class NonCalibratedBinaryClassifierOutput
@@ -539,14 +541,19 @@ public class NonCalibratedBinaryClassifierOutput
             public float Score;
         }
 
-        public static IEnumerable<BinaryLabelFloatFeatureVectorSample> GenerateBinaryLabelFloatFeatureVectorSamples(int exampleCount)
+        public static IEnumerable<BinaryLabelFloatFeatureVectorFloatWeightSample> GenerateBinaryLabelFloatFeatureVectorFloatWeightSamples(int exampleCount)
         {
             var rnd = new Random(0);
-            var data = new List<BinaryLabelFloatFeatureVectorSample>();
+            var data = new List<BinaryLabelFloatFeatureVectorFloatWeightSample>();
             for (int i = 0; i < exampleCount; ++i)
             {
                 // Initialize an example with a random label and an empty feature vector.
-                var sample = new BinaryLabelFloatFeatureVectorSample() { Label = rnd.Next() % 2 == 0, Features = new float[_simpleBinaryClassSampleFeatureLength] };
+                var sample = new BinaryLabelFloatFeatureVectorFloatWeightSample() {
+                    Label = rnd.Next() % 2 == 0,
+                    Features = new float[_simpleBinaryClassSampleFeatureLength],
+                    Weight = (float)rnd.NextDouble()
+                };
+
                 // Fill feature vector according the assigned label.
                 for (int j = 0; j < _simpleBinaryClassSampleFeatureLength; ++j)
                 {

diff --git a/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs b/src/Microsoft.ML.StandardTrainers/Standard/SdcaBinary.cs
@@ -154,7 +154,7 @@ public abstract class SdcaTrainerBase<TOptions, TTransformer, TModel> : Stochast
         /// <summary>
         /// Options for the SDCA-based trainers.
         /// </summary>
-        public abstract class OptionsBase : TrainerInputBaseWithLabel
+        public abstract class OptionsBase : TrainerInputBaseWithWeight
         {
             /// <summary>
             /// The L2 <a href='tmpurl_regularization'>regularization</a> hyperparameter.
@@ -1505,7 +1505,7 @@ private protected SdcaBinaryTrainerBase(IHostEnvironment env,
         }
 
         private protected SdcaBinaryTrainerBase(IHostEnvironment env, BinaryOptionsBase options, ISupportSdcaClassificationLoss loss = null, bool doCalibration = false)
-            : base(env, options, TrainerUtils.MakeBoolScalarLabel(options.LabelColumnName))
+            : base(env, options, TrainerUtils.MakeBoolScalarLabel(options.LabelColumnName), TrainerUtils.MakeR4ScalarWeightColumn(options.ExampleWeightColumnName))
         {
             _loss = loss ?? new LogLossFactory().CreateComponent(env);
             Loss = _loss;

diff --git a/src/Microsoft.ML.StandardTrainers/Standard/SdcaMulticlass.cs b/src/Microsoft.ML.StandardTrainers/Standard/SdcaMulticlass.cs
@@ -103,7 +103,7 @@ internal SdcaMulticlassTrainerBase(IHostEnvironment env, MulticlassOptions optio
         }
 
         internal SdcaMulticlassTrainerBase(IHostEnvironment env, MulticlassOptions options)
-            : this(env, options, options.FeatureColumnName, options.LabelColumnName)
+            : this(env, options, options.FeatureColumnName, options.LabelColumnName, options.ExampleWeightColumnName)
         {
         }
 

diff --git a/src/Microsoft.ML.StandardTrainers/Standard/SdcaRegression.cs b/src/Microsoft.ML.StandardTrainers/Standard/SdcaRegression.cs
@@ -104,13 +104,14 @@ internal SdcaRegressionTrainer(IHostEnvironment env, Options options, string fea
         {
             Host.CheckValue(labelColumn, nameof(labelColumn));
             Host.CheckValue(featureColumn, nameof(featureColumn));
+            Host.CheckValueOrNull(weightColumn);
 
             _loss = options.LossFunction ?? options.LossFunctionFactory.CreateComponent(env);
             Loss = _loss;
         }
 
         internal SdcaRegressionTrainer(IHostEnvironment env, Options options)
-            : this(env, options, options.FeatureColumnName, options.LabelColumnName)
+            : this(env, options, options.FeatureColumnName, options.LabelColumnName, options.ExampleWeightColumnName)
         {
         }
 

diff --git a/src/Microsoft.ML.StaticPipe/SdcaStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/SdcaStaticExtensions.cs
@@ -102,6 +102,7 @@ public static Scalar<float> Sdca(this RegressionCatalog.RegressionTrainers catal
                 {
                     options.LabelColumnName = labelName;
                     options.FeatureColumnName = featuresName;
+                    options.ExampleWeightColumnName = weightsName;
 
                     var trainer = new SdcaRegressionTrainer(env, options);
                     if (onFit != null)
@@ -206,6 +207,7 @@ public static (Scalar<float> score, Scalar<float> probability, Scalar<bool> pred
                 {
                     options.LabelColumnName = labelName;
                     options.FeatureColumnName = featuresName;
+                    options.ExampleWeightColumnName = weightsName;
 
                     var trainer = new SdcaLogisticRegressionBinaryTrainer(env, options);
                     if (onFit != null)
@@ -313,6 +315,7 @@ public static (Scalar<float> score, Scalar<bool> predictedLabel) SdcaNonCalibrat
                 {
                     options.FeatureColumnName = featuresName;
                     options.LabelColumnName = labelName;
+                    options.ExampleWeightColumnName = weightsName;
 
                     var trainer = new SdcaNonCalibratedBinaryTrainer(env, options);
                     if (onFit != null)
@@ -407,6 +410,7 @@ public static (Vector<float> score, Key<uint, TVal> predictedLabel) Sdca<TVal>(
                 {
                     options.LabelColumnName = labelName;
                     options.FeatureColumnName = featuresName;
+                    options.ExampleWeightColumnName = weightsName;
 
                     var trainer = new SdcaMaximumEntropyMulticlassTrainer(env, options);
                     if (onFit != null)
@@ -499,6 +503,7 @@ public static (Vector<float> score, Key<uint, TVal> predictedLabel) SdcaNonCalib
                 {
                     options.LabelColumnName = labelName;
                     options.FeatureColumnName = featuresName;
+                    options.ExampleWeightColumnName = weightsName;
 
                     var trainer = new SdcaNonCalibratedMulticlassTrainer(env, options);
                     if (onFit != null)

diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json
@@ -15006,6 +15006,18 @@
           "IsNullable": false,
           "Default": "Label"
         },
+        {
+          "Name": "ExampleWeightColumnName",
+          "Type": "String",
+          "Desc": "Column to use for example weight",
+          "Aliases": [
+            "weight"
+          ],
+          "Required": false,
+          "SortOrder": 4.0,
+          "IsNullable": false,
+          "Default": null
+        },
         {
           "Name": "NormalizeFeatures",
           "Type": {
@@ -15218,6 +15230,7 @@
         }
       ],
       "InputKind": [
+        "ITrainerInputWithWeight",
         "ITrainerInputWithLabel",
         "ITrainerInput"
       ],
@@ -15315,6 +15328,18 @@
           "IsNullable": false,
           "Default": "Label"
         },
+        {
+          "Name": "ExampleWeightColumnName",
+          "Type": "String",
+          "Desc": "Column to use for example weight",
+          "Aliases": [
+            "weight"
+          ],
+          "Required": false,
+          "SortOrder": 4.0,
+          "IsNullable": false,
+          "Default": null
+        },
         {
           "Name": "NormalizeFeatures",
           "Type": {
@@ -15492,6 +15517,7 @@
         }
       ],
       "InputKind": [
+        "ITrainerInputWithWeight",
         "ITrainerInputWithLabel",
         "ITrainerInput"
       ],
@@ -15589,6 +15615,18 @@
           "IsNullable": false,
           "Default": "Label"
         },
+        {
+          "Name": "ExampleWeightColumnName",
+          "Type": "String",
+          "Desc": "Column to use for example weight",
+          "Aliases": [
+            "weight"
+          ],
+          "Required": false,
+          "SortOrder": 4.0,
+          "IsNullable": false,
+          "Default": null
+        },
         {
           "Name": "NormalizeFeatures",
           "Type": {
@@ -15766,6 +15804,7 @@
         }
       ],
       "InputKind": [
+        "ITrainerInputWithWeight",
         "ITrainerInputWithLabel",
         "ITrainerInput"
       ],

diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/SdcaTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/SdcaTests.cs
@@ -48,7 +48,7 @@ public void SdcaWorkout()
         public void SdcaLogisticRegression()
         {
             // Generate C# objects as training examples.
-            var rawData = SamplesUtils.DatasetUtils.GenerateBinaryLabelFloatFeatureVectorSamples(100);
+            var rawData = SamplesUtils.DatasetUtils.GenerateBinaryLabelFloatFeatureVectorFloatWeightSamples(100);
 
             // Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
             // as a catalog of available operations and as the source of randomness.
@@ -88,11 +88,122 @@ public void SdcaLogisticRegression()
             Assert.InRange(first.Probability, 0.8, 1);
         }
 
+        [Fact]
+        public void SdcaLogisticRegressionWithWeight()
+        {
+            // Generate C# objects as training examples.
+            var rawData = SamplesUtils.DatasetUtils.GenerateBinaryLabelFloatFeatureVectorFloatWeightSamples(100);
+
+            // Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
+            // as a catalog of available operations and as the source of randomness.
+            var mlContext = new MLContext(0);
+
+            // Read the data as an IDataView.
+            var data = mlContext.Data.LoadFromEnumerable(rawData);
+
+            // ML.NET doesn't cache data set by default. Caching is very helpful when working with iterative
+            // algorithms which needs many data passes. Since SDCA is the case, we cache.
+            data = mlContext.Data.Cache(data);
+
+            // SdcaLogisticRegression with and without weights.
+            var sdcaWithoutWeightBinary = mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(
+                new SdcaLogisticRegressionBinaryTrainer.Options { NumberOfThreads = 1 });
+            var sdcaWithWeightBinary = mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(
+                new SdcaLogisticRegressionBinaryTrainer.Options { ExampleWeightColumnName = "Weight", NumberOfThreads = 1 });
+
+            var modelWithoutWeights = sdcaWithoutWeightBinary.Fit(data);
+            var modelWithWeights = sdcaWithWeightBinary.Fit(data);
+
+            var prediction1 = modelWithoutWeights.Transform(data);
+            var prediction2 = modelWithWeights.Transform(data);
+
+            // Verify the metrics produced are different.
+            var metrics1 = mlContext.BinaryClassification.Evaluate(prediction1);
+            var metrics2 = mlContext.BinaryClassification.Evaluate(prediction2);
+            Assert.Equal(0.9658, metrics1.AreaUnderRocCurve, 4);
+            Assert.Equal(0.3488, metrics1.LogLoss, 4);
+            Assert.Equal(0.9596, metrics2.AreaUnderRocCurve, 4);
+            Assert.Equal(0.3591, metrics2.LogLoss, 4);
+
+            // Verify the raw scores are different.
+            var scores1 = prediction1.GetColumn<float>(prediction1.Schema["Score"]).ToArray();
+            var scores2 = prediction2.GetColumn<float>(prediction2.Schema["Score"]).ToArray();
+            Assert.True(scores1.Length == scores2.Length);
+
+            bool sameScores = true;
+            for (int i = 0; i < scores1.Length; i++)
+            {
+                if(!CompareNumbersWithTolerance(scores1[i], scores2[i]))
+                {
+                    sameScores = false;
+                    break;
+                }
+            }
+            Assert.False(sameScores);
+        }
+
+        [Fact]
+        public void SdcaMaximumEntropyWithWeight()
+        {
+            // Generate C# objects as training examples.
+            var rawData = SamplesUtils.DatasetUtils.GenerateBinaryLabelFloatFeatureVectorFloatWeightSamples(100);
+
+            // Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
+            // as a catalog of available operations and as the source of randomness.
+            var mlContext = new MLContext(0);
+
+            // Read the data as an IDataView.
+            var data = mlContext.Data.LoadFromEnumerable(rawData);
+
+            // ML.NET doesn't cache data set by default. Caching is very helpful when working with iterative
+            // algorithms which needs many data passes. Since SDCA is the case, we cache.
+            data = mlContext.Data.Cache(data);
+
+            // SdcaMaximumEntropy with and without weights.
+            var sdcaWithoutWeightMulticlass = mlContext.Transforms.Conversion.MapValueToKey("LabelIndex", "Label").
+               Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy(
+                   new SdcaMaximumEntropyMulticlassTrainer.Options { LabelColumnName = "LabelIndex", NumberOfThreads = 1 }));
+
+            var sdcaWithWeightMulticlass = mlContext.Transforms.Conversion.MapValueToKey("LabelIndex", "Label").
+                Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy(
+                    new SdcaMaximumEntropyMulticlassTrainer.Options { LabelColumnName = "LabelIndex", ExampleWeightColumnName = "Weight", NumberOfThreads = 1 }));
+
+            var modelWithoutWeights = sdcaWithoutWeightMulticlass.Fit(data);
+            var modelWithWeights = sdcaWithWeightMulticlass.Fit(data);
+
+            var prediction1 = modelWithoutWeights.Transform(data);
+            var prediction2 = modelWithWeights.Transform(data);
+
+            // Verify the metrics produced are different.
+            var metrics1 = mlContext.MulticlassClassification.Evaluate(prediction1, labelColumnName: "LabelIndex", topKPredictionCount: 1);
+            var metrics2 = mlContext.MulticlassClassification.Evaluate(prediction2, labelColumnName: "LabelIndex", topKPredictionCount: 1);
+            Assert.Equal(0.9100, metrics1.TopKAccuracy, 4);
+            Assert.Equal(0.2411, metrics1.LogLoss, 4);
+            Assert.Equal(0.8800, metrics2.TopKAccuracy, 4);
+            Assert.Equal(0.2464, metrics2.LogLoss, 4);
+
+            // Verify the raw scores are different.
+            var scores1 = prediction1.GetColumn<float[]>(prediction1.Schema["Score"]).ToArray();
+            var scores2 = prediction2.GetColumn<float[]>(prediction2.Schema["Score"]).ToArray();
+            Assert.True(scores1.Length == scores2.Length);
+
+            bool sameScores = true;
+            for (int i = 0; i < scores1.Length; i++)
+            {
+                if (!CompareNumbersWithTolerance(scores1[i][0], scores2[i][0]))
+                {
+                    sameScores = false;
+                    break;
+                }
+            }
+            Assert.False(sameScores);
+        }
+
         [Fact]
         public void SdcaSupportVectorMachine()
         {
             // Generate C# objects as training examples.
-            var rawData = SamplesUtils.DatasetUtils.GenerateBinaryLabelFloatFeatureVectorSamples(100);
+            var rawData = SamplesUtils.DatasetUtils.GenerateBinaryLabelFloatFeatureVectorFloatWeightSamples(100);
 
             // Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
             // as a catalog of available operations and as the source of randomness.

diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/TreeEnsembleFeaturizerTest.cs b/test/Microsoft.ML.Tests/TrainerEstimators/TreeEnsembleFeaturizerTest.cs
@@ -16,7 +16,7 @@ public partial class TrainerEstimators
         public void TreeEnsembleFeaturizerOutputSchemaTest()
         {
             // Create data set
-            var data = SamplesUtils.DatasetUtils.GenerateBinaryLabelFloatFeatureVectorSamples(1000).ToList();
+            var data = SamplesUtils.DatasetUtils.GenerateBinaryLabelFloatFeatureVectorFloatWeightSamples(1000).ToList();
             var dataView = ML.Data.LoadFromEnumerable(data);
 
             // Define a tree model whose trees will be extracted to construct a tree featurizer.
@@ -36,8 +36,8 @@ public void TreeEnsembleFeaturizerOutputSchemaTest()
 
             // To get output schema, we need to create RoleMappedSchema for calling Bind(...).
             var roleMappedSchema = new RoleMappedSchema(dataView.Schema,
-                label: nameof(SamplesUtils.DatasetUtils.BinaryLabelFloatFeatureVectorSample.Label),
-                feature: nameof(SamplesUtils.DatasetUtils.BinaryLabelFloatFeatureVectorSample.Features));
+                label: nameof(SamplesUtils.DatasetUtils.BinaryLabelFloatFeatureVectorFloatWeightSample.Label),
+                feature: nameof(SamplesUtils.DatasetUtils.BinaryLabelFloatFeatureVectorFloatWeightSample.Features));
 
             // Retrieve output schema. 
             var boundMapper = (treeFeaturizer as ISchemaBindableMapper).Bind(Env, roleMappedSchema);
-Original file line number
+Diff line change
@@ Expand Up @@
             }
             internal SdcaMulticlassTrainerBase(IHostEnvironment env, MulticlassOptions options)
-                : this(env, options, options.FeatureColumnName, options.LabelColumnName)
+                : this(env, options, options.FeatureColumnName, options.LabelColumnName, options.ExampleWeightColumnName)
             {
             }
@@ Expand Down @@