dotnet · TomFinley · Sep 6, 2018 · Sep 5, 2018 · Sep 5, 2018 · Sep 5, 2018
diff --git a/src/Microsoft.ML.Data/StaticPipe/Estimator.cs b/src/Microsoft.ML.Data/StaticPipe/Estimator.cs
@@ -79,20 +79,4 @@ string NameMap(PipelineColumn col)
             }
         }
     }
-
-    public static class Estimator
-    {
-        /// <summary>
-        /// Create an object that can be used as the start of a new pipeline, that assumes it uses
-        /// something with the sahape of <typeparamref name="TTupleShape"/> as its input schema shape.
-        /// The returned object is an empty estimator.
-        /// </summary>
-        /// <param name="fromSchema">Creates a new empty head of a pipeline</param>
-        /// <returns>The empty esitmator, to which new items may be appended to create a pipeline</returns>
-        public static Estimator<TTupleShape, TTupleShape, ITransformer> MakeNew<TTupleShape>(SchemaBearing<TTupleShape> fromSchema)
-        {
-            Contracts.CheckValue(fromSchema, nameof(fromSchema));
-            return fromSchema.MakeNewEstimator();
-        }
-    }
 }
diff --git a/src/Microsoft.ML.Data/StaticPipe/SchemaBearing.cs b/src/Microsoft.ML.Data/StaticPipe/SchemaBearing.cs
@@ -37,11 +37,12 @@ private protected SchemaBearing(IHostEnvironment env, StaticSchemaShape shape)
         }
 
         /// <summary>
-        /// Create an object that can be used as the start of a new pipeline, that assumes it uses
-        /// something with the sahape of <typeparamref name="TTupleShape"/> as its input schema shape.
-        /// The returned object is an empty estimator.
+        /// Starts a new pipeline, using the output schema of this object. Note that the returned
+        /// estimator does not contain this object, but it has its schema informed by <typeparamref name="TTupleShape"/>.
+        /// The returned object is an empty estimator, on which a new segment of the pipeline can be created.
         /// </summary>
-        internal Estimator<TTupleShape, TTupleShape, ITransformer> MakeNewEstimator()
+        /// <returns>An empty estimator with the same shape as the object on which it was created</returns>
+        public Estimator<TTupleShape, TTupleShape, ITransformer> MakeNewEstimator()
         {
             var est = new EstimatorChain<ITransformer>();
             return new Estimator<TTupleShape, TTupleShape, ITransformer>(Env, est, Shape, Shape);

diff --git a/src/Microsoft.ML.Data/StaticPipe/TrainerEstimatorReconciler.cs b/src/Microsoft.ML.Data/StaticPipe/TrainerEstimatorReconciler.cs
diff --git a/src/Microsoft.ML.StandardLearners/Standard/LinearClassificationTrainer.cs b/src/Microsoft.ML.StandardLearners/Standard/LinearClassificationTrainer.cs
@@ -1405,11 +1405,25 @@ public LinearClassificationTrainer(IHostEnvironment env, Arguments args,
             Info = new TrainerInfo(calibration: !(_loss is LogLoss));
             _args = args;
             _positiveInstanceWeight = _args.PositiveInstanceWeight;
-            OutputColumns = new[]
+
+            if (Info.NeedCalibration)
             {
-                new SchemaShape.Column(DefaultColumnNames.Score, SchemaShape.Column.VectorKind.Scalar, NumberType.R4, false),
-                new SchemaShape.Column(DefaultColumnNames.PredictedLabel, SchemaShape.Column.VectorKind.Scalar, BoolType.Instance, false)
-            };
+                OutputColumns = new[]
+                {
+                    new SchemaShape.Column(DefaultColumnNames.Score, SchemaShape.Column.VectorKind.Scalar, NumberType.R4, false),
+                    new SchemaShape.Column(DefaultColumnNames.PredictedLabel, SchemaShape.Column.VectorKind.Scalar, BoolType.Instance, false)
+                };
+            }
+            else
+            {
+                OutputColumns = new[]
+                {
+                    new SchemaShape.Column(DefaultColumnNames.Score, SchemaShape.Column.VectorKind.Scalar, NumberType.R4, false),
+                    new SchemaShape.Column(DefaultColumnNames.Probability, SchemaShape.Column.VectorKind.Scalar, NumberType.R4, false),
+                    new SchemaShape.Column(DefaultColumnNames.PredictedLabel, SchemaShape.Column.VectorKind.Scalar, BoolType.Instance, false)
+                };
+            }
+
         }
 
         public LinearClassificationTrainer(IHostEnvironment env, Arguments args)

diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaStatic.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaStatic.cs
@@ -0,0 +1,230 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using Microsoft.ML.Data.StaticPipe;
+using Microsoft.ML.Data.StaticPipe.Runtime;
+using Microsoft.ML.Runtime.Data;
+using Microsoft.ML.Runtime.Internal.Calibration;
+
+namespace Microsoft.ML.Runtime.Learners
+{
+    /// <summary>
+    /// Extension methods and utilities for instantiating SDCA trainer estimators inside statically typed pipelines.
+    /// </summary>
+    public static class SdcaStatic
+    {
+        /// <summary>
+        /// Predict a target using a linear regression model trained with the SDCA trainer.
+        /// </summary>
+        /// <param name="label">The label, or dependent variable.</param>
+        /// <param name="features">The features, or independent variables.</param>
+        /// <param name="weights">The optional example weights.</param>
+        /// <param name="l2Const">The L2 regularization hyperparameter.</param>
+        /// <param name="l1Threshold">The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model.</param>
+        /// <param name="maxIterations">The maximum number of passes to perform over the data.</param>
+        /// <param name="loss">The custom loss, if unspecified will be <see cref="SquaredLossSDCARegressionLossFunction"/>.</param>
+        /// <param name="onFit">A delegate that is called every time the
+        /// <see cref="Estimator{TTupleInShape, TTupleOutShape, TTransformer}.Fit(DataView{TTupleInShape})"/> method is called on the
+        /// <see cref="Estimator{TTupleInShape, TTupleOutShape, TTransformer}"/> instance created out of this. This delegate will receive
+        /// the linear model that was trained.  Note that this action cannot change the result in any way; it is only a way for the caller to
+        /// be informed about what was learnt.</param>
+        /// <returns>The predicted output.</returns>
+        public static Scalar<float> PredictSdcaRegression(this Scalar<float> label, Vector<float> features, Scalar<float> weights = null,
+            float? l2Const = null,
+            float? l1Threshold = null,
+            int? maxIterations = null,
+            ISupportSdcaRegressionLoss loss = null,
+            Action<LinearRegressionPredictor> onFit = null)
+        {
+            Contracts.CheckValue(label, nameof(label));
+            Contracts.CheckValue(features, nameof(features));
+            Contracts.CheckValueOrNull(weights);
+            Contracts.CheckParam(!(l2Const < 0), nameof(l2Const), "Must not be negative");
+            Contracts.CheckParam(!(l1Threshold < 0), nameof(l1Threshold), "Must not be negative");
+            Contracts.CheckParam(!(maxIterations < 1), nameof(maxIterations), "Must be positive if specified");
+            Contracts.CheckValueOrNull(loss);
+            Contracts.CheckValueOrNull(onFit);
+
+            var args = new SdcaRegressionTrainer.Arguments()
+            {
+                L2Const = l2Const,
+                L1Threshold = l1Threshold,
+                MaxIterations = maxIterations
+            };
+            if (loss != null)
+                args.LossFunction = new TrivialRegressionLossFactory(loss);
+
+            var rec = new TrainerEstimatorReconciler.Regression(
+                (env, labelName, featuresName, weightsName) =>
+                {
+                    var trainer = new SdcaRegressionTrainer(env, args, featuresName, labelName, weightsName);
+                    if (onFit != null)
+                        return trainer.WithOnFitDelegate(trans => onFit(trans.Model));
+                    return trainer;
+                }, label, features, weights);
+
+            return rec.Score;
+        }
+
+        /// <summary>
+        /// Predict a target using a linear binary classification model trained with the SDCA trainer, and log-loss.
+        /// </summary>
+        /// <param name="label">The label, or dependent variable.</param>
+        /// <param name="features">The features, or independent variables.</param>
+        /// <param name="weights">The optional example weights.</param>
+        /// <param name="l2Const">The L2 regularization hyperparameter.</param>
+        /// <param name="l1Threshold">The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model.</param>
+        /// <param name="maxIterations">The maximum number of passes to perform over the data.</param>
+        /// <param name="onFit">A delegate that is called every time the
+        /// <see cref="Estimator{TTupleInShape, TTupleOutShape, TTransformer}.Fit(DataView{TTupleInShape})"/> method is called on the
+        /// <see cref="Estimator{TTupleInShape, TTupleOutShape, TTransformer}"/> instance created out of this. This delegate will receive
+        /// the linear model that was trained, as well as the calibrator on top of that model. Note that this action cannot change the
+        /// result in any way; it is only a way for the caller to be informed about what was learnt.</param>
+        /// <returns>The set of output columns including in order the predicted binary classification score (which will range
+        /// from negative to positive infinity), the calibrated prediction (from 0 to 1), and the predicted label.</returns>
+        public static (Scalar<float> score, Scalar<float> probability, Scalar<bool> predictedLabel)
+            PredictSdcaBinaryClassification(this Scalar<bool> label, Vector<float> features, Scalar<float> weights = null,
+                float? l2Const = null,
+                float? l1Threshold = null,
+                int? maxIterations = null,
+                Action<LinearBinaryPredictor, ParameterMixingCalibratedPredictor> onFit = null)
+        {
+            Contracts.CheckValue(label, nameof(label));
+            Contracts.CheckValue(features, nameof(features));
+            Contracts.CheckValueOrNull(weights);
+            Contracts.CheckParam(!(l2Const < 0), nameof(l2Const), "Must not be negative");
+            Contracts.CheckParam(!(l1Threshold < 0), nameof(l1Threshold), "Must not be negative");
+            Contracts.CheckParam(!(maxIterations < 1), nameof(maxIterations), "Must be positive if specified");
+            Contracts.CheckValueOrNull(onFit);
+
+            var args = new LinearClassificationTrainer.Arguments()
+            {
+                L2Const = l2Const,
+                L1Threshold = l1Threshold,
+                MaxIterations = maxIterations,
+            };
+
+            var rec = new TrainerEstimatorReconciler.BinaryClassifier(
+                (env, labelName, featuresName, weightsName) =>
+                {
+                    var trainer = new LinearClassificationTrainer(env, args, featuresName, labelName, weightsName);
+                    if (onFit != null)
+                    {
+                        return trainer.WithOnFitDelegate(trans =>
+                        {
+                            // Under the default log-loss we assume a calibrated predictor.
+                            var model = trans.Model;
+                            var cali = (ParameterMixingCalibratedPredictor)model;
+                            var pred = (LinearBinaryPredictor)cali.SubPredictor;
+                            onFit(pred, cali);
+                        });
+                    }
+                    return trainer;
+                }, label, features, weights);
+
+            return rec.Output;
+        }
+
+        /// <summary>
+        /// Predict a target using a linear binary classification model trained with the SDCA trainer, and a custom loss.
+        /// Note that because we cannot be sure that all loss functions will produce naturally calibrated outputs, setting
+        /// a custom loss function will not produce a calibrated probability column.
+        /// </summary>
+        /// <param name="label">The label, or dependent variable.</param>
+        /// <param name="features">The features, or independent variables.</param>
+        /// /// <param name="loss">The custom loss.</param>
+        /// <param name="weights">The optional example weights.</param>
+        /// <param name="l2Const">The L2 regularization hyperparameter.</param>
+        /// <param name="l1Threshold">The L1 regularization hyperparameter. Higher values will tend to lead to more sparse model.</param>
+        /// <param name="maxIterations">The maximum number of passes to perform over the data.</param>
+        /// <param name="onFit">A delegate that is called every time the
+        /// <see cref="Estimator{TTupleInShape, TTupleOutShape, TTransformer}.Fit(DataView{TTupleInShape})"/> method is called on the
+        /// <see cref="Estimator{TTupleInShape, TTupleOutShape, TTransformer}"/> instance created out of this. This delegate will receive
+        /// the linear model that was trained, as well as the calibrator on top of that model. Note that this action cannot change the
+        /// result in any way; it is only a way for the caller to be informed about what was learnt.</param>
+        /// <returns>The set of output columns including in order the predicted binary classification score (which will range
+        /// from negative to positive infinity), and the predicted label.</returns>
+        /// <seealso cref="PredictSdcaBinaryClassification(Scalar{bool}, Vector{float}, Scalar{float}, float?, float?, int?, Action{LinearBinaryPredictor, ParameterMixingCalibratedPredictor})"/>
+        public static (Scalar<float> score, Scalar<bool> predictedLabel)
+            PredictSdcaBinaryClassification(this Scalar<bool> label, Vector<float> features,
+                ISupportSdcaClassificationLoss loss,
+                Scalar<float> weights = null,
+                float? l2Const = null,
+                float? l1Threshold = null,
+                int? maxIterations = null,
+                Action<LinearBinaryPredictor> onFit = null
+            )
+        {
+            Contracts.CheckValue(label, nameof(label));
+            Contracts.CheckValue(features, nameof(features));
+            Contracts.CheckValue(loss, nameof(loss));
+            Contracts.CheckValueOrNull(weights);
+            Contracts.CheckParam(!(l2Const < 0), nameof(l2Const), "Must not be negative");
+            Contracts.CheckParam(!(l1Threshold < 0), nameof(l1Threshold), "Must not be negative");
+            Contracts.CheckParam(!(maxIterations < 1), nameof(maxIterations), "Must be positive if specified");
+            Contracts.CheckValueOrNull(onFit);
+
+            bool hasProbs = loss is LogLoss;
+
+            var args = new LinearClassificationTrainer.Arguments()
+            {
+                L2Const = l2Const,
+                L1Threshold = l1Threshold,
+                MaxIterations = maxIterations,
+                LossFunction = new TrivialClassificationLossFactory(loss)
+            };
+
+            var rec = new TrainerEstimatorReconciler.BinaryClassifierNoCalibration(
+                (env, labelName, featuresName, weightsName) =>
+                {
+                    var trainer = new LinearClassificationTrainer(env, args, featuresName, labelName, weightsName);
+                    if (onFit != null)
+                    {
+                        return trainer.WithOnFitDelegate(trans =>
+                        {
+                            var model = trans.Model;
+                            if (model is ParameterMixingCalibratedPredictor cali)
+                                onFit((LinearBinaryPredictor)cali.SubPredictor);
+                            else
+                                onFit((LinearBinaryPredictor)model);
+                        });
+                    }
+                    return trainer;
+                }, label, features, weights, hasProbs);
+
+            return rec.Output;
+        }
+
+        private sealed class TrivialRegressionLossFactory : ISupportSdcaRegressionLossFactory
+        {
+            private readonly ISupportSdcaRegressionLoss _loss;
+
+            public TrivialRegressionLossFactory(ISupportSdcaRegressionLoss loss)
+            {
+                _loss = loss;
+            }
+
+            public ISupportSdcaRegressionLoss CreateComponent(IHostEnvironment env)
+            {
+                return _loss;
+            }
+        }
+
+        private sealed class TrivialClassificationLossFactory : ISupportSdcaClassificationLossFactory
+        {
+            private readonly ISupportSdcaClassificationLoss _loss;
+
+            public TrivialClassificationLossFactory(ISupportSdcaClassificationLoss loss)
+            {
+                _loss = loss;
+            }
+
+            public ISupportSdcaClassificationLoss CreateComponent(IHostEnvironment env)
+            {
+                return _loss;
+            }
+        }
+    }
+}
diff --git a/test/Microsoft.ML.CodeAnalyzer.Tests/Resources/TypeIsSchemaShapeResource.cs b/test/Microsoft.ML.CodeAnalyzer.Tests/Resources/TypeIsSchemaShapeResource.cs
@@ -16,7 +16,7 @@ public static void Bar()
                 text: ctx.LoadText(1),
                 numericFeatures: ctx.LoadFloat(2, 5)));
 
-            var est = Estimator.MakeNew(text);
+            var est = text.MakeNewEstimator();
             // This should work.
             est.Append(r => r.text);
             // These should not.

diff --git a/test/Microsoft.ML.StaticPipelineTesting/ImageAnalyticsTests.cs b/test/Microsoft.ML.StaticPipelineTesting/ImageAnalyticsTests.cs
@@ -10,7 +10,7 @@
 
 namespace Microsoft.ML.StaticPipelineTesting
 {
-    public sealed class ImageAnalyticsTests : MakeConsoleWork
+    public sealed class ImageAnalyticsTests : BaseTestClassWithConsole
     {
         public ImageAnalyticsTests(ITestOutputHelper output)
             : base(output)

diff --git a/test/Microsoft.ML.StaticPipelineTesting/Microsoft.ML.StaticPipelineTesting.csproj b/test/Microsoft.ML.StaticPipelineTesting/Microsoft.ML.StaticPipelineTesting.csproj
@@ -5,8 +5,11 @@
   <ItemGroup>
     <ProjectReference Include="..\..\src\Microsoft.ML.Data\Microsoft.ML.Data.csproj" />
     <ProjectReference Include="..\..\src\Microsoft.ML.ImageAnalytics\Microsoft.ML.ImageAnalytics.csproj" />
+    <ProjectReference Include="..\..\src\Microsoft.ML.StandardLearners\Microsoft.ML.StandardLearners.csproj" />
     <ProjectReference Include="..\Microsoft.ML.TestFramework\Microsoft.ML.TestFramework.csproj" />
 
     <ProjectReference Include="..\..\src\Microsoft.ML.Analyzer\Microsoft.ML.Analyzer.csproj" />
+
+    <NativeAssemblyReference Include="CpuMathNative" />
   </ItemGroup>
 </Project>
diff --git a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
@@ -3,7 +3,6 @@
 // See the LICENSE file in the project root for more information.
 
 using Microsoft.ML.Data.StaticPipe;
-using Microsoft.ML.Data.StaticPipe.Runtime;
 using Microsoft.ML.Runtime;
 using Microsoft.ML.Runtime.Data;
 using Microsoft.ML.TestFramework;
@@ -15,28 +14,27 @@
 
 namespace Microsoft.ML.StaticPipelineTesting
 {
-    public abstract class MakeConsoleWork : IDisposable
+    public abstract class BaseTestClassWithConsole : BaseTestClass, IDisposable
     {
-        private readonly ITestOutputHelper _output;
         private readonly TextWriter _originalOut;
         private readonly TextWriter _textWriter;
 
-        public MakeConsoleWork(ITestOutputHelper output)
+        public BaseTestClassWithConsole(ITestOutputHelper output)
+            : base(output)
         {
-            _output = output;
             _originalOut = Console.Out;
             _textWriter = new StringWriter();
             Console.SetOut(_textWriter);
         }
 
         public void Dispose()
         {
-            _output.WriteLine(_textWriter.ToString());
+            Output.WriteLine(_textWriter.ToString());
             Console.SetOut(_originalOut);
         }
     }
 
-    public sealed class StaticPipeTests : MakeConsoleWork
+    public sealed class StaticPipeTests : BaseTestClassWithConsole
     {
         public StaticPipeTests(ITestOutputHelper output)
             : base(output)
@@ -110,7 +108,7 @@ void CheckValuesSame(bool bl, string tx, float v0, float v1, float v2)
             // The next step where we shuffle the names around a little bit is one where we are
             // testing out the implicit usage of copy columns.
 
-            var est = Estimator.MakeNew(text).Append(r => (text: r.label, label: r.numericFeatures));
+            var est = text.MakeNewEstimator().Append(r => (text: r.label, label: r.numericFeatures));
             var newText = text.Append(est);
             var newTextData = newText.Fit(dataSource).Read(dataSource);