diff --git a/Microsoft.ML.sln b/Microsoft.ML.sln index 362347afbc..87949500c4 100644 --- a/Microsoft.ML.sln +++ b/Microsoft.ML.sln @@ -127,6 +127,10 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Benchmarks.Tes EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.TimeSeries.Tests", "test\Microsoft.ML.TimeSeries.Tests\Microsoft.ML.TimeSeries.Tests.csproj", "{4B101D58-E7E4-4877-A536-A9B41E2E82A3}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "samples", "samples", "{DA452A53-2E94-4433-B08C-041EDEC729E6}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Samples", "docs\samples\Microsoft.ML.Samples\Microsoft.ML.Samples.csproj", "{ECB71297-9DF1-48CE-B93A-CD969221F9B6}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -471,6 +475,14 @@ Global {B6C83F04-A04B-4F00-9E68-1EC411F9317C}.Release|Any CPU.Build.0 = Release|Any CPU {B6C83F04-A04B-4F00-9E68-1EC411F9317C}.Release-Intrinsics|Any CPU.ActiveCfg = Release-Intrinsics|Any CPU {B6C83F04-A04B-4F00-9E68-1EC411F9317C}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU + {ECB71297-9DF1-48CE-B93A-CD969221F9B6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {ECB71297-9DF1-48CE-B93A-CD969221F9B6}.Debug|Any CPU.Build.0 = Debug|Any CPU + {ECB71297-9DF1-48CE-B93A-CD969221F9B6}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug-Intrinsics|Any CPU + {ECB71297-9DF1-48CE-B93A-CD969221F9B6}.Debug-Intrinsics|Any CPU.Build.0 = Debug-Intrinsics|Any CPU + {ECB71297-9DF1-48CE-B93A-CD969221F9B6}.Release|Any CPU.ActiveCfg = Release|Any CPU + {ECB71297-9DF1-48CE-B93A-CD969221F9B6}.Release|Any CPU.Build.0 = Release|Any CPU + {ECB71297-9DF1-48CE-B93A-CD969221F9B6}.Release-Intrinsics|Any CPU.ActiveCfg = Release-Intrinsics|Any CPU + {ECB71297-9DF1-48CE-B93A-CD969221F9B6}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU {4B101D58-E7E4-4877-A536-A9B41E2E82A3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {4B101D58-E7E4-4877-A536-A9B41E2E82A3}.Debug|Any CPU.Build.0 = Debug|Any CPU {4B101D58-E7E4-4877-A536-A9B41E2E82A3}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug-Intrinsics|Any CPU @@ -531,6 +543,7 @@ Global {73DAAC82-D308-48CC-8FFE-3B037F8BBCCA} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {49D03292-8AFE-4B82-823C-D047BF8420F7} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} {B6C83F04-A04B-4F00-9E68-1EC411F9317C} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} + {ECB71297-9DF1-48CE-B93A-CD969221F9B6} = {DA452A53-2E94-4433-B08C-041EDEC729E6} {4B101D58-E7E4-4877-A536-A9B41E2E82A3} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution diff --git a/docs/samples/Microsoft.ML.Samples/DatasetCreator.cs b/docs/samples/Microsoft.ML.Samples/DatasetCreator.cs new file mode 100644 index 0000000000..92f46b9fa8 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/DatasetCreator.cs @@ -0,0 +1,60 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.IO; +using System.Text; + +namespace Microsoft.ML.Samples.StaticPipe +{ + public static class DatasetCreator + { + public static (string trainPath, string testPath) CreateRegressionDataset() + { + // creating a small sample dataset, and writting it to file + string trainDataPath = @"RegressionTrainDataset.txt"; + string testDataPath = @"RegressionTestDataset.txt"; + + string header = "feature_a, feature_b, target"; + + int a = 0; + int b = 0; + int target = 0; + + var csvTrain = new StringBuilder().AppendLine(header); + var csvTest = new StringBuilder().AppendLine(header); + + Random rnd = new Random(); + for (int i = 0; i < 1000; i++) + { + a = rnd.Next(i - 5, i + 5); + b = rnd.Next(0, 10); + + target = 2*a + b; + + if (i % 15 == 0) + csvTest.AppendLine($"{a}, {b}, {target}"); + else + csvTrain.AppendLine($"{a}, {b} , {target}"); + } + + + if (!File.Exists(trainDataPath)) + File.WriteAllText(trainDataPath, csvTrain.ToString()); + else + { + new Exception("Train dataset file already exists"); + } + + if (!File.Exists(testDataPath)) + File.WriteAllText(testDataPath, csvTest.ToString()); + else + { + new Exception("Test dataset file already exists"); + } + + return (trainDataPath, testDataPath); + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj new file mode 100644 index 0000000000..72b1e8eab4 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj @@ -0,0 +1,20 @@ + + + + netcoreapp2.1 + Exe + + + + + + + + + false + Analyzer + + + + + diff --git a/docs/samples/Microsoft.ML.Samples/Program.cs b/docs/samples/Microsoft.ML.Samples/Program.cs new file mode 100644 index 0000000000..726d549791 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Program.cs @@ -0,0 +1,16 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Samples.StaticPipe; + +namespace Microsoft.ML.Samples +{ + internal static class Program + { + static void Main(string[] args) + { + Trainers.SdcaRegression(); + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Trainers.cs b/docs/samples/Microsoft.ML.Samples/Trainers.cs new file mode 100644 index 0000000000..9940bf3dad --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Trainers.cs @@ -0,0 +1,72 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.Learners; +using Microsoft.ML.StaticPipe; +using System; + +namespace Microsoft.ML.Samples.StaticPipe +{ + public static class Trainers + { + public static void SdcaRegression() + { + var (trainDataPath, testDataPath) = DatasetCreator.CreateRegressionDataset(); + + //creating the ML.Net IHostEnvironment object, needed for the pipeline + var env = new LocalEnvironment(seed: 0); + + // creating the ML context, based on the task performed. + var regressionContext = new RegressionContext(env); + + // Creating a data reader, based on the format of the data + var reader = TextLoader.CreateReader(env, c => ( + label: c.LoadFloat(2), + features: c.LoadFloat(0, 1) + ), + separator: ',', hasHeader: true); + + // Read the data + var trainData = reader.Read(new MultiFileSource(trainDataPath)); + + // The predictor that gets produced out of training + LinearRegressionPredictor pred = null; + + // Create the estimator + var learningPipeline = reader.MakeNewEstimator() + .Append(r => (r.label, score: regressionContext.Trainers.Sdca( + r.label, + r.features, + l1Threshold: 0f, + maxIterations: 100, + onFit: p => pred = p) + ) + ); + + // fit this pipeline to the training data + var model = learningPipeline.Fit(trainData); + + // check the weights that the model learned + VBuffer weights = default; + pred.GetFeatureWeights(ref weights); + + Console.WriteLine($"weight 0 - {weights.Values[0]}"); + Console.WriteLine($"weight 1 - {weights.Values[1]}"); + + // test the model we just trained, using the test file. + var testData = reader.Read(new MultiFileSource(testDataPath)); + var data = model.Transform(testData); + + //Evaluate how the model is doing on the test data + var metrics = regressionContext.Evaluate(data, r => r.label, r => r.score); + + Console.WriteLine($"L1 - {metrics.L1}"); + Console.WriteLine($"L2 - {metrics.L2}"); + Console.WriteLine($"LossFunction - {metrics.LossFn}"); + Console.WriteLine($"RMS - {metrics.Rms}"); + Console.WriteLine($"RSquared - {metrics.RSquared}"); + } + } +} diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaStatic.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaStatic.cs index fc9d6e1e1f..a18aee29b5 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/SdcaStatic.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaStatic.cs @@ -33,6 +33,12 @@ public static partial class RegressionTrainers /// the linear model that was trained. Note that this action cannot change the result in any way; it is only a way for the caller to /// be informed about what was learnt. /// The predicted output. + /// + /// + /// + /// public static Scalar Sdca(this RegressionContext.RegressionTrainers ctx, Scalar label, Vector features, Scalar weights = null, float? l2Const = null,