diff --git a/THIRD-PARTY-NOTICES.TXT b/THIRD-PARTY-NOTICES.TXT index 778dd888df..ae6f0a7ec3 100644 --- a/THIRD-PARTY-NOTICES.TXT +++ b/THIRD-PARTY-NOTICES.TXT @@ -42,4 +42,27 @@ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +License notice for SharpZipLib +------------------------------ + +https://github.com/icsharpcode/SharpZipLib + +Copyright © 2000-2018 SharpZipLib Contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy of this +software and associated documentation files (the "Software"), to deal in the Software +without restriction, including without limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or +substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE +FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/BootstrapSample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/BootstrapSample.cs index c49a0f35dc..a9fa08a4dc 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/BootstrapSample.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/BootstrapSample.cs @@ -1,7 +1,8 @@ using System; using System.Collections.Generic; +using Microsoft.ML; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public static class BootstrapSample { @@ -12,7 +13,7 @@ public static void Example() var mlContext = new MLContext(); // Get a small dataset as an IEnumerable and them read it as ML.NET's data type. - IEnumerable enumerableOfData = SamplesUtils.DatasetUtils.GenerateBinaryLabelFloatFeatureVectorFloatWeightSamples(5); + IEnumerable enumerableOfData = Microsoft.ML.SamplesUtils.DatasetUtils.GenerateBinaryLabelFloatFeatureVectorFloatWeightSamples(5); var data = mlContext.Data.LoadFromEnumerable(enumerableOfData); // Look at the original dataset @@ -43,7 +44,7 @@ public static void Example() { var resample = mlContext.Data.BootstrapSample(data, seed: i); - var enumerable = mlContext.Data.CreateEnumerable(resample, reuseRowObject: false); + var enumerable = mlContext.Data.CreateEnumerable(resample, reuseRowObject: false); Console.WriteLine($"Label\tFeatures[0]"); foreach (var row in enumerable) { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/Cache.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/Cache.cs index acc47e8cb0..2519ca86f7 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/Cache.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/Cache.cs @@ -1,7 +1,8 @@ using System; +using Microsoft.ML; using Microsoft.ML.SamplesUtils; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public static class Cache { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/CrossValidationSplit.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/CrossValidationSplit.cs index 4ae6b103a8..eff2cd6638 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/CrossValidationSplit.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/CrossValidationSplit.cs @@ -1,8 +1,9 @@ using System; using System.Collections.Generic; +using Microsoft.ML; using static Microsoft.ML.DataOperationsCatalog; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { /// /// Sample class showing how to use CrossValidationSplit. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/DataViewEnumerable.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/DataViewEnumerable.cs index 65864bc5f6..43c717699f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/DataViewEnumerable.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/DataViewEnumerable.cs @@ -1,8 +1,9 @@ using System; using System.Collections.Generic; +using Microsoft.ML; using Microsoft.ML.SamplesUtils; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { /// /// Sample class showing how to use ShuffleRows. @@ -53,8 +54,6 @@ public static void Example() // 1/4/2012 34 0 // 1/5/2012 35 0 // 1/6/2012 35 0 - - Console.ReadLine(); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByColumn.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByColumn.cs index 40a801dda5..2be2983e01 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByColumn.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByColumn.cs @@ -1,7 +1,8 @@ using System; using System.Collections.Generic; +using Microsoft.ML; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { /// /// Sample class showing how to use FilterRowsByColumn. @@ -15,7 +16,7 @@ public static void Example() var mlContext = new MLContext(); // Get a small dataset as an IEnumerable. - IEnumerable enumerableOfData = SamplesUtils.DatasetUtils.GetSampleTemperatureData(10); + IEnumerable enumerableOfData = Microsoft.ML.SamplesUtils.DatasetUtils.GetSampleTemperatureData(10); var data = mlContext.Data.LoadFromEnumerable(enumerableOfData); // Before we apply a filter, examine all the records in the dataset. @@ -42,7 +43,7 @@ public static void Example() var filteredData = mlContext.Data.FilterRowsByColumn(data, columnName: "Temperature", lowerBound: 34, upperBound: 37); // Look at the filtered data and observe that values outside [34,37) have been dropped. - var enumerable = mlContext.Data.CreateEnumerable(filteredData, reuseRowObject: true); + var enumerable = mlContext.Data.CreateEnumerable(filteredData, reuseRowObject: true); Console.WriteLine($"Date\tTemperature"); foreach (var row in enumerable) { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByKeyColumnFraction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByKeyColumnFraction.cs index 1a418cb8f6..0766e77f12 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByKeyColumnFraction.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByKeyColumnFraction.cs @@ -1,9 +1,10 @@ using System; using System.Collections.Generic; +using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.SamplesUtils; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { using MulticlassClassificationExample = DatasetUtils.MulticlassClassificationExample; diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByMissingValues.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByMissingValues.cs index f1f9c4a21d..a624928a96 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByMissingValues.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/FilterRowsByMissingValues.cs @@ -1,7 +1,8 @@ using System; +using Microsoft.ML; using Microsoft.ML.SamplesUtils; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public class FilterRowsByMissingValues { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/ShuffleRows.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/ShuffleRows.cs index 6e88e89cd2..14baddaa6c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/ShuffleRows.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/ShuffleRows.cs @@ -1,7 +1,8 @@ using System; +using Microsoft.ML; using Microsoft.ML.SamplesUtils; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { /// /// Sample class showing how to use ShuffleRows. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SkipRows.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SkipRows.cs index 194fff12d1..2e38b77c88 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SkipRows.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/SkipRows.cs @@ -1,5 +1,7 @@ using System; -namespace Microsoft.ML.Samples.Dynamic +using Microsoft.ML; + +namespace Samples.Dynamic { /// /// Sample class showing how to use Skip. @@ -13,7 +15,7 @@ public static void Example() var mlContext = new MLContext(); // Get a small dataset as an IEnumerable. - var enumerableOfData = SamplesUtils.DatasetUtils.GetSampleTemperatureData(10); + var enumerableOfData = Microsoft.ML.SamplesUtils.DatasetUtils.GetSampleTemperatureData(10); var data = mlContext.Data.LoadFromEnumerable(enumerableOfData); // Before we apply a filter, examine all the records in the dataset. @@ -40,7 +42,7 @@ public static void Example() var filteredData = mlContext.Data.SkipRows(data, 5); // Look at the filtered data and observe that the first 5 rows have been dropped - var enumerable = mlContext.Data.CreateEnumerable(filteredData, reuseRowObject: true); + var enumerable = mlContext.Data.CreateEnumerable(filteredData, reuseRowObject: true); Console.WriteLine($"Date\tTemperature"); foreach (var row in enumerable) { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TakeRows.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TakeRows.cs index 24b69de0a8..9653a67904 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TakeRows.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TakeRows.cs @@ -1,5 +1,7 @@ using System; -namespace Microsoft.ML.Samples.Dynamic +using Microsoft.ML; + +namespace Samples.Dynamic { /// /// Sample class showing how to use Take. @@ -13,7 +15,7 @@ public static void Example() var mlContext = new MLContext(); // Get a small dataset as an IEnumerable. - var enumerableOfData = SamplesUtils.DatasetUtils.GetSampleTemperatureData(10); + var enumerableOfData = Microsoft.ML.SamplesUtils.DatasetUtils.GetSampleTemperatureData(10); var data = mlContext.Data.LoadFromEnumerable(enumerableOfData); // Before we apply a filter, examine all the records in the dataset. @@ -40,7 +42,7 @@ public static void Example() var filteredData = mlContext.Data.TakeRows(data, 5); // Look at the filtered data and observe that only the first 5 rows are in the resulting dataset. - var enumerable = mlContext.Data.CreateEnumerable(filteredData, reuseRowObject: true); + var enumerable = mlContext.Data.CreateEnumerable(filteredData, reuseRowObject: true); Console.WriteLine($"Date\tTemperature"); foreach (var row in enumerable) { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TrainTestSplit.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TrainTestSplit.cs index eb3bdd5a5a..d02651c006 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TrainTestSplit.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TrainTestSplit.cs @@ -1,11 +1,9 @@ using System; using System.Collections.Generic; -using System.Collections.Immutable; -using System.Linq; -using Microsoft.ML.Data; +using Microsoft.ML; using static Microsoft.ML.DataOperationsCatalog; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { /// /// Sample class showing how to use TrainTestSplit. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs index 1101de6d51..d167f3f9e3 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs @@ -1,9 +1,9 @@ using System; +using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.SamplesUtils; -using Microsoft.ML.Trainers; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public static class FeatureContributionCalculationTransform { @@ -76,7 +76,6 @@ public static void Example() index++; } - Console.ReadLine(); // The output of the above code is: // Label Score BiggestFeature Value Weight Contribution diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs index 45f89fd80b..c1124c8425 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs @@ -1,19 +1,20 @@ using System; using System.Collections.Generic; +using Microsoft.ML; using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public static partial class TransformSamples { - public static void NgramTransform() + public static void Example() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var ml = new MLContext(); // Get a small dataset as an IEnumerable and convert to IDataView. - IEnumerable data = SamplesUtils.DatasetUtils.GetSentimentData(); + IEnumerable data = Microsoft.ML.SamplesUtils.DatasetUtils.GetSentimentData(); var trainData = ml.Data.LoadFromEnumerable(data); // Preview of the data. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs index 2c0fcce6bb..dc92f50e40 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs @@ -1,8 +1,9 @@ using System; using System.Collections.Generic; +using Microsoft.ML; using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public static class NormalizerTransform { @@ -13,7 +14,7 @@ public static void Example() var ml = new MLContext(); // Get a small dataset as an IEnumerable and convert it to an IDataView. - IEnumerable data = SamplesUtils.DatasetUtils.GetInfertData(); + IEnumerable data = Microsoft.ML.SamplesUtils.DatasetUtils.GetInfertData(); var trainData = ml.Data.LoadFromEnumerable(data); // Preview of the data. @@ -56,8 +57,8 @@ public static void Example() // Composing a different pipeline if we wanted to normalize more than one column at a time. // Using log scale as the normalization mode. - var multiColPipeline = ml.Transforms.NormalizeMinMax("LogInduced", "Induced") - .Append(ml.Transforms.NormalizeMinMax("LogSpontaneous", "Spontaneous")); + var multiColPipeline = ml.Transforms.NormalizeLogMeanVariance(new[] { new InputOutputColumnPair("LogInduced", "Induced"), new InputOutputColumnPair("LogSpontaneous", "Spontaneous") }); + // The transformed data. var multiColtransformer = multiColPipeline.Fit(trainData); var multiColtransformedData = multiColtransformer.Transform(trainData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/OnnxTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/OnnxTransform.cs index bbb66b0e35..62a7802ec1 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/OnnxTransform.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/OnnxTransform.cs @@ -1,9 +1,10 @@ using System; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.OnnxRuntime; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public static class OnnxTransformExample { @@ -13,8 +14,9 @@ public static class OnnxTransformExample public static void Example() { // Download the squeeznet image model from ONNX model zoo, version 1.2 - // https://github.com/onnx/models/tree/master/squeezenet - var modelPath = @"squeezenet\model.onnx"; + // https://github.com/onnx/models/tree/master/squeezenet or use + // Microsoft.ML.Onnx.TestModels nuget. + var modelPath = @"squeezenet\00000001\model.onnx"; // Inspect the model's inputs and outputs var session = new InferenceSession(modelPath); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIHelper.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIHelper.cs index 95c64e629c..6cf402bb6f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIHelper.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIHelper.cs @@ -2,8 +2,9 @@ using System.Linq; using Microsoft.ML.Trainers; using Microsoft.ML.SamplesUtils; +using Microsoft.ML; -namespace Microsoft.ML.Samples.Dynamic.PermutationFeatureImportance +namespace Samples.Dynamic.PermutationFeatureImportance { public static class PfiHelper { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIRegressionExample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIRegressionExample.cs index 46b5bc65a6..3b7f3d112b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIRegressionExample.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIRegressionExample.cs @@ -1,7 +1,8 @@ using System; using System.Linq; +using Microsoft.ML; -namespace Microsoft.ML.Samples.Dynamic.PermutationFeatureImportance +namespace Samples.Dynamic.PermutationFeatureImportance { public static class PfiRegression { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PfiBinaryClassificationExample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PfiBinaryClassificationExample.cs index 8e109890e1..b3b646c35f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PfiBinaryClassificationExample.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PfiBinaryClassificationExample.cs @@ -1,8 +1,9 @@ using System; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Trainers; -namespace Microsoft.ML.Samples.Dynamic.PermutationFeatureImportance +namespace Samples.Dynamic.PermutationFeatureImportance { public static class PfiBinaryClassification { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs index 67d4680796..8fb934bf5c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs @@ -1,9 +1,10 @@ using System; using System.Collections.Generic; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public static class ProjectionTransforms { @@ -14,7 +15,7 @@ public static void Example() var ml = new MLContext(); // Get a small dataset as an IEnumerable and convert it to an IDataView. - IEnumerable data = SamplesUtils.DatasetUtils.GetVectorOfNumbersData(); + IEnumerable data = Microsoft.ML.SamplesUtils.DatasetUtils.GetVectorOfNumbersData(); var trainData = ml.Data.LoadFromEnumerable(data); // Preview of the data. @@ -37,13 +38,13 @@ public static void Example() }; // A pipeline to project Features column into Random fourier space. - var rffPipeline = ml.Transforms.ApproximatedKernelMap(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), rank: 4); + var rffPipeline = ml.Transforms.ApproximatedKernelMap(nameof(Microsoft.ML.SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), rank: 4); // The transformed (projected) data. var transformedData = rffPipeline.Fit(trainData).Transform(trainData); // Getting the data of the newly created column, so we can preview it. - var randomFourier = transformedData.GetColumn>(transformedData.Schema[nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features)]); + var randomFourier = transformedData.GetColumn>(transformedData.Schema[nameof(Microsoft.ML.SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features)]); - printHelper(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), randomFourier); + printHelper(nameof(Microsoft.ML.SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), randomFourier); // Features column obtained post-transformation. // @@ -55,13 +56,15 @@ public static void Example() //0.165 0.117 -0.547 0.014 // A pipeline to project Features column into L-p normalized vector. - var lpNormalizePipeline = ml.Transforms.NormalizeLpNorm(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), norm: Transforms.LpNormNormalizingEstimatorBase.NormFunction.L1); + var lpNormalizePipeline = ml.Transforms.NormalizeLpNorm(nameof(Microsoft.ML.SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), + norm: Microsoft.ML.Transforms.LpNormNormalizingEstimatorBase.NormFunction.L1); + // The transformed (projected) data. transformedData = lpNormalizePipeline.Fit(trainData).Transform(trainData); // Getting the data of the newly created column, so we can preview it. - var lpNormalize= transformedData.GetColumn>(transformedData.Schema[nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features)]); + var lpNormalize= transformedData.GetColumn>(transformedData.Schema[nameof(Microsoft.ML.SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features)]); - printHelper(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), lpNormalize); + printHelper(nameof(Microsoft.ML.SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), lpNormalize); // Features column obtained post-transformation. // @@ -73,13 +76,13 @@ public static void Example() // 0.133 0.156 0.178 0.200 0.000 0.022 0.044 0.067 0.089 0.111 // A pipeline to project Features column into L-p normalized vector. - var gcNormalizePipeline = ml.Transforms.NormalizeGlobalContrast(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), ensureZeroMean:false); + var gcNormalizePipeline = ml.Transforms.NormalizeGlobalContrast(nameof(Microsoft.ML.SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), ensureZeroMean:false); // The transformed (projected) data. transformedData = gcNormalizePipeline.Fit(trainData).Transform(trainData); // Getting the data of the newly created column, so we can preview it. - var gcNormalize = transformedData.GetColumn>(transformedData.Schema[nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features)]); + var gcNormalize = transformedData.GetColumn>(transformedData.Schema[nameof(Microsoft.ML.SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features)]); - printHelper(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), gcNormalize); + printHelper(nameof(Microsoft.ML.SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), gcNormalize); // Features column obtained post-transformation. // diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/ImageClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/ImageClassification.cs index 6b22c5816c..3fa852fb44 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/ImageClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/ImageClassification.cs @@ -1,8 +1,13 @@ using System; +using System.IO; using System.Linq; +using System.Net; +using ICSharpCode.SharpZipLib.GZip; +using ICSharpCode.SharpZipLib.Tar; +using Microsoft.ML; using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public static class ImageClassification { @@ -13,7 +18,14 @@ public static void Example() { // Download the ResNet 101 model from the location below. // https://storage.googleapis.com/download.tensorflow.org/models/tflite_11_05_08/resnet_v2_101.tgz - var modelLocation = @"resnet_v2_101/resnet_v2_101_299_frozen.pb"; + + string modelLocation = "resnet_v2_101_299_frozen.pb"; + if (!File.Exists(modelLocation)) + { + modelLocation = Download(@"https://storage.googleapis.com/download.tensorflow.org/models/tflite_11_05_08/resnet_v2_101.tgz", @"resnet_v2_101_299_frozen.tgz"); + Unzip(Path.Join(Directory.GetCurrentDirectory(), modelLocation), Directory.GetCurrentDirectory()); + modelLocation = "resnet_v2_101_299_frozen.pb"; + } var mlContext = new MLContext(); var data = GetTensorData(); @@ -22,7 +34,7 @@ public static void Example() // Create a ML pipeline. var pipeline = mlContext.Model.LoadTensorFlowModel(modelLocation).ScoreTensorFlowModel( new[] { nameof(OutputScores.output) }, - new[] { nameof(TensorData.input) }); + new[] { nameof(TensorData.input) }, addBatchDimensionInput: true); // Run the pipeline and get the transformed values. var estimator = pipeline.Fit(idv); @@ -86,5 +98,31 @@ class OutputScores { public float[] output { get; set; } } + + private static string Download(string baseGitPath, string dataFile) + { + using (WebClient client = new WebClient()) + { + client.DownloadFile(new Uri($"{baseGitPath}"), dataFile); + } + + return dataFile; + } + + /// + /// Taken from https://github.com/icsharpcode/SharpZipLib/wiki/GZip-and-Tar-Samples. + /// + private static void Unzip(string path, string targetDir) + { + Stream inStream = File.OpenRead(path); + Stream gzipStream = new GZipInputStream(inStream); + + TarArchive tarArchive = TarArchive.CreateInputTarArchive(gzipStream); + tarArchive.ExtractContents(targetDir); + tarArchive.Close(); + + gzipStream.Close(); + inStream.Close(); + } } } \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs index efc428a731..1fa1f9541a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs @@ -1,8 +1,9 @@ using System; using System.IO; +using Microsoft.ML; using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public static class TextClassification { @@ -12,7 +13,7 @@ public static class TextClassification /// public static void Example() { - string modelLocation = SamplesUtils.DatasetUtils.DownloadTensorFlowSentimentModel(); + string modelLocation = Microsoft.ML.SamplesUtils.DatasetUtils.DownloadTensorFlowSentimentModel(); var mlContext = new MLContext(); var data = new[] { new IMDBSentiment() { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/TextTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/TextTransform.cs index 06f7a34193..6b8c507b07 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/TextTransform.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/TextTransform.cs @@ -1,9 +1,10 @@ using System; using System.Collections.Generic; +using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.Transforms.Text; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public static class TextTransform { @@ -14,7 +15,7 @@ public static void Example() var ml = new MLContext(); // Get a small dataset as an IEnumerable and convert to IDataView. - var data = SamplesUtils.DatasetUtils.GetSentimentData(); + var data = Microsoft.ML.SamplesUtils.DatasetUtils.GetSentimentData(); var trainData = ml.Data.LoadFromEnumerable(data); // Preview of the data. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/AnomalyDetection/RandomizedPcaSample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/AnomalyDetection/RandomizedPcaSample.cs index a42c760da0..62287c4ba1 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/AnomalyDetection/RandomizedPcaSample.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/AnomalyDetection/RandomizedPcaSample.cs @@ -1,9 +1,10 @@ using System; using System.Collections.Generic; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic.Trainers.AnomalyDetection +namespace Samples.Dynamic.Trainers.AnomalyDetection { public static class RandomizedPcaSample { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/AnomalyDetection/RandomizedPcaSampleWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/AnomalyDetection/RandomizedPcaSampleWithOptions.cs index f9160570c9..02b725f7ce 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/AnomalyDetection/RandomizedPcaSampleWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/AnomalyDetection/RandomizedPcaSampleWithOptions.cs @@ -1,9 +1,10 @@ using System; using System.Collections.Generic; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic.Trainers.AnomalyDetection +namespace Samples.Dynamic.Trainers.AnomalyDetection { public static class RandomizedPcaSampleWithOptions { @@ -28,7 +29,7 @@ public static void Example() // Convert the List to IDataView, a consumble format to ML.NET functions. var data = mlContext.Data.LoadFromEnumerable(samples); - var options = new ML.Trainers.RandomizedPcaTrainer.Options() + var options = new Microsoft.ML.Trainers.RandomizedPcaTrainer.Options() { FeatureColumnName = nameof(DataPoint.Features), Rank = 1, diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs index b0dd1613fb..1a9635a3ad 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptron.cs @@ -1,4 +1,6 @@ -namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification +using Microsoft.ML; + +namespace Samples.Dynamic.Trainers.BinaryClassification { public static class AveragedPerceptron { @@ -13,7 +15,7 @@ public static void Example() var mlContext = new MLContext(seed: 0); // Download and featurize the dataset. - var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); + var data = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); // Leave out 10% of data for testing. var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.1); @@ -27,7 +29,7 @@ public static void Example() // Evaluate how the model is doing on the test data. var dataWithPredictions = model.Transform(trainTestData.TestSet); var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // Accuracy: 0.86 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs index b34926f658..4165b7943a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/AveragedPerceptronWithOptions.cs @@ -1,6 +1,7 @@ -using Microsoft.ML.Trainers; +using Microsoft.ML; +using Microsoft.ML.Trainers; -namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification +namespace Samples.Dynamic.Trainers.BinaryClassification { public static class AveragedPerceptronWithOptions { @@ -15,7 +16,7 @@ public static void Example() var mlContext = new MLContext(seed: 0); // Download and featurize the dataset. - var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); + var data = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); // Leave out 10% of data for testing. var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.1); @@ -39,7 +40,7 @@ public static void Example() // Evaluate how the model is doing on the test data. var dataWithPredictions = model.Transform(trainTestData.TestSet); var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // Accuracy: 0.86 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/BinaryClassification.ttinclude b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/BinaryClassification.ttinclude new file mode 100644 index 0000000000..3e24e61f58 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/BinaryClassification.ttinclude @@ -0,0 +1,98 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.ML; +using Microsoft.ML.Data; +<# if (TrainerOptions != null) { #> +<#=OptionsInclude#> +<# } #> + +namespace Samples.Dynamic.Trainers.BinaryClassification +{ + public static class <#=ClassName#> + {<#=Comments#> + public static void Example() + { + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + // Setting the seed to a fixed number in this example to make outputs deterministic. + var mlContext = new MLContext(seed: 0); + + // Create a list of training data points. + var dataPoints = GenerateRandomDataPoints(1000); + + // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); + +<# if (TrainerOptions == null) { #> + // Define the trainer. + var pipeline = mlContext.BinaryClassification.Trainers.<#=Trainer#>(); +<# } else { #> + // Define trainer options. + var options = new <#=TrainerOptions#>; + + // Define the trainer. + var pipeline = mlContext.BinaryClassification.Trainers.<#=Trainer#>(options); +<# } #> + + // Train the model. + var model = pipeline.Fit(trainingData); + + // Create testing data. Use different random seed to make it different from training data. + var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + + // Run the model on test data set. + var transformedTestData = model.Transform(testData); + + // Convert IDataView object to a list. + var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + + // Look at 5 predictions + foreach (var p in predictions.Take(5)) + Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + + <#=ExpectedOutputPerInstance#> + <# string Evaluator = IsCalibrated ? "Evaluate" : "EvaluateNonCalibrated"; #> + + // Evaluate the overall metrics + var metrics = mlContext.BinaryClassification.<#=Evaluator#>(transformedTestData); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + + <#=ExpectedOutput#> + } + + private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + { + var random = new Random(seed); + float randomFloat() => (float)random.NextDouble(); + for (int i = 0; i < count; i++) + { + var label = randomFloat() > 0.5f; + yield return new DataPoint + { + Label = label, + // Create random features that are correlated with the label. + // For data points with false label, the feature values are slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + <#=DataSepValue#>).ToArray() + }; + } + } + + // Example with label and 50 feature values. A data set is a collection of such examples. + private class DataPoint + { + public bool Label { get; set; } + [VectorType(50)] + public float[] Features { get; set; } + } + + // Class used to capture predictions. + private class Prediction + { + // Original label. + public bool Label { get; set; } + // Predicted label from the trainer. + public bool PredictedLabel { get; set; } + } + } +} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/FixedPlatt.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/FixedPlatt.cs index 52fe41cc4b..75b25dcb86 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/FixedPlatt.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/FixedPlatt.cs @@ -1,7 +1,8 @@ using System; using System.Linq; +using Microsoft.ML; -namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification.Calibrators +namespace Samples.Dynamic.Trainers.BinaryClassification.Calibrators { public static class FixedPlatt { @@ -13,7 +14,7 @@ public static void Example() var mlContext = new MLContext(seed: 0); // Download and featurize the dataset. - var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); + var data = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); // Leave out 10% of data for testing. var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.3); @@ -53,11 +54,11 @@ public static void Example() // Score 5.36571 Probability 0.9955735 } - private static void PrintRowViewValues(Data.DataDebuggerPreview data) + private static void PrintRowViewValues(Microsoft.ML.Data.DataDebuggerPreview data) { var firstRows = data.RowView.Take(5); - foreach (Data.DataDebuggerPreview.RowInfo row in firstRows) + foreach (Microsoft.ML.Data.DataDebuggerPreview.RowInfo row in firstRows) { foreach (var kvPair in row.Values) { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Isotonic.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Isotonic.cs index 9c856d1455..1bddd25d94 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Isotonic.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Isotonic.cs @@ -1,7 +1,8 @@ using System; using System.Linq; +using Microsoft.ML; -namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification.Calibrators +namespace Samples.Dynamic.Trainers.BinaryClassification.Calibrators { public static class Isotonic { @@ -13,7 +14,7 @@ public static void Example() var mlContext = new MLContext(seed: 0); // Download and featurize the dataset. - var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); + var data = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); // Leave out 10% of data for testing. var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.3); @@ -53,11 +54,11 @@ public static void Example() // Score 5.36571 Probability 0.8958333 } - private static void PrintRowViewValues(Data.DataDebuggerPreview data) + private static void PrintRowViewValues(Microsoft.ML.Data.DataDebuggerPreview data) { var firstRows = data.RowView.Take(5); - foreach (Data.DataDebuggerPreview.RowInfo row in firstRows) + foreach (Microsoft.ML.Data.DataDebuggerPreview.RowInfo row in firstRows) { foreach (var kvPair in row.Values) { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Naive.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Naive.cs index edb38b5cc5..81f23a0974 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Naive.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Naive.cs @@ -1,7 +1,8 @@ using System; using System.Linq; +using Microsoft.ML; -namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification.Calibrators +namespace Samples.Dynamic.Trainers.BinaryClassification.Calibrators { public static class Naive { @@ -13,7 +14,7 @@ public static void Example() var mlContext = new MLContext(seed: 0); // Download and featurize the dataset. - var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); + var data = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); // Leave out 10% of data for testing. var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.3); @@ -53,11 +54,11 @@ public static void Example() // Score 5.36571 Probability 0.9117647 } - private static void PrintRowViewValues(Data.DataDebuggerPreview data) + private static void PrintRowViewValues(Microsoft.ML.Data.DataDebuggerPreview data) { var firstRows = data.RowView.Take(5); - foreach (Data.DataDebuggerPreview.RowInfo row in firstRows) + foreach (Microsoft.ML.Data.DataDebuggerPreview.RowInfo row in firstRows) { foreach (var kvPair in row.Values) { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Platt.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Platt.cs index 12ff762d14..f78f61de22 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Platt.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/Calibrators/Platt.cs @@ -1,7 +1,8 @@ using System; using System.Linq; +using Microsoft.ML; -namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification.Calibrators +namespace Samples.Dynamic.Trainers.BinaryClassification.Calibrators { public static class Platt { @@ -13,7 +14,7 @@ public static void Example() var mlContext = new MLContext(seed: 0); // Download and featurize the dataset. - var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); + var data = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); // Leave out 10% of data for testing. var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.3); @@ -53,11 +54,11 @@ public static void Example() // Score 5.36571 Probability 0.9065308 } - private static void PrintRowViewValues(Data.DataDebuggerPreview data) + private static void PrintRowViewValues(Microsoft.ML.Data.DataDebuggerPreview data) { var firstRows = data.RowView.Take(5); - foreach (Data.DataDebuggerPreview.RowInfo row in firstRows) + foreach (Microsoft.ML.Data.DataDebuggerPreview.RowInfo row in firstRows) { foreach (var kvPair in row.Values) { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastForest.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastForest.cs index 587499997d..f280209107 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastForest.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastForest.cs @@ -1,9 +1,10 @@ using System; using System.Collections.Generic; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification +namespace Samples.Dynamic.Trainers.BinaryClassification { public static class FastForest { @@ -50,7 +51,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(transformedTestData); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // Accuracy: 0.74 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastForestWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastForestWithOptions.cs index d243c54c69..dc7152ebd4 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastForestWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastForestWithOptions.cs @@ -1,10 +1,11 @@ using System; using System.Collections.Generic; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.Trainers.FastTree; -namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification +namespace Samples.Dynamic.Trainers.BinaryClassification { public static class FastForestWithOptions { @@ -62,7 +63,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(transformedTestData); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // Accuracy: 0.73 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastTree.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastTree.cs index eae52f11ab..d5308822dd 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastTree.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastTree.cs @@ -1,9 +1,10 @@ using System; using System.Collections.Generic; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification +namespace Samples.Dynamic.Trainers.BinaryClassification { public static class FastTree { @@ -50,7 +51,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // Accuracy: 0.81 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastTreeWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastTreeWithOptions.cs index 26493f0b12..0b33ac7277 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastTreeWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FastTreeWithOptions.cs @@ -1,10 +1,11 @@ using System; using System.Collections.Generic; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.Trainers.FastTree; -namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification +namespace Samples.Dynamic.Trainers.BinaryClassification { public static class FastTreeWithOptions { @@ -62,7 +63,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // Accuracy: 0.78 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs index 8c87c899a2..839feed672 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachine.cs @@ -1,8 +1,9 @@ using System; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public static class FFMBinaryClassification { @@ -13,7 +14,7 @@ public static void Example() var mlContext = new MLContext(); // Download and featurize the dataset. - var dataviews = SamplesUtils.DatasetUtils.LoadFeaturizedSentimentDataset(mlContext); + var dataviews = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedSentimentDataset(mlContext); var trainData = dataviews[0]; var testData = dataviews[1]; @@ -61,7 +62,7 @@ public static void Example() var dataWithPredictions = model.Transform(testData); var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions, "Sentiment"); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Accuracy: 0.72 // AUC: 0.75 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachineWithoutArguments.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachineWithoutArguments.cs index 1f4d6bd5be..45bfcc1550 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachineWithoutArguments.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachineWithoutArguments.cs @@ -1,8 +1,8 @@ using System; using System.Linq; -using Microsoft.ML.Data; +using Microsoft.ML; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public static class FFMBinaryClassificationWithoutArguments { @@ -13,7 +13,7 @@ public static void Example() var mlContext = new MLContext(); // Download and featurize the dataset. - var dataviews = SamplesUtils.DatasetUtils.LoadFeaturizedSentimentDataset(mlContext); + var dataviews = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedSentimentDataset(mlContext); var trainData = dataviews[0]; var testData = dataviews[1]; @@ -62,7 +62,7 @@ public static void Example() var dataWithPredictions = model.Transform(testData); var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions, "Sentiment"); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // Accuracy: 0.61 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachinewWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachinewWithOptions.cs index 5e7c2ae3ef..ab63c51441 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachinewWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/FieldAwareFactorizationMachinewWithOptions.cs @@ -1,9 +1,10 @@ using System; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.Trainers; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public static class FFMBinaryClassificationWithOptions { @@ -14,7 +15,7 @@ public static void Example() var mlContext = new MLContext(); // Download and featurize the dataset. - var dataviews = SamplesUtils.DatasetUtils.LoadFeaturizedSentimentDataset(mlContext); + var dataviews = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedSentimentDataset(mlContext); var trainData = dataviews[0]; var testData = dataviews[1]; @@ -69,7 +70,7 @@ public static void Example() var dataWithPredictions = model.Transform(testData); var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions, "Sentiment"); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Accuracy: 0.78 // AUC: 0.81 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LbfgsLogisticRegression.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LbfgsLogisticRegression.cs new file mode 100644 index 0000000000..2d5dd25b78 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LbfgsLogisticRegression.cs @@ -0,0 +1,101 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.ML; +using Microsoft.ML.Data; + +namespace Samples.Dynamic.Trainers.BinaryClassification +{ + public static class LbfgsLogisticRegression + { + public static void Example() + { + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + // Setting the seed to a fixed number in this example to make outputs deterministic. + var mlContext = new MLContext(seed: 0); + + // Create a list of training data points. + var dataPoints = GenerateRandomDataPoints(1000); + + // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); + + // Define the trainer. + var pipeline = mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(); + + // Train the model. + var model = pipeline.Fit(trainingData); + + // Create testing data. Use different random seed to make it different from training data. + var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + + // Run the model on test data set. + var transformedTestData = model.Transform(testData); + + // Convert IDataView object to a list. + var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + + // Look at 5 predictions + foreach (var p in predictions.Take(5)) + Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + + // Expected output: + // Label: True, Prediction: True + // Label: False, Prediction: True + // Label: True, Prediction: True + // Label: True, Prediction: True + // Label: False, Prediction: False + + // Evaluate the overall metrics + var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + + // Expected output: + // Accuracy: 0.88 + // AUC: 0.96 + // F1 Score: 0.87 + // Negative Precision: 0.90 + // Negative Recall: 0.87 + // Positive Precision: 0.86 + // Positive Recall: 0.89 + // Log Loss: 0.38 + // Log Loss Reduction: 0.62 + // Entropy: 1.00 + } + + private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + { + var random = new Random(seed); + float randomFloat() => (float)random.NextDouble(); + for (int i = 0; i < count; i++) + { + var label = randomFloat() > 0.5f; + yield return new DataPoint + { + Label = label, + // Create random features that are correlated with the label. + // For data points with false label, the feature values are slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.1f).ToArray() + }; + } + } + + // Example with label and 50 feature values. A data set is a collection of such examples. + private class DataPoint + { + public bool Label { get; set; } + [VectorType(50)] + public float[] Features { get; set; } + } + + // Class used to capture predictions. + private class Prediction + { + // Original label. + public bool Label { get; set; } + // Predicted label from the trainer. + public bool PredictedLabel { get; set; } + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LbfgsLogisticRegressionWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LbfgsLogisticRegressionWithOptions.cs new file mode 100644 index 0000000000..8286527c30 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LbfgsLogisticRegressionWithOptions.cs @@ -0,0 +1,110 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.ML; +using Microsoft.ML.Data; +using Microsoft.ML.Trainers; + +namespace Samples.Dynamic.Trainers.BinaryClassification +{ + public static class LbfgsLogisticRegressionWithOptions + { + public static void Example() + { + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + // Setting the seed to a fixed number in this example to make outputs deterministic. + var mlContext = new MLContext(seed: 0); + + // Create a list of training data points. + var dataPoints = GenerateRandomDataPoints(1000); + + // Convert the list of data points to an IDataView object, which is consumable by ML.NET API. + var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); + + // Define trainer options. + var options = new LbfgsLogisticRegressionBinaryTrainer.Options() + { + MaximumNumberOfIterations = 100, + OptmizationTolerance = 1e-8f, + L2Regularization = 0.01f + }; + + // Define the trainer. + var pipeline = mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(options); + + // Train the model. + var model = pipeline.Fit(trainingData); + + // Create testing data. Use different random seed to make it different from training data. + var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed:123)); + + // Run the model on test data set. + var transformedTestData = model.Transform(testData); + + // Convert IDataView object to a list. + var predictions = mlContext.Data.CreateEnumerable(transformedTestData, reuseRowObject: false).ToList(); + + // Look at 5 predictions + foreach (var p in predictions.Take(5)) + Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}"); + + // Expected output: + // Label: True, Prediction: True + // Label: False, Prediction: True + // Label: True, Prediction: True + // Label: True, Prediction: True + // Label: False, Prediction: False + + // Evaluate the overall metrics + var metrics = mlContext.BinaryClassification.Evaluate(transformedTestData); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + + // Expected output: + // Accuracy: 0.87 + // AUC: 0.96 + // F1 Score: 0.87 + // Negative Precision: 0.89 + // Negative Recall: 0.87 + // Positive Precision: 0.86 + // Positive Recall: 0.88 + // Log Loss: 0.37 + // Log Loss Reduction: 0.63 + // Entropy: 1.00 + } + + private static IEnumerable GenerateRandomDataPoints(int count, int seed=0) + { + var random = new Random(seed); + float randomFloat() => (float)random.NextDouble(); + for (int i = 0; i < count; i++) + { + var label = randomFloat() > 0.5f; + yield return new DataPoint + { + Label = label, + // Create random features that are correlated with the label. + // For data points with false label, the feature values are slightly increased by adding a constant. + Features = Enumerable.Repeat(label, 50).Select(x => x ? randomFloat() : randomFloat() + 0.1f).ToArray() + }; + } + } + + // Example with label and 50 feature values. A data set is a collection of such examples. + private class DataPoint + { + public bool Label { get; set; } + [VectorType(50)] + public float[] Features { get; set; } + } + + // Class used to capture predictions. + private class Prediction + { + // Original label. + public bool Label { get; set; } + // Predicted label from the trainer. + public bool PredictedLabel { get; set; } + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbm.cs index 64498940d9..574b226df2 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbm.cs @@ -1,4 +1,6 @@ -namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification +using Microsoft.ML; + +namespace Samples.Dynamic.Trainers.BinaryClassification { public class LightGbm { @@ -9,7 +11,7 @@ public static void Example() var mlContext = new MLContext(); // Download and featurize the dataset. - var dataview = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); + var dataview = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); // Leave out 10% of data for testing. var split = mlContext.Data.TrainTestSplit(dataview, testFraction: 0.1); @@ -24,7 +26,7 @@ public static void Example() var dataWithPredictions = model.Transform(split.TestSet); var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // Accuracy: 0.88 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbmWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbmWithOptions.cs index 41f85c327c..e38011dcba 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbmWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/LightGbmWithOptions.cs @@ -1,6 +1,7 @@ -using Microsoft.ML.Trainers.LightGbm; +using Microsoft.ML; +using Microsoft.ML.Trainers.LightGbm; -namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification +namespace Samples.Dynamic.Trainers.BinaryClassification { class LightGbmWithOptions { @@ -11,7 +12,7 @@ public static void Example() var mlContext = new MLContext(); // Download and featurize the dataset. - var dataview = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); + var dataview = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); // Leave out 10% of data for testing. var split = mlContext.Data.TrainTestSplit(dataview, testFraction: 0.1); @@ -34,7 +35,7 @@ public static void Example() var dataWithPredictions = model.Transform(split.TestSet); var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // Accuracy: 0.88 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/PriorTrainerSample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/PriorTrainerSample.cs index 08819c8a01..7382e679c1 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/PriorTrainerSample.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/PriorTrainerSample.cs @@ -1,6 +1,7 @@ -using Microsoft.ML.Data; +using Microsoft.ML; +using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public class PriorTrainer { @@ -11,7 +12,7 @@ public static void Example() var mlContext = new MLContext(); // Download and featurize the dataset. - var dataFiles = SamplesUtils.DatasetUtils.DownloadSentimentDataset(); + var dataFiles = Microsoft.ML.SamplesUtils.DatasetUtils.DownloadSentimentDataset(); var trainFile = dataFiles[0]; var testFile = dataFiles[1]; @@ -25,7 +26,7 @@ public static void Example() var loader = mlContext.Data.CreateTextLoader( columns: new[] { - new TextLoader.Column("Sentiment", DataKind.Single, 0), + new TextLoader.Column("Sentiment", DataKind.Boolean, 0), new TextLoader.Column("SentimentText", DataKind.String, 1) }, hasHeader: true @@ -48,7 +49,7 @@ public static void Example() // Step 4: Evaluate on the test set var transformedData = trainedPipeline.Transform(loader.Load(testFile)); var evalMetrics = mlContext.BinaryClassification.Evaluate(transformedData, labelColumnName: "Sentiment"); - SamplesUtils.ConsoleUtils.PrintMetrics(evalMetrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(evalMetrics); // The Prior trainer outputs the proportion of a label in the dataset as the probability of that label. // In this case 'Accuracy: 0.50' means that there is a split of around 50%-50% of positive and negative labels in the test dataset. @@ -62,7 +63,7 @@ public static void Example() // Positive Precision: 0.50 // Positive Recall: 1.00 // LogLoss: 1.05 - // LogLossReduction: -4.89 + // LogLossReduction: -0.05 // Entropy: 1.00 } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticDualCoordinateAscent.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticDualCoordinateAscent.cs index d4a366ceb1..2d82383924 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticDualCoordinateAscent.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticDualCoordinateAscent.cs @@ -1,9 +1,10 @@ using System; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.Trainers; -namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification +namespace Samples.Dynamic.Trainers.BinaryClassification { public static class StochasticDualCoordinateAscent { @@ -12,7 +13,7 @@ public static void Example() // Downloading the dataset from github.com/dotnet/machinelearning. // This will create a sentiment.tsv file in the filesystem. // You can open this file, if you want to see the data. - string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset()[0]; + string dataFile = Microsoft.ML.SamplesUtils.DatasetUtils.DownloadSentimentDataset()[0]; // A preview of the data. // Sentiment SentimentText @@ -47,9 +48,9 @@ public static void Example() // Featurize the text column through the FeaturizeText API. // Then append a binary classifier, setting the "Label" column as the label of the dataset, and // the "Features" column produced by FeaturizeText as the features column. - var pipeline = mlContext.Transforms.Text.FeaturizeText("SentimentText", "Features") + var pipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText") .AppendCacheCheckpoint(mlContext) // Add a data-cache step within a pipeline. - .Append(mlContext.BinaryClassification.Trainers.SdcaNonCalibrated(labelColumnName: "Sentiment", featureColumnName: "Features", l2Regularization: 0.001f)); + .Append(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(labelColumnName: "Sentiment", featureColumnName: "Features", l2Regularization: 0.001f)); // Step 3: Run Cross-Validation on this pipeline. var cvResults = mlContext.BinaryClassification.CrossValidate(data, pipeline, labelColumnName: "Sentiment"); @@ -59,7 +60,7 @@ public static void Example() // If we wanted to specify more advanced parameters for the algorithm, // we could do so by tweaking the 'advancedSetting'. - var advancedPipeline = mlContext.Transforms.Text.FeaturizeText("SentimentText", "Features") + var advancedPipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText") .Append(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression( new SdcaLogisticRegressionBinaryTrainer.Options { LabelColumnName = "Sentiment", @@ -69,7 +70,7 @@ public static void Example() })); // Run Cross-Validation on this second pipeline. - var cvResults_advancedPipeline = mlContext.BinaryClassification.CrossValidate(data, pipeline, labelColumnName: "Sentiment", numberOfFolds: 3); + var cvResults_advancedPipeline = mlContext.BinaryClassification.CrossValidate(data, advancedPipeline, labelColumnName: "Sentiment", numberOfFolds: 3); accuracies = cvResults_advancedPipeline.Select(r => r.Metrics.Accuracy); Console.WriteLine(accuracies.Average()); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticDualCoordinateAscentNonCalibrated.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticDualCoordinateAscentNonCalibrated.cs index b73f9e6867..db5b697623 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticDualCoordinateAscentNonCalibrated.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticDualCoordinateAscentNonCalibrated.cs @@ -1,15 +1,16 @@ using System; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Trainers; -namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification +namespace Samples.Dynamic.Trainers.BinaryClassification { public static class StochasticDualCoordinateAscentNonCalibrated { public static void Example() { // Generate IEnumerable as training examples. - var rawData = SamplesUtils.DatasetUtils.GenerateBinaryLabelFloatFeatureVectorFloatWeightSamples(100); + var rawData = Microsoft.ML.SamplesUtils.DatasetUtils.GenerateBinaryLabelFloatFeatureVectorFloatWeightSamples(100); // Information in first example. // Label: true @@ -49,7 +50,7 @@ public static void Example() // Step 4: Make prediction and evaluate its quality (on training set). var prediction = model.Transform(data); - var rawPrediction = mlContext.Data.CreateEnumerable(prediction, false); + var rawPrediction = mlContext.Data.CreateEnumerable(prediction, false); // Step 5: Inspect the prediction of the first example. // Note that positive/negative label may be associated with positive/negative score diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticDualCoordinateAscentWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticDualCoordinateAscentWithOptions.cs index c08e9c56fa..2f1bbbb8a1 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticDualCoordinateAscentWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticDualCoordinateAscentWithOptions.cs @@ -1,7 +1,7 @@ using Microsoft.ML; using Microsoft.ML.Trainers; -namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification +namespace Samples.Dynamic.Trainers.BinaryClassification { public static class StochasticDualCoordinateAscentWithOptions { @@ -16,7 +16,7 @@ public static void Example() var mlContext = new MLContext(seed: 0); // Download and featurize the dataset. - var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); + var data = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); // Leave out 10% of data for testing. var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.1); @@ -41,7 +41,7 @@ public static void Example() // Evaluate how the model is doing on the test data. var dataWithPredictions = model.Transform(trainTestData.TestSet); var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // Accuracy: 0.85 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescent.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescent.cs index b0e6f1b6c7..e3423c61d2 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescent.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescent.cs @@ -1,4 +1,6 @@ -namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification +using Microsoft.ML; + +namespace Samples.Dynamic.Trainers.BinaryClassification { public static class StochasticGradientDescent { @@ -13,7 +15,7 @@ public static void Example() var mlContext = new MLContext(seed: 0); // Download and featurize the dataset. - var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); + var data = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); // Leave out 10% of data for testing. var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.1); @@ -27,7 +29,7 @@ public static void Example() // Evaluate how the model is doing on the test data. var dataWithPredictions = model.Transform(trainTestData.TestSet); var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // Accuracy: 0.85 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentNonCalibrated.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentNonCalibrated.cs index 2d3dd293df..e28921b75b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentNonCalibrated.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentNonCalibrated.cs @@ -1,4 +1,6 @@ -namespace Microsoft.ML.Samples.Dynamic +using Microsoft.ML; + +namespace Samples.Dynamic { public static class StochasticGradientDescentNonCalibrated { @@ -13,7 +15,7 @@ public static void Example() var mlContext = new MLContext(seed: 0); // Download and featurize the dataset. - var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); + var data = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); // Leave out 10% of data for testing. var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.1); @@ -27,7 +29,7 @@ public static void Example() // Evaluate how the model is doing on the test data. var dataWithPredictions = model.Transform(trainTestData.TestSet); var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // Accuracy: 0.85 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentNonCalibratedWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentNonCalibratedWithOptions.cs index 826f0a6bc7..9ae4269b51 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentNonCalibratedWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentNonCalibratedWithOptions.cs @@ -1,6 +1,7 @@ -using Microsoft.ML.Trainers; +using Microsoft.ML; +using Microsoft.ML.Trainers; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public static class StochasticGradientDescentNonCalibratedWithOptions { @@ -15,7 +16,7 @@ public static void Example() var mlContext = new MLContext(seed: 0); // Download and featurize the dataset. - var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); + var data = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); // Leave out 10% of data for testing. var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.1); @@ -37,7 +38,7 @@ public static void Example() // Evaluate how the model is doing on the test data. var dataWithPredictions = model.Transform(trainTestData.TestSet); var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // Accuracy: 0.85 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentWithOptions.cs index f03f620bde..891c17576f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/StochasticGradientDescentWithOptions.cs @@ -1,6 +1,7 @@ -using Microsoft.ML.Trainers; +using Microsoft.ML; +using Microsoft.ML.Trainers; -namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification +namespace Samples.Dynamic.Trainers.BinaryClassification { public static class StochasticGradientDescentWithOptions { @@ -15,7 +16,7 @@ public static void Example() var mlContext = new MLContext(seed: 0); // Download and featurize the dataset. - var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); + var data = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); // Leave out 10% of data for testing. var trainTestData = mlContext.Data.TrainTestSplit(data, testFraction: 0.1); @@ -40,7 +41,7 @@ public static void Example() // Evaluate how the model is doing on the test data. var dataWithPredictions = model.Transform(trainTestData.TestSet); var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // Accuracy: 0.85 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescent.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescent.cs index 8d35c621a1..3a9a842206 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescent.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescent.cs @@ -1,4 +1,6 @@ -namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification +using Microsoft.ML; + +namespace Samples.Dynamic.Trainers.BinaryClassification { public static class SymbolicStochasticGradientDescent { @@ -14,7 +16,7 @@ public static void Example() var mlContext = new MLContext(seed: 0); // Download and featurize the dataset. - var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); + var data = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); // Leave out 10% of data for testing. var split = mlContext.Data.TrainTestSplit(data, testFraction: 0.1); @@ -25,7 +27,7 @@ public static void Example() // Evaluate how the model is doing on the test data. var dataWithPredictions = model.Transform(split.TestSet); var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // Accuracy: 0.85 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescentWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescentWithOptions.cs index bb6c74bebb..234d075551 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescentWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescentWithOptions.cs @@ -1,4 +1,6 @@ -namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification +using Microsoft.ML; + +namespace Samples.Dynamic.Trainers.BinaryClassification { public static class SymbolicStochasticGradientDescentWithOptions { @@ -14,13 +16,13 @@ public static void Example() var mlContext = new MLContext(seed: 0); // Download and featurize the dataset. - var data = SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); + var data = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); // Leave out 10% of data for testing. var split = mlContext.Data.TrainTestSplit(data, testFraction: 0.1); // Create data training pipeline var pipeline = mlContext.BinaryClassification.Trainers.SymbolicSgdLogisticRegression( - new ML.Trainers.SymbolicSgdLogisticRegressionBinaryTrainer.Options() + new Microsoft.ML.Trainers.SymbolicSgdLogisticRegressionBinaryTrainer.Options() { LearningRate = 0.2f, NumberOfIterations = 10, @@ -33,7 +35,7 @@ public static void Example() // Evaluate how the model is doing on the test data. var dataWithPredictions = model.Transform(split.TestSet); var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // Accuracy: 0.84 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Clustering/KMeans.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Clustering/KMeans.cs index fb9c867e95..438a7bb5e7 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Clustering/KMeans.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Clustering/KMeans.cs @@ -1,7 +1,8 @@ using System; +using Microsoft.ML; using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public class KMeans { @@ -12,7 +13,7 @@ public static void Example() var ml = new MLContext(seed: 1); // Get a small dataset as an IEnumerable and convert it to an IDataView. - var data = SamplesUtils.DatasetUtils.GetInfertData(); + var data = Microsoft.ML.SamplesUtils.DatasetUtils.GetInfertData(); var trainData = ml.Data.LoadFromEnumerable(data); // Preview of the data. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Clustering/KMeansWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Clustering/KMeansWithOptions.cs index 4aa171b7c3..88c2652c94 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Clustering/KMeansWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Clustering/KMeansWithOptions.cs @@ -1,8 +1,9 @@ using System; +using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.Trainers; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public class KMeansWithOptions { @@ -13,7 +14,7 @@ public static void Example() var ml = new MLContext(seed: 1); // Get a small dataset as an IEnumerable and convert it to an IDataView. - var data = SamplesUtils.DatasetUtils.GetInfertData(); + var data = Microsoft.ML.SamplesUtils.DatasetUtils.GetInfertData(); var trainData = ml.Data.LoadFromEnumerable(data); // Preview of the data. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs index 367e50ff47..0528be95e9 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbm.cs @@ -1,9 +1,10 @@ using System; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.SamplesUtils; -namespace Microsoft.ML.Samples.Dynamic.Trainers.MulticlassClassification +namespace Samples.Dynamic.Trainers.MulticlassClassification { public static class LightGbm { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs index 2b24423e5e..84e206ac99 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/LightGbmWithOptions.cs @@ -1,10 +1,11 @@ using System; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.SamplesUtils; using Microsoft.ML.Trainers.LightGbm; -namespace Microsoft.ML.Samples.Dynamic.Trainers.MulticlassClassification +namespace Samples.Dynamic.Trainers.MulticlassClassification { public static class LightGbmWithOptions { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/StochasticDualCoordinateAscent.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/StochasticDualCoordinateAscent.cs index ce43ab733f..40f7c2223a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/StochasticDualCoordinateAscent.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/StochasticDualCoordinateAscent.cs @@ -1,7 +1,7 @@ -using Microsoft.ML.Data; +using Microsoft.ML; using Microsoft.ML.SamplesUtils; -namespace Microsoft.ML.Samples.Dynamic.Trainers.MulticlassClassification +namespace Samples.Dynamic.Trainers.MulticlassClassification { public static class StochasticDualCoordinateAscent { @@ -44,7 +44,7 @@ public static void Example() // Evaluate the trained model using the test set. var metrics = mlContext.MulticlassClassification.Evaluate(dataWithPredictions); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // Micro Accuracy: 0.82 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/StochasticDualCoordinateAscentWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/StochasticDualCoordinateAscentWithOptions.cs index 10bc9c7918..f1ca1b4f56 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/StochasticDualCoordinateAscentWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/StochasticDualCoordinateAscentWithOptions.cs @@ -1,7 +1,8 @@ -using Microsoft.ML.SamplesUtils; +using Microsoft.ML; +using Microsoft.ML.SamplesUtils; using Microsoft.ML.Trainers; -namespace Microsoft.ML.Samples.Dynamic.Trainers.MulticlassClassification +namespace Samples.Dynamic.Trainers.MulticlassClassification { public static class StochasticDualCoordinateAscentWithOptions { @@ -54,7 +55,7 @@ public static void Example() // Evaluate the trained model using the test set. var metrics = mlContext.MulticlassClassification.Evaluate(dataWithPredictions); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // Micro Accuracy: 0.82 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbm.cs index 76cabc36ee..9afc6872fa 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbm.cs @@ -1,4 +1,6 @@ -namespace Microsoft.ML.Samples.Dynamic.Trainers.Ranking +using Microsoft.ML; + +namespace Samples.Dynamic.Trainers.Ranking { public class LightGbm { @@ -9,7 +11,7 @@ public static void Example() var mlContext = new MLContext(); // Download and featurize the dataset. - var dataview = SamplesUtils.DatasetUtils.LoadFeaturizedMslrWeb10kDataset(mlContext); + var dataview = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedMslrWeb10kDataset(mlContext); // Leave out 10% of the dataset for testing. Since this is a ranking problem, we must ensure that the split // respects the GroupId column, i.e. rows with the same GroupId are either all in the train split or all in @@ -30,7 +32,7 @@ public static void Example() var dataWithPredictions = model.Transform(split.TestSet); var metrics = mlContext.Ranking.Evaluate(dataWithPredictions); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // DCG: @1:1.71, @2:3.88, @3:7.93 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbmWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbmWithOptions.cs index 1cb039bb18..312d9e7e56 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbmWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Ranking/LightGbmWithOptions.cs @@ -1,6 +1,7 @@ -using Microsoft.ML.Trainers.LightGbm; +using Microsoft.ML; +using Microsoft.ML.Trainers.LightGbm; -namespace Microsoft.ML.Samples.Dynamic.Trainers.Ranking +namespace Samples.Dynamic.Trainers.Ranking { public class LightGbmWithOptions { @@ -11,7 +12,7 @@ public static void Example() var mlContext = new MLContext(); // Download and featurize the train and validation datasets. - var dataview = SamplesUtils.DatasetUtils.LoadFeaturizedMslrWeb10kDataset(mlContext); + var dataview = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedMslrWeb10kDataset(mlContext); // Leave out 10% of the dataset for testing. Since this is a ranking problem, we must ensure that the split // respects the GroupId column, i.e. rows with the same GroupId are either all in the train split or all in @@ -29,7 +30,8 @@ public static void Example() Booster = new GradientBooster.Options { FeatureFraction = 0.9 - } + }, + RowGroupColumnName = "GroupId" }); // Fit this pipeline to the training Data. @@ -39,11 +41,11 @@ public static void Example() var dataWithPredictions = model.Transform(split.TestSet); var metrics = mlContext.Ranking.Evaluate(dataWithPredictions); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // DCG: @1:1.71, @2:3.88, @3:7.93 - // NDCG: @1:7.98, @2:12.14, @3:16.62 + // NDCG: @1:0.08, @2:0.12, @3:0.17 } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorization.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorization.cs index 05b1b1553d..1ec86a25c1 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorization.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorization.cs @@ -1,8 +1,9 @@ using System; using System.Collections.Generic; +using Microsoft.ML; using static Microsoft.ML.SamplesUtils.DatasetUtils; -namespace Microsoft.ML.Samples.Dynamic.Trainers.Recommendation +namespace Samples.Dynamic.Trainers.Recommendation { public static class MatrixFactorization { @@ -38,7 +39,7 @@ public static void Example() var metrics = mlContext.Recommendation().Evaluate(prediction, labelColumnName: nameof(MatrixElement.Value), scoreColumnName: nameof(MatrixElementForScore.Score)); // Print out some metrics for checking the model's quality. - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // L1: 0.17 // L2: 0.05 // LossFunction: 0.05 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorizationWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorizationWithOptions.cs index d48f231015..d91032df4f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorizationWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Recommendation/MatrixFactorizationWithOptions.cs @@ -1,9 +1,10 @@ using System; using System.Collections.Generic; +using Microsoft.ML; using Microsoft.ML.Trainers; using static Microsoft.ML.SamplesUtils.DatasetUtils; -namespace Microsoft.ML.Samples.Dynamic.Trainers.Recommendation +namespace Samples.Dynamic.Trainers.Recommendation { public static class MatrixFactorizationWithOptions { @@ -48,7 +49,7 @@ public static void Example() var metrics = mlContext.Recommendation().Evaluate(prediction, labelColumnName: nameof(MatrixElement.Value), scoreColumnName: nameof(MatrixElementForScore.Score)); // Print out some metrics for checking the model's quality. - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // L1: 0.16 // L2: 0.04 // LossFunction: 0.04 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastForest.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastForest.cs index 7263aef771..172a24e874 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastForest.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastForest.cs @@ -1,9 +1,10 @@ using System; using System.Collections.Generic; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic.Trainers.Regression +namespace Samples.Dynamic.Trainers.Regression { public static class FastForest { @@ -50,7 +51,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.Regression.Evaluate(transformedTestData); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // Mean Absolute Error: 0.06 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastForestWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastForestWithOptions.cs index 4629d882e1..e09bbdf9fd 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastForestWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastForestWithOptions.cs @@ -1,10 +1,11 @@ using System; using System.Collections.Generic; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.Trainers.FastTree; -namespace Microsoft.ML.Samples.Dynamic.Trainers.Regression +namespace Samples.Dynamic.Trainers.Regression { public static class FastForestWithOptions { @@ -62,7 +63,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.Regression.Evaluate(transformedTestData); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // Mean Absolute Error: 0.06 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTree.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTree.cs index 082bc340f3..51e9ec2e3a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTree.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTree.cs @@ -1,9 +1,10 @@ using System; using System.Collections.Generic; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic.Trainers.Regression +namespace Samples.Dynamic.Trainers.Regression { public static class FastTree { @@ -50,7 +51,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.Regression.Evaluate(transformedTestData); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // Mean Absolute Error: 0.05 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeTweedie.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeTweedie.cs index ead29a678f..7fcfb974c6 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeTweedie.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeTweedie.cs @@ -1,9 +1,10 @@ using System; using System.Collections.Generic; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic.Trainers.Regression +namespace Samples.Dynamic.Trainers.Regression { public static class FastTreeTweedie { @@ -50,7 +51,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.Regression.Evaluate(transformedTestData); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // Mean Absolute Error: 0.05 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeTweedieWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeTweedieWithOptions.cs index dd75a9c4f4..7f582e88c4 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeTweedieWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeTweedieWithOptions.cs @@ -1,10 +1,11 @@ using System; using System.Collections.Generic; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.Trainers.FastTree; -namespace Microsoft.ML.Samples.Dynamic.Trainers.Regression +namespace Samples.Dynamic.Trainers.Regression { public static class FastTreeTweedieWithOptions { @@ -62,7 +63,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.Regression.Evaluate(transformedTestData); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // Mean Absolute Error: 0.05 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeWithOptions.cs index 594c80868c..4efd915ed5 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/FastTreeWithOptions.cs @@ -1,10 +1,11 @@ using System; using System.Collections.Generic; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.Trainers.FastTree; -namespace Microsoft.ML.Samples.Dynamic.Trainers.Regression +namespace Samples.Dynamic.Trainers.Regression { public static class FastTreeWithOptions { @@ -62,7 +63,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.Regression.Evaluate(transformedTestData); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // Mean Absolute Error: 0.05 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs index 67e57697f2..dd8107452a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/Gam.cs @@ -1,8 +1,9 @@ using System; using System.Linq; +using Microsoft.ML; using Microsoft.ML.SamplesUtils; -namespace Microsoft.ML.Samples.Dynamic.Trainers.Regression +namespace Samples.Dynamic.Trainers.Regression { public static class Gam { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs index 6545e27022..33617b2d94 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/GamWithOptions.cs @@ -1,10 +1,11 @@ using System; using System.Collections.Generic; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.Trainers.FastTree; -namespace Microsoft.ML.Samples.Dynamic.Trainers.Regression +namespace Samples.Dynamic.Trainers.Regression { public static class GamWithOptions { @@ -60,7 +61,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.Regression.Evaluate(transformedTestData); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // Mean Absolute Error: 0.06 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbm.cs index c30d980650..f721aed215 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbm.cs @@ -1,8 +1,9 @@ using System; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic.Trainers.Regression +namespace Samples.Dynamic.Trainers.Regression { class LightGbm { @@ -14,7 +15,7 @@ public static void Example() var mlContext = new MLContext(); // Download and load the housing dataset into an IDataView. - var dataView = SamplesUtils.DatasetUtils.LoadHousingRegressionDataset(mlContext); + var dataView = Microsoft.ML.SamplesUtils.DatasetUtils.LoadHousingRegressionDataset(mlContext); //////////////////// Data Preview //////////////////// /// Only 6 columns are displayed here. @@ -52,7 +53,7 @@ public static void Example() // Evaluate how the model is doing on the test data. var dataWithPredictions = model.Transform(split.TestSet); var metrics = mlContext.Regression.Evaluate(dataWithPredictions, labelColumnName: labelName); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output // L1: 4.97 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmWithOptions.cs index f6d3eeb1f9..05cf8386b1 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/LightGbmWithOptions.cs @@ -1,9 +1,10 @@ using System; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.Trainers.LightGbm; -namespace Microsoft.ML.Samples.Dynamic.Trainers.Regression +namespace Samples.Dynamic.Trainers.Regression { class LightGbmWithOptions { @@ -15,7 +16,7 @@ public static void Example() var mlContext = new MLContext(); // Download and load the housing dataset into an IDataView. - var dataView = SamplesUtils.DatasetUtils.LoadHousingRegressionDataset(mlContext); + var dataView = Microsoft.ML.SamplesUtils.DatasetUtils.LoadHousingRegressionDataset(mlContext); //////////////////// Data Preview //////////////////// /// Only 6 columns are displayed here. @@ -61,7 +62,7 @@ public static void Example() // Evaluate how the model is doing on the test data. var dataWithPredictions = model.Transform(split.TestSet); var metrics = mlContext.Regression.Evaluate(dataWithPredictions, labelColumnName: labelName); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output // L1: 4.97 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OnlineGradientDescent.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OnlineGradientDescent.cs index 1b7110f13e..42a2e2ef93 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OnlineGradientDescent.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OnlineGradientDescent.cs @@ -1,9 +1,10 @@ using System; using System.Collections.Generic; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic.Trainers.Regression +namespace Samples.Dynamic.Trainers.Regression { public static class OnlineGradientDescent { @@ -43,7 +44,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.Regression.Evaluate(transformedTestData); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // TODO #2425: OGD is missing baseline tests and seems numerically unstable } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OnlineGradientDescentWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OnlineGradientDescentWithOptions.cs index 1e3d9b1b32..4789b457bb 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OnlineGradientDescentWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OnlineGradientDescentWithOptions.cs @@ -1,10 +1,11 @@ using System; using System.Collections.Generic; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.Trainers; -namespace Microsoft.ML.Samples.Dynamic.Trainers.Regression +namespace Samples.Dynamic.Trainers.Regression { public static class OnlineGradientDescentWithOptions { @@ -57,7 +58,7 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.Regression.Evaluate(transformedTestData); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // TODO #2425: OGD is missing baseline tests and seems numerically unstable } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquares.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquares.cs index 2d43970d17..b1485b7d75 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquares.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquares.cs @@ -1,8 +1,9 @@ using System; +using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.SamplesUtils; -namespace Microsoft.ML.Samples.Dynamic.Trainers.Regression +namespace Samples.Dynamic.Trainers.Regression { public static class OrdinaryLeastSquares { @@ -12,7 +13,7 @@ public static class OrdinaryLeastSquares public static void Example() { // Downloading a regression dataset from github.com/dotnet/machinelearning - string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset(); + string dataFile = Microsoft.ML.SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset(); // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquaresWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquaresWithOptions.cs index 3d1af5555d..97daa181bc 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquaresWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquaresWithOptions.cs @@ -1,9 +1,10 @@ using System; +using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.SamplesUtils; using Microsoft.ML.Trainers; -namespace Microsoft.ML.Samples.Dynamic.Trainers.Regression +namespace Samples.Dynamic.Trainers.Regression { public static class OrdinaryLeastSquaresWithOptions { @@ -13,7 +14,7 @@ public static class OrdinaryLeastSquaresWithOptions public static void Example() { // Downloading a regression dataset from github.com/dotnet/machinelearning - string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset(); + string dataFile = DatasetUtils.DownloadHousingRegressionDataset(); // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/PoissonRegression.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/PoissonRegression.cs index 44565c783e..31f4ad93a5 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/PoissonRegression.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/PoissonRegression.cs @@ -1,9 +1,10 @@ using System; using System.Collections.Generic; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic.Trainers.Regression +namespace Samples.Dynamic.Trainers.Regression { public static class PoissonRegression { @@ -48,8 +49,8 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.Regression.Evaluate(transformedTestData); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); - + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + // Expected output: // Mean Absolute Error: 0.07 // Mean Squared Error: 0.01 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/PoissonRegressionWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/PoissonRegressionWithOptions.cs index 53c3422b07..c996634241 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/PoissonRegressionWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/PoissonRegressionWithOptions.cs @@ -1,10 +1,11 @@ using System; using System.Collections.Generic; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.Trainers; -namespace Microsoft.ML.Samples.Dynamic.Trainers.Regression +namespace Samples.Dynamic.Trainers.Regression { public static class PoissonRegressionWithOptions { @@ -60,8 +61,8 @@ public static void Example() // Evaluate the overall metrics var metrics = mlContext.Regression.Evaluate(transformedTestData); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); - + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + // Expected output: // Mean Absolute Error: 0.07 // Mean Squared Error: 0.01 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/RegressionSamplesTemplate.ttinclude b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/RegressionSamplesTemplate.ttinclude index 10b58238c8..d3a32c94c8 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/RegressionSamplesTemplate.ttinclude +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/RegressionSamplesTemplate.ttinclude @@ -1,12 +1,13 @@ using System; using System.Collections.Generic; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; <# if (TrainerOptions != null) { #> using Microsoft.ML.Trainers; <# } #> -namespace Microsoft.ML.Samples.Dynamic.Trainers.Regression +namespace Samples.Dynamic.Trainers.Regression { public static class <#=ClassName#> { @@ -54,7 +55,7 @@ namespace Microsoft.ML.Samples.Dynamic.Trainers.Regression // Evaluate the overall metrics var metrics = mlContext.Regression.Evaluate(transformedTestData); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); <#=ExpectedOutput#> } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/StochasticDualCoordinateAscent.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/StochasticDualCoordinateAscent.cs index e674bec00c..54c440cc96 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/StochasticDualCoordinateAscent.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/StochasticDualCoordinateAscent.cs @@ -1,8 +1,6 @@ -using System; -using System.Linq; -using Microsoft.ML.Data; +using Microsoft.ML; -namespace Microsoft.ML.Samples.Dynamic.Trainers.Regression +namespace Samples.Dynamic.Trainers.Regression { public static class StochasticDualCoordinateAscent { @@ -14,7 +12,7 @@ public static void Example() var mlContext = new MLContext(seed: 0); // Create in-memory examples as C# native class and convert to IDataView - var data = SamplesUtils.DatasetUtils.GenerateFloatLabelFloatFeatureVectorSamples(1000); + var data = Microsoft.ML.SamplesUtils.DatasetUtils.GenerateFloatLabelFloatFeatureVectorSamples(1000); var dataView = mlContext.Data.LoadFromEnumerable(data); // Split the data into training and test sets. Only training set is used in fitting @@ -30,7 +28,7 @@ public static void Example() // Evaluate the trained model using the test set. var metrics = mlContext.Regression.Evaluate(dataWithPredictions); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // L1: 0.27 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/StochasticDualCoordinateAscentWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/StochasticDualCoordinateAscentWithOptions.cs index c0110454a4..cfe96f1a4f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/StochasticDualCoordinateAscentWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/StochasticDualCoordinateAscentWithOptions.cs @@ -1,7 +1,7 @@ -using Microsoft.ML.Data; +using Microsoft.ML; using Microsoft.ML.Trainers; -namespace Microsoft.ML.Samples.Dynamic.Trainers.Regression +namespace Samples.Dynamic.Trainers.Regression { public static class StochasticDualCoordinateAscentWithOptions { @@ -13,7 +13,7 @@ public static void Example() var mlContext = new MLContext(seed: 0); // Create in-memory examples as C# native class and convert to IDataView - var data = SamplesUtils.DatasetUtils.GenerateFloatLabelFloatFeatureVectorSamples(1000); + var data = Microsoft.ML.SamplesUtils.DatasetUtils.GenerateFloatLabelFloatFeatureVectorSamples(1000); var dataView = mlContext.Data.LoadFromEnumerable(data); // Split the data into training and test sets. Only training set is used in fitting @@ -40,7 +40,7 @@ public static void Example() // Evaluate the trained model using the test set. var metrics = mlContext.Regression.Evaluate(dataWithPredictions); - SamplesUtils.ConsoleUtils.PrintMetrics(metrics); + Microsoft.ML.SamplesUtils.ConsoleUtils.PrintMetrics(metrics); // Expected output: // L1: 0.26 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Concatenate.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Concatenate.cs index 22f6b7e321..e25d1e0d94 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Concatenate.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Concatenate.cs @@ -1,7 +1,8 @@ using System; +using Microsoft.ML; using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public static class ConcatTransform { @@ -12,7 +13,7 @@ public static void Example() var mlContext = new MLContext(); // Get a small dataset as an IEnumerable and them read it as ML.NET's data type. - var data = SamplesUtils.DatasetUtils.GetInfertData(); + var data = Microsoft.ML.SamplesUtils.DatasetUtils.GetInfertData(); var trainData = mlContext.Data.LoadFromEnumerable(data); // Preview of the data. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToVector.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToVector.cs new file mode 100644 index 0000000000..9efdcb1417 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToVector.cs @@ -0,0 +1,74 @@ +using System; +using System.Collections.Generic; +using Microsoft.ML; +using Microsoft.ML.Data; + +namespace Samples.Dynamic +{ + class MapKeyToVector + { + /// This example demonstrates the use of MapKeyToVector by mapping keys to floats[]. + /// Because the ML.NET KeyType maps the missing value to zero, counting starts at 1, so the uint values + /// converted to KeyTypes will appear skewed by one. + /// See https://github.com/dotnet/machinelearning/blob/master/docs/code/IDataViewTypeSystem.md#key-types + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var mlContext = new MLContext(); + + // Get a small dataset as an IEnumerable. + var rawData = new[] { + new DataPoint() { Timeframe = 9, Category = 5 }, + new DataPoint() { Timeframe = 8, Category = 4 }, + new DataPoint() { Timeframe = 8, Category = 4 }, + new DataPoint() { Timeframe = 9, Category = 3 }, + new DataPoint() { Timeframe = 2, Category = 3 }, + new DataPoint() { Timeframe = 3, Category = 5 } + }; + + var data = mlContext.Data.LoadFromEnumerable(rawData); + + // Constructs the ML.net pipeline + var pipeline = mlContext.Transforms.Conversion.MapKeyToVector("TimeframeVector", "Timeframe") + .Append(mlContext.Transforms.Conversion.MapKeyToVector("CategoryVector", "Category", outputCountVector: true)); + + // Fits the pipeline to the data. + IDataView transformedData = pipeline.Fit(data).Transform(data); + + // Getting the resulting data as an IEnumerable. + // This will contain the newly created columns. + IEnumerable features = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); + + Console.WriteLine($" Timeframe TimeframeVector Category CategoryVector"); + foreach (var featureRow in features) + Console.WriteLine($"{featureRow.Timeframe}\t\t\t{string.Join(',', featureRow.TimeframeVector)}\t\t\t{featureRow.Category}\t\t{string.Join(',', featureRow.CategoryVector)}"); + + // TransformedData obtained post-transformation. + // + // Timeframe TimeframeVector Category CategoryVector + // 10 0,0,0,0,0,0,0,0,0,1 6 0,0,0,0,0 + // 9 0,0,0,0,0,0,0,0,1,0 5 0,0,0,0,1 + // 9 0,0,0,0,0,0,0,0,1,0 5 0,0,0,0,1 + // 10 0,0,0,0,0,0,0,0,0,1 4 0,0,0,1,0 + // 3 0,0,1,0,0,0,0,0,0,0 4 0,0,0,1,0 + // 4 0,0,0,1,0,0,0,0,0,0 6 0,0,0,0,0 + } + + private class DataPoint + { + [KeyType(10)] + public uint Timeframe { get; set; } + + [KeyType(6)] + public uint Category { get; set; } + + } + + private class TransformedData : DataPoint + { + public float[] TimeframeVector { get; set; } + public float[] CategoryVector { get; set; } + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CopyColumns.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CopyColumns.cs index 5296c5bd78..08f9f48fb8 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CopyColumns.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CopyColumns.cs @@ -1,7 +1,8 @@ using System; using System.Collections.Generic; +using Microsoft.ML; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public static class CopyColumns { @@ -12,7 +13,7 @@ public static void Example() var mlContext = new MLContext(); // Get a small dataset as an IEnumerable and them read it as ML.NET's data type. - IEnumerable data = SamplesUtils.DatasetUtils.GetInfertData(); + IEnumerable data = Microsoft.ML.SamplesUtils.DatasetUtils.GetInfertData(); var trainData = mlContext.Data.LoadFromEnumerable(data); // Preview of the data. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMappingSample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMappingSample.cs index d4b5b5904e..4214451924 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMappingSample.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMappingSample.cs @@ -1,5 +1,7 @@ using System; -namespace Microsoft.ML.Samples.Dynamic +using Microsoft.ML; + +namespace Samples.Dynamic { public static class CustomMapping { @@ -10,7 +12,7 @@ public static void Example() var mlContext = new MLContext(); // Get a small dataset as an IEnumerable and convert it to an IDataView. - var data = SamplesUtils.DatasetUtils.GetInfertData(); + var data = Microsoft.ML.SamplesUtils.DatasetUtils.GetInfertData(); var trainData = mlContext.Data.LoadFromEnumerable(data); // Preview of the data. @@ -22,7 +24,7 @@ public static void Example() // 35 4 6-11yrs ... // We define the custom mapping between input and output rows that will be applied by the transformation. - Action mapping = + Action mapping = (input, output) => output.IsUnderThirty = input.Age < 30; // Custom transformations can be used to transform data directly, or as part of a pipeline. Below we transform data directly. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/DropColumns.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/DropColumns.cs index bcc812bee3..ff3e2cd5dc 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/DropColumns.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/DropColumns.cs @@ -1,7 +1,8 @@ using System; using System.Collections.Generic; +using Microsoft.ML; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public static class DropColumns { @@ -12,7 +13,7 @@ public static void Example() var mlContext = new MLContext(); // Get a small dataset as an IEnumerable and them read it as ML.NET's data type. - IEnumerable data = SamplesUtils.DatasetUtils.GetInfertData(); + IEnumerable data = Microsoft.ML.SamplesUtils.DatasetUtils.GetInfertData(); var trainData = mlContext.Data.LoadFromEnumerable(data); // Preview of the data. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/IndicateMissingValues.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/IndicateMissingValues.cs index 15d448deee..9bda79b6d8 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/IndicateMissingValues.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/IndicateMissingValues.cs @@ -1,9 +1,10 @@ using System; using System.Collections.Generic; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public static class IndicateMissingValues { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhiten.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhiten.cs index 6076d40a39..0f0f1d5370 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhiten.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhiten.cs @@ -1,9 +1,10 @@ using System; using System.Collections.Generic; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public sealed class VectorWhiten { @@ -16,7 +17,7 @@ public static void Example() var ml = new MLContext(); // Get a small dataset as an IEnumerable and convert it to an IDataView. - var data = SamplesUtils.DatasetUtils.GetVectorOfNumbersData(); + var data = Microsoft.ML.SamplesUtils.DatasetUtils.GetVectorOfNumbersData(); var trainData = ml.Data.LoadFromEnumerable(data); // Preview of the data. @@ -39,14 +40,14 @@ public static void Example() }; // A pipeline to project Features column into white noise vector. - var whiteningPipeline = ml.Transforms.VectorWhiten(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), - kind: Transforms.WhiteningKind.ZeroPhaseComponentAnalysis); + var whiteningPipeline = ml.Transforms.VectorWhiten(nameof(Microsoft.ML.SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), + kind: Microsoft.ML.Transforms.WhiteningKind.ZeroPhaseComponentAnalysis); // The transformed (projected) data. var transformedData = whiteningPipeline.Fit(trainData).Transform(trainData); // Getting the data of the newly created column, so we can preview it. - var whitening = transformedData.GetColumn>(transformedData.Schema[nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features)]); + var whitening = transformedData.GetColumn>(transformedData.Schema[nameof(Microsoft.ML.SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features)]); - printHelper(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), whitening); + printHelper(nameof(Microsoft.ML.SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), whitening); // Features column obtained post-transformation. // diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithOptions.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithOptions.cs index bf314064e1..4349140c62 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithOptions.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithOptions.cs @@ -1,9 +1,10 @@ using System; using System.Collections.Generic; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public sealed class VectorWhitenWithOptions { @@ -15,7 +16,7 @@ public static void Example() var ml = new MLContext(); // Get a small dataset as an IEnumerable and convert it to an IDataView. - var data = SamplesUtils.DatasetUtils.GetVectorOfNumbersData(); + var data = Microsoft.ML.SamplesUtils.DatasetUtils.GetVectorOfNumbersData(); var trainData = ml.Data.LoadFromEnumerable(data); // Preview of the data. @@ -39,13 +40,13 @@ public static void Example() // A pipeline to project Features column into white noise vector. - var whiteningPipeline = ml.Transforms.VectorWhiten(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), kind: Transforms.WhiteningKind.PrincipalComponentAnalysis, rank: 4); + var whiteningPipeline = ml.Transforms.VectorWhiten(nameof(Microsoft.ML.SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), kind: Microsoft.ML.Transforms.WhiteningKind.PrincipalComponentAnalysis, rank: 4); // The transformed (projected) data. var transformedData = whiteningPipeline.Fit(trainData).Transform(trainData); // Getting the data of the newly created column, so we can preview it. - var whitening = transformedData.GetColumn>(transformedData.Schema[nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features)]); + var whitening = transformedData.GetColumn>(transformedData.Schema[nameof(Microsoft.ML.SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features)]); - printHelper(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), whitening); + printHelper(nameof(Microsoft.ML.SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), whitening); // Features column obtained post-transformation. // -0.979 0.867 1.449 1.236 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValues.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValues.cs index 01fce1ad06..757242acfe 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValues.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ReplaceMissingValues.cs @@ -1,10 +1,11 @@ using System; using System.Collections.Generic; using System.Linq; +using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.Transforms; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { class ReplaceMissingValues { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/SelectColumns.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/SelectColumns.cs index 9f365694cc..8747bc45ae 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/SelectColumns.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/SelectColumns.cs @@ -1,5 +1,7 @@ using System; -namespace Microsoft.ML.Samples.Dynamic +using Microsoft.ML; + +namespace Samples.Dynamic { public static class SelectColumns { @@ -10,7 +12,7 @@ public static void Example() var mlContext = new MLContext(); // Get a small dataset as an IEnumerable and them read it as ML.NET's data type. - var enumerableData = SamplesUtils.DatasetUtils.GetInfertData(); + var enumerableData = Microsoft.ML.SamplesUtils.DatasetUtils.GetInfertData(); var data = mlContext.Data.LoadFromEnumerable(enumerableData); // Before transformation, take a look at the dataset diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs index 268c6865e7..833fdceda7 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsa.cs @@ -1,28 +1,16 @@ using System; using System.Collections.Generic; +using System.IO; +using Microsoft.ML; using Microsoft.ML.Data; +using Microsoft.ML.Transforms.TimeSeries; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public static class DetectChangePointBySsa { - class ChangePointPrediction - { - [VectorType(4)] - public double[] Prediction { get; set; } - } - - class SsaChangePointData - { - public float Value; - - public SsaChangePointData(float value) - { - Value = value; - } - } - // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). + // It demostrates stateful prediction engine that updates the state of the model and allows for saving/reloading. // The estimator is applied then to identify points where data distribution changed. // This estimator can account for temporal seasonality in the data. public static void Example() @@ -31,60 +19,119 @@ public static void Example() // as well as the source of randomness. var ml = new MLContext(); - // Generate sample series data with a recurring pattern and then a change in trend + // Generate sample series data with a recurring pattern const int SeasonalitySize = 5; const int TrainingSeasons = 3; const int TrainingSize = SeasonalitySize * TrainingSeasons; - var data = new List(); - for (int i = 0; i < TrainingSeasons; i++) - for (int j = 0; j < SeasonalitySize; j++) - data.Add(new SsaChangePointData(j)); - // This is a change point - for (int i = 0; i < SeasonalitySize; i++) - data.Add(new SsaChangePointData(i * 100)); + var data = new List() + { + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + }; // Convert data to IDataView. var dataView = ml.Data.LoadFromEnumerable(data); - // Setup estimator arguments - var inputColumnName = nameof(SsaChangePointData.Value); + // Setup SsaChangePointDetector arguments + var inputColumnName = nameof(TimeSeriesData.Value); var outputColumnName = nameof(ChangePointPrediction.Prediction); - // The transformed data. - var transformedData = ml.Transforms.DetectChangePointBySsa(outputColumnName, inputColumnName, 95, 8, TrainingSize, SeasonalitySize + 1).Fit(dataView).Transform(dataView); + // Train the change point detector. + ITransformer model = ml.Transforms.DetectChangePointBySsa(outputColumnName, inputColumnName, 95, 8, TrainingSize, SeasonalitySize + 1).Fit(dataView); - // Getting the data of the newly created column as an IEnumerable of ChangePointPrediction. - var predictionColumn = ml.Data.CreateEnumerable(transformedData, reuseRowObject: false); + // Create a prediction engine from the model for feeding new data. + var engine = model.CreateTimeSeriesPredictionFunction(ml); - Console.WriteLine($"{outputColumnName} column obtained post-transformation."); + // Start streaming new data points with no change point to the prediction engine. + Console.WriteLine($"Output from ChangePoint predictions on new data:"); Console.WriteLine("Data\tAlert\tScore\tP-Value\tMartingale value"); - int k = 0; - foreach (var prediction in predictionColumn) - Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", data[k++].Value, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]); - Console.WriteLine(""); - - // Prediction column obtained post-transformation. + + // Output from ChangePoint predictions on new data: // Data Alert Score P-Value Martingale value - // 0 0 - 2.53 0.50 0.00 - // 1 0 - 0.01 0.01 0.00 - // 2 0 0.76 0.14 0.00 - // 3 0 0.69 0.28 0.00 - // 4 0 1.44 0.18 0.00 - // 0 0 - 1.84 0.17 0.00 - // 1 0 0.22 0.44 0.00 - // 2 0 0.20 0.45 0.00 - // 3 0 0.16 0.47 0.00 - // 4 0 1.33 0.18 0.00 - // 0 0 - 1.79 0.07 0.00 - // 1 0 0.16 0.50 0.00 - // 2 0 0.09 0.50 0.00 - // 3 0 0.08 0.45 0.00 - // 4 0 1.31 0.12 0.00 - // 0 0 - 1.79 0.07 0.00 - // 100 1 99.16 0.00 4031.94 <-- alert is on, predicted changepoint - // 200 0 185.23 0.00 731260.87 - // 300 0 270.40 0.01 3578470.47 - // 400 0 357.11 0.03 45298370.86 + + for (int i = 0; i < 5; i++) + PrintPrediction(i, engine.Predict(new TimeSeriesData(i))); + + // 0 0 -1.01 0.50 0.00 + // 1 0 -0.24 0.22 0.00 + // 2 0 -0.31 0.30 0.00 + // 3 0 0.44 0.01 0.00 + // 4 0 2.16 0.00 0.24 + + // Now stream data points that reflect a change in trend. + for (int i = 0; i < 5; i++) + { + int value = (i + 1) * 100; + PrintPrediction(value, engine.Predict(new TimeSeriesData(value))); + } + // 100 0 86.23 0.00 2076098.24 + // 200 0 171.38 0.00 809668524.21 + // 300 1 256.83 0.01 22130423541.93 <-- alert is on, note that delay is expected + // 400 0 326.55 0.04 241162710263.29 + // 500 0 364.82 0.08 597660527041.45 <-- saved to disk + + // Now we demonstrate saving and loading the model. + + // Save the model that exists within the prediction engine. + // The engine has been updating this model with every new data point. + var modelPath = "model.zip"; + engine.CheckPoint(ml, modelPath); + + // Load the model. + using (var file = File.OpenRead(modelPath)) + model = ml.Model.Load(file, out DataViewSchema schema); + + // We must create a new prediction engine from the persisted model. + engine = model.CreateTimeSeriesPredictionFunction(ml); + + // Run predictions on the loaded model. + for (int i = 0; i < 5; i++) + { + int value = (i + 1) * 100; + PrintPrediction(value, engine.Predict(new TimeSeriesData(value))); + } + + // 100 0 -58.58 0.15 1096021098844.34 <-- loaded from disk and running new predictions + // 200 0 -41.24 0.20 97579154688.98 + // 300 0 -30.61 0.24 95319753.87 + // 400 0 58.87 0.38 14.24 + // 500 0 219.28 0.36 0.05 + + } + + private static void PrintPrediction(float value, ChangePointPrediction prediction) => + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, prediction.Prediction[0], + prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]); + + class ChangePointPrediction + { + [VectorType(4)] + public double[] Prediction { get; set; } + } + + class TimeSeriesData + { + public float Value; + + public TimeSeriesData(float value) + { + Value = value; + } } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaBatchPrediction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaBatchPrediction.cs new file mode 100644 index 0000000000..2e0df19007 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectChangePointBySsaBatchPrediction.cs @@ -0,0 +1,114 @@ +using System; +using System.Collections.Generic; +using Microsoft.ML; +using Microsoft.ML.Data; + +namespace Samples.Dynamic +{ + public static class DetectChangePointBySsaBatchPrediction + { + // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). + // The estimator is applied then to identify points where data distribution changed. + // This estimator can account for temporal seasonality in the data. + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var ml = new MLContext(); + + // Generate sample series data with a recurring pattern and then a change in trend + const int SeasonalitySize = 5; + const int TrainingSeasons = 3; + const int TrainingSize = SeasonalitySize * TrainingSeasons; + var data = new List() + { + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + + //This is a change point + new TimeSeriesData(0), + new TimeSeriesData(100), + new TimeSeriesData(200), + new TimeSeriesData(300), + new TimeSeriesData(400), + }; + + // Convert data to IDataView. + var dataView = ml.Data.LoadFromEnumerable(data); + + // Setup estimator arguments + var inputColumnName = nameof(TimeSeriesData.Value); + var outputColumnName = nameof(ChangePointPrediction.Prediction); + + // The transformed data. + var transformedData = ml.Transforms.DetectChangePointBySsa(outputColumnName, inputColumnName, 95, 8, TrainingSize, SeasonalitySize + 1).Fit(dataView).Transform(dataView); + + // Getting the data of the newly created column as an IEnumerable of ChangePointPrediction. + var predictionColumn = ml.Data.CreateEnumerable(transformedData, reuseRowObject: false); + + Console.WriteLine($"{outputColumnName} column obtained post-transformation."); + Console.WriteLine("Data\tAlert\tScore\tP-Value\tMartingale value"); + int k = 0; + foreach (var prediction in predictionColumn) + PrintPrediction(data[k++].Value, prediction); + + // Prediction column obtained post-transformation. + // Data Alert Score P-Value Martingale value + // 0 0 -2.53 0.50 0.00 + // 1 0 -0.01 0.01 0.00 + // 2 0 0.76 0.14 0.00 + // 3 0 0.69 0.28 0.00 + // 4 0 1.44 0.18 0.00 + // 0 0 -1.84 0.17 0.00 + // 1 0 0.22 0.44 0.00 + // 2 0 0.20 0.45 0.00 + // 3 0 0.16 0.47 0.00 + // 4 0 1.33 0.18 0.00 + // 0 0 -1.79 0.07 0.00 + // 1 0 0.16 0.50 0.00 + // 2 0 0.09 0.50 0.00 + // 3 0 0.08 0.45 0.00 + // 4 0 1.31 0.12 0.00 + // 0 0 -1.79 0.07 0.00 + // 100 1 99.16 0.00 4031.94 <-- alert is on, predicted changepoint + // 200 0 185.23 0.00 731260.87 + // 300 0 270.40 0.01 3578470.47 + // 400 0 357.11 0.03 45298370.86 + } + + private static void PrintPrediction(float value, ChangePointPrediction prediction) => + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, prediction.Prediction[0], + prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]); + + class ChangePointPrediction + { + [VectorType(4)] + public double[] Prediction { get; set; } + } + + class TimeSeriesData + { + public float Value; + + public TimeSeriesData(float value) + { + Value = value; + } + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs index d19b1be2e1..2890c4ec57 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePoint.cs @@ -4,28 +4,15 @@ using System; using System.Collections.Generic; +using System.IO; +using Microsoft.ML; using Microsoft.ML.Data; +using Microsoft.ML.Transforms.TimeSeries; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public static class DetectIidChangePoint { - class ChangePointPrediction - { - [VectorType(4)] - public double[] Prediction { get; set; } - } - - class IidChangePointData - { - public float Value; - - public IidChangePointData(float value) - { - Value = value; - } - } - // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). // The estimator is applied then to identify points where data distribution changed. public static void Example() @@ -36,35 +23,54 @@ public static void Example() // Generate sample series data with a change const int Size = 16; - var data = new List(Size); - for (int i = 0; i < Size / 2; i++) - data.Add(new IidChangePointData(5)); - // This is a change point - for (int i = 0; i < Size / 2; i++) - data.Add(new IidChangePointData(7)); + var data = new List(Size) + { + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + + //Change point data. + new TimeSeriesData(7), + new TimeSeriesData(7), + new TimeSeriesData(7), + new TimeSeriesData(7), + new TimeSeriesData(7), + new TimeSeriesData(7), + new TimeSeriesData(7), + new TimeSeriesData(7), + }; // Convert data to IDataView. var dataView = ml.Data.LoadFromEnumerable(data); - // Setup estimator arguments + // Setup IidSpikeDetector arguments string outputColumnName = nameof(ChangePointPrediction.Prediction); - string inputColumnName = nameof(IidChangePointData.Value); + string inputColumnName = nameof(TimeSeriesData.Value); - // The transformed data. - var transformedData = ml.Transforms.DetectIidChangePoint(outputColumnName, inputColumnName, 95, Size / 4).Fit(dataView).Transform(dataView); + // Time Series model. + ITransformer model = ml.Transforms.DetectIidChangePoint(outputColumnName, inputColumnName, 95, Size / 4).Fit(dataView); - // Getting the data of the newly created column as an IEnumerable of ChangePointPrediction. - var predictionColumn = ml.Data.CreateEnumerable(transformedData, reuseRowObject: false); + // Create a time series prediction engine from the model. + var engine = model.CreateTimeSeriesPredictionFunction(ml); Console.WriteLine($"{outputColumnName} column obtained post-transformation."); Console.WriteLine("Data\tAlert\tScore\tP-Value\tMartingale value"); - int k = 0; - foreach (var prediction in predictionColumn) - Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", data[k++].Value, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]); - Console.WriteLine(""); - - // Prediction column obtained post-transformation. + // Data Alert Score P-Value Martingale value + + // Create non-anomalous data and check for change point. + for (int index = 0; index < 8; index++) + { + // Anomaly change point detection. + PrintPrediction(5, engine.Predict(new TimeSeriesData(5))); + } + + // 5 0 5.00 0.50 0.00 <-- Time Series 1. // 5 0 5.00 0.50 0.00 // 5 0 5.00 0.50 0.00 // 5 0 5.00 0.50 0.00 @@ -72,9 +78,50 @@ public static void Example() // 5 0 5.00 0.50 0.00 // 5 0 5.00 0.50 0.00 // 5 0 5.00 0.50 0.00 - // 5 0 5.00 0.50 0.00 - // 7 1 7.00 0.00 10298.67 <-- alert is on, predicted changepoint - // 7 0 7.00 0.13 33950.16 + + // Change point + PrintPrediction(7, engine.Predict(new TimeSeriesData(7))); + + // 7 1 7.00 0.00 10298.67 <-- alert is on, predicted changepoint (and model is checkpointed). + + // Checkpoint the model. + var modelPath = "temp.zip"; + engine.CheckPoint(ml, modelPath); + + // Reference to current time series engine because in the next step "engine" will point to the + // checkpointed model being loaded from disk. + var timeseries1 = engine; + + // Load the model. + using (var file = File.OpenRead(modelPath)) + model = ml.Model.Load(file, out DataViewSchema schema); + + // Create a time series prediction engine from the checkpointed model. + engine = model.CreateTimeSeriesPredictionFunction(ml); + for (int index = 0; index < 8; index++) + { + // Anomaly change point detection. + PrintPrediction(7, engine.Predict(new TimeSeriesData(7))); + } + + // 7 0 7.00 0.13 33950.16 <-- Time Series 2 : Model loaded back from disk and prediction is made. + // 7 0 7.00 0.26 60866.34 + // 7 0 7.00 0.38 78362.04 + // 7 0 7.00 0.50 0.01 + // 7 0 7.00 0.50 0.00 + // 7 0 7.00 0.50 0.00 + // 7 0 7.00 0.50 0.00 + + // Prediction from the original time series engine should match the prediction from + // check pointed model. + engine = timeseries1; + for (int index = 0; index < 8; index++) + { + // Anomaly change point detection. + PrintPrediction(7, engine.Predict(new TimeSeriesData(7))); + } + + // 7 0 7.00 0.13 33950.16 <-- Time Series 1 and prediction is made. // 7 0 7.00 0.26 60866.34 // 7 0 7.00 0.38 78362.04 // 7 0 7.00 0.50 0.01 @@ -82,5 +129,25 @@ public static void Example() // 7 0 7.00 0.50 0.00 // 7 0 7.00 0.50 0.00 } + + private static void PrintPrediction(float value, ChangePointPrediction prediction) => + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, prediction.Prediction[0], + prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]); + + class ChangePointPrediction + { + [VectorType(4)] + public double[] Prediction { get; set; } + } + + class TimeSeriesData + { + public float Value; + + public TimeSeriesData(float value) + { + Value = value; + } + } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePointBatchPrediction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePointBatchPrediction.cs new file mode 100644 index 0000000000..760305df33 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidChangePointBatchPrediction.cs @@ -0,0 +1,105 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using Microsoft.ML; +using Microsoft.ML.Data; + +namespace Samples.Dynamic +{ + public static class DetectIidChangePointBatchPrediction + { + // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). + // The estimator is applied then to identify points where data distribution changed. + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var ml = new MLContext(); + + // Generate sample series data with a change + const int Size = 16; + var data = new List(Size) + { + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + + //Change point data. + new TimeSeriesData(7), + new TimeSeriesData(7), + new TimeSeriesData(7), + new TimeSeriesData(7), + new TimeSeriesData(7), + new TimeSeriesData(7), + new TimeSeriesData(7), + new TimeSeriesData(7), + }; + + // Convert data to IDataView. + var dataView = ml.Data.LoadFromEnumerable(data); + + // Setup estimator arguments + string outputColumnName = nameof(ChangePointPrediction.Prediction); + string inputColumnName = nameof(TimeSeriesData.Value); + + // The transformed data. + var transformedData = ml.Transforms.DetectIidChangePoint(outputColumnName, inputColumnName, 95, Size / 4).Fit(dataView).Transform(dataView); + + // Getting the data of the newly created column as an IEnumerable of ChangePointPrediction. + var predictionColumn = ml.Data.CreateEnumerable(transformedData, reuseRowObject: false); + + Console.WriteLine($"{outputColumnName} column obtained post-transformation."); + Console.WriteLine("Data\tAlert\tScore\tP-Value\tMartingale value"); + int k = 0; + foreach (var prediction in predictionColumn) + PrintPrediction(data[k++].Value, prediction); + + // Prediction column obtained post-transformation. + // Data Alert Score P-Value Martingale value + // 5 0 5.00 0.50 0.00 + // 5 0 5.00 0.50 0.00 + // 5 0 5.00 0.50 0.00 + // 5 0 5.00 0.50 0.00 + // 5 0 5.00 0.50 0.00 + // 5 0 5.00 0.50 0.00 + // 5 0 5.00 0.50 0.00 + // 5 0 5.00 0.50 0.00 + // 7 1 7.00 0.00 10298.67 <-- alert is on, predicted changepoint + // 7 0 7.00 0.13 33950.16 + // 7 0 7.00 0.26 60866.34 + // 7 0 7.00 0.38 78362.04 + // 7 0 7.00 0.50 0.01 + // 7 0 7.00 0.50 0.00 + // 7 0 7.00 0.50 0.00 + // 7 0 7.00 0.50 0.00 + } + + private static void PrintPrediction(float value, ChangePointPrediction prediction) => + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, prediction.Prediction[0], + prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]); + + class ChangePointPrediction + { + [VectorType(4)] + public double[] Prediction { get; set; } + } + + class TimeSeriesData + { + public float Value; + + public TimeSeriesData(float value) + { + Value = value; + } + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpike.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpike.cs index d43b581842..439006a7ec 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpike.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpike.cs @@ -1,29 +1,14 @@ using System; using System.Collections.Generic; using System.IO; +using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.Transforms.TimeSeries; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public static class DetectIidSpike { - class IidSpikeData - { - public float Value; - - public IidSpikeData(float value) - { - Value = value; - } - } - - class IidSpikePrediction - { - [VectorType(3)] - public double[] Prediction { get; set; } - } - // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). // The estimator is applied then to identify spiking points in the series. public static void Example() @@ -34,46 +19,101 @@ public static void Example() // Generate sample series data with a spike const int Size = 10; - var data = new List(Size); - for (int i = 0; i < Size / 2; i++) - data.Add(new IidSpikeData(5)); - // This is a spike - data.Add(new IidSpikeData(10)); - for (int i = 0; i < Size / 2; i++) - data.Add(new IidSpikeData(5)); + var data = new List(Size + 1) + { + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + + // This is a spike. + new TimeSeriesData(10), + + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + }; // Convert data to IDataView. var dataView = ml.Data.LoadFromEnumerable(data); - // Setup the estimator arguments + // Setup IidSpikeDetector arguments string outputColumnName = nameof(IidSpikePrediction.Prediction); - string inputColumnName = nameof(IidSpikeData.Value); + string inputColumnName = nameof(TimeSeriesData.Value); - // The transformed data. - var transformedData = ml.Transforms.DetectIidSpike(outputColumnName, inputColumnName, 95, Size / 4).Fit(dataView).Transform(dataView); + // The transformed model. + ITransformer model = ml.Transforms.DetectIidSpike(outputColumnName, inputColumnName, 95, Size).Fit(dataView); - // Getting the data of the newly created column as an IEnumerable of IidSpikePrediction. - var predictionColumn = ml.Data.CreateEnumerable(transformedData, reuseRowObject: false); + // Create a time series prediction engine from the model. + var engine = model.CreateTimeSeriesPredictionFunction(ml); Console.WriteLine($"{outputColumnName} column obtained post-transformation."); - Console.WriteLine("Alert\tScore\tP-Value"); - foreach (var prediction in predictionColumn) - Console.WriteLine("{0}\t{1:0.00}\t{2:0.00}", prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2]); - Console.WriteLine(""); - + Console.WriteLine("Data\tAlert\tScore\tP-Value"); + // Prediction column obtained post-transformation. - // Alert Score P-Value - // 0 5.00 0.50 - // 0 5.00 0.50 - // 0 5.00 0.50 - // 0 5.00 0.50 - // 0 5.00 0.50 - // 1 10.00 0.00 <-- alert is on, predicted spike - // 0 5.00 0.26 - // 0 5.00 0.26 - // 0 5.00 0.50 - // 0 5.00 0.50 - // 0 5.00 0.50 + // Data Alert Score P-Value + + // Create non-anomalous data and check for anomaly. + for (int index = 0; index < 5; index++) + { + // Anomaly spike detection. + PrintPrediction(5, engine.Predict(new TimeSeriesData(5))); + } + + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + + // Spike. + PrintPrediction(10, engine.Predict(new TimeSeriesData(10))); + + // 10 1 10.00 0.00 <-- alert is on, predicted spike (check-point model) + + // Checkpoint the model. + var modelPath = "temp.zip"; + engine.CheckPoint(ml, modelPath); + + // Load the model. + using (var file = File.OpenRead(modelPath)) + model = ml.Model.Load(file, out DataViewSchema schema); + + for (int index = 0; index < 5; index++) + { + // Anomaly spike detection. + PrintPrediction(5, engine.Predict(new TimeSeriesData(5))); + } + + // 5 0 5.00 0.26 <-- load model from disk. + // 5 0 5.00 0.26 + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + + } + + private static void PrintPrediction(float value, IidSpikePrediction prediction) => + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, prediction.Prediction[0], + prediction.Prediction[1], prediction.Prediction[2]); + + class TimeSeriesData + { + public float Value; + + public TimeSeriesData(float value) + { + Value = value; + } + } + + class IidSpikePrediction + { + [VectorType(3)] + public double[] Prediction { get; set; } } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpikeBatchPrediction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpikeBatchPrediction.cs new file mode 100644 index 0000000000..4145214918 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectIidSpikeBatchPrediction.cs @@ -0,0 +1,93 @@ +using System; +using System.Collections.Generic; +using Microsoft.ML; +using Microsoft.ML.Data; + +namespace Samples.Dynamic +{ + public static class DetectIidSpikeBatchPrediction + { + // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). + // The estimator is applied then to identify spiking points in the series. + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var ml = new MLContext(); + + // Generate sample series data with a spike + const int Size = 10; + var data = new List(Size + 1) + { + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + + // This is a spike. + new TimeSeriesData(10), + + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + new TimeSeriesData(5), + }; + + // Convert data to IDataView. + var dataView = ml.Data.LoadFromEnumerable(data); + + // Setup the estimator arguments + string outputColumnName = nameof(IidSpikePrediction.Prediction); + string inputColumnName = nameof(TimeSeriesData.Value); + + // The transformed data. + var transformedData = ml.Transforms.DetectIidSpike(outputColumnName, inputColumnName, 95, Size / 4).Fit(dataView).Transform(dataView); + + // Getting the data of the newly created column as an IEnumerable of IidSpikePrediction. + var predictionColumn = ml.Data.CreateEnumerable(transformedData, reuseRowObject: false); + + Console.WriteLine($"{outputColumnName} column obtained post-transformation."); + Console.WriteLine("Data\tAlert\tScore\tP-Value"); + + int k = 0; + foreach (var prediction in predictionColumn) + PrintPrediction(data[k++].Value, prediction); + + // Prediction column obtained post-transformation. + // Data Alert Score P-Value + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + // 10 1 10.00 0.00 <-- alert is on, predicted spike + // 5 0 5.00 0.26 + // 5 0 5.00 0.26 + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + // 5 0 5.00 0.50 + } + + private static void PrintPrediction(float value, IidSpikePrediction prediction) => + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, prediction.Prediction[0], + prediction.Prediction[1], prediction.Prediction[2]); + + class TimeSeriesData + { + public float Value; + + public TimeSeriesData(float value) + { + Value = value; + } + } + + class IidSpikePrediction + { + [VectorType(3)] + public double[] Prediction { get; set; } + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs index 9c6f89307f..38094bb672 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsa.cs @@ -1,92 +1,130 @@ using System; using System.Collections.Generic; +using System.IO; +using Microsoft.ML; using Microsoft.ML.Data; +using Microsoft.ML.Transforms.TimeSeries; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public static class DetectSpikeBySsa { - class SsaSpikeData - { - public float Value; - - public SsaSpikeData(float value) - { - Value = value; - } - } - - class SsaSpikePrediction - { - [VectorType(3)] - public double[] Prediction { get; set; } - } - // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). // The estimator is applied then to identify spiking points in the series. // This estimator can account for temporal seasonality in the data. - public static void SsaSpikeDetectorTransform() + public static void Example() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. var ml = new MLContext(); - // Generate sample series data with a recurring pattern and a spike within the pattern + // Generate sample series data with a recurring pattern const int SeasonalitySize = 5; const int TrainingSeasons = 3; const int TrainingSize = SeasonalitySize * TrainingSeasons; - var data = new List(); - for (int i = 0; i < TrainingSeasons; i++) - for (int j = 0; j < SeasonalitySize; j++) - data.Add(new SsaSpikeData(j)); - // This is a spike - data.Add(new SsaSpikeData(100)); - for (int i = 0; i < SeasonalitySize; i++) - data.Add(new SsaSpikeData(i)); + var data = new List() + { + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + }; // Convert data to IDataView. var dataView = ml.Data.LoadFromEnumerable(data); - // Setup estimator arguments - var inputColumnName = nameof(SsaSpikeData.Value); + // Setup IidSpikeDetector arguments + var inputColumnName = nameof(TimeSeriesData.Value); var outputColumnName = nameof(SsaSpikePrediction.Prediction); - // The transformed data. - var transformedData = ml.Transforms.DetectSpikeBySsa(outputColumnName, inputColumnName, 95, 8, TrainingSize, SeasonalitySize + 1).Fit(dataView).Transform(dataView); + // Train the change point detector. + ITransformer model = ml.Transforms.DetectSpikeBySsa(outputColumnName, inputColumnName, 95, 8, TrainingSize, SeasonalitySize + 1).Fit(dataView); - // Getting the data of the newly created column as an IEnumerable of SsaSpikePrediction. - var predictionColumn = ml.Data.CreateEnumerable(transformedData, reuseRowObject: false); + // Create a prediction engine from the model for feeding new data. + var engine = model.CreateTimeSeriesPredictionFunction(ml); - Console.WriteLine($"{outputColumnName} column obtained post-transformation."); + // Start streaming new data points with no change point to the prediction engine. + Console.WriteLine($"Output from spike predictions on new data:"); Console.WriteLine("Data\tAlert\tScore\tP-Value"); - int k = 0; - foreach (var prediction in predictionColumn) - Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", data[k++].Value, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2]); - Console.WriteLine(""); - // Prediction column obtained post-transformation. + // Output from spike predictions on new data: // Data Alert Score P-Value - // 0 0 - 2.53 0.50 - // 1 0 - 0.01 0.01 - // 2 0 0.76 0.14 - // 3 0 0.69 0.28 - // 4 0 1.44 0.18 - // 0 0 - 1.84 0.17 - // 1 0 0.22 0.44 - // 2 0 0.20 0.45 - // 3 0 0.16 0.47 - // 4 0 1.33 0.18 - // 0 0 - 1.79 0.07 - // 1 0 0.16 0.50 - // 2 0 0.09 0.50 - // 3 0 0.08 0.45 - // 4 0 1.31 0.12 - // 100 1 98.21 0.00 <-- alert is on, predicted spike - // 0 0 - 13.83 0.29 - // 1 0 - 1.74 0.44 - // 2 0 - 0.47 0.46 - // 3 0 - 16.50 0.29 - // 4 0 - 29.82 0.21 + + for (int j = 0; j < 2; j++) + for (int i = 0; i < 5; i++) + PrintPrediction(i, engine.Predict(new TimeSeriesData(i))); + + // 0 0 -1.01 0.50 + // 1 0 -0.24 0.22 + // 2 0 -0.31 0.30 + // 3 0 0.44 0.01 + // 4 0 2.16 0.00 + // 0 0 -0.78 0.27 + // 1 0 -0.80 0.30 + // 2 0 -0.84 0.31 + // 3 0 0.33 0.31 + // 4 0 2.21 0.07 + + // Now send a data point that reflects a spike. + PrintPrediction(100, engine.Predict(new TimeSeriesData(100))); + + // 100 1 86.17 0.00 <-- alert is on, predicted spike + + // Now we demonstrate saving and loading the model. + // Save the model that exists within the prediction engine. + // The engine has been updating this model with every new data point. + var modelPath = "model.zip"; + engine.CheckPoint(ml, modelPath); + + // Load the model. + using (var file = File.OpenRead(modelPath)) + model = ml.Model.Load(file, out DataViewSchema schema); + + // We must create a new prediction engine from the persisted model. + engine = model.CreateTimeSeriesPredictionFunction(ml); + + // Run predictions on the loaded model. + for (int i = 0; i < 5; i++) + PrintPrediction(i, engine.Predict(new TimeSeriesData(i))); + + // 0 0 -2.74 0.40 <-- saved to disk, re-loaded, and running new predictions + // 1 0 -1.47 0.42 + // 2 0 -17.50 0.24 + // 3 0 -30.82 0.16 + // 4 0 -23.24 0.28 + } + + private static void PrintPrediction(float value, SsaSpikePrediction prediction) => + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, prediction.Prediction[0], + prediction.Prediction[1], prediction.Prediction[2]); + + class TimeSeriesData + { + public float Value; + + public TimeSeriesData(float value) + { + Value = value; + } + } + + class SsaSpikePrediction + { + [VectorType(3)] + public double[] Prediction { get; set; } } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsaBatchPrediction.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsaBatchPrediction.cs new file mode 100644 index 0000000000..cc6a798dcf --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSpikeBySsaBatchPrediction.cs @@ -0,0 +1,117 @@ +using System; +using System.Collections.Generic; +using Microsoft.ML; +using Microsoft.ML.Data; + +namespace Samples.Dynamic +{ + public static class DetectSpikeBySsaBatchPrediction + { + // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). + // The estimator is applied then to identify spiking points in the series. + // This estimator can account for temporal seasonality in the data. + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var ml = new MLContext(); + + // Generate sample series data with a recurring pattern and a spike within the pattern + const int SeasonalitySize = 5; + const int TrainingSeasons = 3; + const int TrainingSize = SeasonalitySize * TrainingSeasons; + var data = new List() + { + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + + //This is a spike. + new TimeSeriesData(100), + + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + }; + + // Convert data to IDataView. + var dataView = ml.Data.LoadFromEnumerable(data); + + // Setup estimator arguments + var inputColumnName = nameof(TimeSeriesData.Value); + var outputColumnName = nameof(SsaSpikePrediction.Prediction); + + // The transformed data. + var transformedData = ml.Transforms.DetectSpikeBySsa(outputColumnName, inputColumnName, 95, 8, TrainingSize, SeasonalitySize + 1).Fit(dataView).Transform(dataView); + + // Getting the data of the newly created column as an IEnumerable of SsaSpikePrediction. + var predictionColumn = ml.Data.CreateEnumerable(transformedData, reuseRowObject: false); + + Console.WriteLine($"{outputColumnName} column obtained post-transformation."); + Console.WriteLine("Data\tAlert\tScore\tP-Value"); + int k = 0; + foreach (var prediction in predictionColumn) + PrintPrediction(data[k++].Value, prediction); + + // Prediction column obtained post-transformation. + // Data Alert Score P-Value + // 0 0 -2.53 0.50 + // 1 0 -0.01 0.01 + // 2 0 0.76 0.14 + // 3 0 0.69 0.28 + // 4 0 1.44 0.18 + // 0 0 -1.84 0.17 + // 1 0 0.22 0.44 + // 2 0 0.20 0.45 + // 3 0 0.16 0.47 + // 4 0 1.33 0.18 + // 0 0 -1.79 0.07 + // 1 0 0.16 0.50 + // 2 0 0.09 0.50 + // 3 0 0.08 0.45 + // 4 0 1.31 0.12 + // 100 1 98.21 0.00 <-- alert is on, predicted spike + // 0 0 -13.83 0.29 + // 1 0 -1.74 0.44 + // 2 0 -0.47 0.46 + // 3 0 -16.50 0.29 + // 4 0 -29.82 0.21 + } + + private static void PrintPrediction(float value, SsaSpikePrediction prediction) => + Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", value, prediction.Prediction[0], + prediction.Prediction[1], prediction.Prediction[2]); + + class TimeSeriesData + { + public float Value; + + public TimeSeriesData(float value) + { + Value = value; + } + } + + class SsaSpikePrediction + { + [VectorType(3)] + public double[] Prediction { get; set; } + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj index 9772e70704..3f0f7d9d69 100644 --- a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj +++ b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj @@ -137,4 +137,22 @@ + + + + + + + DnnImageModels\ResNet18Onnx\ResNet18.onnx + PreserveNewest + + + + + + DnnImageModels\ResNetPrepOnnx\ResNetPreprocess.onnx + PreserveNewest + + + diff --git a/docs/samples/Microsoft.ML.Samples/Program.cs b/docs/samples/Microsoft.ML.Samples/Program.cs index ef67739045..03b6ce19b6 100644 --- a/docs/samples/Microsoft.ML.Samples/Program.cs +++ b/docs/samples/Microsoft.ML.Samples/Program.cs @@ -1,4 +1,4 @@ -using Microsoft.ML.Samples.Dynamic; +using Samples.Dynamic; namespace Microsoft.ML.Samples { diff --git a/docs/samples/Microsoft.ML.Samples/Static/AveragedPerceptronBinaryClassification.cs b/docs/samples/Microsoft.ML.Samples/Static/AveragedPerceptronBinaryClassification.cs index 34b380d086..c5eb417fdd 100644 --- a/docs/samples/Microsoft.ML.Samples/Static/AveragedPerceptronBinaryClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Static/AveragedPerceptronBinaryClassification.cs @@ -1,15 +1,16 @@ using System; +using Microsoft.ML; using Microsoft.ML.StaticPipe; -namespace Microsoft.ML.Samples.Static +namespace Samples.Static { public class AveragedPerceptronBinaryClassificationExample { - public static void AveragedPerceptronBinaryClassification() + public static void Example() { // Downloading a classification dataset from github.com/dotnet/machinelearning. // It will be stored in the same path as the executable - string dataFilePath = SamplesUtils.DatasetUtils.DownloadAdultDataset(); + string dataFilePath = Microsoft.ML.SamplesUtils.DatasetUtils.DownloadAdultDataset(); // Data Preview // 1. Column [Label]: IsOver50K (boolean) diff --git a/docs/samples/Microsoft.ML.Samples/Static/FastTreeBinaryClassification.cs b/docs/samples/Microsoft.ML.Samples/Static/FastTreeBinaryClassification.cs index f34c6c4145..cda4a54ef8 100644 --- a/docs/samples/Microsoft.ML.Samples/Static/FastTreeBinaryClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Static/FastTreeBinaryClassification.cs @@ -1,16 +1,17 @@ using System; +using Microsoft.ML; using Microsoft.ML.StaticPipe; -namespace Microsoft.ML.Samples.Static +namespace Samples.Static { public class FastTreeBinaryClassificationExample { // This example requires installation of additional nuget package Microsoft.ML.FastTree. - public static void FastTreeBinaryClassification() + public static void Example() { // Downloading a classification dataset from github.com/dotnet/machinelearning. // It will be stored in the same path as the executable - string dataFilePath = SamplesUtils.DatasetUtils.DownloadAdultDataset(); + string dataFilePath = Microsoft.ML.SamplesUtils.DatasetUtils.DownloadAdultDataset(); // Data Preview // 1. Column [Label]: IsOver50K (boolean) diff --git a/docs/samples/Microsoft.ML.Samples/Static/FastTreeRegression.cs b/docs/samples/Microsoft.ML.Samples/Static/FastTreeRegression.cs index b2fc20047d..66ca7176d4 100644 --- a/docs/samples/Microsoft.ML.Samples/Static/FastTreeRegression.cs +++ b/docs/samples/Microsoft.ML.Samples/Static/FastTreeRegression.cs @@ -1,19 +1,20 @@ using System; using System.Linq; +using Microsoft.ML; using Microsoft.ML.StaticPipe; using Microsoft.ML.Trainers.FastTree; -namespace Microsoft.ML.Samples.Static +namespace Samples.Static { public class FastTreeRegressionExample { // This example requires installation of additional nuget package Microsoft.ML.FastTree. - public static void FastTreeRegression() + public static void Example() { // Downloading a regression dataset from github.com/dotnet/machinelearning // this will create a housing.txt file in the filsystem this code will run // you can open the file to see the data. - string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset(); + string dataFile = Microsoft.ML.SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset(); // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. diff --git a/docs/samples/Microsoft.ML.Samples/Static/FeatureSelectionTransform.cs b/docs/samples/Microsoft.ML.Samples/Static/FeatureSelectionTransform.cs index 88b6c6839d..aa52b3a468 100644 --- a/docs/samples/Microsoft.ML.Samples/Static/FeatureSelectionTransform.cs +++ b/docs/samples/Microsoft.ML.Samples/Static/FeatureSelectionTransform.cs @@ -1,17 +1,18 @@ using System; using System.Collections.Generic; +using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.StaticPipe; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Static { public class FeatureSelectionTransformStaticExample { - public static void FeatureSelectionTransform() + public static void Example() { // Downloading a classification dataset from github.com/dotnet/machinelearning. // It will be stored in the same path as the executable - string dataFilePath = SamplesUtils.DatasetUtils.DownloadBreastCancerDataset(); + string dataFilePath = Microsoft.ML.SamplesUtils.DatasetUtils.DownloadBreastCancerDataset(); // Data Preview // 1. Label 0=benign, 1=malignant diff --git a/docs/samples/Microsoft.ML.Samples/Static/LightGBMBinaryClassification.cs b/docs/samples/Microsoft.ML.Samples/Static/LightGBMBinaryClassification.cs index e042db89c9..4723d3e1f8 100644 --- a/docs/samples/Microsoft.ML.Samples/Static/LightGBMBinaryClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Static/LightGBMBinaryClassification.cs @@ -1,16 +1,17 @@ using System; using Microsoft.ML.Trainers.LightGbm.StaticPipe; using Microsoft.ML.StaticPipe; +using Microsoft.ML; -namespace Microsoft.ML.Samples.Static +namespace Samples.Static { public class LightGbmBinaryClassificationExample { - public static void LightGbmBinaryClassification() + public static void Example() { // Downloading a classification dataset from github.com/dotnet/machinelearning. // It will be stored in the same path as the executable - string dataFilePath = SamplesUtils.DatasetUtils.DownloadAdultDataset(); + string dataFilePath = Microsoft.ML.SamplesUtils.DatasetUtils.DownloadAdultDataset(); // Data Preview // 1. Column [Label]: IsOver50K (boolean) diff --git a/docs/samples/Microsoft.ML.Samples/Static/LightGBMMulticlassWithInMemoryData.cs b/docs/samples/Microsoft.ML.Samples/Static/LightGBMMulticlassWithInMemoryData.cs index 48191ab5e7..8f08f86889 100644 --- a/docs/samples/Microsoft.ML.Samples/Static/LightGBMMulticlassWithInMemoryData.cs +++ b/docs/samples/Microsoft.ML.Samples/Static/LightGBMMulticlassWithInMemoryData.cs @@ -4,12 +4,13 @@ using Microsoft.ML.Trainers.LightGbm.StaticPipe; using Microsoft.ML.SamplesUtils; using Microsoft.ML.StaticPipe; +using Microsoft.ML; -namespace Microsoft.ML.Samples.Static +namespace Samples.Static { class LightGBMMulticlassWithInMemoryData { - public void MulticlassLightGbmStaticPipelineWithInMemoryData() + public void Example() { // Create a general context for ML.NET operations. It can be used for exception tracking and logging, // as a catalog of available operations and as the source of randomness. diff --git a/docs/samples/Microsoft.ML.Samples/Static/LightGBMRegression.cs b/docs/samples/Microsoft.ML.Samples/Static/LightGBMRegression.cs index 3a5bfa1d11..a7c1d7bdae 100644 --- a/docs/samples/Microsoft.ML.Samples/Static/LightGBMRegression.cs +++ b/docs/samples/Microsoft.ML.Samples/Static/LightGBMRegression.cs @@ -3,17 +3,18 @@ using Microsoft.ML.Trainers.LightGbm; using Microsoft.ML.Trainers.LightGbm.StaticPipe; using Microsoft.ML.StaticPipe; +using Microsoft.ML; -namespace Microsoft.ML.Samples.Static +namespace Samples.Static { public class LightGbmRegressionExample { - public static void LightGbmRegression() + public static void Example() { // Downloading a regression dataset from github.com/dotnet/machinelearning // this will create a housing.txt file in the filsystem. // You can open the file to see the data. - string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset(); + string dataFile = Microsoft.ML.SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset(); // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. diff --git a/docs/samples/Microsoft.ML.Samples/Static/SDCABinaryClassification.cs b/docs/samples/Microsoft.ML.Samples/Static/SDCABinaryClassification.cs index 3e9933838d..e43d1cfcb9 100644 --- a/docs/samples/Microsoft.ML.Samples/Static/SDCABinaryClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Static/SDCABinaryClassification.cs @@ -1,17 +1,18 @@ using System; using System.Collections.Generic; +using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.StaticPipe; -namespace Microsoft.ML.Samples.Static +namespace Samples.Static { public class SdcaBinaryClassificationExample { - public static void SdcaBinaryClassification() + public static void Example() { // Downloading a classification dataset from github.com/dotnet/machinelearning. // It will be stored in the same path as the executable - string dataFilePath = SamplesUtils.DatasetUtils.DownloadAdultDataset(); + string dataFilePath = Microsoft.ML.SamplesUtils.DatasetUtils.DownloadAdultDataset(); // Data Preview // 1. Column [Label]: IsOver50K (boolean) diff --git a/docs/samples/Microsoft.ML.Samples/Static/SDCARegression.cs b/docs/samples/Microsoft.ML.Samples/Static/SDCARegression.cs index ac4e6b2a7a..df8cf17267 100644 --- a/docs/samples/Microsoft.ML.Samples/Static/SDCARegression.cs +++ b/docs/samples/Microsoft.ML.Samples/Static/SDCARegression.cs @@ -1,17 +1,18 @@ using System; +using Microsoft.ML; using Microsoft.ML.StaticPipe; using Microsoft.ML.Trainers; -namespace Microsoft.ML.Samples.Static +namespace Samples.Static { public class SdcaRegressionExample { - public static void SdcaRegression() + public static void Example() { // Downloading a regression dataset from github.com/dotnet/machinelearning // this will create a housing.txt file in the filsystem this code will run // you can open the file to see the data. - string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset(); + string dataFile = Microsoft.ML.SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset(); // Creating the ML.Net IHostEnvironment object, needed for the pipeline var mlContext = new MLContext(); diff --git a/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj b/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj index b7c0a83577..3fb71a6442 100644 --- a/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj +++ b/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj @@ -5,6 +5,10 @@ Microsoft.ML.SampleUtils + + + + diff --git a/src/Microsoft.ML.StandardTrainers/Standard/Simple/SimpleTrainers.cs b/src/Microsoft.ML.StandardTrainers/Standard/Simple/SimpleTrainers.cs index 2184c9f166..b7b371a952 100644 --- a/src/Microsoft.ML.StandardTrainers/Standard/Simple/SimpleTrainers.cs +++ b/src/Microsoft.ML.StandardTrainers/Standard/Simple/SimpleTrainers.cs @@ -230,7 +230,7 @@ private PriorModelParameters Train(TrainContext context) data.CheckBinaryLabel(); _host.CheckParam(data.Schema.Label.HasValue, nameof(data), "Missing Label column"); var labelCol = data.Schema.Label.Value; - _host.CheckParam(labelCol.Type == NumberDataViewType.Single, nameof(data), "Invalid type for Label column"); + _host.CheckParam(labelCol.Type == BooleanDataViewType.Instance, nameof(data), "Invalid type for Label column"); double pos = 0; double neg = 0; @@ -243,9 +243,9 @@ private PriorModelParameters Train(TrainContext context) using (var cursor = data.Data.GetRowCursor(cols)) { - var getLab = cursor.GetLabelFloatGetter(data); + var getLab = cursor.GetGetter(data.Schema.Label.Value); var getWeight = colWeight >= 0 ? cursor.GetGetter(data.Schema.Weight.Value) : null; - float lab = default; + bool lab = default; float weight = 1; while (cursor.MoveNext()) { @@ -258,9 +258,9 @@ private PriorModelParameters Train(TrainContext context) } // Testing both directions effectively ignores NaNs. - if (lab > 0) + if (lab) pos += weight; - else if (lab <= 0) + else neg += weight; } } diff --git a/src/Microsoft.ML.TimeSeries.StaticPipe/TimeSeriesStatic.cs b/src/Microsoft.ML.TimeSeries.StaticPipe/TimeSeriesStatic.cs index 74c1f22e93..2575bf32b8 100644 --- a/src/Microsoft.ML.TimeSeries.StaticPipe/TimeSeriesStatic.cs +++ b/src/Microsoft.ML.TimeSeries.StaticPipe/TimeSeriesStatic.cs @@ -230,7 +230,7 @@ public static Vector DetectChangePointBySsa( /// /// Static API extension methods for . /// - public static class SsaSpikeDetecotStaticExtensions + public static class SsaSpikeDetectorStaticExtensions { private sealed class OutColumn : Vector { diff --git a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs index aa64de5495..681d53fe40 100644 --- a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs @@ -25,7 +25,7 @@ public static class TimeSeriesCatalog /// /// /// /// /// @@ -47,7 +47,7 @@ public static IidChangePointEstimator DetectIidChangePoint(this TransformsCatalo /// /// /// /// /// @@ -73,7 +73,7 @@ public static IidSpikeEstimator DetectIidSpike(this TransformsCatalog catalog, s /// /// /// /// /// @@ -110,7 +110,7 @@ public static SsaChangePointEstimator DetectChangePointBySsa(this TransformsCata /// /// /// /// /// diff --git a/src/Microsoft.ML.TimeSeries/PredictionFunction.cs b/src/Microsoft.ML.TimeSeries/PredictionFunction.cs index 070e0c3d10..7978b7142f 100644 --- a/src/Microsoft.ML.TimeSeries/PredictionFunction.cs +++ b/src/Microsoft.ML.TimeSeries/PredictionFunction.cs @@ -65,6 +65,14 @@ public sealed class TimeSeriesPredictionFunction : PredictionEngineB /// /// Usually . /// Path to file on disk where the updated model needs to be saved. + /// + /// + /// + /// + /// public void CheckPoint(IHostEnvironment env, string modelPath) { using (var file = File.Create(modelPath)) @@ -261,8 +269,8 @@ public static class PredictionFunctionExtensions /// /// /// /// /// diff --git a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs index 22696851c8..cfec3f878a 100644 --- a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs +++ b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs @@ -1,4 +1,9 @@ -using Microsoft.ML.Data; +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Linq; +using Microsoft.ML.Data; using Microsoft.ML.Runtime; using Microsoft.ML.Transforms; @@ -56,6 +61,20 @@ public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalo return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); } + /// + /// It normalizes the data based on the observed minimum and maximum values of the data. + /// + /// The transform catalog + /// List of Output and Input column pairs. + /// Maximum number of examples used to train the normalizer. + /// Whether to map zero to zero, preserving sparsity. + public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalog, InputOutputColumnPair[] columns, + long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, + bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched) => + new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), + columns.Select(column => + new NormalizingEstimator.MinMaxColumnOptions(column.OutputColumnName, column.InputColumnName, maximumExampleCount, fixZero)).ToArray()); + /// /// It normalizes the data based on the computed mean and variance of the data. /// @@ -75,6 +94,22 @@ public static NormalizingEstimator NormalizeMeanVariance(this TransformsCatalog return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); } + /// + /// It normalizes the data based on the computed mean and variance of the data. + /// + /// The transform catalog + /// List of Output and Input column pairs. + /// Maximum number of examples used to train the normalizer. + /// Whether to map zero to zero, preserving sparsity. + /// Whether to use CDF as the output. + public static NormalizingEstimator NormalizeMeanVariance(this TransformsCatalog catalog, InputOutputColumnPair[] columns, + long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, + bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched, + bool useCdf = NormalizingEstimator.Defaults.MeanVarCdf) => + new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), + columns.Select(column => + new NormalizingEstimator.MeanVarianceColumnOptions(column.OutputColumnName, column.InputColumnName, maximumExampleCount, fixZero, useCdf)).ToArray()); + /// /// It normalizes the data based on the computed mean and variance of the logarithm of the data. /// @@ -92,6 +127,27 @@ public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatal return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); } + /// + /// It normalizes the data based on the computed mean and variance of the logarithm of the data. + /// + /// The transform catalog + /// List of Output and Input column pairs. + /// Maximum number of examples used to train the normalizer. + /// Whether to use CDF as the output. + /// + /// + /// + /// + /// + public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatalog catalog, InputOutputColumnPair[] columns, + long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, + bool useCdf = NormalizingEstimator.Defaults.LogMeanVarCdf) => + new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), + columns.Select(column => + new NormalizingEstimator.LogMeanVarianceColumnOptions(column.OutputColumnName, column.InputColumnName, maximumExampleCount, useCdf)).ToArray()); + /// /// The values are assigned into bins with equal density. /// @@ -111,6 +167,22 @@ public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catal return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); } + /// + /// The values are assigned into bins with equal density. + /// + /// The transform catalog + /// List of Output and Input column pairs. + /// Maximum number of examples used to train the normalizer. + /// Whether to map zero to zero, preserving sparsity. + /// Maximum number of bins (power of 2 recommended). + public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catalog, InputOutputColumnPair[] columns, + long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, + bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched, + int maximumBinCount = NormalizingEstimator.Defaults.MaximumBinCount) => + new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), + columns.Select(column => + new NormalizingEstimator.BinningColumnOptions(column.OutputColumnName, column.InputColumnName, maximumExampleCount, fixZero, maximumBinCount)).ToArray()); + /// /// The values are assigned into bins based on correlation with the column. /// @@ -134,6 +206,27 @@ public static NormalizingEstimator NormalizeSupervisedBinning(this TransformsCat return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions); } + /// + /// The values are assigned into bins based on correlation with the column. + /// + /// The transform catalog + /// List of Output and Input column pairs. + /// Name of the label column for supervised binning. + /// Maximum number of examples used to train the normalizer. + /// Whether to map zero to zero, preserving sparsity. + /// Maximum number of bins (power of 2 recommended). + /// Minimum number of examples per bin. + public static NormalizingEstimator NormalizeSupervisedBinning(this TransformsCatalog catalog, InputOutputColumnPair[] columns, + string labelColumnName = DefaultColumnNames.Label, + long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, + bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched, + int maximumBinCount = NormalizingEstimator.Defaults.MaximumBinCount, + int mininimumExamplesPerBin = NormalizingEstimator.Defaults.MininimumBinSize) => + new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), + columns.Select(column => + new NormalizingEstimator.SupervisedBinningColumOptions( + column.OutputColumnName, column.InputColumnName, labelColumnName, maximumExampleCount, fixZero, maximumBinCount, mininimumExamplesPerBin)).ToArray()); + /// /// Normalize (rescale) columns according to specified custom parameters. /// diff --git a/test/BaselineOutput/SingleDebug/PriorPredictor/BinaryPrior-CV-breast-cancer-out.txt b/test/BaselineOutput/SingleDebug/PriorPredictor/BinaryPrior-CV-breast-cancer-out.txt index ae9258e681..8a5c288d72 100644 --- a/test/BaselineOutput/SingleDebug/PriorPredictor/BinaryPrior-CV-breast-cancer-out.txt +++ b/test/BaselineOutput/SingleDebug/PriorPredictor/BinaryPrior-CV-breast-cancer-out.txt @@ -1,4 +1,4 @@ -maml.exe CV tr=PriorPredictor threads=- dout=%Output% data=%Data% seed=1 +maml.exe CV tr=PriorPredictor threads=- dout=%Output% loader=Text{col=Label:BL:0 col=Features:~} data=%Data% seed=1 Not adding a normalizer. Not training a calibrator because it is not needed. Not adding a normalizer. diff --git a/test/BaselineOutput/SingleDebug/PriorPredictor/BinaryPrior-CV-breast-cancer-rp.txt b/test/BaselineOutput/SingleDebug/PriorPredictor/BinaryPrior-CV-breast-cancer-rp.txt index 9e21b55dab..ca8471a9a8 100644 --- a/test/BaselineOutput/SingleDebug/PriorPredictor/BinaryPrior-CV-breast-cancer-rp.txt +++ b/test/BaselineOutput/SingleDebug/PriorPredictor/BinaryPrior-CV-breast-cancer-rp.txt @@ -1,4 +1,4 @@ PriorPredictor AUC Accuracy Positive precision Positive recall Negative precision Negative recall Log-loss Log-loss reduction F1 Score AUPRC Learner Name Train Dataset Test Dataset Results File Run Time Physical Memory Virtual Memory Command Line Settings -0.5 0.656163 0 0 0.656163 1 0.935104 -0.00959 NaN 0.418968 PriorPredictor %Data% %Output% 99 0 0 maml.exe CV tr=PriorPredictor threads=- dout=%Output% data=%Data% seed=1 +0.5 0.656163 0 0 0.656163 1 0.935104 -0.00959 NaN 0.418968 PriorPredictor %Data% %Output% 99 0 0 maml.exe CV tr=PriorPredictor threads=- dout=%Output% loader=Text{col=Label:BL:0 col=Features:~} data=%Data% seed=1 diff --git a/test/BaselineOutput/SingleDebug/PriorPredictor/BinaryPrior-TrainTest-breast-cancer-out.txt b/test/BaselineOutput/SingleDebug/PriorPredictor/BinaryPrior-TrainTest-breast-cancer-out.txt index 8e3bf3b17b..529400420f 100644 --- a/test/BaselineOutput/SingleDebug/PriorPredictor/BinaryPrior-TrainTest-breast-cancer-out.txt +++ b/test/BaselineOutput/SingleDebug/PriorPredictor/BinaryPrior-TrainTest-breast-cancer-out.txt @@ -1,4 +1,4 @@ -maml.exe TrainTest test=%Data% tr=PriorPredictor dout=%Output% data=%Data% out=%Output% seed=1 +maml.exe TrainTest test=%Data% tr=PriorPredictor dout=%Output% loader=Text{col=Label:BL:0 col=Features:~} data=%Data% out=%Output% seed=1 Not adding a normalizer. Not training a calibrator because it is not needed. TEST POSITIVE RATIO: 0.3448 (241.0/(241.0+458.0)) diff --git a/test/BaselineOutput/SingleDebug/PriorPredictor/BinaryPrior-TrainTest-breast-cancer-rp.txt b/test/BaselineOutput/SingleDebug/PriorPredictor/BinaryPrior-TrainTest-breast-cancer-rp.txt index 60265c1608..efb8a13e71 100644 --- a/test/BaselineOutput/SingleDebug/PriorPredictor/BinaryPrior-TrainTest-breast-cancer-rp.txt +++ b/test/BaselineOutput/SingleDebug/PriorPredictor/BinaryPrior-TrainTest-breast-cancer-rp.txt @@ -1,4 +1,4 @@ PriorPredictor AUC Accuracy Positive precision Positive recall Negative precision Negative recall Log-loss Log-loss reduction F1 Score AUPRC Learner Name Train Dataset Test Dataset Results File Run Time Physical Memory Virtual Memory Command Line Settings -0.5 0.655222 0 0 0.655222 1 0.929318 0 NaN 0.415719 PriorPredictor %Data% %Data% %Output% 99 0 0 maml.exe TrainTest test=%Data% tr=PriorPredictor dout=%Output% data=%Data% out=%Output% seed=1 +0.5 0.655222 0 0 0.655222 1 0.929318 0 NaN 0.415719 PriorPredictor %Data% %Data% %Output% 99 0 0 maml.exe TrainTest test=%Data% tr=PriorPredictor dout=%Output% loader=Text{col=Label:BL:0 col=Features:~} data=%Data% out=%Output% seed=1 diff --git a/test/BaselineOutput/SingleRelease/PriorPredictor/BinaryPrior-CV-breast-cancer-out.txt b/test/BaselineOutput/SingleRelease/PriorPredictor/BinaryPrior-CV-breast-cancer-out.txt index ae9258e681..8a5c288d72 100644 --- a/test/BaselineOutput/SingleRelease/PriorPredictor/BinaryPrior-CV-breast-cancer-out.txt +++ b/test/BaselineOutput/SingleRelease/PriorPredictor/BinaryPrior-CV-breast-cancer-out.txt @@ -1,4 +1,4 @@ -maml.exe CV tr=PriorPredictor threads=- dout=%Output% data=%Data% seed=1 +maml.exe CV tr=PriorPredictor threads=- dout=%Output% loader=Text{col=Label:BL:0 col=Features:~} data=%Data% seed=1 Not adding a normalizer. Not training a calibrator because it is not needed. Not adding a normalizer. diff --git a/test/BaselineOutput/SingleRelease/PriorPredictor/BinaryPrior-CV-breast-cancer-rp.txt b/test/BaselineOutput/SingleRelease/PriorPredictor/BinaryPrior-CV-breast-cancer-rp.txt index 9e21b55dab..ca8471a9a8 100644 --- a/test/BaselineOutput/SingleRelease/PriorPredictor/BinaryPrior-CV-breast-cancer-rp.txt +++ b/test/BaselineOutput/SingleRelease/PriorPredictor/BinaryPrior-CV-breast-cancer-rp.txt @@ -1,4 +1,4 @@ PriorPredictor AUC Accuracy Positive precision Positive recall Negative precision Negative recall Log-loss Log-loss reduction F1 Score AUPRC Learner Name Train Dataset Test Dataset Results File Run Time Physical Memory Virtual Memory Command Line Settings -0.5 0.656163 0 0 0.656163 1 0.935104 -0.00959 NaN 0.418968 PriorPredictor %Data% %Output% 99 0 0 maml.exe CV tr=PriorPredictor threads=- dout=%Output% data=%Data% seed=1 +0.5 0.656163 0 0 0.656163 1 0.935104 -0.00959 NaN 0.418968 PriorPredictor %Data% %Output% 99 0 0 maml.exe CV tr=PriorPredictor threads=- dout=%Output% loader=Text{col=Label:BL:0 col=Features:~} data=%Data% seed=1 diff --git a/test/BaselineOutput/SingleRelease/PriorPredictor/BinaryPrior-TrainTest-breast-cancer-out.txt b/test/BaselineOutput/SingleRelease/PriorPredictor/BinaryPrior-TrainTest-breast-cancer-out.txt index 8e3bf3b17b..529400420f 100644 --- a/test/BaselineOutput/SingleRelease/PriorPredictor/BinaryPrior-TrainTest-breast-cancer-out.txt +++ b/test/BaselineOutput/SingleRelease/PriorPredictor/BinaryPrior-TrainTest-breast-cancer-out.txt @@ -1,4 +1,4 @@ -maml.exe TrainTest test=%Data% tr=PriorPredictor dout=%Output% data=%Data% out=%Output% seed=1 +maml.exe TrainTest test=%Data% tr=PriorPredictor dout=%Output% loader=Text{col=Label:BL:0 col=Features:~} data=%Data% out=%Output% seed=1 Not adding a normalizer. Not training a calibrator because it is not needed. TEST POSITIVE RATIO: 0.3448 (241.0/(241.0+458.0)) diff --git a/test/BaselineOutput/SingleRelease/PriorPredictor/BinaryPrior-TrainTest-breast-cancer-rp.txt b/test/BaselineOutput/SingleRelease/PriorPredictor/BinaryPrior-TrainTest-breast-cancer-rp.txt index 60265c1608..efb8a13e71 100644 --- a/test/BaselineOutput/SingleRelease/PriorPredictor/BinaryPrior-TrainTest-breast-cancer-rp.txt +++ b/test/BaselineOutput/SingleRelease/PriorPredictor/BinaryPrior-TrainTest-breast-cancer-rp.txt @@ -1,4 +1,4 @@ PriorPredictor AUC Accuracy Positive precision Positive recall Negative precision Negative recall Log-loss Log-loss reduction F1 Score AUPRC Learner Name Train Dataset Test Dataset Results File Run Time Physical Memory Virtual Memory Command Line Settings -0.5 0.655222 0 0 0.655222 1 0.929318 0 NaN 0.415719 PriorPredictor %Data% %Data% %Output% 99 0 0 maml.exe TrainTest test=%Data% tr=PriorPredictor dout=%Output% data=%Data% out=%Output% seed=1 +0.5 0.655222 0 0 0.655222 1 0.929318 0 NaN 0.415719 PriorPredictor %Data% %Data% %Output% 99 0 0 maml.exe TrainTest test=%Data% tr=PriorPredictor dout=%Output% loader=Text{col=Label:BL:0 col=Features:~} data=%Data% out=%Output% seed=1 diff --git a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs index 0c46c7a7c1..3e0b31062c 100644 --- a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs +++ b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs @@ -120,8 +120,7 @@ public void BinaryPriorTest() { var predictors = new[] { TestLearners.binaryPrior}; - var datasets = GetDatasetsForBinaryClassifierBaseTest(); - RunAllTests(predictors, datasets); + RunAllTests(predictors, new[] { TestDatasets.breastCancerBoolLabel }); Done(); } diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/PriorRandomTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/PriorRandomTests.cs index 259f4bcd48..d5c2ef7c6a 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/PriorRandomTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/PriorRandomTests.cs @@ -18,7 +18,7 @@ private IDataView GetBreastCancerDataviewWithTextColumns() HasHeader = true, Columns = new[] { - new TextLoader.Column("Label", DataKind.Single, 0), + new TextLoader.Column("Label", DataKind.Boolean, 0), new TextLoader.Column("F1", DataKind.String, 1), new TextLoader.Column("F2", DataKind.Int32, 2), new TextLoader.Column("Rest", DataKind.Single, new [] { new TextLoader.Range(3, 9) }) diff --git a/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs b/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs index 17d41c418e..ed26cc0b06 100644 --- a/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs @@ -5,6 +5,7 @@ using System; using System.Collections.Immutable; using System.IO; +using Microsoft.ML; using Microsoft.ML.Data; using Microsoft.ML.Data.IO; using Microsoft.ML.Experimental; @@ -207,6 +208,181 @@ public void NormalizerParameters() Done(); } + [Fact] + public void NormalizerParametersMultiColumnApi() + { + string dataPath = GetDataPath("iris.txt"); + var context = new MLContext(seed: 0); + + var loader = new TextLoader(context, new TextLoader.Options + { + Columns = new[] { + new TextLoader.Column("Label", DataKind.Single, 0), + new TextLoader.Column("float1", DataKind.Single, 1), + new TextLoader.Column("float4", DataKind.Single, new[]{new TextLoader.Range(1, 4) }), + new TextLoader.Column("double1", DataKind.Double, 1), + new TextLoader.Column("double4", DataKind.Double, new[]{new TextLoader.Range(1, 4) }), + new TextLoader.Column("int1", DataKind.Int32, 0), + new TextLoader.Column("float0", DataKind.Single, new[]{ new TextLoader.Range { Min = 1, VariableEnd = true } }) + }, + HasHeader = true + }, new MultiFileSource(dataPath)); + + var est = context.Transforms.NormalizeMinMax( + new[] { new InputOutputColumnPair("float1"), new InputOutputColumnPair("float4"), + new InputOutputColumnPair("double1"), new InputOutputColumnPair("double4"), }) + .Append(context.Transforms.NormalizeBinning( + new[] {new InputOutputColumnPair("float1bin", "float1"), new InputOutputColumnPair("float4bin", "float4"), + new InputOutputColumnPair("double1bin", "double1"), new InputOutputColumnPair("double4bin", "double4")})) + .Append(context.Transforms.NormalizeMeanVariance( + new[] {new InputOutputColumnPair("float1mv", "float1"), new InputOutputColumnPair("float4mv", "float4"), + new InputOutputColumnPair("double1mv", "double1"), new InputOutputColumnPair("double4mv", "double4")})) + .Append(context.Transforms.NormalizeLogMeanVariance( + new[] {new InputOutputColumnPair("float1lmv", "float1"), new InputOutputColumnPair("float4lmv", "float4"), + new InputOutputColumnPair("double1lmv", "double1"), new InputOutputColumnPair("double4lmv", "double4")})) + .Append(context.Transforms.NormalizeSupervisedBinning( + new[] {new InputOutputColumnPair("float1nsb", "float1"), new InputOutputColumnPair("float4nsb", "float4"), + new InputOutputColumnPair("double1nsb", "double1"), new InputOutputColumnPair("double4nsb", "double4")})); + + var data = loader.Load(dataPath); + + var transformer = est.Fit(data); + var transformers = transformer.ToImmutableArray(); + var floatAffineModel = ((NormalizingTransformer)transformers[0]).Columns[0].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters; + Assert.Equal(0.12658228f, floatAffineModel.Scale); + Assert.Equal(0, floatAffineModel.Offset); + + var floatAffineModelVec = ((NormalizingTransformer)transformers[0]).Columns[1].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters>; + Assert.Equal(4, floatAffineModelVec.Scale.Length); + Assert.Empty(floatAffineModelVec.Offset); + + var doubleAffineModel = ((NormalizingTransformer)transformers[0]).Columns[2].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters; + Assert.Equal(0.12658227848101264, doubleAffineModel.Scale); + Assert.Equal(0, doubleAffineModel.Offset); + + var doubleAffineModelVector = ((NormalizingTransformer)transformers[0]).Columns[3].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters>; + Assert.Equal(4, doubleAffineModelVector.Scale.Length); + Assert.Equal(0.12658227848101264, doubleAffineModelVector.Scale[0]); + Assert.Equal(0.4, doubleAffineModelVector.Scale[3]); + Assert.Empty(doubleAffineModelVector.Offset); + + var floatBinModel = ((NormalizingTransformer)transformers[1]).Columns[0].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters; + Assert.True(35 == floatBinModel.UpperBounds.Length); + Assert.True(0.550632954f == floatBinModel.UpperBounds[0]); + Assert.True(float.PositiveInfinity == floatBinModel.UpperBounds[34]); + Assert.True(34 == floatBinModel.Density); + Assert.True(0 == floatBinModel.Offset); + + var floatBinModelVector = ((NormalizingTransformer)transformers[1]).Columns[1].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters>; + Assert.True(4 == floatBinModelVector.UpperBounds.Length); + Assert.True(35 == floatBinModelVector.UpperBounds[0].Length); + Assert.True(0.550632954f == floatBinModelVector.UpperBounds[0][0]); + Assert.True(float.PositiveInfinity == floatBinModelVector.UpperBounds[0][floatBinModelVector.UpperBounds[0].Length - 1]); + Assert.True(0.0600000024f == floatBinModelVector.UpperBounds[3][0]); + Assert.True(float.PositiveInfinity == floatBinModelVector.UpperBounds[3][floatBinModelVector.UpperBounds[3].Length - 1]); + Assert.True(4 == floatBinModelVector.Density.Length); + Assert.True(0 == floatBinModelVector.Offset.Length); + + var doubleBinModel = ((NormalizingTransformer)transformers[1]).Columns[2].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters; + Assert.Equal(35, doubleBinModel.UpperBounds.Length); + Assert.True(0.550632911392405 == doubleBinModel.UpperBounds[0]); + Assert.True(double.PositiveInfinity == doubleBinModel.UpperBounds[34]); + Assert.Equal(34, doubleBinModel.Density); + Assert.Equal(0, doubleBinModel.Offset); + + var doubleBinModelVector = ((NormalizingTransformer)transformers[1]).Columns[3].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters>; + Assert.Equal(35, doubleBinModelVector.UpperBounds[0].Length); + Assert.True(0.550632911392405 == doubleBinModelVector.UpperBounds[0][0]); + Assert.True(double.PositiveInfinity == doubleBinModelVector.UpperBounds[0][doubleBinModelVector.UpperBounds[0].Length - 1]); + Assert.True(0.060000000000000012 == doubleBinModelVector.UpperBounds[3][0]); + Assert.True(double.PositiveInfinity == doubleBinModelVector.UpperBounds[3][doubleBinModelVector.UpperBounds[3].Length - 1]); + Assert.Equal(4, doubleBinModelVector.Density.Length); + Assert.Empty(doubleBinModelVector.Offset); + + var floatCdfMeanModel = ((NormalizingTransformer)transformers[2]).Columns[0].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters; + Assert.Equal(1.33754611f, floatCdfMeanModel.Scale); + Assert.Equal(0, floatCdfMeanModel.Offset); + + var floatCdfMeanModelVector = ((NormalizingTransformer)transformers[2]).Columns[1].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters>; + Assert.Equal(1.33754611f, floatCdfMeanModelVector.Scale[0]); + Assert.Equal(1.75526536f, floatCdfMeanModelVector.Scale[3]); + Assert.Equal(4, floatCdfMeanModelVector.Scale.Length); + Assert.Empty(floatCdfMeanModelVector.Offset); + + var doubleCdfMeanModel = ((NormalizingTransformer)transformers[2]).Columns[2].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters; + Assert.Equal(1.3375461389666252, doubleCdfMeanModel.Scale); + Assert.Equal(0, doubleCdfMeanModel.Offset); + + var doubleCdfMeanModelVector = ((NormalizingTransformer)transformers[2]).Columns[3].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters>; + Assert.Equal(4, doubleCdfMeanModelVector.Scale.Length); + Assert.True(1.3375461389666252 == doubleCdfMeanModelVector.Scale[0]); + Assert.True(1.7552654477786787 == doubleCdfMeanModelVector.Scale[3]); + Assert.Empty(doubleCdfMeanModelVector.Offset); + + var floatCdfLogMeanModel = ((NormalizingTransformer)transformers[3]).Columns[0].ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters; + Assert.Equal(-0.310623198747635f, floatCdfLogMeanModel.Mean); + Assert.True(true == floatCdfLogMeanModel.UseLog); + Assert.Equal(0.140807763f, floatCdfLogMeanModel.StandardDeviation); + + var floatCdfLogMeanModelVector = ((NormalizingTransformer)transformers[3]).Columns[1].ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters>; + Assert.Equal(4, floatCdfLogMeanModelVector.Mean.Length); + Assert.True(-0.3106232f == floatCdfLogMeanModelVector.Mean[0]); + Assert.True(-1.08362031f == floatCdfLogMeanModelVector.Mean[3]); + Assert.True(true == floatCdfLogMeanModelVector.UseLog); + Assert.Equal(4, floatCdfLogMeanModelVector.StandardDeviation.Length); + Assert.True(0.140807763f == floatCdfLogMeanModelVector.StandardDeviation[0]); + Assert.True(0.9843767f == floatCdfLogMeanModelVector.StandardDeviation[3]); + + var doubleCdfLogMeanModel = ((NormalizingTransformer)transformers[3]).Columns[2].ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters; + Assert.Equal(-0.31062321927759518, doubleCdfLogMeanModel.Mean); + Assert.True(doubleCdfLogMeanModel.UseLog); + Assert.Equal(0.14080776721611871, doubleCdfLogMeanModel.StandardDeviation); + + var doubleCdfLogMeanModelVector = ((NormalizingTransformer)transformers[3]).Columns[3].ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters>; + Assert.Equal(4, doubleCdfLogMeanModelVector.Mean.Length); + Assert.True(-0.31062321927759518 == doubleCdfLogMeanModelVector.Mean[0]); + Assert.True(-1.0836203140680853 == doubleCdfLogMeanModelVector.Mean[3]); + Assert.True(doubleCdfLogMeanModelVector.UseLog); + Assert.Equal(4, doubleCdfLogMeanModelVector.StandardDeviation.Length); + Assert.True(0.14080776721611871 == doubleCdfLogMeanModelVector.StandardDeviation[0]); + Assert.True(0.98437679839698122 == doubleCdfLogMeanModelVector.StandardDeviation[3]); + + floatBinModel = ((NormalizingTransformer)transformers[4]).Columns[0].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters; + Assert.True(4 == floatBinModel.UpperBounds.Length); + Assert.True(0.6139241f == floatBinModel.UpperBounds[0]); + Assert.True(float.PositiveInfinity == floatBinModel.UpperBounds[3]); + Assert.True(3 == floatBinModel.Density); + Assert.True(0 == floatBinModel.Offset); + + floatBinModelVector = ((NormalizingTransformer)transformers[4]).Columns[1].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters>; + Assert.True(4 == floatBinModelVector.UpperBounds.Length); + Assert.True(4 == floatBinModelVector.UpperBounds[0].Length); + Assert.True(0.6139241f == floatBinModelVector.UpperBounds[0][0]); + Assert.True(float.PositiveInfinity == floatBinModelVector.UpperBounds[0][floatBinModelVector.UpperBounds[0].Length - 1]); + Assert.True(0.32f == floatBinModelVector.UpperBounds[3][0]); + Assert.True(float.PositiveInfinity == floatBinModelVector.UpperBounds[3][floatBinModelVector.UpperBounds[3].Length - 1]); + Assert.True(4 == floatBinModelVector.Density.Length); + Assert.True(0 == floatBinModelVector.Offset.Length); + + doubleBinModel = ((NormalizingTransformer)transformers[4]).Columns[2].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters; + Assert.Equal(4, doubleBinModel.UpperBounds.Length); + Assert.True(0.61392405063291133 == doubleBinModel.UpperBounds[0]); + Assert.True(float.PositiveInfinity == doubleBinModel.UpperBounds[3]); + Assert.Equal(3, doubleBinModel.Density); + Assert.Equal(0, doubleBinModel.Offset); + + doubleBinModelVector = ((NormalizingTransformer)transformers[4]).Columns[3].ModelParameters as NormalizingTransformer.BinNormalizerModelParameters>; + Assert.Equal(4, doubleBinModelVector.UpperBounds[0].Length); + Assert.True(0.6139240506329113335 == doubleBinModelVector.UpperBounds[0][0]); + Assert.True(double.PositiveInfinity == doubleBinModelVector.UpperBounds[0][doubleBinModelVector.UpperBounds[0].Length - 1]); + Assert.True(0.32 == doubleBinModelVector.UpperBounds[3][0]); + Assert.True(double.PositiveInfinity == doubleBinModelVector.UpperBounds[3][doubleBinModelVector.UpperBounds[3].Length - 1]); + Assert.Equal(4, doubleBinModelVector.Density.Length); + Assert.Empty(doubleBinModelVector.Offset); + + Done(); + } + [Fact] public void SimpleConstructorsAndExtensions() {