diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs index 55f3c89845..26a0514892 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs @@ -1,8 +1,7 @@ using System; using System.Collections.Generic; -using System.Linq; using Microsoft.ML.Data; -using Microsoft.ML.Transforms; +using Microsoft.ML.Experimental; namespace Microsoft.ML.Samples.Dynamic { @@ -28,7 +27,7 @@ public static void Example() // 35 1 6-11yrs 1 3 32 5 ... // A pipeline for normalizing the Induced column. - var pipeline = ml.Transforms.Normalize("Induced"); + var pipeline = ml.Transforms.NormalizeMinMax("Induced"); // The transformed (normalized according to Normalizer.NormalizerMode.MinMax) data. var transformer = pipeline.Fit(trainData); @@ -58,8 +57,8 @@ public static void Example() // Composing a different pipeline if we wanted to normalize more than one column at a time. // Using log scale as the normalization mode. - var multiColPipeline = ml.Transforms.Normalize("LogInduced", "Induced", NormalizingEstimator.NormalizationMode.LogMeanVariance) - .Append(ml.Transforms.Normalize("LogSpontaneous", "Spontaneous", NormalizingEstimator.NormalizationMode.LogMeanVariance)); + var multiColPipeline = ml.Transforms.NormalizeMinMax("LogInduced", "Induced") + .Append(ml.Transforms.NormalizeMinMax("LogSpontaneous", "Spontaneous")); // The transformed data. var multiColtransformer = multiColPipeline.Fit(trainData); var multiColtransformedData = multiColtransformer.Transform(trainData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIRegressionExample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIRegressionExample.cs index 4afa964850..bb571dd20f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIRegressionExample.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PFIRegressionExample.cs @@ -1,5 +1,6 @@ using System; using System.Linq; +using Microsoft.ML.Experimental; namespace Microsoft.ML.Samples.Dynamic.PermutationFeatureImportance { @@ -19,7 +20,7 @@ public static void Example() // Normalize the data set so that for each feature, its maximum value is 1 while its minimum value is 0. // Then append a linear regression trainer. var pipeline = mlContext.Transforms.Concatenate("Features", featureNames) - .Append(mlContext.Transforms.Normalize("Features")) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) .Append(mlContext.Regression.Trainers.Ols( labelColumnName: labelName, featureColumnName: "Features")); var model = pipeline.Fit(data); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PfiBinaryClassificationExample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PfiBinaryClassificationExample.cs index 09fb640f30..04e0bd9178 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PfiBinaryClassificationExample.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance/PfiBinaryClassificationExample.cs @@ -1,5 +1,6 @@ using System; using System.Linq; +using Microsoft.ML.Experimental; using Microsoft.ML.Trainers; namespace Microsoft.ML.Samples.Dynamic.PermutationFeatureImportance @@ -21,7 +22,7 @@ public static void Example() // Normalize the data set so that for each feature, its maximum value is 1 while its minimum value is 0. // Then append a logistic regression trainer. var pipeline = mlContext.Transforms.Concatenate("Features", featureNames) - .Append(mlContext.Transforms.Normalize("Features")) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) .Append(mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression( labelColumnName: labelName, featureColumnName: "Features")); var model = pipeline.Fit(data); diff --git a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj index 8d83cc95bd..7250067681 100644 --- a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj +++ b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj @@ -6,6 +6,7 @@ + diff --git a/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj b/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj index b7c0a83577..7553151b5c 100644 --- a/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj +++ b/src/Microsoft.ML.SamplesUtils/Microsoft.ML.SamplesUtils.csproj @@ -8,6 +8,7 @@ + diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs index 3d2aa09791..08c3f5084a 100644 --- a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs +++ b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs @@ -7,6 +7,7 @@ using System.IO; using System.Net; using Microsoft.ML.Data; +using Microsoft.ML.Experimental; namespace Microsoft.ML.SamplesUtils { @@ -171,7 +172,7 @@ public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext) "occupation", "relationship", "ethnicity", "native-country", "age", "education-num", "capital-gain", "capital-loss", "hours-per-week")) // Min-max normalize all the features - .Append(mlContext.Transforms.Normalize("Features")); + .Append(mlContext.Transforms.NormalizeMinMax("Features")); var data = loader.Load(dataFile); var featurizedData = pipeline.Fit(data).Transform(data); diff --git a/src/Microsoft.ML.Transforms/Microsoft.ML.Transforms.csproj b/src/Microsoft.ML.Transforms/Microsoft.ML.Transforms.csproj index 4aa4a4eb79..8260cd5ab7 100644 --- a/src/Microsoft.ML.Transforms/Microsoft.ML.Transforms.csproj +++ b/src/Microsoft.ML.Transforms/Microsoft.ML.Transforms.csproj @@ -48,6 +48,7 @@ + diff --git a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs index dd12c452c4..b62cef1c49 100644 --- a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs +++ b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs @@ -9,48 +9,6 @@ namespace Microsoft.ML /// public static class NormalizationCatalog { - /// - /// Normalize (rescale) the column according to the specified . - /// - /// The transform catalog - /// Name of the column resulting from the transformation of . - /// Name of the column to transform. If set to , the value of the will be used as source. - /// The used to map the old values in the new scale. - /// - /// - /// - /// - /// - public static NormalizingEstimator Normalize(this TransformsCatalog catalog, - string outputColumnName, string inputColumnName = null, - NormalizingEstimator.NormalizationMode mode = NormalizingEstimator.NormalizationMode.MinMax) - => new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName ?? outputColumnName, mode); - - /// - /// Normalize (rescale) several columns according to the specified . - /// - /// The transform catalog - /// The used to map the old values to the new ones. - /// The pairs of input and output columns. - /// - /// - /// - /// - /// - [BestFriend] - internal static NormalizingEstimator Normalize(this TransformsCatalog catalog, - NormalizingEstimator.NormalizationMode mode, - params InputOutputColumnPair[] columns) - { - var env = CatalogUtils.GetEnvironment(catalog); - env.CheckValue(columns, nameof(columns)); - return new NormalizingEstimator(env, mode, InputOutputColumnPair.ConvertToValueTuples(columns)); - } - /// /// Normalize (rescale) columns according to specified custom parameters. /// diff --git a/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs b/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs index 663c1383d3..fe34d261ca 100644 --- a/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs +++ b/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs @@ -6,6 +6,7 @@ using Microsoft.ML.Benchmarks.Harness; using Microsoft.ML.Calibrators; using Microsoft.ML.Data; +using Microsoft.ML.Experimental; using Microsoft.ML.TestFramework; using Microsoft.ML.Trainers; @@ -35,7 +36,7 @@ public CalibratedModelParametersBase + @@ -24,6 +25,6 @@ - + diff --git a/test/Microsoft.ML.Functional.Tests/DataTransformation.cs b/test/Microsoft.ML.Functional.Tests/DataTransformation.cs index 3790d84cdd..9a34e4402e 100644 --- a/test/Microsoft.ML.Functional.Tests/DataTransformation.cs +++ b/test/Microsoft.ML.Functional.Tests/DataTransformation.cs @@ -3,11 +3,11 @@ // See the LICENSE file in the project root for more information. using System; +using Microsoft.ML.Experimental; using Microsoft.ML.Functional.Tests.Datasets; using Microsoft.ML.RunTests; using Microsoft.ML.TestFramework; using Microsoft.ML.Trainers; -using Microsoft.ML.Transforms; using Microsoft.ML.Transforms.Text; using Xunit; using Xunit.Abstractions; @@ -174,7 +174,7 @@ void ExtensibilityNormalizeColumns() // Compose the transformation. var pipeline = mlContext.Transforms.Concatenate("Features", Iris.Features) - .Append(mlContext.Transforms.Normalize("Features", mode: NormalizingEstimator.NormalizationMode.MinMax)); + .Append(mlContext.Transforms.NormalizeMinMax("Features")); // Transform the data. var transformedData = pipeline.Fit(data).Transform(data); diff --git a/test/Microsoft.ML.Functional.Tests/IntrospectiveTraining.cs b/test/Microsoft.ML.Functional.Tests/IntrospectiveTraining.cs index 89caf8c4f2..62716a8a98 100644 --- a/test/Microsoft.ML.Functional.Tests/IntrospectiveTraining.cs +++ b/test/Microsoft.ML.Functional.Tests/IntrospectiveTraining.cs @@ -7,6 +7,7 @@ using System.Collections.Immutable; using System.Linq; using Microsoft.ML.Data; +using Microsoft.ML.Experimental; using Microsoft.ML.Functional.Tests.Datasets; using Microsoft.ML.RunTests; using Microsoft.ML.TestFramework; @@ -254,7 +255,7 @@ void IntrospectNormalization() // Compose the transformation. var pipeline = mlContext.Transforms.Concatenate("Features", Iris.Features) - .Append(mlContext.Transforms.Normalize("Features", mode: NormalizingEstimator.NormalizationMode.MinMax)); + .Append(mlContext.Transforms.NormalizeMinMax("Features")); // Fit the pipeline. var model = pipeline.Fit(data); diff --git a/test/Microsoft.ML.Functional.Tests/ModelFiles.cs b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs index e1fbe98749..e248ecb6bb 100644 --- a/test/Microsoft.ML.Functional.Tests/ModelFiles.cs +++ b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs @@ -8,6 +8,7 @@ using System.Linq; using Microsoft.ML.Calibrators; using Microsoft.ML.Data; +using Microsoft.ML.Experimental; using Microsoft.ML.Functional.Tests.Datasets; using Microsoft.ML.RunTests; using Microsoft.ML.Trainers.FastTree; @@ -275,7 +276,7 @@ public void LoadSchemaAndCreateNewData() var data = loader.Load(file); // Pipeline. - var pipeline = ML.Transforms.Normalize("Features"); + var pipeline = ML.Transforms.NormalizeMinMax("Features"); // Train. var model = pipeline.Fit(data); @@ -330,7 +331,7 @@ public void SaveCompositeLoaderAndLoad() { var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); var loader = ML.Data.CreateTextLoader(hasHeader: true, dataSample: file); - var composite = loader.Append(ML.Transforms.Normalize("Features")); + var composite = loader.Append(ML.Transforms.NormalizeMinMax("Features")); var loaderWithEmbeddedModel = composite.Fit(file); string modelPath = GetOutputPath(FullTestName + "-model.zip"); @@ -368,7 +369,7 @@ public void SaveLoaderAndTransformerAndLoad() { var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); var loader = ML.Data.CreateTextLoader(hasHeader: true, dataSample: file); - var estimator = ML.Transforms.Normalize("Features"); + var estimator = ML.Transforms.NormalizeMinMax("Features"); var data = loader.Load(file); var model = estimator.Fit(data); @@ -401,7 +402,7 @@ public void SaveTransformerAndSchemaAndLoad() { var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); var loader = ML.Data.CreateTextLoader(hasHeader: true, dataSample: file); - var estimator = ML.Transforms.Normalize("Features"); + var estimator = ML.Transforms.NormalizeMinMax("Features"); var model = estimator.Fit(loader.Load(file)); string modelPath = GetOutputPath(FullTestName + "-model.zip"); diff --git a/test/Microsoft.ML.Functional.Tests/ONNX.cs b/test/Microsoft.ML.Functional.Tests/ONNX.cs index 3ece5658b8..49ac2e16fc 100644 --- a/test/Microsoft.ML.Functional.Tests/ONNX.cs +++ b/test/Microsoft.ML.Functional.Tests/ONNX.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using System.IO; +using Microsoft.ML.Experimental; using Microsoft.ML.Functional.Tests.Datasets; using Microsoft.ML.RunTests; using Microsoft.ML.TestFramework; @@ -33,7 +34,7 @@ public void SaveOnnxModelLoadAndScoreFastTree() // Create a pipeline to train on the housing data. var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) - .Append(mlContext.Transforms.Normalize("Features")) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) .AppendCacheCheckpoint(mlContext) .Append(mlContext.Regression.Trainers.FastTree( new FastTreeRegressionTrainer.Options { NumberOfThreads = 1, NumberOfTrees = 10 })); @@ -85,7 +86,7 @@ public void SaveOnnxModelLoadAndScoreKMeans() // Create a pipeline to train on the housing data. var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) - .Append(mlContext.Transforms.Normalize("Features")) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) .AppendCacheCheckpoint(mlContext) .Append(mlContext.Clustering.Trainers.KMeans( new KMeansTrainer.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 10 })); @@ -137,7 +138,7 @@ public void SaveOnnxModelLoadAndScoreSDCA() // Create a pipeline to train on the housing data. var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) - .Append(mlContext.Transforms.Normalize("Features")) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) .AppendCacheCheckpoint(mlContext) .Append(mlContext.Regression.Trainers.Sdca( new SdcaRegressionTrainer.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 10 })); diff --git a/test/Microsoft.ML.Functional.Tests/Training.cs b/test/Microsoft.ML.Functional.Tests/Training.cs index 2499ecc634..c44d1be7e3 100644 --- a/test/Microsoft.ML.Functional.Tests/Training.cs +++ b/test/Microsoft.ML.Functional.Tests/Training.cs @@ -5,6 +5,7 @@ using System; using System.Linq; using Microsoft.ML.Data; +using Microsoft.ML.Experimental; using Microsoft.ML.Functional.Tests.Datasets; using Microsoft.ML.RunTests; using Microsoft.ML.TestFramework; @@ -316,7 +317,7 @@ public void ContinueTrainingOnlineGradientDescent() // Create a transformation pipeline. var featurizationPipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) - .Append(mlContext.Transforms.Normalize("Features")) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) .AppendCacheCheckpoint(mlContext); var trainer = mlContext.Regression.Trainers.OnlineGradientDescent( @@ -360,7 +361,7 @@ public void ContinueTrainingPoissonRegression() // Create a transformation pipeline. var featurizationPipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) - .Append(mlContext.Transforms.Normalize("Features")) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) .AppendCacheCheckpoint(mlContext); var trainer = mlContext.Regression.Trainers.LbfgsPoissonRegression( diff --git a/test/Microsoft.ML.Tests/CachingTests.cs b/test/Microsoft.ML.Tests/CachingTests.cs index 1b58848391..a78e54ab5a 100644 --- a/test/Microsoft.ML.Tests/CachingTests.cs +++ b/test/Microsoft.ML.Tests/CachingTests.cs @@ -5,6 +5,7 @@ using System.Linq; using System.Threading; using Microsoft.ML.Data; +using Microsoft.ML.Experimental; using Microsoft.ML.RunTests; using Microsoft.ML.StaticPipe; using Xunit; @@ -43,8 +44,8 @@ public void CacheCheckpointTest() var trainData = Enumerable.Range(0, 100).Select(c => new MyData()).ToArray(); var pipe = ML.Transforms.CopyColumns("F1", "Features") - .Append(ML.Transforms.Normalize("Norm1", "F1")) - .Append(ML.Transforms.Normalize("Norm2", "F1", Transforms.NormalizingEstimator.NormalizationMode.MeanVariance)); + .Append(ML.Transforms.NormalizeMinMax("Norm1", "F1")) + .Append(ML.Transforms.NormalizeMeanVariance("Norm2", "F1")); pipe.Fit(ML.Data.LoadFromEnumerable(trainData)); @@ -53,8 +54,8 @@ public void CacheCheckpointTest() trainData = Enumerable.Range(0, 100).Select(c => new MyData()).ToArray(); pipe = ML.Transforms.CopyColumns("F1", "Features") .AppendCacheCheckpoint(ML) - .Append(ML.Transforms.Normalize("Norm1", "F1")) - .Append(ML.Transforms.Normalize("Norm2", "F1", Transforms.NormalizingEstimator.NormalizationMode.MeanVariance)); + .Append(ML.Transforms.NormalizeMinMax("Norm1", "F1")) + .Append(ML.Transforms.NormalizeMeanVariance("Norm2", "F1")); pipe.Fit(ML.Data.LoadFromEnumerable(trainData)); diff --git a/test/Microsoft.ML.Tests/FeatureContributionTests.cs b/test/Microsoft.ML.Tests/FeatureContributionTests.cs index d1f691d8c5..17f4ba83fb 100644 --- a/test/Microsoft.ML.Tests/FeatureContributionTests.cs +++ b/test/Microsoft.ML.Tests/FeatureContributionTests.cs @@ -6,13 +6,12 @@ using System.IO; using Microsoft.ML.Calibrators; using Microsoft.ML.Data; +using Microsoft.ML.Experimental; using Microsoft.ML.Data.IO; using Microsoft.ML.Internal.Utilities; -using Microsoft.ML.Model; using Microsoft.ML.RunTests; using Microsoft.ML.TestFramework.Attributes; using Microsoft.ML.Trainers; -using Microsoft.ML.Transforms; using Xunit; using Xunit.Abstractions; @@ -306,7 +305,7 @@ private IDataView GetSparseDataset(TaskType task = TaskType.Regression, int numb var srcDV = bldr.GetDataView(); var pipeline = ML.Transforms.Concatenate("Features", "X1", "X2VBuffer", "X3Important") - .Append(ML.Transforms.Normalize("Features")); + .Append(ML.Transforms.NormalizeMinMax("Features")); if (task == TaskType.BinaryClassification) return pipeline.Append(ML.Transforms.Conversion.ConvertType("Label", outputKind: DataKind.Boolean)) diff --git a/test/Microsoft.ML.Tests/OnnxConversionTest.cs b/test/Microsoft.ML.Tests/OnnxConversionTest.cs index 723ab59055..5315bc854e 100644 --- a/test/Microsoft.ML.Tests/OnnxConversionTest.cs +++ b/test/Microsoft.ML.Tests/OnnxConversionTest.cs @@ -10,6 +10,7 @@ using System.Text.RegularExpressions; using Google.Protobuf; using Microsoft.ML.Data; +using Microsoft.ML.Experimental; using Microsoft.ML.Model.OnnxConverter; using Microsoft.ML.RunTests; using Microsoft.ML.Runtime; @@ -57,7 +58,7 @@ public void SimpleEndToEndOnnxConversionTest() hasHeader: true); var cachedTrainData = mlContext.Data.Cache(data); var dynamicPipeline = - mlContext.Transforms.Normalize("FeatureVector") + mlContext.Transforms.NormalizeMinMax("FeatureVector") .AppendCacheCheckpoint(mlContext) .Append(mlContext.Regression.Trainers.Sdca(new SdcaRegressionTrainer.Options() { LabelColumnName = "Target", @@ -137,7 +138,7 @@ public void KmeansOnnxConversionTest() separatorChar: '\t', hasHeader: true); - var pipeline = mlContext.Transforms.Normalize("Features"). + var pipeline = mlContext.Transforms.NormalizeMinMax("Features"). Append(mlContext.Clustering.Trainers.KMeans(new Trainers.KMeansTrainer.Options { FeatureColumnName = DefaultColumnNames.Features, @@ -315,7 +316,7 @@ public void LogisticRegressionOnnxConversionTest() hasHeader: true); var cachedTrainData = mlContext.Data.Cache(data); var dynamicPipeline = - mlContext.Transforms.Normalize("FeatureVector") + mlContext.Transforms.NormalizeMinMax("FeatureVector") .AppendCacheCheckpoint(mlContext) .Append(mlContext.Regression.Trainers.Sdca(new SdcaRegressionTrainer.Options() { LabelColumnName = "Target", @@ -352,7 +353,7 @@ public void LightGbmBinaryClassificationOnnxConversionTest() hasHeader: true); var cachedTrainData = mlContext.Data.Cache(data); var dynamicPipeline = - mlContext.Transforms.Normalize("FeatureVector") + mlContext.Transforms.NormalizeMinMax("FeatureVector") .AppendCacheCheckpoint(mlContext) .Append(mlContext.Regression.Trainers.LightGbm(labelColumnName: "Target", featureColumnName: "FeatureVector", numberOfIterations: 3, numberOfLeaves: 16, minimumExampleCountPerLeaf: 100)); var model = dynamicPipeline.Fit(data); @@ -383,7 +384,7 @@ public void MulticlassLogisticRegressionOnnxConversionTest() separatorChar: '\t', hasHeader: true); - var pipeline = mlContext.Transforms.Normalize("Features"). + var pipeline = mlContext.Transforms.NormalizeMinMax("Features"). Append(mlContext.Transforms.Conversion.MapValueToKey("Label")). Append(mlContext.MulticlassClassification.Trainers.LbfgsMaximumEntropy(new LbfgsMaximumEntropyMulticlassTrainer.Options() { NumberOfThreads = 1 })); @@ -416,7 +417,7 @@ public void RemoveVariablesInPipelineTest() var pipeline = mlContext.Transforms.Categorical.OneHotEncoding("F2", "F2", Transforms.OneHotEncodingEstimator.OutputKind.Bag) .Append(mlContext.Transforms.ReplaceMissingValues(new MissingValueReplacingEstimator.ColumnOptions("F2"))) .Append(mlContext.Transforms.Concatenate("Features", "F1", "F2")) - .Append(mlContext.Transforms.Normalize("Features")) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) .Append(mlContext.BinaryClassification.Trainers.FastTree(labelColumnName: "Label", featureColumnName: "Features", numberOfLeaves: 2, numberOfTrees: 1, minimumExampleCountPerLeaf: 2)); var model = pipeline.Fit(data); diff --git a/test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs b/test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs index db49bbf161..589ae5ddba 100644 --- a/test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs +++ b/test/Microsoft.ML.Tests/PermutationFeatureImportanceTests.cs @@ -6,6 +6,7 @@ using System.Collections.Immutable; using System.Linq; using Microsoft.ML.Data; +using Microsoft.ML.Experimental; using Microsoft.ML.Internal.Utilities; using Microsoft.ML.RunTests; using Microsoft.ML.Trainers; @@ -421,7 +422,7 @@ private IDataView GetDenseDataset(TaskType task = TaskType.Regression) var srcDV = bldr.GetDataView(); var pipeline = ML.Transforms.Concatenate("Features", "X1", "X2Important", "X3", "X4Rand") - .Append(ML.Transforms.Normalize("Features")); + .Append(ML.Transforms.NormalizeMinMax("Features")); if (task == TaskType.BinaryClassification) return pipeline.Append(ML.Transforms.Conversion.ConvertType("Label", outputKind: DataKind.Boolean)) .Fit(srcDV).Transform(srcDV); @@ -501,7 +502,7 @@ private IDataView GetSparseDataset(TaskType task = TaskType.Regression) var srcDV = bldr.GetDataView(); var pipeline = ML.Transforms.Concatenate("Features", "X1", "X2VBuffer", "X3Important") - .Append(ML.Transforms.Normalize("Features")); + .Append(ML.Transforms.NormalizeMinMax("Features")); if (task == TaskType.BinaryClassification) { return pipeline.Append(ML.Transforms.Conversion.ConvertType("Label", outputKind: DataKind.Boolean)) diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs index 867925788f..2b4201860b 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs @@ -7,6 +7,7 @@ using System.IO; using System.Linq; using Microsoft.ML.Data; +using Microsoft.ML.Experimental; using Microsoft.ML.RunTests; using Microsoft.ML.TestFramework; using Microsoft.ML.Trainers; @@ -93,7 +94,7 @@ private void TrainRegression(string trainDataPath, string testDataPath, string m var pipeline = // First 'normalize' the data (rescale to be // between -1 and 1 for all examples), and then train the model. - mlContext.Transforms.Normalize("FeatureVector") + mlContext.Transforms.NormalizeMinMax("FeatureVector") // We add a step for caching data in memory so that the downstream iterative training // algorithm can efficiently scan through the data multiple times. Otherwise, the following // trainer will read data from disk multiple times. The caching mechanism uses an on-demand strategy. diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs index b92b95ad32..86d8e288b0 100644 --- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using Microsoft.ML.Data; +using Microsoft.ML.Experimental; using Microsoft.ML.RunTests; using Microsoft.ML.TestFramework; using Microsoft.ML.Trainers; @@ -29,7 +30,7 @@ public void TrainAndPredictIrisModelTest() ); var pipe = mlContext.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") - .Append(mlContext.Transforms.Normalize("Features")) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) .AppendCacheCheckpoint(mlContext) .Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy( diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs index 088f164b28..2383601b63 100644 --- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using Microsoft.ML.Data; +using Microsoft.ML.Experimental; using Microsoft.ML.Trainers; using Xunit; @@ -34,7 +35,7 @@ public void TrainAndPredictIrisModelWithStringLabelTest() // Create Estimator var pipe = mlContext.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") - .Append(mlContext.Transforms.Normalize("Features")) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) .Append(mlContext.Transforms.Conversion.MapValueToKey("Label", "IrisPlantType"), TransformerScope.TrainTest) .AppendCacheCheckpoint(mlContext) .Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy( diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs index f2098c68e5..c44f0d5db2 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using Microsoft.ML.Data; +using Microsoft.ML.Experimental; using Microsoft.ML.RunTests; using Microsoft.ML.Trainers; using Xunit; @@ -27,7 +28,7 @@ public void TrainAndPredictIrisModelUsingDirectInstantiationTest() ); var pipe = mlContext.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") - .Append(mlContext.Transforms.Normalize("Features")) + .Append(mlContext.Transforms.NormalizeMinMax("Features")) .Append(mlContext.Transforms.Conversion.MapValueToKey("Label")) .AppendCacheCheckpoint(mlContext) .Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy( diff --git a/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs b/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs index 775289ca07..17d41c418e 100644 --- a/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs @@ -225,8 +225,8 @@ public void SimpleConstructorsAndExtensions() var est1 = new NormalizingEstimator(Env, "float4"); var est2 = new NormalizingEstimator(Env, NormalizingEstimator.NormalizationMode.MinMax, ("float4", "float4")); var est3 = new NormalizingEstimator(Env, new NormalizingEstimator.MinMaxColumnOptions("float4")); - var est4 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.MinMax); - var est5 = ML.Transforms.Normalize("float4"); + var est4 = ML.Transforms.NormalizeMinMax("float4", "float4"); + var est5 = ML.Transforms.NormalizeMinMax("float4"); var data1 = est1.Fit(data).Transform(data); var data2 = est2.Fit(data).Transform(data); @@ -246,7 +246,7 @@ public void SimpleConstructorsAndExtensions() // Tests for MeanVariance var est6 = new NormalizingEstimator(Env, NormalizingEstimator.NormalizationMode.MeanVariance, ("float4", "float4")); var est7 = new NormalizingEstimator(Env, new NormalizingEstimator.MeanVarianceColumnOptions("float4")); - var est8 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.MeanVariance); + var est8 = ML.Transforms.NormalizeMeanVariance("float4", "float4"); var data6 = est6.Fit(data).Transform(data); var data7 = est7.Fit(data).Transform(data); @@ -259,7 +259,7 @@ public void SimpleConstructorsAndExtensions() // Tests for LogMeanVariance var est9 = new NormalizingEstimator(Env, NormalizingEstimator.NormalizationMode.LogMeanVariance, ("float4", "float4")); var est10 = new NormalizingEstimator(Env, new NormalizingEstimator.LogMeanVarianceColumnOptions("float4")); - var est11 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.LogMeanVariance); + var est11 = ML.Transforms.NormalizeLogMeanVariance("float4", "float4"); var data9 = est9.Fit(data).Transform(data); var data10 = est10.Fit(data).Transform(data); @@ -272,7 +272,7 @@ public void SimpleConstructorsAndExtensions() // Tests for Binning var est12 = new NormalizingEstimator(Env, NormalizingEstimator.NormalizationMode.Binning, ("float4", "float4")); var est13 = new NormalizingEstimator(Env, new NormalizingEstimator.BinningColumnOptions("float4")); - var est14 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.Binning); + var est14 = ML.Transforms.NormalizeBinning("float4", "float4"); var data12 = est12.Fit(data).Transform(data); var data13 = est13.Fit(data).Transform(data); @@ -285,7 +285,7 @@ public void SimpleConstructorsAndExtensions() // Tests for SupervisedBinning var est15 = new NormalizingEstimator(Env, NormalizingEstimator.NormalizationMode.SupervisedBinning, ("float4", "float4")); var est16 = new NormalizingEstimator(Env, new NormalizingEstimator.SupervisedBinningColumOptions("float4")); - var est17 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.SupervisedBinning); + var est17 = ML.Transforms.NormalizeSupervisedBinning("float4", "float4"); var data15 = est15.Fit(data).Transform(data); var data16 = est16.Fit(data).Transform(data); @@ -314,11 +314,11 @@ public void NormalizerExperimentalExtensions() var data = loader.Load(dataPath); // Normalizer Extensions - var est1 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.MinMax); - var est2 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.MeanVariance); - var est3 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.LogMeanVariance); - var est4 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.Binning); - var est5 = ML.Transforms.Normalize("float4", "float4", NormalizingEstimator.NormalizationMode.SupervisedBinning); + var est1 = ML.Transforms.NormalizeMinMax("float4", "float4"); + var est2 = ML.Transforms.NormalizeMeanVariance("float4", "float4"); + var est3 = ML.Transforms.NormalizeLogMeanVariance("float4", "float4"); + var est4 = ML.Transforms.NormalizeBinning("float4", "float4"); + var est5 = ML.Transforms.NormalizeSupervisedBinning("float4", "float4"); // Normalizer Extensions (Experimental) var est6 = ML.Transforms.NormalizeMinMax("float4", "float4"); @@ -370,7 +370,7 @@ public void NormalizerExperimentalExtensionGetColumnPairs() }); var data = loader.Load(dataPath); - var est = ML.Transforms.Normalize("output", "input", NormalizingEstimator.NormalizationMode.MinMax); + var est = ML.Transforms.NormalizeMinMax("output", "input"); var t = est.Fit(data); Assert.Single(t.GetColumnPairs());