Skip to content

Remove generic normalizer estimator catalog methods. #3116

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML.Data;
using Microsoft.ML.Transforms;
using Microsoft.ML.Experimental;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it just experimental? We are removing the generic estimator, so these new methods must be supported officially and therefore deserve a namespace like ML.Transform. Is my understanding correct?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


namespace Microsoft.ML.Samples.Dynamic
{
Expand All @@ -28,7 +27,7 @@ public static void Example()
// 35 1 6-11yrs 1 3 32 5 ...

// A pipeline for normalizing the Induced column.
var pipeline = ml.Transforms.Normalize("Induced");
var pipeline = ml.Transforms.NormalizeMinMax("Induced");
// The transformed (normalized according to Normalizer.NormalizerMode.MinMax) data.
var transformer = pipeline.Fit(trainData);

Expand Down Expand Up @@ -58,8 +57,8 @@ public static void Example()

// Composing a different pipeline if we wanted to normalize more than one column at a time.
// Using log scale as the normalization mode.
var multiColPipeline = ml.Transforms.Normalize("LogInduced", "Induced", NormalizingEstimator.NormalizationMode.LogMeanVariance)
.Append(ml.Transforms.Normalize("LogSpontaneous", "Spontaneous", NormalizingEstimator.NormalizationMode.LogMeanVariance));
var multiColPipeline = ml.Transforms.NormalizeMinMax("LogInduced", "Induced")
.Append(ml.Transforms.NormalizeMinMax("LogSpontaneous", "Spontaneous"));
// The transformed data.
var multiColtransformer = multiColPipeline.Fit(trainData);
var multiColtransformedData = multiColtransformer.Transform(trainData);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using System;
using System.Linq;
using Microsoft.ML.Experimental;

namespace Microsoft.ML.Samples.Dynamic.PermutationFeatureImportance
{
Expand All @@ -19,7 +20,7 @@ public static void Example()
// Normalize the data set so that for each feature, its maximum value is 1 while its minimum value is 0.
// Then append a linear regression trainer.
var pipeline = mlContext.Transforms.Concatenate("Features", featureNames)
.Append(mlContext.Transforms.Normalize("Features"))
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
.Append(mlContext.Regression.Trainers.Ols(
labelColumnName: labelName, featureColumnName: "Features"));
var model = pipeline.Fit(data);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using System;
using System.Linq;
using Microsoft.ML.Experimental;
using Microsoft.ML.Trainers;

namespace Microsoft.ML.Samples.Dynamic.PermutationFeatureImportance
Expand All @@ -21,7 +22,7 @@ public static void Example()
// Normalize the data set so that for each feature, its maximum value is 1 while its minimum value is 0.
// Then append a logistic regression trainer.
var pipeline = mlContext.Transforms.Concatenate("Features", featureNames)
.Append(mlContext.Transforms.Normalize("Features"))
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
.Append(mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(
labelColumnName: labelName, featureColumnName: "Features"));
var model = pipeline.Fit(data);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
</PropertyGroup>

<ItemGroup>
<ProjectReference Include="..\..\..\src\Microsoft.ML.Experimental\Microsoft.ML.Experimental.csproj" />
<ProjectReference Include="..\..\..\src\Microsoft.ML.LightGbm.StaticPipe\Microsoft.ML.LightGbm.StaticPipe.csproj" />
<ProjectReference Include="..\..\..\src\Microsoft.ML.LightGbm\Microsoft.ML.LightGbm.csproj" />
<ProjectReference Include="..\..\..\src\Microsoft.ML.Mkl.Components\Microsoft.ML.Mkl.Components.csproj" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
<ItemGroup>
<ProjectReference Include="..\Microsoft.ML.Core\Microsoft.ML.Core.csproj" />
<ProjectReference Include="..\Microsoft.ML.Data\Microsoft.ML.Data.csproj" />
<ProjectReference Include="..\Microsoft.ML.Experimental\Microsoft.ML.Experimental.csproj" />
<ProjectReference Include="..\Microsoft.ML.Transforms\Microsoft.ML.Transforms.csproj" />
</ItemGroup>

Expand Down
3 changes: 2 additions & 1 deletion src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
using System.IO;
using System.Net;
using Microsoft.ML.Data;
using Microsoft.ML.Experimental;

namespace Microsoft.ML.SamplesUtils
{
Expand Down Expand Up @@ -171,7 +172,7 @@ public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext)
"occupation", "relationship", "ethnicity", "native-country", "age", "education-num",
"capital-gain", "capital-loss", "hours-per-week"))
// Min-max normalize all the features
.Append(mlContext.Transforms.Normalize("Features"));
.Append(mlContext.Transforms.NormalizeMinMax("Features"));

var data = loader.Load(dataFile);
var featurizedData = pipeline.Fit(data).Transform(data);
Expand Down
1 change: 1 addition & 0 deletions src/Microsoft.ML.Transforms/Microsoft.ML.Transforms.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
<ItemGroup>
<ProjectReference Include="..\Microsoft.ML.Core\Microsoft.ML.Core.csproj" />
<ProjectReference Include="..\Microsoft.ML.Data\Microsoft.ML.Data.csproj" />
<ProjectReference Include="..\Microsoft.ML.Experimental\Microsoft.ML.Experimental.csproj" />
</ItemGroup>

<ItemGroup>
Expand Down
42 changes: 0 additions & 42 deletions src/Microsoft.ML.Transforms/NormalizerCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,48 +9,6 @@ namespace Microsoft.ML
/// </summary>
public static class NormalizationCatalog
{
/// <summary>
/// Normalize (rescale) the column according to the specified <paramref name="mode"/>.
/// </summary>
/// <param name="catalog">The transform catalog</param>
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
/// <param name="mode">The <see cref="NormalizingEstimator.NormalizationMode"/> used to map the old values in the new scale. </param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[Normalize](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs)]
/// ]]>
/// </format>
/// </example>
public static NormalizingEstimator Normalize(this TransformsCatalog catalog,
string outputColumnName, string inputColumnName = null,
NormalizingEstimator.NormalizationMode mode = NormalizingEstimator.NormalizationMode.MinMax)
=> new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName ?? outputColumnName, mode);

/// <summary>
/// Normalize (rescale) several columns according to the specified <paramref name="mode"/>.
/// </summary>
/// <param name="catalog">The transform catalog</param>
/// <param name="mode">The <see cref="NormalizingEstimator.NormalizationMode"/> used to map the old values to the new ones. </param>
/// <param name="columns">The pairs of input and output columns.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[Normalize](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs)]
/// ]]>
/// </format>
/// </example>
[BestFriend]
internal static NormalizingEstimator Normalize(this TransformsCatalog catalog,
NormalizingEstimator.NormalizationMode mode,
params InputOutputColumnPair[] columns)
{
var env = CatalogUtils.GetEnvironment(catalog);
env.CheckValue(columns, nameof(columns));
return new NormalizingEstimator(env, mode, InputOutputColumnPair.ConvertToValueTuples(columns));
}

/// <summary>
/// Normalize (rescale) columns according to specified custom parameters.
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
using Microsoft.ML.Benchmarks.Harness;
using Microsoft.ML.Calibrators;
using Microsoft.ML.Data;
using Microsoft.ML.Experimental;
using Microsoft.ML.TestFramework;
using Microsoft.ML.Trainers;

Expand Down Expand Up @@ -35,7 +36,7 @@ public CalibratedModelParametersBase<LinearBinaryModelParameters, PlattCalibrato
}, hasHeader: true);

var estimatorPipeline = ml.Transforms.Categorical.OneHotEncoding("CatFeatures")
.Append(ml.Transforms.Normalize("NumFeatures"))
.Append(ml.Transforms.NormalizeMinMax("NumFeatures"))
.Append(ml.Transforms.Concatenate("Features", "NumFeatures", "CatFeatures"))
.Append(ml.Clustering.Trainers.KMeans("Features"))
.Append(ml.Transforms.Concatenate("Features", "Features", "Score"))
Expand Down
3 changes: 2 additions & 1 deletion test/Microsoft.ML.Benchmarks/Microsoft.ML.Benchmarks.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\src\Microsoft.ML.Core\Microsoft.ML.Core.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.Experimental\Microsoft.ML.Experimental.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.KMeansClustering\Microsoft.ML.KMeansClustering.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.LightGbm\Microsoft.ML.LightGbm.csproj" />
<ProjectReference Include="..\..\src\Microsoft.ML.StandardTrainers\Microsoft.ML.StandardTrainers.csproj" />
Expand All @@ -24,6 +25,6 @@
<NativeAssemblyReference Include="CpuMathNative" />
<NativeAssemblyReference Include="FastTreeNative" />
<NativeAssemblyReference Include="MklImports" />
<NativeAssemblyReference Condition="'$(OS)' == 'Windows_NT'" Include="libiomp5md"/>
<NativeAssemblyReference Condition="'$(OS)' == 'Windows_NT'" Include="libiomp5md" />
</ItemGroup>
</Project>
4 changes: 2 additions & 2 deletions test/Microsoft.ML.Functional.Tests/DataTransformation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
// See the LICENSE file in the project root for more information.

using System;
using Microsoft.ML.Experimental;
using Microsoft.ML.Functional.Tests.Datasets;
using Microsoft.ML.RunTests;
using Microsoft.ML.TestFramework;
using Microsoft.ML.Trainers;
using Microsoft.ML.Transforms;
using Microsoft.ML.Transforms.Text;
using Xunit;
using Xunit.Abstractions;
Expand Down Expand Up @@ -174,7 +174,7 @@ void ExtensibilityNormalizeColumns()

// Compose the transformation.
var pipeline = mlContext.Transforms.Concatenate("Features", Iris.Features)
.Append(mlContext.Transforms.Normalize("Features", mode: NormalizingEstimator.NormalizationMode.MinMax));
.Append(mlContext.Transforms.NormalizeMinMax("Features"));

// Transform the data.
var transformedData = pipeline.Fit(data).Transform(data);
Expand Down
3 changes: 2 additions & 1 deletion test/Microsoft.ML.Functional.Tests/IntrospectiveTraining.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
using System.Collections.Immutable;
using System.Linq;
using Microsoft.ML.Data;
using Microsoft.ML.Experimental;
using Microsoft.ML.Functional.Tests.Datasets;
using Microsoft.ML.RunTests;
using Microsoft.ML.TestFramework;
Expand Down Expand Up @@ -254,7 +255,7 @@ void IntrospectNormalization()

// Compose the transformation.
var pipeline = mlContext.Transforms.Concatenate("Features", Iris.Features)
.Append(mlContext.Transforms.Normalize("Features", mode: NormalizingEstimator.NormalizationMode.MinMax));
.Append(mlContext.Transforms.NormalizeMinMax("Features"));

// Fit the pipeline.
var model = pipeline.Fit(data);
Expand Down
9 changes: 5 additions & 4 deletions test/Microsoft.ML.Functional.Tests/ModelFiles.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
using System.Linq;
using Microsoft.ML.Calibrators;
using Microsoft.ML.Data;
using Microsoft.ML.Experimental;
using Microsoft.ML.Functional.Tests.Datasets;
using Microsoft.ML.RunTests;
using Microsoft.ML.Trainers.FastTree;
Expand Down Expand Up @@ -275,7 +276,7 @@ public void LoadSchemaAndCreateNewData()
var data = loader.Load(file);

// Pipeline.
var pipeline = ML.Transforms.Normalize("Features");
var pipeline = ML.Transforms.NormalizeMinMax("Features");

// Train.
var model = pipeline.Fit(data);
Expand Down Expand Up @@ -330,7 +331,7 @@ public void SaveCompositeLoaderAndLoad()
{
var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename));
var loader = ML.Data.CreateTextLoader<InputData>(hasHeader: true, dataSample: file);
var composite = loader.Append(ML.Transforms.Normalize("Features"));
var composite = loader.Append(ML.Transforms.NormalizeMinMax("Features"));
var loaderWithEmbeddedModel = composite.Fit(file);

string modelPath = GetOutputPath(FullTestName + "-model.zip");
Expand Down Expand Up @@ -368,7 +369,7 @@ public void SaveLoaderAndTransformerAndLoad()
{
var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename));
var loader = ML.Data.CreateTextLoader<InputData>(hasHeader: true, dataSample: file);
var estimator = ML.Transforms.Normalize("Features");
var estimator = ML.Transforms.NormalizeMinMax("Features");
var data = loader.Load(file);
var model = estimator.Fit(data);

Expand Down Expand Up @@ -401,7 +402,7 @@ public void SaveTransformerAndSchemaAndLoad()
{
var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename));
var loader = ML.Data.CreateTextLoader<InputData>(hasHeader: true, dataSample: file);
var estimator = ML.Transforms.Normalize("Features");
var estimator = ML.Transforms.NormalizeMinMax("Features");
var model = estimator.Fit(loader.Load(file));

string modelPath = GetOutputPath(FullTestName + "-model.zip");
Expand Down
7 changes: 4 additions & 3 deletions test/Microsoft.ML.Functional.Tests/ONNX.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// See the LICENSE file in the project root for more information.

using System.IO;
using Microsoft.ML.Experimental;
using Microsoft.ML.Functional.Tests.Datasets;
using Microsoft.ML.RunTests;
using Microsoft.ML.TestFramework;
Expand Down Expand Up @@ -33,7 +34,7 @@ public void SaveOnnxModelLoadAndScoreFastTree()

// Create a pipeline to train on the housing data.
var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features)
.Append(mlContext.Transforms.Normalize("Features"))
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
.AppendCacheCheckpoint(mlContext)
.Append(mlContext.Regression.Trainers.FastTree(
new FastTreeRegressionTrainer.Options { NumberOfThreads = 1, NumberOfTrees = 10 }));
Expand Down Expand Up @@ -85,7 +86,7 @@ public void SaveOnnxModelLoadAndScoreKMeans()

// Create a pipeline to train on the housing data.
var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features)
.Append(mlContext.Transforms.Normalize("Features"))
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
.AppendCacheCheckpoint(mlContext)
.Append(mlContext.Clustering.Trainers.KMeans(
new KMeansTrainer.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 10 }));
Expand Down Expand Up @@ -137,7 +138,7 @@ public void SaveOnnxModelLoadAndScoreSDCA()

// Create a pipeline to train on the housing data.
var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features)
.Append(mlContext.Transforms.Normalize("Features"))
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
.AppendCacheCheckpoint(mlContext)
.Append(mlContext.Regression.Trainers.Sdca(
new SdcaRegressionTrainer.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 10 }));
Expand Down
5 changes: 3 additions & 2 deletions test/Microsoft.ML.Functional.Tests/Training.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System;
using System.Linq;
using Microsoft.ML.Data;
using Microsoft.ML.Experimental;
using Microsoft.ML.Functional.Tests.Datasets;
using Microsoft.ML.RunTests;
using Microsoft.ML.TestFramework;
Expand Down Expand Up @@ -316,7 +317,7 @@ public void ContinueTrainingOnlineGradientDescent()

// Create a transformation pipeline.
var featurizationPipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features)
.Append(mlContext.Transforms.Normalize("Features"))
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
.AppendCacheCheckpoint(mlContext);

var trainer = mlContext.Regression.Trainers.OnlineGradientDescent(
Expand Down Expand Up @@ -360,7 +361,7 @@ public void ContinueTrainingPoissonRegression()

// Create a transformation pipeline.
var featurizationPipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features)
.Append(mlContext.Transforms.Normalize("Features"))
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
.AppendCacheCheckpoint(mlContext);

var trainer = mlContext.Regression.Trainers.LbfgsPoissonRegression(
Expand Down
9 changes: 5 additions & 4 deletions test/Microsoft.ML.Tests/CachingTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System.Linq;
using System.Threading;
using Microsoft.ML.Data;
using Microsoft.ML.Experimental;
using Microsoft.ML.RunTests;
using Microsoft.ML.StaticPipe;
using Xunit;
Expand Down Expand Up @@ -43,8 +44,8 @@ public void CacheCheckpointTest()
var trainData = Enumerable.Range(0, 100).Select(c => new MyData()).ToArray();

var pipe = ML.Transforms.CopyColumns("F1", "Features")
.Append(ML.Transforms.Normalize("Norm1", "F1"))
.Append(ML.Transforms.Normalize("Norm2", "F1", Transforms.NormalizingEstimator.NormalizationMode.MeanVariance));
.Append(ML.Transforms.NormalizeMinMax("Norm1", "F1"))
.Append(ML.Transforms.NormalizeMeanVariance("Norm2", "F1"));

pipe.Fit(ML.Data.LoadFromEnumerable(trainData));

Expand All @@ -53,8 +54,8 @@ public void CacheCheckpointTest()
trainData = Enumerable.Range(0, 100).Select(c => new MyData()).ToArray();
pipe = ML.Transforms.CopyColumns("F1", "Features")
.AppendCacheCheckpoint(ML)
.Append(ML.Transforms.Normalize("Norm1", "F1"))
.Append(ML.Transforms.Normalize("Norm2", "F1", Transforms.NormalizingEstimator.NormalizationMode.MeanVariance));
.Append(ML.Transforms.NormalizeMinMax("Norm1", "F1"))
.Append(ML.Transforms.NormalizeMeanVariance("Norm2", "F1"));

pipe.Fit(ML.Data.LoadFromEnumerable(trainData));

Expand Down
5 changes: 2 additions & 3 deletions test/Microsoft.ML.Tests/FeatureContributionTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,12 @@
using System.IO;
using Microsoft.ML.Calibrators;
using Microsoft.ML.Data;
using Microsoft.ML.Experimental;
using Microsoft.ML.Data.IO;
using Microsoft.ML.Internal.Utilities;
using Microsoft.ML.Model;
using Microsoft.ML.RunTests;
using Microsoft.ML.TestFramework.Attributes;
using Microsoft.ML.Trainers;
using Microsoft.ML.Transforms;
using Xunit;
using Xunit.Abstractions;

Expand Down Expand Up @@ -306,7 +305,7 @@ private IDataView GetSparseDataset(TaskType task = TaskType.Regression, int numb
var srcDV = bldr.GetDataView();

var pipeline = ML.Transforms.Concatenate("Features", "X1", "X2VBuffer", "X3Important")
.Append(ML.Transforms.Normalize("Features"));
.Append(ML.Transforms.NormalizeMinMax("Features"));

if (task == TaskType.BinaryClassification)
return pipeline.Append(ML.Transforms.Conversion.ConvertType("Label", outputKind: DataKind.Boolean))
Expand Down
Loading