Skip to content

Commit 9926f98

Browse files
authored
Move transform catalog extensions into its own file and class in experimental nuget. (#3080)
* Move transform catalog extensions into its own class in experimental nuget. * TransformsCatalogExtensions.cs
1 parent 0b638bf commit 9926f98

File tree

2 files changed

+112
-103
lines changed

2 files changed

+112
-103
lines changed

src/Microsoft.ML.Experimental/MLContextExtensions.cs

-103
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5-
using Microsoft.ML.Data;
6-
using Microsoft.ML.Transforms;
7-
85
namespace Microsoft.ML.Experimental
96
{
107
public static class MLContextExtensions
@@ -14,105 +11,5 @@ public static class MLContextExtensions
1411
/// </summary>
1512
/// <param name="ctx"><see cref="MLContext"/> reference.</param>
1613
public static void CancelExecution(this MLContext ctx) => ctx.CancelExecution();
17-
18-
/// <summary>
19-
/// Normalize (rescale) the column according to the <see cref="NormalizingEstimator.NormalizationMode.MinMax"/> mode.
20-
/// It normalizes the data based on the observed minimum and maximum values of the data.
21-
/// </summary>
22-
/// <param name="catalog">The transform catalog</param>
23-
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
24-
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
25-
/// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param>
26-
/// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param>
27-
public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalog,
28-
string outputColumnName, string inputColumnName = null,
29-
long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount,
30-
bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched)
31-
{
32-
var columnOptions = new NormalizingEstimator.MinMaxColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, fixZero);
33-
return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions);
34-
}
35-
36-
/// <summary>
37-
/// Normalize (rescale) the column according to the <see cref="NormalizingEstimator.NormalizationMode.MeanVariance"/> mode.
38-
/// It normalizes the data based on the computed mean and variance of the data.
39-
/// </summary>
40-
/// <param name="catalog">The transform catalog</param>
41-
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
42-
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
43-
/// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param>
44-
/// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param>
45-
/// <param name="useCdf">Whether to use CDF as the output.</param>
46-
public static NormalizingEstimator NormalizeMeanVariance(this TransformsCatalog catalog,
47-
string outputColumnName, string inputColumnName = null,
48-
long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount,
49-
bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched,
50-
bool useCdf = NormalizingEstimator.Defaults.MeanVarCdf)
51-
{
52-
var columnOptions = new NormalizingEstimator.MeanVarianceColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, fixZero, useCdf);
53-
return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions);
54-
}
55-
56-
/// <summary>
57-
/// Normalize (rescale) the column according to the <see cref="NormalizingEstimator.NormalizationMode.LogMeanVariance"/> mode.
58-
/// It normalizes the data based on the computed mean and variance of the logarithm of the data.
59-
/// </summary>
60-
/// <param name="catalog">The transform catalog</param>
61-
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
62-
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
63-
/// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param>
64-
/// <param name="useCdf">Whether to use CDF as the output.</param>
65-
public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatalog catalog,
66-
string outputColumnName, string inputColumnName = null,
67-
long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount,
68-
bool useCdf = NormalizingEstimator.Defaults.LogMeanVarCdf)
69-
{
70-
var columnOptions = new NormalizingEstimator.LogMeanVarianceColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, useCdf);
71-
return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions);
72-
}
73-
74-
/// <summary>
75-
/// Normalize (rescale) the column according to the <see cref="NormalizingEstimator.NormalizationMode.Binning"/> mode.
76-
/// The values are assigned into bins with equal density.
77-
/// </summary>
78-
/// <param name="catalog">The transform catalog</param>
79-
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
80-
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
81-
/// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param>
82-
/// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param>
83-
/// <param name="maximumBinCount">Maximum number of bins (power of 2 recommended).</param>
84-
public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catalog,
85-
string outputColumnName, string inputColumnName = null,
86-
long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount,
87-
bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched,
88-
int maximumBinCount = NormalizingEstimator.Defaults.MaximumBinCount)
89-
{
90-
var columnOptions = new NormalizingEstimator.BinningColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, fixZero, maximumBinCount);
91-
return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions);
92-
}
93-
94-
/// <summary>
95-
/// Normalize (rescale) the column according to the <see cref="NormalizingEstimator.NormalizationMode.SupervisedBinning"/> mode.
96-
/// The values are assigned into bins based on correlation with the <paramref name="labelColumnName"/> column.
97-
/// </summary>
98-
/// <param name="catalog">The transform catalog</param>
99-
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
100-
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
101-
/// <param name="labelColumnName">Name of the label column for supervised binning.</param>
102-
/// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param>
103-
/// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param>
104-
/// <param name="maximumBinCount">Maximum number of bins (power of 2 recommended).</param>
105-
/// <param name="mininimumExamplesPerBin">Minimum number of examples per bin.</param>
106-
public static NormalizingEstimator NormalizeSupervisedBinning(this TransformsCatalog catalog,
107-
string outputColumnName, string inputColumnName = null,
108-
string labelColumnName = DefaultColumnNames.Label,
109-
long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount,
110-
bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched,
111-
int maximumBinCount = NormalizingEstimator.Defaults.MaximumBinCount,
112-
int mininimumExamplesPerBin = NormalizingEstimator.Defaults.MininimumBinSize)
113-
{
114-
var columnOptions = new NormalizingEstimator.SupervisedBinningColumOptions(outputColumnName, inputColumnName, labelColumnName, maximumExampleCount, fixZero, maximumBinCount, mininimumExamplesPerBin);
115-
return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions);
116-
}
11714
}
11815
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using Microsoft.ML.Data;
6+
using Microsoft.ML.Transforms;
7+
8+
namespace Microsoft.ML.Experimental
9+
{
10+
public static class TransformsCatalogExtensions
11+
{
12+
/// <summary>
13+
/// Normalize (rescale) the column according to the <see cref="NormalizingEstimator.NormalizationMode.MinMax"/> mode.
14+
/// It normalizes the data based on the observed minimum and maximum values of the data.
15+
/// </summary>
16+
/// <param name="catalog">The transform catalog</param>
17+
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
18+
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
19+
/// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param>
20+
/// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param>
21+
public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalog,
22+
string outputColumnName, string inputColumnName = null,
23+
long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount,
24+
bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched)
25+
{
26+
var columnOptions = new NormalizingEstimator.MinMaxColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, fixZero);
27+
return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions);
28+
}
29+
30+
/// <summary>
31+
/// Normalize (rescale) the column according to the <see cref="NormalizingEstimator.NormalizationMode.MeanVariance"/> mode.
32+
/// It normalizes the data based on the computed mean and variance of the data.
33+
/// </summary>
34+
/// <param name="catalog">The transform catalog</param>
35+
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
36+
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
37+
/// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param>
38+
/// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param>
39+
/// <param name="useCdf">Whether to use CDF as the output.</param>
40+
public static NormalizingEstimator NormalizeMeanVariance(this TransformsCatalog catalog,
41+
string outputColumnName, string inputColumnName = null,
42+
long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount,
43+
bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched,
44+
bool useCdf = NormalizingEstimator.Defaults.MeanVarCdf)
45+
{
46+
var columnOptions = new NormalizingEstimator.MeanVarianceColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, fixZero, useCdf);
47+
return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions);
48+
}
49+
50+
/// <summary>
51+
/// Normalize (rescale) the column according to the <see cref="NormalizingEstimator.NormalizationMode.LogMeanVariance"/> mode.
52+
/// It normalizes the data based on the computed mean and variance of the logarithm of the data.
53+
/// </summary>
54+
/// <param name="catalog">The transform catalog</param>
55+
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
56+
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
57+
/// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param>
58+
/// <param name="useCdf">Whether to use CDF as the output.</param>
59+
public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatalog catalog,
60+
string outputColumnName, string inputColumnName = null,
61+
long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount,
62+
bool useCdf = NormalizingEstimator.Defaults.LogMeanVarCdf)
63+
{
64+
var columnOptions = new NormalizingEstimator.LogMeanVarianceColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, useCdf);
65+
return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions);
66+
}
67+
68+
/// <summary>
69+
/// Normalize (rescale) the column according to the <see cref="NormalizingEstimator.NormalizationMode.Binning"/> mode.
70+
/// The values are assigned into bins with equal density.
71+
/// </summary>
72+
/// <param name="catalog">The transform catalog</param>
73+
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
74+
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
75+
/// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param>
76+
/// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param>
77+
/// <param name="maximumBinCount">Maximum number of bins (power of 2 recommended).</param>
78+
public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catalog,
79+
string outputColumnName, string inputColumnName = null,
80+
long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount,
81+
bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched,
82+
int maximumBinCount = NormalizingEstimator.Defaults.MaximumBinCount)
83+
{
84+
var columnOptions = new NormalizingEstimator.BinningColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, fixZero, maximumBinCount);
85+
return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions);
86+
}
87+
88+
/// <summary>
89+
/// Normalize (rescale) the column according to the <see cref="NormalizingEstimator.NormalizationMode.SupervisedBinning"/> mode.
90+
/// The values are assigned into bins based on correlation with the <paramref name="labelColumnName"/> column.
91+
/// </summary>
92+
/// <param name="catalog">The transform catalog</param>
93+
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
94+
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
95+
/// <param name="labelColumnName">Name of the label column for supervised binning.</param>
96+
/// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param>
97+
/// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param>
98+
/// <param name="maximumBinCount">Maximum number of bins (power of 2 recommended).</param>
99+
/// <param name="mininimumExamplesPerBin">Minimum number of examples per bin.</param>
100+
public static NormalizingEstimator NormalizeSupervisedBinning(this TransformsCatalog catalog,
101+
string outputColumnName, string inputColumnName = null,
102+
string labelColumnName = DefaultColumnNames.Label,
103+
long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount,
104+
bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched,
105+
int maximumBinCount = NormalizingEstimator.Defaults.MaximumBinCount,
106+
int mininimumExamplesPerBin = NormalizingEstimator.Defaults.MininimumBinSize)
107+
{
108+
var columnOptions = new NormalizingEstimator.SupervisedBinningColumOptions(outputColumnName, inputColumnName, labelColumnName, maximumExampleCount, fixZero, maximumBinCount, mininimumExamplesPerBin);
109+
return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions);
110+
}
111+
}
112+
}

0 commit comments

Comments
 (0)