Skip to content

Commit d22809e

Browse files
committed
Clean static part for previous projection catalog
Scrubbing normalization again
1 parent f09b25f commit d22809e

File tree

19 files changed

+285
-286
lines changed

19 files changed

+285
-286
lines changed

docs/code/MlNetCookBook.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,7 @@ As a general rule, *if you use a parametric learner, you need to make sure your
595595

596596
ML.NET offers several built-in scaling algorithms, or 'normalizers':
597597
- MinMax normalizer: for each feature, we learn the minimum and maximum value of it, and then linearly rescale it so that the values fit between -1 and 1.
598-
- MeanVar normalizer: for each feature, compute the mean and variance, and then linearly rescale it to zero-mean, unit-variance.
598+
- MeanVariance normalizer: for each feature, compute the mean and variance, and then linearly rescale it to zero-mean, unit-variance.
599599
- CDF normalizer: for each feature, compute the mean and variance, and then replace each value `x` with `Cdf(x)`, where `Cdf` is the cumulative density function of normal distribution with these mean and variance.
600600
- Binning normalizer: discretize the feature value into `N` 'buckets', and then replace each value with the index of the bucket, divided by `N-1`.
601601

@@ -630,8 +630,8 @@ var trainData = mlContext.Data.LoadFromTextFile<IrisInputAllFeatures>(dataPath,
630630
var pipeline =
631631
mlContext.Transforms.Normalize(
632632
new NormalizingEstimator.MinMaxColumnOptions("MinMaxNormalized", "Features", fixZero: true),
633-
new NormalizingEstimator.MeanVarColumnOptions("MeanVarNormalized", "Features", fixZero: true),
634-
new NormalizingEstimator.BinningColumnOptions("BinNormalized", "Features", numBins: 256));
633+
new NormalizingEstimator.MeanVarianceColumnOptions("MeanVarNormalized", "Features", fixZero: true),
634+
new NormalizingEstimator.BinningColumnOptions("BinNormalized", "Features", maximumBinCount: 256));
635635

636636
// Let's train our pipeline of normalizers, and then apply it to the same data.
637637
var normalizedData = pipeline.Fit(trainData).Transform(trainData);

docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ public static void Example()
5555
//0.165 0.117 -0.547 0.014
5656

5757
// A pipeline to project Features column into L-p normalized vector.
58-
var lpNormalizePipeline = ml.Transforms.LpNormalize(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), normKind: Transforms.LpNormalizingEstimatorBase.NormFunction.L1);
58+
var lpNormalizePipeline = ml.Transforms.LpNormalize(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), norm: Transforms.LpNormalizingEstimatorBase.NormFunction.L1);
5959
// The transformed (projected) data.
6060
transformedData = lpNormalizePipeline.Fit(trainData).Transform(trainData);
6161
// Getting the data of the newly created column, so we can preview it.

src/Microsoft.ML.Data/Transforms/NormalizeColumn.cs

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
using Microsoft.ML;
1212
using Microsoft.ML.CommandLine;
1313
using Microsoft.ML.Data;
14-
using Microsoft.ML.EntryPoints;
1514
using Microsoft.ML.Internal.Internallearn;
1615
using Microsoft.ML.Model.OnnxConverter;
1716
using Microsoft.ML.Model.Pfa;
@@ -51,7 +50,7 @@ internal sealed partial class NormalizeTransform
5150
public abstract class ColumnBase : OneToOneColumn
5251
{
5352
[Argument(ArgumentType.AtMostOnce, HelpText = "Max number of examples used to train the normalizer", ShortName = "maxtrain")]
54-
public long? MaxTrainingExamples;
53+
public long? MaximumExampleCount;
5554

5655
private protected ColumnBase()
5756
{
@@ -60,7 +59,7 @@ private protected ColumnBase()
6059
private protected override bool TryUnparseCore(StringBuilder sb)
6160
{
6261
Contracts.AssertValue(sb);
63-
if (MaxTrainingExamples != null)
62+
if (MaximumExampleCount != null)
6463
return false;
6564
return base.TryUnparseCore(sb);
6665
}
@@ -291,7 +290,7 @@ internal static IDataTransform Create(IHostEnvironment env, MinMaxArguments args
291290
.Select(col => new NormalizingEstimator.MinMaxColumnOptions(
292291
col.Name,
293292
col.Source ?? col.Name,
294-
col.MaxTrainingExamples ?? args.MaxTrainingExamples,
293+
col.MaximumExampleCount ?? args.MaxTrainingExamples,
295294
col.FixZero ?? args.FixZero))
296295
.ToArray();
297296
var normalizer = new NormalizingEstimator(env, columns);
@@ -306,10 +305,10 @@ internal static IDataTransform Create(IHostEnvironment env, MeanVarArguments arg
306305
env.CheckValue(args.Columns, nameof(args.Columns));
307306

308307
var columns = args.Columns
309-
.Select(col => new NormalizingEstimator.MeanVarColumnOptions(
308+
.Select(col => new NormalizingEstimator.MeanVarianceColumnOptions(
310309
col.Name,
311310
col.Source ?? col.Name,
312-
col.MaxTrainingExamples ?? args.MaxTrainingExamples,
311+
col.MaximumExampleCount ?? args.MaxTrainingExamples,
313312
col.FixZero ?? args.FixZero))
314313
.ToArray();
315314
var normalizer = new NormalizingEstimator(env, columns);
@@ -326,10 +325,10 @@ internal static IDataTransform Create(IHostEnvironment env, LogMeanVarArguments
326325
env.CheckValue(args.Columns, nameof(args.Columns));
327326

328327
var columns = args.Columns
329-
.Select(col => new NormalizingEstimator.LogMeanVarColumnOptions(
328+
.Select(col => new NormalizingEstimator.LogMeanVarianceColumnOptions(
330329
col.Name,
331330
col.Source ?? col.Name,
332-
col.MaxTrainingExamples ?? args.MaxTrainingExamples,
331+
col.MaximumExampleCount ?? args.MaxTrainingExamples,
333332
args.UseCdf))
334333
.ToArray();
335334
var normalizer = new NormalizingEstimator(env, columns);
@@ -349,7 +348,7 @@ internal static IDataTransform Create(IHostEnvironment env, BinArguments args, I
349348
.Select(col => new NormalizingEstimator.BinningColumnOptions(
350349
col.Name,
351350
col.Source ?? col.Name,
352-
col.MaxTrainingExamples ?? args.MaxTrainingExamples,
351+
col.MaximumExampleCount ?? args.MaxTrainingExamples,
353352
col.FixZero ?? args.FixZero,
354353
col.NumBins ?? args.NumBins))
355354
.ToArray();
@@ -926,7 +925,7 @@ public static IColumnFunctionBuilder CreateBuilder(MinMaxArguments args, IHost h
926925
return CreateBuilder(new NormalizingEstimator.MinMaxColumnOptions(
927926
args.Columns[icol].Name,
928927
args.Columns[icol].Source ?? args.Columns[icol].Name,
929-
args.Columns[icol].MaxTrainingExamples ?? args.MaxTrainingExamples,
928+
args.Columns[icol].MaximumExampleCount ?? args.MaxTrainingExamples,
930929
args.Columns[icol].FixZero ?? args.FixZero), host, srcIndex, srcType, cursor);
931930
}
932931

@@ -959,15 +958,15 @@ public static IColumnFunctionBuilder CreateBuilder(MeanVarArguments args, IHost
959958
Contracts.AssertValue(host);
960959
host.AssertValue(args);
961960

962-
return CreateBuilder(new NormalizingEstimator.MeanVarColumnOptions(
961+
return CreateBuilder(new NormalizingEstimator.MeanVarianceColumnOptions(
963962
args.Columns[icol].Name,
964963
args.Columns[icol].Source ?? args.Columns[icol].Name,
965-
args.Columns[icol].MaxTrainingExamples ?? args.MaxTrainingExamples,
964+
args.Columns[icol].MaximumExampleCount ?? args.MaxTrainingExamples,
966965
args.Columns[icol].FixZero ?? args.FixZero,
967966
args.UseCdf), host, srcIndex, srcType, cursor);
968967
}
969968

970-
public static IColumnFunctionBuilder CreateBuilder(NormalizingEstimator.MeanVarColumnOptions column, IHost host,
969+
public static IColumnFunctionBuilder CreateBuilder(NormalizingEstimator.MeanVarianceColumnOptions column, IHost host,
971970
int srcIndex, DataViewType srcType, DataViewRowCursor cursor)
972971
{
973972
Contracts.AssertValue(host);
@@ -999,14 +998,14 @@ public static IColumnFunctionBuilder CreateBuilder(LogMeanVarArguments args, IHo
999998
Contracts.AssertValue(host);
1000999
host.AssertValue(args);
10011000

1002-
return CreateBuilder(new NormalizingEstimator.LogMeanVarColumnOptions(
1001+
return CreateBuilder(new NormalizingEstimator.LogMeanVarianceColumnOptions(
10031002
args.Columns[icol].Name,
10041003
args.Columns[icol].Source ?? args.Columns[icol].Name,
1005-
args.Columns[icol].MaxTrainingExamples ?? args.MaxTrainingExamples,
1004+
args.Columns[icol].MaximumExampleCount ?? args.MaxTrainingExamples,
10061005
args.UseCdf), host, srcIndex, srcType, cursor);
10071006
}
10081007

1009-
public static IColumnFunctionBuilder CreateBuilder(NormalizingEstimator.LogMeanVarColumnOptions column, IHost host,
1008+
public static IColumnFunctionBuilder CreateBuilder(NormalizingEstimator.LogMeanVarianceColumnOptions column, IHost host,
10101009
int srcIndex, DataViewType srcType, DataViewRowCursor cursor)
10111010
{
10121011
Contracts.AssertValue(host);
@@ -1041,7 +1040,7 @@ public static IColumnFunctionBuilder CreateBuilder(BinArguments args, IHost host
10411040
return CreateBuilder(new NormalizingEstimator.BinningColumnOptions(
10421041
args.Columns[icol].Name,
10431042
args.Columns[icol].Source ?? args.Columns[icol].Name,
1044-
args.Columns[icol].MaxTrainingExamples ?? args.MaxTrainingExamples,
1043+
args.Columns[icol].MaximumExampleCount ?? args.MaxTrainingExamples,
10451044
args.Columns[icol].FixZero ?? args.FixZero,
10461045
args.Columns[icol].NumBins ?? args.NumBins), host, srcIndex, srcType, cursor);
10471046
}
@@ -1091,7 +1090,7 @@ public static IColumnFunctionBuilder CreateBuilder(SupervisedBinArguments args,
10911090
args.Columns[icol].Name,
10921091
args.Columns[icol].Source ?? args.Columns[icol].Name,
10931092
args.LabelColumn ?? DefaultColumnNames.Label,
1094-
args.Columns[icol].MaxTrainingExamples ?? args.MaxTrainingExamples,
1093+
args.Columns[icol].MaximumExampleCount ?? args.MaxTrainingExamples,
10951094
args.Columns[icol].FixZero ?? args.FixZero,
10961095
args.Columns[icol].NumBins ?? args.NumBins,
10971096
args.MinBinSize),

0 commit comments

Comments
 (0)