diff --git a/docs/code/MlNetCookBook.md b/docs/code/MlNetCookBook.md index 9a7c1edad3..3393c3ba47 100644 --- a/docs/code/MlNetCookBook.md +++ b/docs/code/MlNetCookBook.md @@ -595,7 +595,7 @@ As a general rule, *if you use a parametric learner, you need to make sure your ML.NET offers several built-in scaling algorithms, or 'normalizers': - MinMax normalizer: for each feature, we learn the minimum and maximum value of it, and then linearly rescale it so that the values fit between -1 and 1. -- MeanVar normalizer: for each feature, compute the mean and variance, and then linearly rescale it to zero-mean, unit-variance. +- MeanVariance normalizer: for each feature, compute the mean and variance, and then linearly rescale it to zero-mean, unit-variance. - CDF normalizer: for each feature, compute the mean and variance, and then replace each value `x` with `Cdf(x)`, where `Cdf` is the cumulative density function of normal distribution with these mean and variance. - Binning normalizer: discretize the feature value into `N` 'buckets', and then replace each value with the index of the bucket, divided by `N-1`. @@ -630,8 +630,8 @@ var trainData = mlContext.Data.LoadFromTextFile(dataPath, var pipeline = mlContext.Transforms.Normalize( new NormalizingEstimator.MinMaxColumnOptions("MinMaxNormalized", "Features", fixZero: true), - new NormalizingEstimator.MeanVarColumnOptions("MeanVarNormalized", "Features", fixZero: true), - new NormalizingEstimator.BinningColumnOptions("BinNormalized", "Features", numBins: 256)); + new NormalizingEstimator.MeanVarianceColumnOptions("MeanVarNormalized", "Features", fixZero: true), + new NormalizingEstimator.BinningColumnOptions("BinNormalized", "Features", maximumBinCount: 256)); // Let's train our pipeline of normalizers, and then apply it to the same data. var normalizedData = pipeline.Fit(trainData).Transform(trainData); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs index 9e232f46e7..c5421ac305 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs @@ -32,15 +32,7 @@ public static void Example() // The transformed (normalized according to Normalizer.NormalizerMode.MinMax) data. var transformer = pipeline.Fit(trainData); - var modelParams = transformer.Columns - .First(x => x.Name == "Induced") - .ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters; - - Console.WriteLine($"The normalization parameters are: Scale = {modelParams.Scale} and Offset = {modelParams.Offset}"); - //Preview - // - //The normalization parameters are: Scale = 0.5 and Offset = 0" - + // Normalize the data. var transformedData = transformer.Transform(trainData); // Getting the data of the newly created column, so we can preview it. @@ -94,13 +86,6 @@ public static void Example() // 0 // 0 // 0.1586974 - - // Inspect the weights of normalizing the columns - var multiColModelParams = multiColtransformer.Columns - .First(x=> x.Name == "LogInduced") - .ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters; - - Console.WriteLine($"The normalization parameters are: Mean = {multiColModelParams.Mean} and Stddev = {multiColModelParams.Stddev}"); } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs index 2b4b9db567..67d4680796 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs @@ -37,7 +37,7 @@ public static void Example() }; // A pipeline to project Features column into Random fourier space. - var rffPipeline = ml.Transforms.RandomFourierKernelMap(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), rank: 4); + var rffPipeline = ml.Transforms.ApproximatedKernelMap(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), rank: 4); // The transformed (projected) data. var transformedData = rffPipeline.Fit(trainData).Transform(trainData); // Getting the data of the newly created column, so we can preview it. @@ -55,7 +55,7 @@ public static void Example() //0.165 0.117 -0.547 0.014 // A pipeline to project Features column into L-p normalized vector. - var lpNormalizePipeline = ml.Transforms.LpNormalize(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), normKind: Transforms.LpNormalizingEstimatorBase.NormFunction.L1); + var lpNormalizePipeline = ml.Transforms.NormalizeLpNorm(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), norm: Transforms.LpNormNormalizingEstimatorBase.NormFunction.L1); // The transformed (projected) data. transformedData = lpNormalizePipeline.Fit(trainData).Transform(trainData); // Getting the data of the newly created column, so we can preview it. @@ -73,7 +73,7 @@ public static void Example() // 0.133 0.156 0.178 0.200 0.000 0.022 0.044 0.067 0.089 0.111 // A pipeline to project Features column into L-p normalized vector. - var gcNormalizePipeline = ml.Transforms.GlobalContrastNormalize(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), ensureZeroMean:false); + var gcNormalizePipeline = ml.Transforms.NormalizeGlobalContrast(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), ensureZeroMean:false); // The transformed (projected) data. transformedData = gcNormalizePipeline.Fit(trainData).Transform(trainData); // Getting the data of the newly created column, so we can preview it. diff --git a/src/Microsoft.ML.Data/Transforms/NormalizeColumn.cs b/src/Microsoft.ML.Data/Transforms/NormalizeColumn.cs index 6cd4fc42b9..2b671a63ed 100644 --- a/src/Microsoft.ML.Data/Transforms/NormalizeColumn.cs +++ b/src/Microsoft.ML.Data/Transforms/NormalizeColumn.cs @@ -50,8 +50,9 @@ internal sealed partial class NormalizeTransform { public abstract class ColumnBase : OneToOneColumn { - [Argument(ArgumentType.AtMostOnce, HelpText = "Max number of examples used to train the normalizer", ShortName = "maxtrain")] - public long? MaxTrainingExamples; + [Argument(ArgumentType.AtMostOnce, HelpText = "Max number of examples used to train the normalizer", + Name = "MaxTrainingExamples", ShortName = "maxtrain")] + public long? MaximumExampleCount; private protected ColumnBase() { @@ -60,29 +61,29 @@ private protected ColumnBase() private protected override bool TryUnparseCore(StringBuilder sb) { Contracts.AssertValue(sb); - if (MaxTrainingExamples != null) + if (MaximumExampleCount != null) return false; return base.TryUnparseCore(sb); } } // REVIEW: Support different aggregators on different columns, eg, MinMax vs Variance/ZScore. - public abstract class FixZeroColumnBase : ColumnBase + public abstract class ControlZeroColumnBase : ColumnBase { // REVIEW: This only allows mapping either zero or min to zero. It might make sense to allow also max, midpoint and mean to be mapped to zero. - [Argument(ArgumentType.AtMostOnce, HelpText = "Whether to map zero to zero, preserving sparsity", ShortName = "zero")] - public bool? FixZero; + [Argument(ArgumentType.AtMostOnce, Name="FixZero", HelpText = "Whether to map zero to zero, preserving sparsity", ShortName = "zero")] + public bool? EnsureZeroUntouched; private protected override bool TryUnparseCore(StringBuilder sb) { Contracts.AssertValue(sb); - if (FixZero != null) + if (EnsureZeroUntouched != null) return false; return base.TryUnparseCore(sb); } } - public sealed class AffineColumn : FixZeroColumnBase + public sealed class AffineColumn : ControlZeroColumnBase { internal static AffineColumn Parse(string str) { @@ -101,7 +102,7 @@ internal bool TryUnparse(StringBuilder sb) } } - public sealed class BinColumn : FixZeroColumnBase + public sealed class BinColumn : ControlZeroColumnBase { [Argument(ArgumentType.AtMostOnce, HelpText = "Max number of bins, power of 2 recommended", ShortName = "bins")] [TGUI(Label = "Max number of bins")] @@ -147,22 +148,22 @@ internal bool TryUnparse(StringBuilder sb) private static class Defaults { - public const bool FixZero = true; + public const bool EnsureZeroUntouched = true; public const bool MeanVarCdf = false; public const bool LogMeanVarCdf = true; public const int NumBins = 1024; public const int MinBinSize = 10; } - public abstract class FixZeroArgumentsBase : ArgumentsBase + public abstract class ControlZeroArgumentsBase : ArgumentsBase { // REVIEW: This only allows mapping either zero or min to zero. It might make sense to allow also max, midpoint and mean to be mapped to zero. // REVIEW: Convert this to bool? or even an enum{Auto, No, Yes}, and automatically map zero to zero when it is null/Auto. - [Argument(ArgumentType.AtMostOnce, HelpText = "Whether to map zero to zero, preserving sparsity", ShortName = "zero")] - public bool FixZero = Defaults.FixZero; + [Argument(ArgumentType.AtMostOnce, Name = "FixZero", HelpText = "Whether to map zero to zero, preserving sparsity", ShortName = "zero")] + public bool EnsureZeroUntouched = Defaults.EnsureZeroUntouched; } - public abstract class AffineArgumentsBase : FixZeroArgumentsBase + public abstract class AffineArgumentsBase : ControlZeroArgumentsBase { [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)", Name = "Column", ShortName = "col", SortOrder = 1)] public AffineColumn[] Columns; @@ -182,8 +183,9 @@ public sealed class MeanVarArguments : AffineArgumentsBase public abstract class ArgumentsBase : TransformInputBase { - [Argument(ArgumentType.AtMostOnce, HelpText = "Max number of examples used to train the normalizer", ShortName = "maxtrain")] - public long MaxTrainingExamples = 1000000000; + [Argument(ArgumentType.AtMostOnce, HelpText = "Max number of examples used to train the normalizer", + Name = "MaxTrainingExamples", ShortName = "maxtrain")] + public long MaximumExampleCount = 1000000000; public abstract OneToOneColumn[] GetColumns(); @@ -217,7 +219,7 @@ public sealed class LogMeanVarArguments : ArgumentsBase public override OneToOneColumn[] GetColumns() => Columns; } - public abstract class BinArgumentsBase : FixZeroArgumentsBase + public abstract class BinArgumentsBase : ControlZeroArgumentsBase { [Argument(ArgumentType.Multiple, HelpText = "New column definition(s) (optional form: name:src)", Name = "Column", ShortName = "col", SortOrder = 1)] public BinColumn[] Columns; @@ -291,8 +293,8 @@ internal static IDataTransform Create(IHostEnvironment env, MinMaxArguments args .Select(col => new NormalizingEstimator.MinMaxColumnOptions( col.Name, col.Source ?? col.Name, - col.MaxTrainingExamples ?? args.MaxTrainingExamples, - col.FixZero ?? args.FixZero)) + col.MaximumExampleCount ?? args.MaximumExampleCount, + col.EnsureZeroUntouched ?? args.EnsureZeroUntouched)) .ToArray(); var normalizer = new NormalizingEstimator(env, columns); return normalizer.Fit(input).MakeDataTransform(input); @@ -306,11 +308,11 @@ internal static IDataTransform Create(IHostEnvironment env, MeanVarArguments arg env.CheckValue(args.Columns, nameof(args.Columns)); var columns = args.Columns - .Select(col => new NormalizingEstimator.MeanVarColumnOptions( + .Select(col => new NormalizingEstimator.MeanVarianceColumnOptions( col.Name, col.Source ?? col.Name, - col.MaxTrainingExamples ?? args.MaxTrainingExamples, - col.FixZero ?? args.FixZero)) + col.MaximumExampleCount ?? args.MaximumExampleCount, + col.EnsureZeroUntouched ?? args.EnsureZeroUntouched)) .ToArray(); var normalizer = new NormalizingEstimator(env, columns); return normalizer.Fit(input).MakeDataTransform(input); @@ -326,10 +328,10 @@ internal static IDataTransform Create(IHostEnvironment env, LogMeanVarArguments env.CheckValue(args.Columns, nameof(args.Columns)); var columns = args.Columns - .Select(col => new NormalizingEstimator.LogMeanVarColumnOptions( + .Select(col => new NormalizingEstimator.LogMeanVarianceColumnOptions( col.Name, col.Source ?? col.Name, - col.MaxTrainingExamples ?? args.MaxTrainingExamples, + col.MaximumExampleCount ?? args.MaximumExampleCount, args.UseCdf)) .ToArray(); var normalizer = new NormalizingEstimator(env, columns); @@ -349,8 +351,8 @@ internal static IDataTransform Create(IHostEnvironment env, BinArguments args, I .Select(col => new NormalizingEstimator.BinningColumnOptions( col.Name, col.Source ?? col.Name, - col.MaxTrainingExamples ?? args.MaxTrainingExamples, - col.FixZero ?? args.FixZero, + col.MaximumExampleCount ?? args.MaximumExampleCount, + col.EnsureZeroUntouched ?? args.EnsureZeroUntouched, col.NumBins ?? args.NumBins)) .ToArray(); var normalizer = new NormalizingEstimator(env, columns); @@ -927,8 +929,8 @@ public static IColumnFunctionBuilder CreateBuilder(MinMaxArguments args, IHost h return CreateBuilder(new NormalizingEstimator.MinMaxColumnOptions( args.Columns[icol].Name, args.Columns[icol].Source ?? args.Columns[icol].Name, - args.Columns[icol].MaxTrainingExamples ?? args.MaxTrainingExamples, - args.Columns[icol].FixZero ?? args.FixZero), host, srcIndex, srcType, cursor); + args.Columns[icol].MaximumExampleCount ?? args.MaximumExampleCount, + args.Columns[icol].EnsureZeroUntouched ?? args.EnsureZeroUntouched), host, srcIndex, srcType, cursor); } public static IColumnFunctionBuilder CreateBuilder(NormalizingEstimator.MinMaxColumnOptions column, IHost host, @@ -961,15 +963,15 @@ public static IColumnFunctionBuilder CreateBuilder(MeanVarArguments args, IHost Contracts.AssertValue(host); host.AssertValue(args); - return CreateBuilder(new NormalizingEstimator.MeanVarColumnOptions( + return CreateBuilder(new NormalizingEstimator.MeanVarianceColumnOptions( args.Columns[icol].Name, args.Columns[icol].Source ?? args.Columns[icol].Name, - args.Columns[icol].MaxTrainingExamples ?? args.MaxTrainingExamples, - args.Columns[icol].FixZero ?? args.FixZero, + args.Columns[icol].MaximumExampleCount ?? args.MaximumExampleCount, + args.Columns[icol].EnsureZeroUntouched ?? args.EnsureZeroUntouched, args.UseCdf), host, srcIndex, srcType, cursor); } - public static IColumnFunctionBuilder CreateBuilder(NormalizingEstimator.MeanVarColumnOptions column, IHost host, + public static IColumnFunctionBuilder CreateBuilder(NormalizingEstimator.MeanVarianceColumnOptions column, IHost host, int srcIndex, DataViewType srcType, DataViewRowCursor cursor) { Contracts.AssertValue(host); @@ -1001,14 +1003,14 @@ public static IColumnFunctionBuilder CreateBuilder(LogMeanVarArguments args, IHo Contracts.AssertValue(host); host.AssertValue(args); - return CreateBuilder(new NormalizingEstimator.LogMeanVarColumnOptions( + return CreateBuilder(new NormalizingEstimator.LogMeanVarianceColumnOptions( args.Columns[icol].Name, args.Columns[icol].Source ?? args.Columns[icol].Name, - args.Columns[icol].MaxTrainingExamples ?? args.MaxTrainingExamples, + args.Columns[icol].MaximumExampleCount ?? args.MaximumExampleCount, args.UseCdf), host, srcIndex, srcType, cursor); } - public static IColumnFunctionBuilder CreateBuilder(NormalizingEstimator.LogMeanVarColumnOptions column, IHost host, + public static IColumnFunctionBuilder CreateBuilder(NormalizingEstimator.LogMeanVarianceColumnOptions column, IHost host, int srcIndex, DataViewType srcType, DataViewRowCursor cursor) { Contracts.AssertValue(host); @@ -1044,8 +1046,8 @@ public static IColumnFunctionBuilder CreateBuilder(BinArguments args, IHost host return CreateBuilder(new NormalizingEstimator.BinningColumnOptions( args.Columns[icol].Name, args.Columns[icol].Source ?? args.Columns[icol].Name, - args.Columns[icol].MaxTrainingExamples ?? args.MaxTrainingExamples, - args.Columns[icol].FixZero ?? args.FixZero, + args.Columns[icol].MaximumExampleCount ?? args.MaximumExampleCount, + args.Columns[icol].EnsureZeroUntouched ?? args.EnsureZeroUntouched, args.Columns[icol].NumBins ?? args.NumBins), host, srcIndex, srcType, cursor); } @@ -1095,8 +1097,8 @@ public static IColumnFunctionBuilder CreateBuilder(SupervisedBinArguments args, args.Columns[icol].Name, args.Columns[icol].Source ?? args.Columns[icol].Name, args.LabelColumn ?? DefaultColumnNames.Label, - args.Columns[icol].MaxTrainingExamples ?? args.MaxTrainingExamples, - args.Columns[icol].FixZero ?? args.FixZero, + args.Columns[icol].MaximumExampleCount ?? args.MaximumExampleCount, + args.Columns[icol].EnsureZeroUntouched ?? args.EnsureZeroUntouched, args.Columns[icol].NumBins ?? args.NumBins, args.MinBinSize), host, labelColumnId, srcIndex, srcType, cursor); diff --git a/src/Microsoft.ML.Data/Transforms/NormalizeColumnDbl.cs b/src/Microsoft.ML.Data/Transforms/NormalizeColumnDbl.cs index 33cbcb9f37..87c6b1294a 100644 --- a/src/Microsoft.ML.Data/Transforms/NormalizeColumnDbl.cs +++ b/src/Microsoft.ML.Data/Transforms/NormalizeColumnDbl.cs @@ -1430,8 +1430,8 @@ private MinMaxOneColumnFunctionBuilder(IHost host, long lim, bool fix, ValueGett public static IColumnFunctionBuilder Create(NormalizingEstimator.MinMaxColumnOptions column, IHost host, DataViewType srcType, ValueGetter getter) { - host.CheckUserArg(column.MaxTrainingExamples > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); - return new MinMaxOneColumnFunctionBuilder(host, column.MaxTrainingExamples, column.FixZero, getter); + host.CheckUserArg(column.MaximumExampleCount > 1, nameof(column.MaximumExampleCount), "Must be greater than 1"); + return new MinMaxOneColumnFunctionBuilder(host, column.MaximumExampleCount, column.EnsureZeroUntouched, getter); } public override IColumnFunction CreateColumnFunction() @@ -1480,9 +1480,9 @@ private MinMaxVecColumnFunctionBuilder(IHost host, int cv, long lim, bool fix, public static IColumnFunctionBuilder Create(NormalizingEstimator.MinMaxColumnOptions column, IHost host, VectorType srcType, ValueGetter> getter) { - host.CheckUserArg(column.MaxTrainingExamples > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); + host.CheckUserArg(column.MaximumExampleCount > 1, nameof(column.MaximumExampleCount), "Must be greater than 1"); var cv = srcType.Size; - return new MinMaxVecColumnFunctionBuilder(host, cv, column.MaxTrainingExamples, column.FixZero, getter); + return new MinMaxVecColumnFunctionBuilder(host, cv, column.MaximumExampleCount, column.EnsureZeroUntouched, getter); } public override IColumnFunction CreateColumnFunction() @@ -1539,18 +1539,18 @@ private MeanVarOneColumnFunctionBuilder(IHost host, long lim, bool fix, ValueGet _buffer = new VBuffer(1, new TFloat[1]); } - public static IColumnFunctionBuilder Create(NormalizingEstimator.MeanVarColumnOptions column, IHost host, DataViewType srcType, + public static IColumnFunctionBuilder Create(NormalizingEstimator.MeanVarianceColumnOptions column, IHost host, DataViewType srcType, ValueGetter getter) { - host.CheckUserArg(column.MaxTrainingExamples > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); - return new MeanVarOneColumnFunctionBuilder(host, column.MaxTrainingExamples, column.FixZero, getter, false, column.UseCdf); + host.CheckUserArg(column.MaximumExampleCount > 1, nameof(column.MaximumExampleCount), "Must be greater than 1"); + return new MeanVarOneColumnFunctionBuilder(host, column.MaximumExampleCount, column.EnsureZeroUntouched, getter, false, column.UseCdf); } - public static IColumnFunctionBuilder Create(NormalizingEstimator.LogMeanVarColumnOptions column, IHost host, DataViewType srcType, + public static IColumnFunctionBuilder Create(NormalizingEstimator.LogMeanVarianceColumnOptions column, IHost host, DataViewType srcType, ValueGetter getter) { - var lim = column.MaxTrainingExamples; - host.CheckUserArg(lim > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); + var lim = column.MaximumExampleCount; + host.CheckUserArg(lim > 1, nameof(column.MaximumExampleCount), "Must be greater than 1"); return new MeanVarOneColumnFunctionBuilder(host, lim, false, getter, true, column.UseCdf); } @@ -1613,19 +1613,19 @@ private MeanVarVecColumnFunctionBuilder(IHost host, int cv, long lim, bool fix, _useCdf = useCdf; } - public static IColumnFunctionBuilder Create(NormalizingEstimator.MeanVarColumnOptions column, IHost host, VectorType srcType, + public static IColumnFunctionBuilder Create(NormalizingEstimator.MeanVarianceColumnOptions column, IHost host, VectorType srcType, ValueGetter> getter) { - host.CheckUserArg(column.MaxTrainingExamples > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); + host.CheckUserArg(column.MaximumExampleCount > 1, nameof(column.MaximumExampleCount), "Must be greater than 1"); var cv = srcType.Size; - return new MeanVarVecColumnFunctionBuilder(host, cv, column.MaxTrainingExamples, column.FixZero, getter, false, column.UseCdf); + return new MeanVarVecColumnFunctionBuilder(host, cv, column.MaximumExampleCount, column.EnsureZeroUntouched, getter, false, column.UseCdf); } - public static IColumnFunctionBuilder Create(NormalizingEstimator.LogMeanVarColumnOptions column, IHost host, VectorType srcType, + public static IColumnFunctionBuilder Create(NormalizingEstimator.LogMeanVarianceColumnOptions column, IHost host, VectorType srcType, ValueGetter> getter) { - var lim = column.MaxTrainingExamples; - host.CheckUserArg(lim > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); + var lim = column.MaximumExampleCount; + host.CheckUserArg(lim > 1, nameof(column.MaximumExampleCount), "Must be greater than 1"); var cv = srcType.Size; return new MeanVarVecColumnFunctionBuilder(host, cv, lim, false, getter, true, column.UseCdf); } @@ -1732,11 +1732,11 @@ private BinOneColumnFunctionBuilder(IHost host, long lim, bool fix, int numBins, public static IColumnFunctionBuilder Create(NormalizingEstimator.BinningColumnOptions column, IHost host, DataViewType srcType, ValueGetter getter) { - var lim = column.MaxTrainingExamples; - host.CheckUserArg(lim > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); - bool fix = column.FixZero; - var numBins = column.NumBins; - host.CheckUserArg(numBins > 1, nameof(column.NumBins), "Must be greater than 1"); + var lim = column.MaximumExampleCount; + host.CheckUserArg(lim > 1, nameof(column.MaximumExampleCount), "Must be greater than 1"); + bool fix = column.EnsureZeroUntouched; + var numBins = column.MaximumBinCount; + host.CheckUserArg(numBins > 1, nameof(column.MaximumBinCount), "Must be greater than 1"); return new BinOneColumnFunctionBuilder(host, lim, fix, numBins, getter); } @@ -1781,11 +1781,11 @@ private BinVecColumnFunctionBuilder(IHost host, int cv, long lim, bool fix, int public static IColumnFunctionBuilder Create(NormalizingEstimator.BinningColumnOptions column, IHost host, VectorType srcType, ValueGetter> getter) { - var lim = column.MaxTrainingExamples; - host.CheckUserArg(lim > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); - bool fix = column.FixZero; - var numBins = column.NumBins; - host.CheckUserArg(numBins > 1, nameof(column.NumBins), "Must be greater than 1"); + var lim = column.MaximumExampleCount; + host.CheckUserArg(lim > 1, nameof(column.MaximumExampleCount), "Must be greater than 1"); + bool fix = column.EnsureZeroUntouched; + var numBins = column.MaximumBinCount; + host.CheckUserArg(numBins > 1, nameof(column.MaximumBinCount), "Must be greater than 1"); var cv = srcType.Size; return new BinVecColumnFunctionBuilder(host, cv, lim, fix, numBins, getter); } @@ -1864,13 +1864,13 @@ public override IColumnFunction CreateColumnFunction() public static IColumnFunctionBuilder Create(NormalizingEstimator.SupervisedBinningColumOptions column, IHost host, int valueColumnId, int labelColumnId, DataViewRow dataRow) { - var lim = column.MaxTrainingExamples; - host.CheckUserArg(lim > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); - bool fix = column.FixZero; - var numBins = column.NumBins; - host.CheckUserArg(numBins > 1, nameof(column.NumBins), "Must be greater than 1"); - host.CheckUserArg(column.MinBinSize > 0, nameof(column.MinBinSize), "Must be positive"); - return new SupervisedBinOneColumnFunctionBuilder(host, lim, fix, numBins, column.MinBinSize, valueColumnId, labelColumnId, dataRow); + var lim = column.MaximumExampleCount; + host.CheckUserArg(lim > 1, nameof(column.MaximumExampleCount), "Must be greater than 1"); + bool fix = column.EnsureZeroUntouched; + var numBins = column.MaximumBinCount; + host.CheckUserArg(numBins > 1, nameof(column.MaximumBinCount), "Must be greater than 1"); + host.CheckUserArg(column.MininimumBinSize > 0, nameof(column.MininimumBinSize), "Must be positive"); + return new SupervisedBinOneColumnFunctionBuilder(host, lim, fix, numBins, column.MininimumBinSize, valueColumnId, labelColumnId, dataRow); } } @@ -1904,13 +1904,13 @@ public override IColumnFunction CreateColumnFunction() public static IColumnFunctionBuilder Create(NormalizingEstimator.SupervisedBinningColumOptions column, IHost host, int valueColumnId, int labelColumnId, DataViewRow dataRow) { - var lim = column.MaxTrainingExamples; - host.CheckUserArg(lim > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); - bool fix = column.FixZero; - var numBins = column.NumBins; - host.CheckUserArg(numBins > 1, nameof(column.NumBins), "Must be greater than 1"); - host.CheckUserArg(column.MinBinSize > 0, nameof(column.MinBinSize), "Must be positive"); - return new SupervisedBinVecColumnFunctionBuilder(host, lim, fix, numBins, column.MinBinSize, valueColumnId, labelColumnId, dataRow); + var lim = column.MaximumExampleCount; + host.CheckUserArg(lim > 1, nameof(column.MaximumExampleCount), "Must be greater than 1"); + bool fix = column.EnsureZeroUntouched; + var numBins = column.MaximumBinCount; + host.CheckUserArg(numBins > 1, nameof(column.MaximumBinCount), "Must be greater than 1"); + host.CheckUserArg(column.MininimumBinSize > 0, nameof(column.MininimumBinSize), "Must be positive"); + return new SupervisedBinVecColumnFunctionBuilder(host, lim, fix, numBins, column.MininimumBinSize, valueColumnId, labelColumnId, dataRow); } } } diff --git a/src/Microsoft.ML.Data/Transforms/NormalizeColumnSng.cs b/src/Microsoft.ML.Data/Transforms/NormalizeColumnSng.cs index 51a179ceff..60fb8a763a 100644 --- a/src/Microsoft.ML.Data/Transforms/NormalizeColumnSng.cs +++ b/src/Microsoft.ML.Data/Transforms/NormalizeColumnSng.cs @@ -1437,8 +1437,8 @@ private MinMaxOneColumnFunctionBuilder(IHost host, long lim, bool fix, ValueGett public static IColumnFunctionBuilder Create(NormalizingEstimator.MinMaxColumnOptions column, IHost host, DataViewType srcType, ValueGetter getter) { - host.CheckUserArg(column.MaxTrainingExamples > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); - return new MinMaxOneColumnFunctionBuilder(host, column.MaxTrainingExamples, column.FixZero, getter); + host.CheckUserArg(column.MaximumExampleCount > 1, nameof(column.MaximumExampleCount), "Must be greater than 1"); + return new MinMaxOneColumnFunctionBuilder(host, column.MaximumExampleCount, column.EnsureZeroUntouched, getter); } public override IColumnFunction CreateColumnFunction() @@ -1487,9 +1487,9 @@ private MinMaxVecColumnFunctionBuilder(IHost host, int cv, long lim, bool fix, public static IColumnFunctionBuilder Create(NormalizingEstimator.MinMaxColumnOptions column, IHost host, VectorType srcType, ValueGetter> getter) { - host.CheckUserArg(column.MaxTrainingExamples > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); + host.CheckUserArg(column.MaximumExampleCount > 1, nameof(column.MaximumExampleCount), "Must be greater than 1"); var cv = srcType.Size; - return new MinMaxVecColumnFunctionBuilder(host, cv, column.MaxTrainingExamples, column.FixZero, getter); + return new MinMaxVecColumnFunctionBuilder(host, cv, column.MaximumExampleCount, column.EnsureZeroUntouched, getter); } public override IColumnFunction CreateColumnFunction() @@ -1546,18 +1546,18 @@ private MeanVarOneColumnFunctionBuilder(IHost host, long lim, bool fix, ValueGet _buffer = new VBuffer(1, new TFloat[1]); } - public static IColumnFunctionBuilder Create(NormalizingEstimator.MeanVarColumnOptions column, IHost host, DataViewType srcType, + public static IColumnFunctionBuilder Create(NormalizingEstimator.MeanVarianceColumnOptions column, IHost host, DataViewType srcType, ValueGetter getter) { - host.CheckUserArg(column.MaxTrainingExamples > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); - return new MeanVarOneColumnFunctionBuilder(host, column.MaxTrainingExamples, column.FixZero, getter, false, column.UseCdf); + host.CheckUserArg(column.MaximumExampleCount > 1, nameof(column.MaximumExampleCount), "Must be greater than 1"); + return new MeanVarOneColumnFunctionBuilder(host, column.MaximumExampleCount, column.EnsureZeroUntouched, getter, false, column.UseCdf); } - public static IColumnFunctionBuilder Create(NormalizingEstimator.LogMeanVarColumnOptions column, IHost host, DataViewType srcType, + public static IColumnFunctionBuilder Create(NormalizingEstimator.LogMeanVarianceColumnOptions column, IHost host, DataViewType srcType, ValueGetter getter) { - var lim = column.MaxTrainingExamples; - host.CheckUserArg(lim > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); + var lim = column.MaximumExampleCount; + host.CheckUserArg(lim > 1, nameof(column.MaximumExampleCount), "Must be greater than 1"); return new MeanVarOneColumnFunctionBuilder(host, lim, false, getter, true, column.UseCdf); } @@ -1620,19 +1620,19 @@ private MeanVarVecColumnFunctionBuilder(IHost host, int cv, long lim, bool fix, _useCdf = useCdf; } - public static IColumnFunctionBuilder Create(NormalizingEstimator.MeanVarColumnOptions column, IHost host, VectorType srcType, + public static IColumnFunctionBuilder Create(NormalizingEstimator.MeanVarianceColumnOptions column, IHost host, VectorType srcType, ValueGetter> getter) { - host.CheckUserArg(column.MaxTrainingExamples > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); + host.CheckUserArg(column.MaximumExampleCount > 1, nameof(column.MaximumExampleCount), "Must be greater than 1"); var cv = srcType.Size; - return new MeanVarVecColumnFunctionBuilder(host, cv, column.MaxTrainingExamples, column.FixZero, getter, false, column.UseCdf); + return new MeanVarVecColumnFunctionBuilder(host, cv, column.MaximumExampleCount, column.EnsureZeroUntouched, getter, false, column.UseCdf); } - public static IColumnFunctionBuilder Create(NormalizingEstimator.LogMeanVarColumnOptions column, IHost host, VectorType srcType, + public static IColumnFunctionBuilder Create(NormalizingEstimator.LogMeanVarianceColumnOptions column, IHost host, VectorType srcType, ValueGetter> getter) { - var lim = column.MaxTrainingExamples; - host.CheckUserArg(lim > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); + var lim = column.MaximumExampleCount; + host.CheckUserArg(lim > 1, nameof(column.MaximumExampleCount), "Must be greater than 1"); var cv = srcType.Size; return new MeanVarVecColumnFunctionBuilder(host, cv, lim, false, getter, true, column.UseCdf); } @@ -1739,11 +1739,11 @@ private BinOneColumnFunctionBuilder(IHost host, long lim, bool fix, int numBins, public static IColumnFunctionBuilder Create(NormalizingEstimator.BinningColumnOptions column, IHost host, DataViewType srcType, ValueGetter getter) { - var lim = column.MaxTrainingExamples; - host.CheckUserArg(lim > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); - bool fix = column.FixZero; - var numBins = column.NumBins; - host.CheckUserArg(numBins > 1, nameof(column.NumBins), "Must be greater than 1"); + var lim = column.MaximumExampleCount; + host.CheckUserArg(lim > 1, nameof(column.MaximumExampleCount), "Must be greater than 1"); + bool fix = column.EnsureZeroUntouched; + var numBins = column.MaximumBinCount; + host.CheckUserArg(numBins > 1, nameof(column.MaximumBinCount), "Must be greater than 1"); return new BinOneColumnFunctionBuilder(host, lim, fix, numBins, getter); } @@ -1788,11 +1788,11 @@ private BinVecColumnFunctionBuilder(IHost host, int cv, long lim, bool fix, int public static IColumnFunctionBuilder Create(NormalizingEstimator.BinningColumnOptions column, IHost host, VectorType srcType, ValueGetter> getter) { - var lim = column.MaxTrainingExamples; - host.CheckUserArg(lim > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); - bool fix = column.FixZero; - var numBins = column.NumBins; - host.CheckUserArg(numBins > 1, nameof(column.NumBins), "Must be greater than 1"); + var lim = column.MaximumExampleCount; + host.CheckUserArg(lim > 1, nameof(column.MaximumExampleCount), "Must be greater than 1"); + bool fix = column.EnsureZeroUntouched; + var numBins = column.MaximumBinCount; + host.CheckUserArg(numBins > 1, nameof(column.MaximumBinCount), "Must be greater than 1"); var cv = srcType.Size; return new BinVecColumnFunctionBuilder(host, cv, lim, fix, numBins, getter); } @@ -1872,13 +1872,13 @@ public override IColumnFunction CreateColumnFunction() public static IColumnFunctionBuilder Create(NormalizingEstimator.SupervisedBinningColumOptions column, IHost host, int valueColumnId, int labelColumnId, DataViewRow dataRow) { - var lim = column.MaxTrainingExamples; - host.CheckUserArg(lim > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); - bool fix = column.FixZero; - var numBins = column.NumBins; - host.CheckUserArg(numBins > 1, nameof(column.NumBins), "Must be greater than 1"); - host.CheckUserArg(column.MinBinSize > 0, nameof(column.MinBinSize), "Must be positive"); - return new SupervisedBinOneColumnFunctionBuilder(host, lim, fix, numBins, column.MinBinSize, valueColumnId, labelColumnId, dataRow); + var lim = column.MaximumExampleCount; + host.CheckUserArg(lim > 1, nameof(column.MaximumExampleCount), "Must be greater than 1"); + bool fix = column.EnsureZeroUntouched; + var numBins = column.MaximumBinCount; + host.CheckUserArg(numBins > 1, nameof(column.MaximumBinCount), "Must be greater than 1"); + host.CheckUserArg(column.MininimumBinSize > 0, nameof(column.MininimumBinSize), "Must be positive"); + return new SupervisedBinOneColumnFunctionBuilder(host, lim, fix, numBins, column.MininimumBinSize, valueColumnId, labelColumnId, dataRow); } } @@ -1912,13 +1912,13 @@ public override IColumnFunction CreateColumnFunction() public static IColumnFunctionBuilder Create(NormalizingEstimator.SupervisedBinningColumOptions column, IHost host, int valueColumnId, int labelColumnId, DataViewRow dataRow) { - var lim = column.MaxTrainingExamples; - host.CheckUserArg(lim > 1, nameof(column.MaxTrainingExamples), "Must be greater than 1"); - bool fix = column.FixZero; - var numBins = column.NumBins; - host.CheckUserArg(numBins > 1, nameof(column.NumBins), "Must be greater than 1"); - host.CheckUserArg(column.MinBinSize > 0, nameof(column.MinBinSize), "Must be positive"); - return new SupervisedBinVecColumnFunctionBuilder(host, lim, fix, numBins, column.MinBinSize, valueColumnId, labelColumnId, dataRow); + var lim = column.MaximumExampleCount; + host.CheckUserArg(lim > 1, nameof(column.MaximumExampleCount), "Must be greater than 1"); + bool fix = column.EnsureZeroUntouched; + var numBins = column.MaximumBinCount; + host.CheckUserArg(numBins > 1, nameof(column.MaximumBinCount), "Must be greater than 1"); + host.CheckUserArg(column.MininimumBinSize > 0, nameof(column.MininimumBinSize), "Must be positive"); + return new SupervisedBinVecColumnFunctionBuilder(host, lim, fix, numBins, column.MininimumBinSize, valueColumnId, labelColumnId, dataRow); } } } diff --git a/src/Microsoft.ML.Data/Transforms/Normalizer.cs b/src/Microsoft.ML.Data/Transforms/Normalizer.cs index ed5935bf54..3e90ccb12d 100644 --- a/src/Microsoft.ML.Data/Transforms/Normalizer.cs +++ b/src/Microsoft.ML.Data/Transforms/Normalizer.cs @@ -32,12 +32,12 @@ public sealed class NormalizingEstimator : IEstimator [BestFriend] internal static class Defaults { - public const bool FixZero = true; + public const bool EnsureZeroUntouched = true; public const bool MeanVarCdf = false; public const bool LogMeanVarCdf = true; - public const int NumBins = 1024; - public const int MinBinSize = 10; - public const long MaxTrainingExamples = 1000000000; + public const int MaximumBinCount = 1024; + public const int MininimumBinSize = 10; + public const long MaximumExampleCount = 1000000000; } public enum NormalizationMode @@ -68,17 +68,17 @@ public abstract class ColumnOptionsBase { public readonly string Name; public readonly string InputColumnName; - public readonly long MaxTrainingExamples; + public readonly long MaximumExampleCount; - private protected ColumnOptionsBase(string name, string inputColumnName, long maxTrainingExamples) + private protected ColumnOptionsBase(string name, string inputColumnName, long maximumExampleCount) { Contracts.CheckNonEmpty(name, nameof(name)); Contracts.CheckNonEmpty(inputColumnName, nameof(inputColumnName)); - Contracts.CheckParam(maxTrainingExamples > 1, nameof(maxTrainingExamples), "Must be greater than 1"); + Contracts.CheckParam(maximumExampleCount > 1, nameof(maximumExampleCount), "Must be greater than 1"); Name = name; InputColumnName = inputColumnName; - MaxTrainingExamples = maxTrainingExamples; + MaximumExampleCount = maximumExampleCount; } internal abstract IColumnFunctionBuilder MakeBuilder(IHost host, int srcIndex, DataViewType srcType, DataViewRowCursor cursor); @@ -90,9 +90,9 @@ internal static ColumnOptionsBase Create(string outputColumnName, string inputCo case NormalizationMode.MinMax: return new MinMaxColumnOptions(outputColumnName, inputColumnName); case NormalizationMode.MeanVariance: - return new MeanVarColumnOptions(outputColumnName, inputColumnName); + return new MeanVarianceColumnOptions(outputColumnName, inputColumnName); case NormalizationMode.LogMeanVariance: - return new LogMeanVarColumnOptions(outputColumnName, inputColumnName); + return new LogMeanVarianceColumnOptions(outputColumnName, inputColumnName); case NormalizationMode.Binning: return new BinningColumnOptions(outputColumnName, inputColumnName); case NormalizationMode.SupervisedBinning: @@ -103,21 +103,21 @@ internal static ColumnOptionsBase Create(string outputColumnName, string inputCo } } - public abstract class FixZeroColumnOptionsBase : ColumnOptionsBase + public abstract class ControlZeroColumnOptionsBase : ColumnOptionsBase { - public readonly bool FixZero; + public readonly bool EnsureZeroUntouched; - private protected FixZeroColumnOptionsBase(string outputColumnName, string inputColumnName, long maxTrainingExamples, bool fixZero) - : base(outputColumnName, inputColumnName, maxTrainingExamples) + private protected ControlZeroColumnOptionsBase(string outputColumnName, string inputColumnName, long maximumExampleCount, bool ensureZeroUntouched) + : base(outputColumnName, inputColumnName, maximumExampleCount) { - FixZero = fixZero; + EnsureZeroUntouched = ensureZeroUntouched; } } - public sealed class MinMaxColumnOptions : FixZeroColumnOptionsBase + public sealed class MinMaxColumnOptions : ControlZeroColumnOptionsBase { - public MinMaxColumnOptions(string outputColumnName, string inputColumnName = null, long maxTrainingExamples = Defaults.MaxTrainingExamples, bool fixZero = Defaults.FixZero) - : base(outputColumnName, inputColumnName ?? outputColumnName, maxTrainingExamples, fixZero) + public MinMaxColumnOptions(string outputColumnName, string inputColumnName = null, long maximumExampleCount = Defaults.MaximumExampleCount, bool ensureZeroUntouched = Defaults.EnsureZeroUntouched) + : base(outputColumnName, inputColumnName ?? outputColumnName, maximumExampleCount, ensureZeroUntouched) { } @@ -125,13 +125,13 @@ internal override IColumnFunctionBuilder MakeBuilder(IHost host, int srcIndex, D => NormalizeTransform.MinMaxUtils.CreateBuilder(this, host, srcIndex, srcType, cursor); } - public sealed class MeanVarColumnOptions : FixZeroColumnOptionsBase + public sealed class MeanVarianceColumnOptions : ControlZeroColumnOptionsBase { public readonly bool UseCdf; - public MeanVarColumnOptions(string outputColumnName, string inputColumnName = null, - long maxTrainingExamples = Defaults.MaxTrainingExamples, bool fixZero = Defaults.FixZero, bool useCdf = Defaults.MeanVarCdf) - : base(outputColumnName, inputColumnName ?? outputColumnName, maxTrainingExamples, fixZero) + public MeanVarianceColumnOptions(string outputColumnName, string inputColumnName = null, + long maximumExampleCount = Defaults.MaximumExampleCount, bool fixZero = Defaults.EnsureZeroUntouched, bool useCdf = Defaults.MeanVarCdf) + : base(outputColumnName, inputColumnName ?? outputColumnName, maximumExampleCount, fixZero) { UseCdf = useCdf; } @@ -140,13 +140,13 @@ internal override IColumnFunctionBuilder MakeBuilder(IHost host, int srcIndex, D => NormalizeTransform.MeanVarUtils.CreateBuilder(this, host, srcIndex, srcType, cursor); } - public sealed class LogMeanVarColumnOptions : ColumnOptionsBase + public sealed class LogMeanVarianceColumnOptions : ColumnOptionsBase { public readonly bool UseCdf; - public LogMeanVarColumnOptions(string outputColumnName, string inputColumnName = null, - long maxTrainingExamples = Defaults.MaxTrainingExamples, bool useCdf = Defaults.LogMeanVarCdf) - : base(outputColumnName, inputColumnName ?? outputColumnName, maxTrainingExamples) + public LogMeanVarianceColumnOptions(string outputColumnName, string inputColumnName = null, + long maximumExampleCount = Defaults.MaximumExampleCount, bool useCdf = Defaults.LogMeanVarCdf) + : base(outputColumnName, inputColumnName ?? outputColumnName, maximumExampleCount) { UseCdf = useCdf; } @@ -155,42 +155,42 @@ internal override IColumnFunctionBuilder MakeBuilder(IHost host, int srcIndex, D => NormalizeTransform.LogMeanVarUtils.CreateBuilder(this, host, srcIndex, srcType, cursor); } - public sealed class BinningColumnOptions : FixZeroColumnOptionsBase + public sealed class BinningColumnOptions : ControlZeroColumnOptionsBase { - public readonly int NumBins; + public readonly int MaximumBinCount; public BinningColumnOptions(string outputColumnName, string inputColumnName = null, - long maxTrainingExamples = Defaults.MaxTrainingExamples, bool fixZero = true, int numBins = Defaults.NumBins) - : base(outputColumnName, inputColumnName ?? outputColumnName, maxTrainingExamples, fixZero) + long maximumExampleCount = Defaults.MaximumExampleCount, bool fixZero = true, int maximumBinCount = Defaults.MaximumBinCount) + : base(outputColumnName, inputColumnName ?? outputColumnName, maximumExampleCount, fixZero) { - NumBins = numBins; + MaximumBinCount = maximumBinCount; } internal override IColumnFunctionBuilder MakeBuilder(IHost host, int srcIndex, DataViewType srcType, DataViewRowCursor cursor) => NormalizeTransform.BinUtils.CreateBuilder(this, host, srcIndex, srcType, cursor); } - public sealed class SupervisedBinningColumOptions : FixZeroColumnOptionsBase + public sealed class SupervisedBinningColumOptions : ControlZeroColumnOptionsBase { - public readonly int NumBins; - public readonly string LabelColumn; - public readonly int MinBinSize; + public readonly int MaximumBinCount; + public readonly string LabelColumnName; + public readonly int MininimumBinSize; public SupervisedBinningColumOptions(string outputColumnName, string inputColumnName = null, - string labelColumn = DefaultColumnNames.Label, - long maxTrainingExamples = Defaults.MaxTrainingExamples, + string labelColumnName = DefaultColumnNames.Label, + long maximumExampleCount = Defaults.MaximumExampleCount, bool fixZero = true, - int numBins = Defaults.NumBins, - int minBinSize = Defaults.MinBinSize) - : base(outputColumnName, inputColumnName ?? outputColumnName, maxTrainingExamples, fixZero) + int maximumBinCount = Defaults.MaximumBinCount, + int mininimumBinSize = Defaults.MininimumBinSize) + : base(outputColumnName, inputColumnName ?? outputColumnName, maximumExampleCount, fixZero) { - NumBins = numBins; - LabelColumn = labelColumn; - MinBinSize = minBinSize; + MaximumBinCount = maximumBinCount; + LabelColumnName = labelColumnName; + MininimumBinSize = mininimumBinSize; } internal override IColumnFunctionBuilder MakeBuilder(IHost host, int srcIndex, DataViewType srcType, DataViewRowCursor cursor) - => NormalizeTransform.SupervisedBinUtils.CreateBuilder(this, host, LabelColumn, srcIndex, srcType, cursor); + => NormalizeTransform.SupervisedBinUtils.CreateBuilder(this, host, LabelColumnName, srcIndex, srcType, cursor); } private readonly IHost _host; @@ -384,7 +384,25 @@ public ColumnFunctionAccessor(ImmutableArray infos) [BestFriend] internal readonly IReadOnlyList ColumnFunctions; - public readonly ImmutableArray Columns; + /// + /// The configuration of the normalizer. The i-th element describes the i-th input-output column pair. + /// + [BestFriend] + internal readonly ImmutableArray Columns; + + /// + /// The normalization configurations of input columns. It returns the normalization parameters applied to the -th input column. + /// + /// column index. + /// the normalization parameters applied to the -th input column. + public NormalizerModelParametersBase GetNormalizerModelParameters(int index) + { + string errMsg = "Not valid. Valid range is from 0 (inclusive) to " + Columns.Length + " (exclusive) but got " + index + "."; + Contracts.CheckUserArg(index >= 0 && index < Columns.Length, nameof(index), errMsg); + + return Columns[index].ModelParameters; + } + private NormalizingTransformer(IHostEnvironment env, ColumnOptions[] columns) : base(env.Register(nameof(NormalizingTransformer)), columns.Select(x => (x.Name, x.InputColumnName)).ToArray()) { @@ -413,7 +431,7 @@ internal static NormalizingTransformer Train(IHostEnvironment env, IDataView dat var supervisedBinColumn = info as NormalizingEstimator.SupervisedBinningColumOptions; if(supervisedBinColumn != null) - activeCols.Add(data.Schema[supervisedBinColumn.LabelColumn]); + activeCols.Add(data.Schema[supervisedBinColumn.LabelColumnName]); } var functionBuilders = new IColumnFunctionBuilder[columns.Length]; @@ -752,7 +770,7 @@ internal AffineNormalizerModelParameters(TData scale, TData offset) /// /// The model parameters generated by cumulative distribution normalization transformations. /// The cumulative density function is parameterized by and - /// the as observed during fitting. + /// the as observed during fitting. /// /// /// @@ -773,7 +791,7 @@ public sealed class CdfNormalizerModelParameters : NormalizerModelParamet /// The standard deviation(s). In the scalar case, this is a single value. In the vector case this is of /// length equal to the number of slots. /// - public TData Stddev { get; } + public TData StandardDeviation { get; } /// /// Whether the we ought to apply a logarithm to the input first. @@ -786,7 +804,7 @@ public sealed class CdfNormalizerModelParameters : NormalizerModelParamet internal CdfNormalizerModelParameters(TData mean, TData stddev, bool useLog) { Mean = mean; - Stddev = stddev; + StandardDeviation = stddev; UseLog = useLog; } } @@ -810,7 +828,7 @@ public sealed class BinNormalizerModelParameters : NormalizerModelParamet public TData Density { get; } /// - /// If normalization is performed with set to true, + /// If normalization is performed with set to true, /// the offset indicates the displacement of zero, if any. /// public TData Offset { get; } diff --git a/src/Microsoft.ML.StaticPipe/LpNormalizerStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/LpNormalizerStaticExtensions.cs index 337dad694d..a3f9cddc9c 100644 --- a/src/Microsoft.ML.StaticPipe/LpNormalizerStaticExtensions.cs +++ b/src/Microsoft.ML.StaticPipe/LpNormalizerStaticExtensions.cs @@ -9,15 +9,15 @@ namespace Microsoft.ML.StaticPipe { /// - /// Extensions for statically typed . + /// Extensions for statically typed . /// - public static class LpNormalizerStaticExtensions + public static class LpNormNormalizerStaticExtensions { private sealed class OutPipelineColumn : Vector { public readonly Vector Input; - public OutPipelineColumn(Vector input, LpNormalizingEstimatorBase.NormFunction norm, bool ensureZeroMean) + public OutPipelineColumn(Vector input, LpNormNormalizingEstimatorBase.NormFunction norm, bool ensureZeroMean) : base(new Reconciler(norm, ensureZeroMean), input) { Input = input; @@ -26,12 +26,12 @@ public OutPipelineColumn(Vector input, LpNormalizingEstimatorBase.NormFun private sealed class Reconciler : EstimatorReconciler { - private readonly LpNormalizingEstimatorBase.NormFunction _norm; + private readonly LpNormNormalizingEstimatorBase.NormFunction _norm; private readonly bool _ensureZeroMean; - public Reconciler(LpNormalizingEstimatorBase.NormFunction normKind, bool ensureZeroMean) + public Reconciler(LpNormNormalizingEstimatorBase.NormFunction norm, bool ensureZeroMean) { - _norm = normKind; + _norm = norm; _ensureZeroMean = ensureZeroMean; } @@ -47,16 +47,16 @@ public override IEstimator Reconcile(IHostEnvironment env, foreach (var outCol in toOutput) pairs.Add((outputNames[outCol], inputNames[((OutPipelineColumn)outCol).Input])); - return new LpNormalizingEstimator(env, pairs.ToArray(), _norm, _ensureZeroMean); + return new LpNormNormalizingEstimator(env, pairs.ToArray(), _norm, _ensureZeroMean); } } /// - /// The column to apply to. - /// Type of norm to use to normalize each sample. - /// Subtract mean from each value before normalizing. - public static Vector LpNormalize(this Vector input, - LpNormalizingEstimatorBase.NormFunction normKind = LpNormalizingEstimatorBase.Defaults.Norm, - bool subMean = LpNormalizingEstimatorBase.Defaults.LpEnsureZeroMean) => new OutPipelineColumn(input, normKind, subMean); + /// The column containing the vectors to apply the normalization to. + /// Type of norm to use to normalize each sample. + /// Subtract mean from each value before normalizing. + public static Vector NormalizeLpNorm(this Vector input, + LpNormNormalizingEstimatorBase.NormFunction norm = LpNormNormalizingEstimatorBase.Defaults.Norm, + bool ensureZeroMean = LpNormNormalizingEstimatorBase.Defaults.LpEnsureZeroMean) => new OutPipelineColumn(input, norm, ensureZeroMean); } } diff --git a/src/Microsoft.ML.StaticPipe/NormalizerStaticExtensions.cs b/src/Microsoft.ML.StaticPipe/NormalizerStaticExtensions.cs index 2e7480dbe4..f571dbc5ea 100644 --- a/src/Microsoft.ML.StaticPipe/NormalizerStaticExtensions.cs +++ b/src/Microsoft.ML.StaticPipe/NormalizerStaticExtensions.cs @@ -17,61 +17,62 @@ namespace Microsoft.ML.StaticPipe /// public static class NormalizerStaticExtensions { - private const long MaxTrain = NormalizingEstimator.Defaults.MaxTrainingExamples; - private const bool FZ = NormalizingEstimator.Defaults.FixZero; + private const long MaxTrain = NormalizingEstimator.Defaults.MaximumExampleCount; /// /// Learns an affine function based on the minimum and maximum, so that all values between the minimum and /// maximum observed during fitting fall into the range of -1 to 1. /// /// The input column. - /// If set to false, then the observed minimum and maximum during fitting + /// If set to false, then the observed minimum and maximum during fitting /// will map to -1 and 1 respectively, exactly. If however set to true, then 0 will always map to 0. /// This is valuable for the sake of sparsity preservation, if normalizing sparse vectors. - /// When gathering statistics only look at most this many examples. + /// When gathering statistics only look at most this many examples. /// A delegate that can be called whenever the function is fit, with the learned slopes - /// and, if is false, the offsets as well. + /// and, if is false, the offsets as well. /// Note that the statistics gathering and normalization is done independently per slot of the /// vector values. /// Note that if values are later transformed that are lower than the minimum, or higher than the maximum, /// observed during fitting, that the output values may be outside the range of -1 to 1. /// The normalized column. public static NormVector Normalize( - this Vector input, bool fixZero = FZ, long maxTrainingExamples = MaxTrain, + this Vector input, bool ensureZeroUntouched = NormalizingEstimator.Defaults.EnsureZeroUntouched, + long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, OnFitAffine> onFit = null) { - return NormalizeByMinMaxCore(input, fixZero, maxTrainingExamples, onFit); + return NormalizeByMinMaxCore(input, ensureZeroUntouched, maximumExampleCount, onFit); } /// /// Learns an affine function based on the minimum and maximum, so that all values between the minimum and /// maximum observed during fitting fall into the range of -1 to 1. /// - /// The input column. - /// If set to false, then the observed minimum and maximum during fitting + /// The column containing the vectors to apply the normalization to. + /// If set to false, then the observed minimum and maximum during fitting /// will map to -1 and 1 respectively, exactly. If however set to true, then 0 will always map to 0. /// This is valuable for the sake of sparsity preservation, if normalizing sparse vectors. - /// When gathering statistics only look at most this many examples. + /// When gathering statistics only look at most this many examples. /// A delegate called whenever the estimator is fit, with the learned slopes - /// and, if is false, the offsets as well. + /// and, if is false, the offsets as well. /// Note that the statistics gathering and normalization is done independently per slot of the /// vector values. /// Note that if values are later transformed that are lower than the minimum, or higher than the maximum, /// observed during fitting, that the output values may be outside the range of -1 to 1. /// The normalized column. public static NormVector Normalize( - this Vector input, bool fixZero = FZ, long maxTrainingExamples = MaxTrain, + this Vector input, bool ensureZeroUntouched = NormalizingEstimator.Defaults.EnsureZeroUntouched, + long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, OnFitAffine> onFit = null) { - return NormalizeByMinMaxCore(input, fixZero, maxTrainingExamples, onFit); + return NormalizeByMinMaxCore(input, ensureZeroUntouched, maximumExampleCount, onFit); } - private static NormVector NormalizeByMinMaxCore(Vector input, bool fixZero, long maxTrainingExamples, + private static NormVector NormalizeByMinMaxCore(Vector input, bool ensureZeroUntouched, long maximumExampleCount, OnFitAffine> onFit) { Contracts.CheckValue(input, nameof(input)); - Contracts.CheckParam(maxTrainingExamples > 1, nameof(maxTrainingExamples), "Must be greater than 1"); - return new Impl(input, (name, src) => new NormalizingEstimator.MinMaxColumnOptions(name, src, maxTrainingExamples, fixZero), AffineMapper(onFit)); + Contracts.CheckParam(maximumExampleCount > 1, nameof(maximumExampleCount), "Must be greater than 1"); + return new Impl(input, (name, src) => new NormalizingEstimator.MinMaxColumnOptions(name, src, maximumExampleCount, ensureZeroUntouched), AffineMapper(onFit)); } // We have a slightly different breaking up of categories of normalizers versus the dynamic API. Both the mean-var and @@ -82,99 +83,103 @@ private static NormVector NormalizeByMinMaxCore(Vector input, bool fixZ /// Learns an affine function based on the observed mean and standard deviation. This is less susceptible /// to outliers as compared to . /// - /// The input column. - /// If set to true then the offset will always be considered zero. + /// The column containing the vectors to apply the normalization to. + /// If set to true then the offset will always be considered zero. /// If set to true then we transform over the logarithm of the values, rather - /// than just the raw values. If this is set to true then is ignored. - /// When gathering statistics only look at most this many examples. + /// than just the raw values. If this is set to true then is ignored. + /// When gathering statistics only look at most this many examples. /// A delegate called whenever the estimator is fit, with the learned slopes - /// and, if is false, the offsets as well. + /// and, if is false, the offsets as well. /// Note that the statistics gathering and normalization is done independently per slot of the /// vector values. /// The normalized column. - public static NormVector NormalizeByMeanVar( - this Vector input, bool fixZero = FZ, bool useLog = false, long maxTrainingExamples = MaxTrain, + public static NormVector NormalizeMeanVariance( + this Vector input, bool ensureZeroUntouched = NormalizingEstimator.Defaults.EnsureZeroUntouched, + bool useLog = false, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, OnFitAffine> onFit = null) { - return NormalizeByMVCdfCore(input, fixZero, useLog, false, maxTrainingExamples, AffineMapper(onFit)); + return NormalizeByMVCdfCore(input, ensureZeroUntouched, useLog, false, maximumExampleCount, AffineMapper(onFit)); } /// /// Learns an affine function based on the observed mean and standard deviation. This is less susceptible /// to outliers as compared to . /// - /// The input column. - /// If set to true then the offset will always be considered zero. + /// The column containing the vectors to apply the normalization to. + /// If set to true then the offset will always be considered zero. /// If set to true then we transform over the logarithm of the values, rather - /// than just the raw values. If this is set to true then is ignored. - /// When gathering statistics only look at most this many examples. + /// than just the raw values. If this is set to true then is ignored. + /// When gathering statistics only look at most this many examples. /// A delegate called whenever the estimator is fit, with the learned slopes - /// and, if is false, the offsets as well. + /// and, if is false, the offsets as well. /// Note that the statistics gathering and normalization is done independently per slot of the /// vector values. /// The normalized column. - public static NormVector NormalizeByMeanVar( - this Vector input, bool fixZero = FZ, bool useLog = false, long maxTrainingExamples = MaxTrain, + public static NormVector NormalizeMeanVariance( + this Vector input, bool ensureZeroUntouched = NormalizingEstimator.Defaults.EnsureZeroUntouched, + bool useLog = false, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, OnFitAffine> onFit = null) { - return NormalizeByMVCdfCore(input, fixZero, useLog, false, maxTrainingExamples, AffineMapper(onFit)); + return NormalizeByMVCdfCore(input, ensureZeroUntouched, useLog, false, maximumExampleCount, AffineMapper(onFit)); } /// /// Learns a function based on the cumulative density function of a normal distribution parameterized by /// a mean and variance as observed during fitting. /// - /// The input column. - /// If set to false, then the learned distributional parameters will be + /// The column containing the vectors to apply the normalization to. + /// If set to false, then the learned distributional parameters will be /// adjusted in such a way as to ensure that the input 0 maps to the output 0. /// This is valuable for the sake of sparsity preservation, if normalizing sparse vectors. /// If set to true then we transform over the logarithm of the values, rather - /// than just the raw values. If this is set to true then is ignored. - /// When gathering statistics only look at most this many examples. + /// than just the raw values. If this is set to true then is ignored. + /// When gathering statistics only look at most this many examples. /// A delegate called whenever the estimator is fit, with the learned mean and standard /// deviation for all slots. /// Note that the statistics gathering and normalization is done independently per slot of the /// vector values. /// The normalized column. public static NormVector NormalizeByCumulativeDistribution( - this Vector input, bool fixZero = FZ, bool useLog = false, long maxTrainingExamples = MaxTrain, + this Vector input, bool ensureZeroUntouched = NormalizingEstimator.Defaults.EnsureZeroUntouched, + bool useLog = false, long maximumNumberOfExamples = NormalizingEstimator.Defaults.MaximumExampleCount, OnFitCumulativeDistribution> onFit = null) { - return NormalizeByMVCdfCore(input, fixZero, useLog, true, maxTrainingExamples, CdfMapper(onFit)); + return NormalizeByMVCdfCore(input, ensureZeroUntouched, useLog, true, maximumNumberOfExamples, CdfMapper(onFit)); } /// /// Learns a function based on the cumulative density function of a normal distribution parameterized by /// a mean and variance as observed during fitting. /// - /// The input column. - /// If set to false, then the learned distributional parameters will be + /// The column containing the vectors to apply the normalization to. + /// If set to false, then the learned distributional parameters will be /// adjusted in such a way as to ensure that the input 0 maps to the output 0. /// This is valuable for the sake of sparsity preservation, if normalizing sparse vectors. /// If set to true then we transform over the logarithm of the values, rather - /// than just the raw values. If this is set to true then is ignored. - /// When gathering statistics only look at most this many examples. + /// than just the raw values. If this is set to true then is ignored. + /// When gathering statistics only look at most this many examples. /// A delegate called whenever the estimator is fit, with the learned mean and standard /// deviation for all slots. /// Note that the statistics gathering and normalization is done independently per slot of the /// vector values. /// The normalized column. public static NormVector NormalizeByCumulativeDistribution( - this Vector input, bool fixZero = FZ, bool useLog = false, long maxTrainingExamples = MaxTrain, + this Vector input, bool ensureZeroUntouched = NormalizingEstimator.Defaults.EnsureZeroUntouched, + bool useLog = false, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, OnFitCumulativeDistribution> onFit = null) { - return NormalizeByMVCdfCore(input, fixZero, useLog, true, maxTrainingExamples, CdfMapper(onFit)); + return NormalizeByMVCdfCore(input, ensureZeroUntouched, useLog, true, maximumExampleCount, CdfMapper(onFit)); } - private static NormVector NormalizeByMVCdfCore(Vector input, bool fixZero, bool useLog, bool useCdf, long maxTrainingExamples, Action onFit) + private static NormVector NormalizeByMVCdfCore(Vector input, bool ensureZeroUntouched, bool useLog, bool useCdf, long maximumExampleCount, Action onFit) { Contracts.CheckValue(input, nameof(input)); - Contracts.CheckParam(maxTrainingExamples > 1, nameof(maxTrainingExamples), "Must be greater than 1"); + Contracts.CheckParam(maximumExampleCount > 1, nameof(maximumExampleCount), "Must be greater than 1"); return new Impl(input, (name, src) => { if (useLog) - return new NormalizingEstimator.LogMeanVarColumnOptions(name, src, maxTrainingExamples, useCdf); - return new NormalizingEstimator.MeanVarColumnOptions(name, src, maxTrainingExamples, fixZero, useCdf); + return new NormalizingEstimator.LogMeanVarianceColumnOptions(name, src, maximumExampleCount, useCdf); + return new NormalizingEstimator.MeanVarianceColumnOptions(name, src, maximumExampleCount, ensureZeroUntouched, useCdf); }, onFit); } @@ -184,23 +189,25 @@ private static NormVector NormalizeByMVCdfCore(Vector input, bool fixZe /// to make these bins equal in population, but under some circumstances this may be impossible (for example, a slot /// with a very dominant mode). The way the mapping works is, if there are N bins in a slot, and a value /// falls in the range of bin n (indexed from 0), the output value is n / (N - 1), and then possibly - /// subtracting off the binned value for what 0 would have been if is true. + /// subtracting off the binned value for what 0 would have been if is true. /// - /// The input column. - /// The maximum number of discretization points to learn per slot. - /// Normally the output is in the range of 0 to 1, but if set to true, then what + /// The column containing the vectors to apply the normalization to. + /// The maximum number of discretization points to learn per slot. + /// Normally the output is in the range of 0 to 1, but if set to true, then what /// would have been the output for a zero input is subtracted off the value. /// This is valuable for the sake of sparsity preservation, if normalizing sparse vectors. - /// When gathering statistics only look at most this many examples. + /// When gathering statistics only look at most this many examples. /// A delegate called whenever the estimator is fit, with the bin upper bounds for each slot. /// Note that the statistics gathering and normalization is done independently per slot of the /// vector values. /// The normalized column. public static NormVector NormalizeByBinning( - this Vector input, int maxBins = NormalizingEstimator.Defaults.NumBins, bool fixZero = FZ, long maxTrainingExamples = MaxTrain, + this Vector input, int maximumBinCount = NormalizingEstimator.Defaults.MaximumBinCount, + bool ensureZeroUntouched = NormalizingEstimator.Defaults.EnsureZeroUntouched, + long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, OnFitBinned> onFit = null) { - return NormalizeByBinningCore(input, maxBins, fixZero, maxTrainingExamples, onFit); + return NormalizeByBinningCore(input, maximumBinCount, ensureZeroUntouched, maximumExampleCount, onFit); } /// @@ -209,32 +216,34 @@ public static NormVector NormalizeByBinning( /// to make these bins equal in population, but under some circumstances this may be impossible (for example, a slot /// with a very dominant mode). The way the mapping works is, if there are N bins in a slot, and a value /// falls in the range of bin n (indexed from 0), the output value is n / (N - 1), and then possibly - /// subtracting off the binned value for what 0 would have been if is true. + /// subtracting off the binned value for what 0 would have been if is true. /// - /// The input column. - /// The maximum number of discretization points to learn per slot. - /// Normally the output is in the range of 0 to 1, but if set to true, then what + /// The column containing the vectors to apply the normalization to. + /// The maximum number of discretization points to learn per slot. + /// Normally the output is in the range of 0 to 1, but if set to true, then what /// would have been the output for a zero input is subtracted off the value. /// This is valuable for the sake of sparsity preservation, if normalizing sparse vectors. - /// When gathering statistics only look at most this many examples. + /// When gathering statistics only look at most this many examples. /// A delegate called whenever the estimator is fit, with the bin upper bounds for each slot. /// Note that the statistics gathering and normalization is done independently per slot of the /// vector values. /// The normalized column. public static NormVector NormalizeByBinning( - this Vector input, int maxBins = NormalizingEstimator.Defaults.NumBins, bool fixZero = FZ, long maxTrainingExamples = MaxTrain, + this Vector input, int maximumBinCount = NormalizingEstimator.Defaults.MaximumBinCount, + bool ensureZeroUntouched = NormalizingEstimator.Defaults.EnsureZeroUntouched, + long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, OnFitBinned> onFit = null) { - return NormalizeByBinningCore(input, maxBins, fixZero, maxTrainingExamples, onFit); + return NormalizeByBinningCore(input, maximumBinCount, ensureZeroUntouched, maximumExampleCount, onFit); } - private static NormVector NormalizeByBinningCore(Vector input, int numBins, bool fixZero, long maxTrainingExamples, + private static NormVector NormalizeByBinningCore(Vector input, int maximumBinCount, bool ensureZeroUntouched, long maximumExampleCount, OnFitBinned> onFit) { Contracts.CheckValue(input, nameof(input)); - Contracts.CheckParam(numBins > 1, nameof(maxTrainingExamples), "Must be greater than 1"); - Contracts.CheckParam(maxTrainingExamples > 1, nameof(maxTrainingExamples), "Must be greater than 1"); - return new Impl(input, (name, src) => new NormalizingEstimator.BinningColumnOptions(name, src, maxTrainingExamples, fixZero, numBins), BinMapper(onFit)); + Contracts.CheckParam(maximumBinCount > 1, nameof(maximumExampleCount), "Must be greater than 1"); + Contracts.CheckParam(maximumExampleCount > 1, nameof(maximumExampleCount), "Must be greater than 1"); + return new Impl(input, (name, src) => new NormalizingEstimator.BinningColumnOptions(name, src, maximumExampleCount, ensureZeroUntouched, maximumBinCount), BinMapper(onFit)); } /// @@ -322,7 +331,7 @@ private static Action CdfMapper(OnFitCumulativeDistribut return col => { var aCol = (NormalizingTransformer.CdfNormalizerModelParameters)col?.GetNormalizerModelParams(); - onFit(aCol.Mean, aCol.Stddev); + onFit(aCol.Mean, aCol.StandardDeviation); }; } diff --git a/src/Microsoft.ML.StaticPipe/TransformsStatic.cs b/src/Microsoft.ML.StaticPipe/TransformsStatic.cs index 683f237790..607a3439fa 100644 --- a/src/Microsoft.ML.StaticPipe/TransformsStatic.cs +++ b/src/Microsoft.ML.StaticPipe/TransformsStatic.cs @@ -60,14 +60,14 @@ public override IEstimator Reconcile(IHostEnvironment env, } /// - /// The column to apply to. + /// The column containing the vectors to apply the normalization to. /// If , subtract mean from each value before normalizing and use the raw input otherwise. /// If , resulted vector's standard deviation would be one. Otherwise, resulted vector's L2-norm would be one. /// Scale features by this value. - public static Vector GlobalContrastNormalize(this Vector input, - bool ensureZeroMean = LpNormalizingEstimatorBase.Defaults.GcnEnsureZeroMean, - bool ensureUnitStandardDeviation = LpNormalizingEstimatorBase.Defaults.EnsureUnitStdDev, - float scale = LpNormalizingEstimatorBase.Defaults.Scale) => new OutPipelineColumn(input, ensureZeroMean, ensureUnitStandardDeviation, scale); + public static Vector NormalizeGlobalContrast(this Vector input, + bool ensureZeroMean = LpNormNormalizingEstimatorBase.Defaults.GcnEnsureZeroMean, + bool ensureUnitStandardDeviation = LpNormNormalizingEstimatorBase.Defaults.EnsureUnitStdDev, + float scale = LpNormNormalizingEstimatorBase.Defaults.Scale) => new OutPipelineColumn(input, ensureZeroMean, ensureUnitStandardDeviation, scale); } /// @@ -1550,18 +1550,18 @@ public static Vector FeaturizeText(this Scalar input, Scalar Reconcile(IHostEnvironment env, PipelineColumn[] toOutput, IReadOnlyDictionary inputNames, IReadOnlyDictionary outputNames, IReadOnlyCollection usedNames) { - var infos = new RandomFourierKernelMappingEstimator.ColumnOptions[toOutput.Length]; + var infos = new ApproximatedKernelMappingEstimator.ColumnOptions[toOutput.Length]; for (int i = 0; i < toOutput.Length; ++i) { var tcol = (IColInput)toOutput[i]; - infos[i] = new RandomFourierKernelMappingEstimator.ColumnOptions(outputNames[toOutput[i]], tcol.Config.Dimension, tcol.Config.UseCosAndSinBases, inputNames[tcol.Input], tcol.Config.Generator, tcol.Config.Seed); + infos[i] = new ApproximatedKernelMappingEstimator.ColumnOptions(outputNames[toOutput[i]], tcol.Config.Rank, tcol.Config.UseCosAndSinBases, inputNames[tcol.Input], tcol.Config.Generator, tcol.Config.Seed); } - return new RandomFourierKernelMappingEstimator(env, infos); + return new ApproximatedKernelMappingEstimator(env, infos); } } @@ -1607,21 +1607,21 @@ public override IEstimator Reconcile(IHostEnvironment env, Pipelin /// specified shift-invariant kernel. With this transform, we are able to use linear methods (which are scalable) to approximate more complex kernel SVM models. /// /// The column to apply Random Fourier transfomration. - /// The number of random Fourier features to create. + /// The number of random Fourier features to create. /// If , use both of cos and sin basis functions to create two features for every random Fourier frequency. /// Otherwise, only cos bases would be used. /// Which kernel to use. (if it is null, is used.) /// The seed of the random number generator for generating the new features. If not specified global random would be used. - public static Vector LowerVectorSizeWithRandomFourierTransformation(this Vector input, - int dimension = RandomFourierKernelMappingEstimator.Defaults.Rank, bool useCosAndSinBases = RandomFourierKernelMappingEstimator.Defaults.UseCosAndSinBases, + public static Vector ApproximatedKernelMap(this Vector input, + int rank = ApproximatedKernelMappingEstimator.Defaults.Rank, bool useCosAndSinBases = ApproximatedKernelMappingEstimator.Defaults.UseCosAndSinBases, KernelBase generator = null, int? seed = null) { Contracts.CheckValue(input, nameof(input)); - return new ImplVector(input, new Config(dimension, useCosAndSinBases, generator, seed)); + return new ImplVector(input, new Config(rank, useCosAndSinBases, generator, seed)); } } - public static class PcaEstimatorExtensions + public static class PcaStaticExtensions { private sealed class OutPipelineColumn : Vector { @@ -1673,7 +1673,7 @@ public override IEstimator Reconcile(IHostEnvironment env, /// If enabled, data is centered to be zero mean. /// The seed for random number generation /// Vector containing the principal components. - public static Vector ToPrincipalComponents(this Vector input, + public static Vector ProjectToPrincipalComponents(this Vector input, string weightColumn = PrincipalComponentAnalyzer.Defaults.WeightColumn, int rank = PrincipalComponentAnalyzer.Defaults.Rank, int overSampling = PrincipalComponentAnalyzer.Defaults.Oversampling, diff --git a/src/Microsoft.ML.Transforms/FourierDistributionSampler.cs b/src/Microsoft.ML.Transforms/FourierDistributionSampler.cs index 453371c0ff..4ffba0a9e2 100644 --- a/src/Microsoft.ML.Transforms/FourierDistributionSampler.cs +++ b/src/Microsoft.ML.Transforms/FourierDistributionSampler.cs @@ -34,8 +34,8 @@ namespace Microsoft.ML.Transforms internal delegate void SignatureKernelBase(); /// - /// This class indicates which kernel should be approximated by the . - /// . + /// This class indicates which kernel should be approximated by the . + /// . /// public abstract class KernelBase { diff --git a/src/Microsoft.ML.Transforms/GcnTransform.cs b/src/Microsoft.ML.Transforms/GcnTransform.cs index b6845419bd..628383cac4 100644 --- a/src/Microsoft.ML.Transforms/GcnTransform.cs +++ b/src/Microsoft.ML.Transforms/GcnTransform.cs @@ -16,22 +16,22 @@ using Microsoft.ML.Runtime; using Microsoft.ML.Transforms; -[assembly: LoadableClass(LpNormalizingTransformer.GcnSummary, typeof(IDataTransform), typeof(LpNormalizingTransformer), typeof(LpNormalizingTransformer.GcnOptions), typeof(SignatureDataTransform), - LpNormalizingTransformer.UserNameGn, "GcnTransform", LpNormalizingTransformer.ShortNameGn)] +[assembly: LoadableClass(LpNormNormalizingTransformer.GcnSummary, typeof(IDataTransform), typeof(LpNormNormalizingTransformer), typeof(LpNormNormalizingTransformer.GcnOptions), typeof(SignatureDataTransform), + LpNormNormalizingTransformer.UserNameGn, "GcnTransform", LpNormNormalizingTransformer.ShortNameGn)] -[assembly: LoadableClass(LpNormalizingTransformer.GcnSummary, typeof(IDataTransform), typeof(LpNormalizingTransformer), null, typeof(SignatureLoadDataTransform), - LpNormalizingTransformer.UserNameGn, LpNormalizingTransformer.LoaderSignature, LpNormalizingTransformer.LoaderSignatureOld)] +[assembly: LoadableClass(LpNormNormalizingTransformer.GcnSummary, typeof(IDataTransform), typeof(LpNormNormalizingTransformer), null, typeof(SignatureLoadDataTransform), + LpNormNormalizingTransformer.UserNameGn, LpNormNormalizingTransformer.LoaderSignature, LpNormNormalizingTransformer.LoaderSignatureOld)] -[assembly: LoadableClass(LpNormalizingTransformer.Summary, typeof(IDataTransform), typeof(LpNormalizingTransformer), typeof(LpNormalizingTransformer.Options), typeof(SignatureDataTransform), - LpNormalizingTransformer.UserNameLP, "LpNormNormalizer", LpNormalizingTransformer.ShortNameLP)] +[assembly: LoadableClass(LpNormNormalizingTransformer.Summary, typeof(IDataTransform), typeof(LpNormNormalizingTransformer), typeof(LpNormNormalizingTransformer.Options), typeof(SignatureDataTransform), + LpNormNormalizingTransformer.UserNameLP, "LpNormNormalizer", LpNormNormalizingTransformer.ShortNameLP)] -[assembly: LoadableClass(LpNormalizingTransformer.Summary, typeof(LpNormalizingTransformer), null, typeof(SignatureLoadModel), - LpNormalizingTransformer.UserNameGn, LpNormalizingTransformer.LoaderSignature)] +[assembly: LoadableClass(LpNormNormalizingTransformer.Summary, typeof(LpNormNormalizingTransformer), null, typeof(SignatureLoadModel), + LpNormNormalizingTransformer.UserNameGn, LpNormNormalizingTransformer.LoaderSignature)] -[assembly: LoadableClass(typeof(IRowMapper), typeof(LpNormalizingTransformer), null, typeof(SignatureLoadRowMapper), - LpNormalizingTransformer.UserNameGn, LpNormalizingTransformer.LoaderSignature)] +[assembly: LoadableClass(typeof(IRowMapper), typeof(LpNormNormalizingTransformer), null, typeof(SignatureLoadRowMapper), + LpNormNormalizingTransformer.UserNameGn, LpNormNormalizingTransformer.LoaderSignature)] -[assembly: EntryPointModule(typeof(LpNormalization))] +[assembly: EntryPointModule(typeof(LpNormNormalization))] namespace Microsoft.ML.Transforms { @@ -48,7 +48,7 @@ namespace Microsoft.ML.Transforms /// Usage examples and Matlab code: /// https://www.cs.stanford.edu/~acoates/papers/coatesleeng_aistats_2011.pdf. /// - public sealed class LpNormalizingTransformer : OneToOneTransformerBase + public sealed class LpNormNormalizingTransformer : OneToOneTransformerBase { internal sealed class Options : TransformInputBase { @@ -56,10 +56,10 @@ internal sealed class Options : TransformInputBase public Column[] Columns; [Argument(ArgumentType.AtMostOnce, HelpText = "The norm to use to normalize each sample", ShortName = "norm", SortOrder = 1)] - public LpNormalizingEstimatorBase.NormFunction Norm = LpNormalizingEstimatorBase.Defaults.Norm; + public LpNormNormalizingEstimatorBase.NormFunction Norm = LpNormNormalizingEstimatorBase.Defaults.Norm; [Argument(ArgumentType.AtMostOnce, HelpText = "Subtract mean from each value before normalizing", SortOrder = 2)] - public bool SubMean = LpNormalizingEstimatorBase.Defaults.LpEnsureZeroMean; + public bool SubMean = LpNormNormalizingEstimatorBase.Defaults.LpEnsureZeroMean; } internal sealed class GcnOptions : TransformInputBase @@ -68,13 +68,13 @@ internal sealed class GcnOptions : TransformInputBase public GcnColumn[] Columns; [Argument(ArgumentType.AtMostOnce, HelpText = "Subtract mean from each value before normalizing", SortOrder = 1)] - public bool SubMean = LpNormalizingEstimatorBase.Defaults.GcnEnsureZeroMean; + public bool SubMean = LpNormNormalizingEstimatorBase.Defaults.GcnEnsureZeroMean; [Argument(ArgumentType.AtMostOnce, HelpText = "Normalize by standard deviation rather than L2 norm", ShortName = "useStd")] - public bool UseStdDev = LpNormalizingEstimatorBase.Defaults.EnsureUnitStdDev; + public bool UseStdDev = LpNormNormalizingEstimatorBase.Defaults.EnsureUnitStdDev; [Argument(ArgumentType.AtMostOnce, HelpText = "Scale features by this value")] - public float Scale = LpNormalizingEstimatorBase.Defaults.Scale; + public float Scale = LpNormNormalizingEstimatorBase.Defaults.Scale; } internal abstract class ColumnBase : OneToOneColumn @@ -98,7 +98,7 @@ private protected override bool TryUnparseCore(StringBuilder sb) internal sealed class Column : ColumnBase { [Argument(ArgumentType.AtMostOnce, HelpText = "The norm to use to normalize each sample", ShortName = "norm", SortOrder = 1)] - public LpNormalizingEstimatorBase.NormFunction? Norm; + public LpNormNormalizingEstimatorBase.NormFunction? Norm; internal static Column Parse(string str) { @@ -146,7 +146,7 @@ internal bool TryUnparse(StringBuilder sb) } } - private sealed class ColumnOptionsLoaded : LpNormalizingEstimatorBase.ColumnOptionsBase + private sealed class ColumnOptionsLoaded : LpNormNormalizingEstimatorBase.ColumnOptionsBase { internal ColumnOptionsLoaded(ModelLoadContext ctx, string name, string inputColumnName, bool normKindSerialized) : base(ctx, name, inputColumnName, normKindSerialized) @@ -182,7 +182,7 @@ private static VersionInfo GetVersionInfo() verWeCanReadBack: 0x00010001, loaderSignature: LoaderSignature, loaderSignatureAlt: LoaderSignatureOld, - loaderAssemblyName: typeof(LpNormalizingTransformer).Assembly.FullName); + loaderAssemblyName: typeof(LpNormNormalizingTransformer).Assembly.FullName); } private const string RegistrationName = "LpNormNormalizer"; @@ -193,10 +193,10 @@ private static VersionInfo GetVersionInfo() /// /// The objects describing how the transformation is applied on the input data. /// - public IReadOnlyCollection Columns => _columns.AsReadOnly(); - private readonly LpNormalizingEstimatorBase.ColumnOptionsBase[] _columns; + public IReadOnlyCollection Columns => _columns.AsReadOnly(); + private readonly LpNormNormalizingEstimatorBase.ColumnOptionsBase[] _columns; - private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(LpNormalizingEstimatorBase.ColumnOptionsBase[] columns) + private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(LpNormNormalizingEstimatorBase.ColumnOptionsBase[] columns) { Contracts.CheckValue(columns, nameof(columns)); return columns.Select(x => (x.Name, x.InputColumnName)).ToArray(); @@ -205,14 +205,14 @@ private static (string outputColumnName, string inputColumnName)[] GetColumnPair private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) { var inType = inputSchema[srcCol].Type; - if (!LpNormalizingEstimatorBase.IsColumnTypeValid(inType)) - throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", inputSchema[srcCol].Name, LpNormalizingEstimatorBase.ExpectedColumnType, inType.ToString()); + if (!LpNormNormalizingEstimatorBase.IsColumnTypeValid(inType)) + throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", inputSchema[srcCol].Name, LpNormNormalizingEstimatorBase.ExpectedColumnType, inType.ToString()); } /// - /// Create a that takes multiple pairs of columns. + /// Create a that takes multiple pairs of columns. /// - internal LpNormalizingTransformer(IHostEnvironment env, params LpNormalizingEstimatorBase.ColumnOptionsBase[] columns) : - base(Contracts.CheckRef(env, nameof(env)).Register(nameof(LpNormalizingTransformer)), GetColumnPairs(columns)) + internal LpNormNormalizingTransformer(IHostEnvironment env, params LpNormNormalizingEstimatorBase.ColumnOptionsBase[] columns) : + base(Contracts.CheckRef(env, nameof(env)).Register(nameof(LpNormNormalizingTransformer)), GetColumnPairs(columns)) { _columns = columns.ToArray(); } @@ -225,13 +225,13 @@ internal static IDataTransform Create(IHostEnvironment env, GcnOptions options, env.CheckValue(input, nameof(input)); env.CheckValue(options.Columns, nameof(options.Columns)); - var cols = new GlobalContrastNormalizingEstimator.GcnColumnOptions[options.Columns.Length]; + var cols = new GlobalContrastNormalizingEstimator.ColumnOptions[options.Columns.Length]; using (var ch = env.Start("ValidateArgs")) { for (int i = 0; i < cols.Length; i++) { var item = options.Columns[i]; - cols[i] = new GlobalContrastNormalizingEstimator.GcnColumnOptions( + cols[i] = new GlobalContrastNormalizingEstimator.ColumnOptions( item.Name, item.Source ?? item.Name, item.SubMean ?? options.SubMean, @@ -241,7 +241,7 @@ internal static IDataTransform Create(IHostEnvironment env, GcnOptions options, if (!options.SubMean && options.UseStdDev) ch.Warning("subMean parameter is false while useStd is true. It is advisable to set subMean to true in case useStd is set to true."); } - return new LpNormalizingTransformer(env, cols).MakeDataTransform(input); + return new LpNormNormalizingTransformer(env, cols).MakeDataTransform(input); } // Factory method for SignatureDataTransform for Arguments class. @@ -252,27 +252,27 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa env.CheckValue(input, nameof(input)); env.CheckValue(options.Columns, nameof(options.Columns)); - var cols = new LpNormalizingEstimator.LpNormColumnOptions[options.Columns.Length]; + var cols = new LpNormNormalizingEstimator.ColumnOptions[options.Columns.Length]; using (var ch = env.Start("ValidateArgs")) { for (int i = 0; i < cols.Length; i++) { var item = options.Columns[i]; - cols[i] = new LpNormalizingEstimator.LpNormColumnOptions( + cols[i] = new LpNormNormalizingEstimator.ColumnOptions( item.Name, item.Source ?? item.Name, item.Norm ?? options.Norm, item.SubMean ?? options.SubMean); } } - return new LpNormalizingTransformer(env, cols).MakeDataTransform(input); + return new LpNormNormalizingTransformer(env, cols).MakeDataTransform(input); } // Factory method for SignatureLoadModel. - private static LpNormalizingTransformer Create(IHostEnvironment env, ModelLoadContext ctx) + private static LpNormNormalizingTransformer Create(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(env, nameof(env)); - var host = env.Register(nameof(LpNormalizingTransformer)); + var host = env.Register(nameof(LpNormNormalizingTransformer)); host.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(GetVersionInfo()); @@ -281,7 +281,7 @@ private static LpNormalizingTransformer Create(IHostEnvironment env, ModelLoadCo int cbFloat = ctx.Reader.ReadInt32(); env.CheckDecode(cbFloat == sizeof(float)); } - return new LpNormalizingTransformer(host, ctx); + return new LpNormNormalizingTransformer(host, ctx); } // Factory method for SignatureLoadDataTransform. @@ -292,7 +292,7 @@ private static IDataTransform Create(IHostEnvironment env, ModelLoadContext ctx, private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, DataViewSchema inputSchema) => Create(env, ctx).MakeRowMapper(inputSchema); - private LpNormalizingTransformer(IHost host, ModelLoadContext ctx) + private LpNormNormalizingTransformer(IHost host, ModelLoadContext ctx) : base(host, ctx) { // *** Binary format *** @@ -329,9 +329,9 @@ private sealed class Mapper : OneToOneMapperBase private readonly DataViewType[] _srcTypes; private readonly int[] _srcCols; private readonly DataViewType[] _types; - private readonly LpNormalizingTransformer _parent; + private readonly LpNormNormalizingTransformer _parent; - public Mapper(LpNormalizingTransformer parent, DataViewSchema inputSchema) + public Mapper(LpNormNormalizingTransformer parent, DataViewSchema inputSchema) : base(parent.Host.Register(nameof(Mapper)), parent, inputSchema) { _parent = parent; @@ -380,7 +380,7 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func dst) => { @@ -391,7 +391,7 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func dst) => { @@ -402,7 +402,7 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func dst) => { @@ -413,7 +413,7 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func dst) => { @@ -426,13 +426,13 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func dst) => { @@ -441,7 +441,7 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func dst) => { @@ -450,7 +450,7 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func dst) => { @@ -459,7 +459,7 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func dst) => { @@ -470,7 +470,7 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func src, int length) } } - internal static class LpNormalization + internal static class LpNormNormalization { [TlcModule.EntryPoint(Name = "Transforms.LpNormalizer", - Desc = LpNormalizingTransformer.Summary, - UserName = LpNormalizingTransformer.UserNameLP, - ShortName = LpNormalizingTransformer.ShortNameLP)] - public static CommonOutputs.TransformOutput Normalize(IHostEnvironment env, LpNormalizingTransformer.Options input) + Desc = LpNormNormalizingTransformer.Summary, + UserName = LpNormNormalizingTransformer.UserNameLP, + ShortName = LpNormNormalizingTransformer.ShortNameLP)] + public static CommonOutputs.TransformOutput Normalize(IHostEnvironment env, LpNormNormalizingTransformer.Options input) { var h = EntryPointUtils.CheckArgsAndCreateHost(env, "LpNormalize", input); - var xf = LpNormalizingTransformer.Create(h, input, input.Data); + var xf = LpNormNormalizingTransformer.Create(h, input, input.Data); return new CommonOutputs.TransformOutput() { Model = new TransformModelImpl(h, xf, input.Data), @@ -626,13 +626,13 @@ public static CommonOutputs.TransformOutput Normalize(IHostEnvironment env, LpNo } [TlcModule.EntryPoint(Name = "Transforms.GlobalContrastNormalizer", - Desc = LpNormalizingTransformer.GcnSummary, - UserName = LpNormalizingTransformer.UserNameGn, - ShortName = LpNormalizingTransformer.ShortNameGn)] - public static CommonOutputs.TransformOutput GcNormalize(IHostEnvironment env, LpNormalizingTransformer.GcnOptions input) + Desc = LpNormNormalizingTransformer.GcnSummary, + UserName = LpNormNormalizingTransformer.UserNameGn, + ShortName = LpNormNormalizingTransformer.ShortNameGn)] + public static CommonOutputs.TransformOutput GcNormalize(IHostEnvironment env, LpNormNormalizingTransformer.GcnOptions input) { var h = EntryPointUtils.CheckArgsAndCreateHost(env, "GcNormalize", input); - var xf = LpNormalizingTransformer.Create(h, input, input.Data); + var xf = LpNormNormalizingTransformer.Create(h, input, input.Data); return new CommonOutputs.TransformOutput() { Model = new TransformModelImpl(h, xf, input.Data), @@ -644,7 +644,7 @@ public static CommonOutputs.TransformOutput GcNormalize(IHostEnvironment env, Lp /// /// Base estimator class for LpNorm and Gcn normalizers. /// - public abstract class LpNormalizingEstimatorBase : TrivialEstimator + public abstract class LpNormNormalizingEstimatorBase : TrivialEstimator { /// /// The kind of unit norm vectors are rescaled to. This enumeration is serialized. @@ -758,10 +758,10 @@ internal static class Defaults } /// - /// Create a that takes multiple pairs of columns. + /// Create a that takes multiple pairs of columns. /// - internal LpNormalizingEstimatorBase(IHostEnvironment env, params ColumnOptionsBase[] columns) - : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(LpNormalizingEstimator)), new LpNormalizingTransformer(env, columns)) + internal LpNormNormalizingEstimatorBase(IHostEnvironment env, params ColumnOptionsBase[] columns) + : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(LpNormNormalizingEstimator)), new LpNormNormalizingTransformer(env, columns)) { } @@ -808,24 +808,24 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) /// /// Lp Normalizing estimator takes columns and normalizes them individually by rescaling them to unit norm. /// - public sealed class LpNormalizingEstimator : LpNormalizingEstimatorBase + public sealed class LpNormNormalizingEstimator : LpNormNormalizingEstimatorBase { /// /// Describes how the transformer handles one column pair. /// - public sealed class LpNormColumnOptions : ColumnOptionsBase + public sealed class ColumnOptions : ColumnOptionsBase { /// /// Describes how the transformer handles one column pair. /// /// Name of the column resulting from the transformation of . /// Name of column to transform. If set to , the value of the will be used as source. - /// Type of norm to use to normalize each sample. The indicated norm of the resulted vector will be normalized to one. + /// Type of norm to use to normalize each sample. The indicated norm of the resulted vector will be normalized to one. /// If , subtract mean from each value before normalizing and use the raw input otherwise. - public LpNormColumnOptions(string name, string inputColumnName = null, - NormFunction normKind = Defaults.Norm, + public ColumnOptions(string name, string inputColumnName = null, + NormFunction norm = Defaults.Norm, bool ensureZeroMean = Defaults.LpEnsureZeroMean) - : base(name, inputColumnName ?? name, normKind, ensureZeroMean, 1) + : base(name, inputColumnName ?? name, norm, ensureZeroMean, 1) { } } @@ -833,29 +833,29 @@ public LpNormColumnOptions(string name, string inputColumnName = null, /// The environment. /// Name of the column resulting from the transformation of . /// Name of the column to transform. If set to , the value of the will be used as source. - /// Type of norm to use to normalize each sample. The indicated norm of the resulted vector will be normalized to one. + /// Type of norm to use to normalize each sample. The indicated norm of the resulted vector will be normalized to one. /// If , subtract mean from each value before normalizing and use the raw input otherwise. - internal LpNormalizingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null, - NormFunction normKind = Defaults.Norm, bool ensureZeroMean = Defaults.LpEnsureZeroMean) - : this(env, new[] { (outputColumnName, inputColumnName ?? outputColumnName) }, normKind, ensureZeroMean) + internal LpNormNormalizingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null, + NormFunction norm = Defaults.Norm, bool ensureZeroMean = Defaults.LpEnsureZeroMean) + : this(env, new[] { (outputColumnName, inputColumnName ?? outputColumnName) }, norm, ensureZeroMean) { } /// /// The environment. /// Pairs of columns to run the normalization on. - /// Type of norm to use to normalize each sample. The indicated norm of the resulted vector will be normalized to one. + /// Type of norm to use to normalize each sample. The indicated norm of the resulted vector will be normalized to one. /// If , subtract mean from each value before normalizing and use the raw input otherwise. - internal LpNormalizingEstimator(IHostEnvironment env, (string outputColumnName, string inputColumnName)[] columns, - NormFunction normKind = Defaults.Norm, bool ensureZeroMean = Defaults.LpEnsureZeroMean) - : this(env, columns.Select(x => new LpNormColumnOptions(x.outputColumnName, x.inputColumnName, normKind, ensureZeroMean)).ToArray()) + internal LpNormNormalizingEstimator(IHostEnvironment env, (string outputColumnName, string inputColumnName)[] columns, + NormFunction norm = Defaults.Norm, bool ensureZeroMean = Defaults.LpEnsureZeroMean) + : this(env, columns.Select(x => new ColumnOptions(x.outputColumnName, x.inputColumnName, norm, ensureZeroMean)).ToArray()) { } /// - /// Create a that takes multiple pairs of columns. + /// Create a that takes multiple pairs of columns. /// - internal LpNormalizingEstimator(IHostEnvironment env, params LpNormColumnOptions[] columns) + internal LpNormNormalizingEstimator(IHostEnvironment env, params ColumnOptions[] columns) : base(env, columns) { } @@ -864,12 +864,12 @@ internal LpNormalizingEstimator(IHostEnvironment env, params LpNormColumnOptions /// /// Global contrast normalizing estimator takes columns and performs global constrast normalization. /// - public sealed class GlobalContrastNormalizingEstimator : LpNormalizingEstimatorBase + public sealed class GlobalContrastNormalizingEstimator : LpNormNormalizingEstimatorBase { /// /// Describes how the transformer handles one Gcn column pair. /// - public sealed class GcnColumnOptions : ColumnOptionsBase + public sealed class ColumnOptions : ColumnOptionsBase { /// /// Describes how the transformer handles one Gcn column pair. @@ -879,7 +879,7 @@ public sealed class GcnColumnOptions : ColumnOptionsBase /// If , subtract mean from each value before normalizing and use the raw input otherwise. /// If , resulted vector's standard deviation would be one. Otherwise, resulted vector's L2-norm would be one. /// Scale features by this value. - public GcnColumnOptions(string name, string inputColumnName = null, + public ColumnOptions(string name, string inputColumnName = null, bool ensureZeroMean = Defaults.GcnEnsureZeroMean, bool ensureUnitStandardDeviation = Defaults.EnsureUnitStdDev, float scale = Defaults.Scale) @@ -909,14 +909,14 @@ internal GlobalContrastNormalizingEstimator(IHostEnvironment env, string outputC /// Scale features by this value. internal GlobalContrastNormalizingEstimator(IHostEnvironment env, (string outputColumnName, string inputColumnName)[] columns, bool ensureZeroMean = Defaults.GcnEnsureZeroMean, bool ensureUnitStandardDeviation = Defaults.EnsureUnitStdDev, float scale = Defaults.Scale) - : this(env, columns.Select(x => new GcnColumnOptions(x.outputColumnName, x.inputColumnName, ensureZeroMean, ensureUnitStandardDeviation, scale)).ToArray()) + : this(env, columns.Select(x => new ColumnOptions(x.outputColumnName, x.inputColumnName, ensureZeroMean, ensureUnitStandardDeviation, scale)).ToArray()) { } /// /// Create a that takes multiple pairs of columns. /// - internal GlobalContrastNormalizingEstimator(IHostEnvironment env, params GcnColumnOptions[] columns) : + internal GlobalContrastNormalizingEstimator(IHostEnvironment env, params ColumnOptions[] columns) : base(env, columns) { } diff --git a/src/Microsoft.ML.Transforms/KernelCatalog.cs b/src/Microsoft.ML.Transforms/KernelCatalog.cs index 8ef3acd4fe..52c2d2d072 100644 --- a/src/Microsoft.ML.Transforms/KernelCatalog.cs +++ b/src/Microsoft.ML.Transforms/KernelCatalog.cs @@ -28,19 +28,19 @@ public static class KernelExpansionCatalog /// ]]> /// /// - public static RandomFourierKernelMappingEstimator RandomFourierKernelMap(this TransformsCatalog catalog, + public static ApproximatedKernelMappingEstimator ApproximatedKernelMap(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, - int rank = RandomFourierKernelMappingEstimator.Defaults.Rank, - bool useCosAndSinBases = RandomFourierKernelMappingEstimator.Defaults.UseCosAndSinBases) - => new RandomFourierKernelMappingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, rank, useCosAndSinBases); + int rank = ApproximatedKernelMappingEstimator.Defaults.Rank, + bool useCosAndSinBases = ApproximatedKernelMappingEstimator.Defaults.UseCosAndSinBases) + => new ApproximatedKernelMappingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, rank, useCosAndSinBases); /// /// Takes columns filled with a vector of floats and maps its to a random low-dimensional feature space. /// /// The transform's catalog. /// The input columns to use for the transformation. - public static RandomFourierKernelMappingEstimator RandomFourierKernelMap(this TransformsCatalog catalog, params RandomFourierKernelMappingEstimator.ColumnOptions[] columns) - => new RandomFourierKernelMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns); + public static ApproximatedKernelMappingEstimator ApproximatedKernelMap(this TransformsCatalog catalog, params ApproximatedKernelMappingEstimator.ColumnOptions[] columns) + => new ApproximatedKernelMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns); } } diff --git a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs index 9f9df97609..dbd0e81178 100644 --- a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs +++ b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs @@ -55,13 +55,13 @@ public static NormalizingEstimator Normalize(this TransformsCatalog catalog, => new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columns); /// - /// Takes column filled with a vector of floats and normazlize its to one. By setting to , + /// Takes column filled with a vector of floats and normazlize its to one. By setting to , /// a pre-processing step would be applied to make the specified column's mean be a zero vector. /// /// The transform's catalog. /// Name of the column resulting from the transformation of . /// Name of column to transform. If set to , the value of the will be used as source. - /// Type of norm to use to normalize each sample. The indicated norm of the resulted vector will be normalized to one. + /// Type of norm to use to normalize each sample. The indicated norm of the resulted vector will be normalized to one. /// If , subtract mean from each value before normalizing and use the raw input otherwise. /// /// @@ -70,17 +70,17 @@ public static NormalizingEstimator Normalize(this TransformsCatalog catalog, /// ]]> /// /// - public static LpNormalizingEstimator LpNormalize(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, - LpNormalizingEstimatorBase.NormFunction normKind = LpNormalizingEstimatorBase.Defaults.Norm, bool ensureZeroMean = LpNormalizingEstimatorBase.Defaults.LpEnsureZeroMean) - => new LpNormalizingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, normKind, ensureZeroMean); + public static LpNormNormalizingEstimator NormalizeLpNorm(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, + LpNormNormalizingEstimatorBase.NormFunction norm = LpNormNormalizingEstimatorBase.Defaults.Norm, bool ensureZeroMean = LpNormNormalizingEstimatorBase.Defaults.LpEnsureZeroMean) + => new LpNormNormalizingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, norm, ensureZeroMean); /// - /// Takes column filled with a vector of floats and normazlize its norm to one. Note that the allowed norm functions are defined in . + /// Takes column filled with a vector of floats and normazlize its norm to one. Note that the allowed norm functions are defined in . /// /// The transform's catalog. /// Describes the parameters of the lp-normalization process for each column pair. - public static LpNormalizingEstimator LpNormalize(this TransformsCatalog catalog, params LpNormalizingEstimator.LpNormColumnOptions[] columns) - => new LpNormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columns); + public static LpNormNormalizingEstimator NormalizeLpNorm(this TransformsCatalog catalog, params LpNormNormalizingEstimator.ColumnOptions[] columns) + => new LpNormNormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columns); /// /// Takes column filled with a vector of floats and computes global contrast normalization of it. By setting to , @@ -99,10 +99,10 @@ public static LpNormalizingEstimator LpNormalize(this TransformsCatalog catalog, /// ]]> /// /// - public static GlobalContrastNormalizingEstimator GlobalContrastNormalize(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, - bool ensureZeroMean = LpNormalizingEstimatorBase.Defaults.GcnEnsureZeroMean, - bool ensureUnitStandardDeviation = LpNormalizingEstimatorBase.Defaults.EnsureUnitStdDev, - float scale = LpNormalizingEstimatorBase.Defaults.Scale) + public static GlobalContrastNormalizingEstimator NormalizeGlobalContrast(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, + bool ensureZeroMean = LpNormNormalizingEstimatorBase.Defaults.GcnEnsureZeroMean, + bool ensureUnitStandardDeviation = LpNormNormalizingEstimatorBase.Defaults.EnsureUnitStdDev, + float scale = LpNormNormalizingEstimatorBase.Defaults.Scale) => new GlobalContrastNormalizingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, ensureZeroMean, ensureUnitStandardDeviation, scale); /// @@ -110,7 +110,7 @@ public static GlobalContrastNormalizingEstimator GlobalContrastNormalize(this Tr /// /// The transform's catalog. /// Describes the parameters of the gcn-normaliztion process for each column pair. - public static GlobalContrastNormalizingEstimator GlobalContrastNormalize(this TransformsCatalog catalog, params GlobalContrastNormalizingEstimator.GcnColumnOptions[] columns) + public static GlobalContrastNormalizingEstimator NormalizeGlobalContrast(this TransformsCatalog catalog, params GlobalContrastNormalizingEstimator.ColumnOptions[] columns) => new GlobalContrastNormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columns); } } diff --git a/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs b/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs index 14faad1960..1be12e2453 100644 --- a/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs +++ b/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs @@ -15,17 +15,17 @@ using Microsoft.ML.Runtime; using Microsoft.ML.Transforms; -[assembly: LoadableClass(RandomFourierExpansionTransformer.Summary, typeof(IDataTransform), typeof(RandomFourierExpansionTransformer), typeof(RandomFourierExpansionTransformer.Options), typeof(SignatureDataTransform), +[assembly: LoadableClass(ApproximatedKernelTransformer.Summary, typeof(IDataTransform), typeof(ApproximatedKernelTransformer), typeof(ApproximatedKernelTransformer.Options), typeof(SignatureDataTransform), "Random Fourier Features Transform", "RffTransform", "Rff")] -[assembly: LoadableClass(RandomFourierExpansionTransformer.Summary, typeof(IDataTransform), typeof(RandomFourierExpansionTransformer), null, typeof(SignatureLoadDataTransform), - "Random Fourier Features Transform", RandomFourierExpansionTransformer.LoaderSignature)] +[assembly: LoadableClass(ApproximatedKernelTransformer.Summary, typeof(IDataTransform), typeof(ApproximatedKernelTransformer), null, typeof(SignatureLoadDataTransform), + "Random Fourier Features Transform", ApproximatedKernelTransformer.LoaderSignature)] -[assembly: LoadableClass(RandomFourierExpansionTransformer.Summary, typeof(RandomFourierExpansionTransformer), null, typeof(SignatureLoadModel), - "Random Fourier Features Transform", RandomFourierExpansionTransformer.LoaderSignature)] +[assembly: LoadableClass(ApproximatedKernelTransformer.Summary, typeof(ApproximatedKernelTransformer), null, typeof(SignatureLoadModel), + "Random Fourier Features Transform", ApproximatedKernelTransformer.LoaderSignature)] -[assembly: LoadableClass(typeof(IRowMapper), typeof(RandomFourierExpansionTransformer), null, typeof(SignatureLoadRowMapper), - "Random Fourier Features Transform", RandomFourierExpansionTransformer.LoaderSignature)] +[assembly: LoadableClass(typeof(IRowMapper), typeof(ApproximatedKernelTransformer), null, typeof(SignatureLoadRowMapper), + "Random Fourier Features Transform", ApproximatedKernelTransformer.LoaderSignature)] namespace Microsoft.ML.Transforms { @@ -36,7 +36,7 @@ namespace Microsoft.ML.Transforms /// This transformation is based on this paper by /// Rahimi and Recht. /// - public sealed class RandomFourierExpansionTransformer : OneToOneTransformerBase + public sealed class ApproximatedKernelTransformer : OneToOneTransformerBase { internal sealed class Options { @@ -44,13 +44,13 @@ internal sealed class Options public Column[] Columns; [Argument(ArgumentType.AtMostOnce, HelpText = "The number of random Fourier features to create", ShortName = "dim")] - public int NewDim = RandomFourierKernelMappingEstimator.Defaults.Rank; + public int NewDim = ApproximatedKernelMappingEstimator.Defaults.Rank; [Argument(ArgumentType.Multiple, HelpText = "Which kernel to use?", ShortName = "kernel", SignatureType = typeof(SignatureKernelBase))] public IComponentFactory MatrixGenerator = new GaussianKernel.Options(); [Argument(ArgumentType.AtMostOnce, HelpText = "Create two features for every random Fourier frequency? (one for cos and one for sin)")] - public bool UseSin = RandomFourierKernelMappingEstimator.Defaults.UseCosAndSinBases; + public bool UseSin = ApproximatedKernelMappingEstimator.Defaults.UseCosAndSinBases; [Argument(ArgumentType.LastOccurenceWins, HelpText = "The seed of the random number generator for generating the new features (if unspecified, " + @@ -109,7 +109,7 @@ private sealed class TransformInfo private readonly TauswortheHybrid _rand; private readonly TauswortheHybrid.State _state; - public TransformInfo(IHost host, RandomFourierKernelMappingEstimator.ColumnOptions column, int d, float avgDist) + public TransformInfo(IHost host, ApproximatedKernelMappingEstimator.ColumnOptions column, int d, float avgDist) { Contracts.AssertValue(host); @@ -224,7 +224,7 @@ private static VersionInfo GetVersionInfo() verReadableCur: 0x00010002, verWeCanReadBack: 0x00010001, loaderSignature: LoaderSignature, - loaderAssemblyName: typeof(RandomFourierExpansionTransformer).Assembly.FullName); + loaderAssemblyName: typeof(ApproximatedKernelTransformer).Assembly.FullName); } private readonly TransformInfo[] _transformInfos; @@ -238,7 +238,7 @@ private static string TestColumnType(DataViewType type) return "Expected vector of floats with known size"; } - private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(RandomFourierKernelMappingEstimator.ColumnOptions[] columns) + private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(ApproximatedKernelMappingEstimator.ColumnOptions[] columns) { Contracts.CheckValue(columns, nameof(columns)); return columns.Select(x => (x.Name, x.InputColumnName)).ToArray(); @@ -255,8 +255,8 @@ private protected override void CheckInputColumn(DataViewSchema inputSchema, int new VectorType(NumberDataViewType.Single, _transformInfos[col].SrcDim).ToString(), type.ToString()); } - internal RandomFourierExpansionTransformer(IHostEnvironment env, IDataView input, RandomFourierKernelMappingEstimator.ColumnOptions[] columns) - : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(RandomFourierExpansionTransformer)), GetColumnPairs(columns)) + internal ApproximatedKernelTransformer(IHostEnvironment env, IDataView input, ApproximatedKernelMappingEstimator.ColumnOptions[] columns) + : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ApproximatedKernelTransformer)), GetColumnPairs(columns)) { var avgDistances = GetAvgDistances(columns, input); _transformInfos = new TransformInfo[columns.Length]; @@ -281,7 +281,7 @@ private static int RoundUp(int cflt, int cfltAlign) return cblob * cfltAlign; } - private float[] GetAvgDistances(RandomFourierKernelMappingEstimator.ColumnOptions[] columns, IDataView input) + private float[] GetAvgDistances(ApproximatedKernelMappingEstimator.ColumnOptions[] columns, IDataView input) { var avgDistances = new float[columns.Length]; const int reservoirSize = 5000; @@ -395,7 +395,7 @@ private static IDataTransform Create(IHostEnvironment env, ModelLoadContext ctx, private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, DataViewSchema inputSchema) => Create(env, ctx).MakeRowMapper(inputSchema); - private RandomFourierExpansionTransformer(IHost host, ModelLoadContext ctx) + private ApproximatedKernelTransformer(IHost host, ModelLoadContext ctx) : base(host, ctx) { // *** Binary format *** @@ -420,14 +420,14 @@ private static IDataTransform Create(IHostEnvironment env, Options options, IDat env.CheckValue(input, nameof(input)); env.CheckValue(options.Columns, nameof(options.Columns)); - var cols = new RandomFourierKernelMappingEstimator.ColumnOptions[options.Columns.Length]; + var cols = new ApproximatedKernelMappingEstimator.ColumnOptions[options.Columns.Length]; using (var ch = env.Start("ValidateArgs")) { for (int i = 0; i < cols.Length; i++) { var item = options.Columns[i]; - cols[i] = new RandomFourierKernelMappingEstimator.ColumnOptions( + cols[i] = new ApproximatedKernelMappingEstimator.ColumnOptions( item.Name, item.NewDim ?? options.NewDim, item.UseSin ?? options.UseSin, @@ -436,14 +436,14 @@ private static IDataTransform Create(IHostEnvironment env, Options options, IDat item.Seed ?? options.Seed); }; } - return new RandomFourierExpansionTransformer(env, input, cols).MakeDataTransform(input); + return new ApproximatedKernelTransformer(env, input, cols).MakeDataTransform(input); } // Factory method for SignatureLoadModel. - private static RandomFourierExpansionTransformer Create(IHostEnvironment env, ModelLoadContext ctx) + private static ApproximatedKernelTransformer Create(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(env, nameof(env)); - var host = env.Register(nameof(RandomFourierExpansionTransformer)); + var host = env.Register(nameof(ApproximatedKernelTransformer)); host.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(GetVersionInfo()); @@ -452,7 +452,7 @@ private static RandomFourierExpansionTransformer Create(IHostEnvironment env, Mo int cbFloat = ctx.Reader.ReadInt32(); env.CheckDecode(cbFloat == sizeof(float)); } - return new RandomFourierExpansionTransformer(host, ctx); + return new ApproximatedKernelTransformer(host, ctx); } private protected override void SaveModel(ModelSaveContext ctx) @@ -476,9 +476,9 @@ private sealed class Mapper : OneToOneMapperBase private readonly DataViewType[] _srcTypes; private readonly int[] _srcCols; private readonly DataViewType[] _types; - private readonly RandomFourierExpansionTransformer _parent; + private readonly ApproximatedKernelTransformer _parent; - public Mapper(RandomFourierExpansionTransformer parent, DataViewSchema inputSchema) + public Mapper(ApproximatedKernelTransformer parent, DataViewSchema inputSchema) : base(parent.Host.Register(nameof(Mapper)), parent, inputSchema) { _parent = parent; @@ -606,7 +606,7 @@ private void TransformFeatures(in VBuffer src, ref VBuffer dst, Tr /// /// Maps vector columns to a low -dimensional feature space. /// - public sealed class RandomFourierKernelMappingEstimator : IEstimator + public sealed class ApproximatedKernelMappingEstimator : IEstimator { [BestFriend] internal static class Defaults @@ -679,22 +679,22 @@ public ColumnOptions(string name, int rank, bool useCosAndSinBases, string input /// Name of the column to transform. If set to , the value of the will be used as source. /// The number of random Fourier features to create. /// Create two features for every random Fourier frequency? (one for cos and one for sin). - internal RandomFourierKernelMappingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null, int rank = Defaults.Rank, bool useCosAndSinBases = Defaults.UseCosAndSinBases) + internal ApproximatedKernelMappingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null, int rank = Defaults.Rank, bool useCosAndSinBases = Defaults.UseCosAndSinBases) : this(env, new ColumnOptions(outputColumnName, rank, useCosAndSinBases, inputColumnName ?? outputColumnName)) { } - internal RandomFourierKernelMappingEstimator(IHostEnvironment env, params ColumnOptions[] columns) + internal ApproximatedKernelMappingEstimator(IHostEnvironment env, params ColumnOptions[] columns) { Contracts.CheckValue(env, nameof(env)); - _host = env.Register(nameof(RandomFourierKernelMappingEstimator)); + _host = env.Register(nameof(ApproximatedKernelMappingEstimator)); _columns = columns; } /// - /// Trains and returns a . + /// Trains and returns a . /// - public RandomFourierExpansionTransformer Fit(IDataView input) => new RandomFourierExpansionTransformer(_host, input, _columns); + public ApproximatedKernelTransformer Fit(IDataView input) => new ApproximatedKernelTransformer(_host, input, _columns); /// /// Returns the of the schema which will be produced by the transformer. diff --git a/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs b/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs index c8d87fa3a0..16d0c46100 100644 --- a/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs +++ b/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs @@ -126,6 +126,12 @@ public sealed class Options : TransformInputBase /// private WordBagEstimator.Options _wordFeatureExtractor; + /// + /// Norm of the output vector. It will be normalized to one. + /// + [Argument(ArgumentType.AtMostOnce, HelpText = "Normalize vectors (rows) individually by rescaling them to unit norm.", Name = "VectorNormalizer", ShortName = "norm", SortOrder = 13)] + public NormFunction Norm = NormFunction.L2; + /// /// Ngram feature extractor to use for words (WordBag/WordHashBag). /// @@ -181,9 +187,6 @@ public WordBagEstimator.Options CharFeatureExtractor } } - [Argument(ArgumentType.AtMostOnce, HelpText = "Normalize vectors (rows) individually by rescaling them to unit norm.", Name = "VectorNormalizer", ShortName = "norm", SortOrder = 13)] - public NormFunction Norm = NormFunction.L2; - public Options() { WordFeatureExtractor = new WordBagEstimator.Options(); @@ -213,7 +216,7 @@ private sealed class TransformApplierParams public readonly INgramExtractorFactory WordExtractorFactory; public readonly INgramExtractorFactory CharExtractorFactory; - public readonly NormFunction VectorNormalizer; + public readonly NormFunction Norm; public readonly Language Language; public readonly bool UsePredefinedStopWordRemover; public readonly CaseMode TextCase; @@ -226,21 +229,21 @@ private sealed class TransformApplierParams public StopWordsRemovingEstimator.Language StopwordsLanguage => (StopWordsRemovingEstimator.Language)Enum.Parse(typeof(StopWordsRemovingEstimator.Language), Language.ToString()); - public LpNormalizingEstimatorBase.NormFunction LpNormalizerKind + internal LpNormNormalizingEstimatorBase.NormFunction LpNorm { get { - switch (VectorNormalizer) + switch (Norm) { case NormFunction.L1: - return LpNormalizingEstimatorBase.NormFunction.L1; + return LpNormNormalizingEstimatorBase.NormFunction.L1; case NormFunction.L2: - return LpNormalizingEstimatorBase.NormFunction.L2; + return LpNormNormalizingEstimatorBase.NormFunction.L2; case NormFunction.Infinity: - return LpNormalizingEstimatorBase.NormFunction.Infinity; + return LpNormNormalizingEstimatorBase.NormFunction.Infinity; default: Contracts.Assert(false, "Unexpected normalizer type"); - return LpNormalizingEstimatorBase.NormFunction.L2; + return LpNormNormalizingEstimatorBase.NormFunction.L2; } } } @@ -291,7 +294,7 @@ public TransformApplierParams(TextFeaturizingEstimator parent) host.Check(Enum.IsDefined(typeof(CaseMode), parent.OptionalSettings.CaseMode)); WordExtractorFactory = parent._wordFeatureExtractor?.CreateComponent(host, parent._dictionary); CharExtractorFactory = parent._charFeatureExtractor?.CreateComponent(host, parent._dictionary); - VectorNormalizer = parent.OptionalSettings.Norm; + Norm = parent.OptionalSettings.Norm; Language = parent.OptionalSettings.Language; UsePredefinedStopWordRemover = parent.OptionalSettings.UsePredefinedStopWordRemover; TextCase = parent.OptionalSettings.CaseMode; @@ -463,15 +466,15 @@ public ITransformer Fit(IDataView input) } } - if (tparams.VectorNormalizer != NormFunction.None) + if (tparams.Norm != NormFunction.None) { - var xfCols = new List(2); + var xfCols = new List(2); if (charFeatureCol != null) { var dstCol = GenerateColumnName(view.Schema, charFeatureCol, "LpCharNorm"); tempCols.Add(dstCol); - xfCols.Add(new LpNormalizingEstimator.LpNormColumnOptions(dstCol, charFeatureCol, normKind: tparams.LpNormalizerKind)); + xfCols.Add(new LpNormNormalizingEstimator.ColumnOptions(dstCol, charFeatureCol, norm: tparams.LpNorm)); charFeatureCol = dstCol; } @@ -479,12 +482,12 @@ public ITransformer Fit(IDataView input) { var dstCol = GenerateColumnName(view.Schema, wordFeatureCol, "LpWordNorm"); tempCols.Add(dstCol); - xfCols.Add(new LpNormalizingEstimator.LpNormColumnOptions(dstCol, wordFeatureCol, normKind: tparams.LpNormalizerKind)); + xfCols.Add(new LpNormNormalizingEstimator.ColumnOptions(dstCol, wordFeatureCol, norm: tparams.LpNorm)); wordFeatureCol = dstCol; } if (xfCols.Count > 0) - view = new LpNormalizingTransformer(h, xfCols.ToArray()).Transform(view); + view = new LpNormNormalizingTransformer(h, xfCols.ToArray()).Transform(view); } { @@ -564,6 +567,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options args, IDataV { var estimator = new TextFeaturizingEstimator(env, args.Columns.Name, args.Columns.Source ?? new[] { args.Columns.Name }, args); estimator._dictionary = args.Dictionary; + // Review: I don't think the following two lines are needed. estimator._wordFeatureExtractor = args.WordFeatureExtractorFactory; estimator._charFeatureExtractor = args.CharFeatureExtractorFactory; return estimator.Fit(data).Transform(data) as IDataTransform; diff --git a/src/Microsoft.ML.Transforms/doc.xml b/src/Microsoft.ML.Transforms/doc.xml index 24948a3fce..12e498812d 100644 --- a/src/Microsoft.ML.Transforms/doc.xml +++ b/src/Microsoft.ML.Transforms/doc.xml @@ -203,9 +203,9 @@ - + - The LpNormalizer transforms, normalizes vectors (rows) individually by rescaling them to unit norm (L2, L1 or LInf). + The LpNormNormalizer transforms, normalizes vectors (rows) individually by rescaling them to unit norm (L2, L1 or LInf). Performs the following operation on a vector X: Y = (X - M) / D where M is mean and D is either L2 norm, L1 norm or LInf norm. @@ -215,14 +215,6 @@ For more information see: - - - pipeline.Add(new LpNormalizer("FeatureCol") - { - NormKind = LpNormNormalizerTransformNormalizerKind.L1Norm - }); - - @@ -236,7 +228,7 @@ For more information see: An Analysis of Single-Layer Networks in Unsupervised Feature Learning - + pipeline.Add(new GlobalContrastNormalizer("FeatureCol") diff --git a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv index 302f08c27d..728d301961 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv +++ b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv @@ -89,7 +89,7 @@ Transforms.FeatureCombiner Combines all the features into one feature column. Mi Transforms.FeatureContributionCalculationTransformer For each data point, calculates the contribution of individual features to the model prediction. Microsoft.ML.Transforms.FeatureContributionEntryPoint FeatureContributionCalculation Microsoft.ML.Transforms.FeatureContributionCalculatingTransformer+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput Transforms.FeatureSelectorByCount Selects the slots for which the count of non-default values is greater than or equal to a threshold. Microsoft.ML.Transforms.SelectFeatures CountSelect Microsoft.ML.Transforms.CountFeatureSelectingEstimator+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput Transforms.FeatureSelectorByMutualInformation Selects the top k slots across all specified columns ordered by their mutual information with the label column. Microsoft.ML.Transforms.SelectFeatures MutualInformationSelect Microsoft.ML.Transforms.MutualInformationFeatureSelectingEstimator+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput -Transforms.GlobalContrastNormalizer Performs a global contrast normalization on input values: Y = (s * X - M) / D, where s is a scale, M is mean and D is either L2 norm or standard deviation. Microsoft.ML.Transforms.LpNormalization GcNormalize Microsoft.ML.Transforms.LpNormalizingTransformer+GcnOptions Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput +Transforms.GlobalContrastNormalizer Performs a global contrast normalization on input values: Y = (s * X - M) / D, where s is a scale, M is mean and D is either L2 norm or standard deviation. Microsoft.ML.Transforms.LpNormNormalization GcNormalize Microsoft.ML.Transforms.LpNormNormalizingTransformer+GcnOptions Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput Transforms.HashConverter Converts column values into hashes. This transform accepts both numeric and text inputs, both single and vector-valued columns. Microsoft.ML.Transforms.HashJoin Apply Microsoft.ML.Transforms.HashJoiningTransform+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput Transforms.ImageGrayscale Convert image into grayscale. Microsoft.ML.Transforms.Image.ImageAnalyticsEntryPoints ImageGrayscale Microsoft.ML.Transforms.Image.ImageGrayscalingTransformer+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput Transforms.ImageLoader Load images from files. Microsoft.ML.Transforms.Image.ImageAnalyticsEntryPoints ImageLoader Microsoft.ML.Data.ImageLoadingTransformer+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput @@ -101,7 +101,7 @@ Transforms.LabelIndicator Label remapper used by OVA Microsoft.ML.Transforms.Lab Transforms.LabelToFloatConverter Transforms the label to float to make it suitable for regression. Microsoft.ML.EntryPoints.FeatureCombiner PrepareRegressionLabel Microsoft.ML.EntryPoints.FeatureCombiner+RegressionLabelInput Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput Transforms.LightLda The LDA transform implements LightLDA, a state-of-the-art implementation of Latent Dirichlet Allocation. Microsoft.ML.Transforms.Text.TextAnalytics LightLda Microsoft.ML.Transforms.Text.LatentDirichletAllocationTransformer+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput Transforms.LogMeanVarianceNormalizer Normalizes the data based on the computed mean and variance of the logarithm of the data. Microsoft.ML.Data.Normalize LogMeanVar Microsoft.ML.Transforms.NormalizeTransform+LogMeanVarArguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput -Transforms.LpNormalizer Normalize vectors (rows) individually by rescaling them to unit norm (L2, L1 or LInf). Performs the following operation on a vector X: Y = (X - M) / D, where M is mean and D is either L2 norm, L1 norm or LInf norm. Microsoft.ML.Transforms.LpNormalization Normalize Microsoft.ML.Transforms.LpNormalizingTransformer+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput +Transforms.LpNormalizer Normalize vectors (rows) individually by rescaling them to unit norm (L2, L1 or LInf). Performs the following operation on a vector X: Y = (X - M) / D, where M is mean and D is either L2 norm, L1 norm or LInf norm. Microsoft.ML.Transforms.LpNormNormalization Normalize Microsoft.ML.Transforms.LpNormNormalizingTransformer+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput Transforms.ManyHeterogeneousModelCombiner Combines a sequence of TransformModels and a PredictorModel into a single PredictorModel. Microsoft.ML.EntryPoints.ModelOperations CombineModels Microsoft.ML.EntryPoints.ModelOperations+PredictorModelInput Microsoft.ML.EntryPoints.ModelOperations+PredictorModelOutput Transforms.MeanVarianceNormalizer Normalizes the data based on the computed mean and variance of the data. Microsoft.ML.Data.Normalize MeanVar Microsoft.ML.Transforms.NormalizeTransform+MeanVarArguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput Transforms.MinMaxNormalizer Normalizes the data based on the observed minimum and maximum values of the data. Microsoft.ML.Data.Normalize MinMax Microsoft.ML.Transforms.NormalizeTransform+MinMaxArguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput diff --git a/test/Microsoft.ML.Benchmarks/RffTransform.cs b/test/Microsoft.ML.Benchmarks/RffTransform.cs index 8d4a33f958..d026b13d8c 100644 --- a/test/Microsoft.ML.Benchmarks/RffTransform.cs +++ b/test/Microsoft.ML.Benchmarks/RffTransform.cs @@ -43,7 +43,7 @@ public void CV_Multiclass_Digits_RffTransform_OVAAveragedPerceptron() var data = loader.Load(_dataPath_Digits); - var pipeline = mlContext.Transforms.RandomFourierKernelMap("FeaturesRFF", "Features") + var pipeline = mlContext.Transforms.ApproximatedKernelMap("FeaturesRFF", "Features") .AppendCacheCheckpoint(mlContext) .Append(mlContext.Transforms.Concatenate("Features", "FeaturesRFF")) .Append(new ValueToKeyMappingEstimator(mlContext, "Label")) diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs index 7dc92cb996..e3f1646cda 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs @@ -717,9 +717,9 @@ public void EntryPointPipelineEnsemble() for (int i = 0; i < nModels; i++) { var data = splitOutput.TrainData[i]; - data = new RandomFourierKernelMappingEstimator(Env, new[] { - new RandomFourierKernelMappingEstimator.ColumnOptions("Features1", 10, false, "Features"), - new RandomFourierKernelMappingEstimator.ColumnOptions("Features2", 10, false, "Features"), + data = new ApproximatedKernelMappingEstimator(Env, new[] { + new ApproximatedKernelMappingEstimator.ColumnOptions("Features1", 10, false, "Features"), + new ApproximatedKernelMappingEstimator.ColumnOptions("Features2", 10, false, "Features"), }).Fit(data).Transform(data); data = new ColumnConcatenatingTransformer(Env, "Features", new[] { "Features1", "Features2" }).Transform(data); @@ -1169,9 +1169,9 @@ public void EntryPointMulticlassPipelineEnsemble() for (int i = 0; i < nModels; i++) { var data = splitOutput.TrainData[i]; - data = new RandomFourierKernelMappingEstimator(Env, new[] { - new RandomFourierKernelMappingEstimator.ColumnOptions("Features1", 10, false, "Features"), - new RandomFourierKernelMappingEstimator.ColumnOptions("Features2", 10, false, "Features"), + data = new ApproximatedKernelMappingEstimator(Env, new[] { + new ApproximatedKernelMappingEstimator.ColumnOptions("Features1", 10, false, "Features"), + new ApproximatedKernelMappingEstimator.ColumnOptions("Features2", 10, false, "Features"), }).Fit(data).Transform(data); data = new ColumnConcatenatingTransformer(Env, "Features", new[] { "Features1", "Features2" }).Transform(data); @@ -2443,10 +2443,10 @@ public void TestInputBuilderBasicArgs() Columns = new[] { NormalizeTransform.AffineColumn.Parse("A"), - new NormalizeTransform.AffineColumn() { Name = "B", Source = "B", FixZero = false }, + new NormalizeTransform.AffineColumn() { Name = "B", Source = "B", EnsureZeroUntouched = false }, }, - FixZero = true, // Same as default, should not appear in the generated JSON. - MaxTrainingExamples = 1000 + EnsureZeroUntouched = true, // Same as default, should not appear in the generated JSON. + MaximumExampleCount = 1000 }; var inputBindingMap = new Dictionary>(); diff --git a/test/Microsoft.ML.Functional.Tests/IntrospectiveTraining.cs b/test/Microsoft.ML.Functional.Tests/IntrospectiveTraining.cs index 6bf8ace67a..5a26d98619 100644 --- a/test/Microsoft.ML.Functional.Tests/IntrospectiveTraining.cs +++ b/test/Microsoft.ML.Functional.Tests/IntrospectiveTraining.cs @@ -264,21 +264,10 @@ void IntrospectNormalization() // Extract the normalizer parameters. // TODO #2854: Normalizer parameters are easy to find via intellisense. - int i = 0; - bool found = false; - foreach (var column in normalizer.Columns) - { - if (column.Name == "Features") - { - found = true; - var featuresNormalizer = normalizer.Columns[i].ModelParameters as NormalizingTransformer.AffineNormalizerModelParameters>; - Assert.NotNull(featuresNormalizer); - Common.AssertFiniteNumbers(featuresNormalizer.Offset); - Common.AssertFiniteNumbers(featuresNormalizer.Scale); - } - i++; - } - Assert.True(found); + var config = normalizer.GetNormalizerModelParameters(0) as NormalizingTransformer.AffineNormalizerModelParameters>; + Assert.NotNull(config); + Common.AssertFiniteNumbers(config.Offset); + Common.AssertFiniteNumbers(config.Scale); } /// /// Introspective Training: I can inspect a pipeline to determine which transformers were included. diff --git a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs index 660c83209b..2879dc230d 100644 --- a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs +++ b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs @@ -405,7 +405,7 @@ public void NormalizerWithOnFit() var est = reader.MakeNewEstimator() .Append(r => (r, ncdf: r.NormalizeByCumulativeDistribution(onFit: (m, s) => mm = m), - n: r.NormalizeByMeanVar(onFit: (s, o) => { ss = s; Assert.Empty(o); }), + n: r.NormalizeMeanVariance(onFit: (s, o) => { ss = s; Assert.Empty(o); }), b: r.NormalizeByBinning(onFit: b => bb = b))); var tdata = est.Fit(data).Transform(data); @@ -634,8 +634,8 @@ public void LpGcNormAndWhitening() var est = reader.MakeNewEstimator() .Append(r => (r.label, - lpnorm: r.features.LpNormalize(), - gcnorm: r.features.GlobalContrastNormalize(), + lpnorm: r.features.NormalizeLpNorm(), + gcnorm: r.features.NormalizeGlobalContrast(), zcawhitened: r.features.ZcaWhitening(), pcswhitened: r.features.PcaWhitening())); var tdata = est.Fit(data).Transform(data); @@ -757,7 +757,7 @@ public void PrincipalComponentAnalysis() var est = reader.MakeNewEstimator() .Append(r => (r.label, - pca: r.features.ToPrincipalComponents(rank: 5))); + pca: r.features.ProjectToPrincipalComponents(rank: 5))); var tdata = est.Fit(data).Transform(data); var schema = tdata.AsDynamic.Schema; @@ -849,7 +849,7 @@ public void TestPcaStatic() separator: ';', hasHeader: true); var data = reader.Load(dataSource); var est = reader.MakeNewEstimator() - .Append(r => (r.label, pca: r.features.ToPrincipalComponents(rank: 5))); + .Append(r => (r.label, pca: r.features.ProjectToPrincipalComponents(rank: 5))); var tdata = est.Fit(data).Transform(data); var schema = tdata.AsDynamic.Schema; diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs index cbcbbcf231..b3eb49a45c 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs @@ -338,10 +338,10 @@ private void NormalizationWorkout(string dataPath) // Apply all kinds of standard ML.NET normalization to the raw features. var pipeline = loader.MakeNewEstimator() .Append(r => ( - MinMaxNormalized: r.Features.Normalize(fixZero: true), - MeanVarNormalized: r.Features.NormalizeByMeanVar(fixZero: false), + MinMaxNormalized: r.Features.Normalize(ensureZeroUntouched: true), + MeanVarNormalized: r.Features.NormalizeMeanVariance(ensureZeroUntouched: false), CdfNormalized: r.Features.NormalizeByCumulativeDistribution(), - BinNormalized: r.Features.NormalizeByBinning(maxBins: 256) + BinNormalized: r.Features.NormalizeByBinning(maximumBinCount: 256) )); // Let's train our pipeline of normalizers, and then apply it to the same data. diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs index 6fb2202692..cebf20503e 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs @@ -241,9 +241,9 @@ private void NormalizationWorkout(string dataPath) // Apply all kinds of standard ML.NET normalization to the raw features. var pipeline = mlContext.Transforms.Normalize( - new NormalizingEstimator.MinMaxColumnOptions("MinMaxNormalized", "Features", fixZero: true), - new NormalizingEstimator.MeanVarColumnOptions("MeanVarNormalized", "Features", fixZero: true), - new NormalizingEstimator.BinningColumnOptions("BinNormalized", "Features", numBins: 256)); + new NormalizingEstimator.MinMaxColumnOptions("MinMaxNormalized", "Features", ensureZeroUntouched: true), + new NormalizingEstimator.MeanVarianceColumnOptions("MeanVarNormalized", "Features", fixZero: true), + new NormalizingEstimator.BinningColumnOptions("BinNormalized", "Features", maximumBinCount: 256)); // Let's train our pipeline of normalizers, and then apply it to the same data. var normalizedData = pipeline.Fit(trainData).Transform(trainData); diff --git a/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs b/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs index af9cd61078..f87652a9bb 100644 --- a/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs @@ -51,18 +51,18 @@ public void NormalizerWorkout() new NormalizingEstimator.BinningColumnOptions("float4bin", "float4"), new NormalizingEstimator.BinningColumnOptions("double1bin", "double1"), new NormalizingEstimator.BinningColumnOptions("double4bin", "double4"), - new NormalizingEstimator.SupervisedBinningColumOptions("float1supervisedbin", "float1", labelColumn: "int1"), - new NormalizingEstimator.SupervisedBinningColumOptions("float4supervisedbin", "float4", labelColumn: "int1"), - new NormalizingEstimator.SupervisedBinningColumOptions("double1supervisedbin", "double1", labelColumn: "int1"), - new NormalizingEstimator.SupervisedBinningColumOptions("double4supervisedbin", "double4", labelColumn: "int1"), - new NormalizingEstimator.MeanVarColumnOptions("float1mv", "float1"), - new NormalizingEstimator.MeanVarColumnOptions("float4mv", "float4"), - new NormalizingEstimator.MeanVarColumnOptions("double1mv", "double1"), - new NormalizingEstimator.MeanVarColumnOptions("double4mv", "double4"), - new NormalizingEstimator.LogMeanVarColumnOptions("float1lmv", "float1"), - new NormalizingEstimator.LogMeanVarColumnOptions("float4lmv", "float4"), - new NormalizingEstimator.LogMeanVarColumnOptions("double1lmv", "double1"), - new NormalizingEstimator.LogMeanVarColumnOptions("double4lmv", "double4")); + new NormalizingEstimator.SupervisedBinningColumOptions("float1supervisedbin", "float1", labelColumnName: "int1"), + new NormalizingEstimator.SupervisedBinningColumOptions("float4supervisedbin", "float4", labelColumnName: "int1"), + new NormalizingEstimator.SupervisedBinningColumOptions("double1supervisedbin", "double1", labelColumnName: "int1"), + new NormalizingEstimator.SupervisedBinningColumOptions("double4supervisedbin", "double4", labelColumnName: "int1"), + new NormalizingEstimator.MeanVarianceColumnOptions("float1mv", "float1"), + new NormalizingEstimator.MeanVarianceColumnOptions("float4mv", "float4"), + new NormalizingEstimator.MeanVarianceColumnOptions("double1mv", "double1"), + new NormalizingEstimator.MeanVarianceColumnOptions("double4mv", "double4"), + new NormalizingEstimator.LogMeanVarianceColumnOptions("float1lmv", "float1"), + new NormalizingEstimator.LogMeanVarianceColumnOptions("float4lmv", "float4"), + new NormalizingEstimator.LogMeanVarianceColumnOptions("double1lmv", "double1"), + new NormalizingEstimator.LogMeanVarianceColumnOptions("double4lmv", "double4")); var data = loader.Load(dataPath); @@ -116,14 +116,14 @@ public void NormalizerParameters() new NormalizingEstimator.BinningColumnOptions("float4bin", "float4"), new NormalizingEstimator.BinningColumnOptions("double1bin", "double1"), new NormalizingEstimator.BinningColumnOptions("double4bin", "double4"), - new NormalizingEstimator.MeanVarColumnOptions("float1mv", "float1"), - new NormalizingEstimator.MeanVarColumnOptions("float4mv", "float4"), - new NormalizingEstimator.MeanVarColumnOptions("double1mv", "double1"), - new NormalizingEstimator.MeanVarColumnOptions("double4mv", "double4"), - new NormalizingEstimator.LogMeanVarColumnOptions("float1lmv", "float1"), - new NormalizingEstimator.LogMeanVarColumnOptions("float4lmv", "float4"), - new NormalizingEstimator.LogMeanVarColumnOptions("double1lmv", "double1"), - new NormalizingEstimator.LogMeanVarColumnOptions("double4lmv", "double4")); + new NormalizingEstimator.MeanVarianceColumnOptions("float1mv", "float1"), + new NormalizingEstimator.MeanVarianceColumnOptions("float4mv", "float4"), + new NormalizingEstimator.MeanVarianceColumnOptions("double1mv", "double1"), + new NormalizingEstimator.MeanVarianceColumnOptions("double4mv", "double4"), + new NormalizingEstimator.LogMeanVarianceColumnOptions("float1lmv", "float1"), + new NormalizingEstimator.LogMeanVarianceColumnOptions("float4lmv", "float4"), + new NormalizingEstimator.LogMeanVarianceColumnOptions("double1lmv", "double1"), + new NormalizingEstimator.LogMeanVarianceColumnOptions("double4lmv", "double4")); var data = loader.Load(dataPath); @@ -186,22 +186,22 @@ public void NormalizerParameters() var floatCdfLogMeanData = transformer.Columns[12].ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters; Assert.Equal(1.75623953f, floatCdfLogMeanData.Mean); Assert.True(true == floatCdfLogMeanData.UseLog); - Assert.Equal(0.140807763f, floatCdfLogMeanData.Stddev); + Assert.Equal(0.140807763f, floatCdfLogMeanData.StandardDeviation); var floatCdfLogMeanDataVec = transformer.Columns[13].ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters>; Assert.Equal(4, floatCdfLogMeanDataVec.Mean.Length); Assert.True(true == floatCdfLogMeanDataVec.UseLog); - Assert.Equal(4, floatCdfLogMeanDataVec.Stddev.Length); + Assert.Equal(4, floatCdfLogMeanDataVec.StandardDeviation.Length); var doubleCdfLogMeanData = transformer.Columns[14].ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters; Assert.Equal(1.7562395401953814, doubleCdfLogMeanData.Mean); Assert.True(doubleCdfLogMeanData.UseLog); - Assert.Equal(0.14080776721611848, doubleCdfLogMeanData.Stddev); + Assert.Equal(0.14080776721611848, doubleCdfLogMeanData.StandardDeviation); var doubleCdfLogMeanDataVec = transformer.Columns[15].ModelParameters as NormalizingTransformer.CdfNormalizerModelParameters>; Assert.Equal(4, doubleCdfLogMeanDataVec.Mean.Length); Assert.True(doubleCdfLogMeanDataVec.UseLog); - Assert.Equal(4, doubleCdfLogMeanDataVec.Stddev.Length); + Assert.Equal(4, doubleCdfLogMeanDataVec.StandardDeviation.Length); Done(); } @@ -272,8 +272,8 @@ public void LpGcNormAndWhiteningWorkout() separator: ';', hasHeader: true) .Load(dataSource); - var est = ML.Transforms.LpNormalize("lpnorm", "features") - .Append(ML.Transforms.GlobalContrastNormalize("gcnorm", "features")) + var est = ML.Transforms.NormalizeLpNorm("lpnorm", "features") + .Append(ML.Transforms.NormalizeGlobalContrast("gcnorm", "features")) .Append(new VectorWhiteningEstimator(ML, "whitened", "features")); TestEstimatorCore(est, data.AsDynamic, invalidInput: invalidData.AsDynamic); @@ -368,8 +368,8 @@ public void LpNormWorkout() separator: ';', hasHeader: true) .Load(dataSource); - var est = ML.Transforms.LpNormalize("lpNorm1", "features") - .Append(ML.Transforms.LpNormalize("lpNorm2", "features", normKind: LpNormalizingEstimatorBase.NormFunction.L1, ensureZeroMean: true)); + var est = ML.Transforms.NormalizeLpNorm("lpNorm1", "features") + .Append(ML.Transforms.NormalizeLpNorm("lpNorm2", "features", norm: LpNormNormalizingEstimatorBase.NormFunction.L1, ensureZeroMean: true)); TestEstimatorCore(est, data.AsDynamic, invalidInput: invalidData.AsDynamic); var outputPath = GetOutputPath("NormalizerEstimator", "lpNorm.tsv"); @@ -401,7 +401,7 @@ public void TestLpNormOldSavingAndLoading() c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true) .Load(dataSource).AsDynamic; - var pipe = ML.Transforms.LpNormalize("whitened", "features"); + var pipe = ML.Transforms.NormalizeLpNorm("whitened", "features"); var result = pipe.Fit(dataView).Transform(dataView); var resultRoles = new RoleMappedData(result); @@ -427,8 +427,8 @@ public void GcnWorkout() separator: ';', hasHeader: true) .Load(dataSource); - var est = ML.Transforms.GlobalContrastNormalize("gcnNorm1", "features") - .Append(ML.Transforms.GlobalContrastNormalize("gcnNorm2", "features", ensureZeroMean: false, ensureUnitStandardDeviation: true, scale: 3)); + var est = ML.Transforms.NormalizeGlobalContrast("gcnNorm1", "features") + .Append(ML.Transforms.NormalizeGlobalContrast("gcnNorm2", "features", ensureZeroMean: false, ensureUnitStandardDeviation: true, scale: 3)); TestEstimatorCore(est, data.AsDynamic, invalidInput: invalidData.AsDynamic); var outputPath = GetOutputPath("NormalizerEstimator", "gcnNorm.tsv"); @@ -460,7 +460,7 @@ public void TestGcnNormOldSavingAndLoading() c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true) .Load(dataSource).AsDynamic; - var pipe = ML.Transforms.GlobalContrastNormalize("whitened", "features"); + var pipe = ML.Transforms.NormalizeGlobalContrast("whitened", "features"); var result = pipe.Fit(dataView).Transform(dataView); var resultRoles = new RoleMappedData(result); diff --git a/test/Microsoft.ML.Tests/Transformers/RffTests.cs b/test/Microsoft.ML.Tests/Transformers/RffTests.cs index 45a5b3eaf7..a594508575 100644 --- a/test/Microsoft.ML.Tests/Transformers/RffTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/RffTests.cs @@ -51,9 +51,9 @@ public void RffWorkout() var validFitInvalidData = ML.Data.LoadFromEnumerable(new[] { new TestClassBiggerSize { A = new float[200] }, new TestClassBiggerSize { A = new float[200] } }); var dataView = ML.Data.LoadFromEnumerable(data); - var pipe = ML.Transforms.RandomFourierKernelMap(new[]{ - new RandomFourierKernelMappingEstimator.ColumnOptions("RffA", 5, false, "A"), - new RandomFourierKernelMappingEstimator.ColumnOptions("RffB", 10, true, "A", new LaplacianKernel()) + var pipe = ML.Transforms.ApproximatedKernelMap(new[]{ + new ApproximatedKernelMappingEstimator.ColumnOptions("RffA", 5, false, "A"), + new ApproximatedKernelMappingEstimator.ColumnOptions("RffB", 10, true, "A", new LaplacianKernel()) }); TestEstimatorCore(pipe, dataView, invalidInput: invalidData, validForFitNotValidForTransformInput: validFitInvalidData); @@ -73,7 +73,7 @@ public void RffStatic() var est = data.MakeNewEstimator() .Append(row => ( - RffVectorFloat: row.VectorFloat.LowerVectorSizeWithRandomFourierTransformation(3, true), row.Label)); + RffVectorFloat: row.VectorFloat.ApproximatedKernelMap(3, true), row.Label)); TestEstimatorCore(est.AsDynamic, data.AsDynamic); @@ -101,9 +101,9 @@ public void TestOldSavingAndLoading() }; var dataView = ML.Data.LoadFromEnumerable(data); - var est = ML.Transforms.RandomFourierKernelMap(new[]{ - new RandomFourierKernelMappingEstimator.ColumnOptions("RffA", 5, false, "A"), - new RandomFourierKernelMappingEstimator.ColumnOptions("RffB", 10, true, "A", new LaplacianKernel()) + var est = ML.Transforms.ApproximatedKernelMap(new[]{ + new ApproximatedKernelMappingEstimator.ColumnOptions("RffA", 5, false, "A"), + new ApproximatedKernelMappingEstimator.ColumnOptions("RffB", 10, true, "A", new LaplacianKernel()) }); var result = est.Fit(dataView).Transform(dataView); var resultRoles = new RoleMappedData(result);