diff --git a/src/Microsoft.ML.Data/Transforms/BootstrapSamplingTransformer.cs b/src/Microsoft.ML.Data/Transforms/BootstrapSamplingTransformer.cs
index d44f855f1b..cb61c00db1 100644
--- a/src/Microsoft.ML.Data/Transforms/BootstrapSamplingTransformer.cs
+++ b/src/Microsoft.ML.Data/Transforms/BootstrapSamplingTransformer.cs
@@ -13,7 +13,7 @@
using Microsoft.ML.Model;
using Microsoft.ML.Transforms;
-[assembly: LoadableClass(BootstrapSamplingTransformer.Summary, typeof(BootstrapSamplingTransformer), typeof(BootstrapSamplingTransformer.Arguments), typeof(SignatureDataTransform),
+[assembly: LoadableClass(BootstrapSamplingTransformer.Summary, typeof(BootstrapSamplingTransformer), typeof(BootstrapSamplingTransformer.Options), typeof(SignatureDataTransform),
BootstrapSamplingTransformer.UserName, "BootstrapSampleTransform", "BootstrapSample")]
[assembly: LoadableClass(BootstrapSamplingTransformer.Summary, typeof(BootstrapSamplingTransformer), null, typeof(SignatureLoadDataTransform),
@@ -36,7 +36,7 @@ internal static class Defaults
public const int PoolSize = 1000;
}
- public sealed class Arguments : TransformInputBase
+ public sealed class Options : TransformInputBase
{
[Argument(ArgumentType.AtMostOnce, HelpText = "Whether this is the out-of-bag sample, that is, all those rows that are not selected by the transform.",
ShortName = "comp")]
@@ -76,16 +76,16 @@ private static VersionInfo GetVersionInfo()
private readonly bool _shuffleInput;
private readonly int _poolSize;
- public BootstrapSamplingTransformer(IHostEnvironment env, Arguments args, IDataView input)
+ public BootstrapSamplingTransformer(IHostEnvironment env, Options options, IDataView input)
: base(env, RegistrationName, input)
{
- Host.CheckValue(args, nameof(args));
- Host.CheckUserArg(args.PoolSize >= 0, nameof(args.PoolSize), "Cannot be negative");
+ Host.CheckValue(options, nameof(options));
+ Host.CheckUserArg(options.PoolSize >= 0, nameof(options.PoolSize), "Cannot be negative");
- _complement = args.Complement;
- _state = new TauswortheHybrid.State(args.Seed ?? (uint)Host.Rand.Next());
- _shuffleInput = args.ShuffleInput;
- _poolSize = args.PoolSize;
+ _complement = options.Complement;
+ _state = new TauswortheHybrid.State(options.Seed ?? (uint)Host.Rand.Next());
+ _shuffleInput = options.ShuffleInput;
+ _poolSize = options.PoolSize;
}
///
@@ -103,7 +103,7 @@ public BootstrapSamplingTransformer(IHostEnvironment env,
uint? seed = null,
bool shuffleInput = Defaults.ShuffleInput,
int poolSize = Defaults.PoolSize)
- : this(env, new Arguments() { Complement = complement, Seed = seed, ShuffleInput = shuffleInput, PoolSize = poolSize }, input)
+ : this(env, new Options() { Complement = complement, Seed = seed, ShuffleInput = shuffleInput, PoolSize = poolSize }, input)
{
}
@@ -242,7 +242,7 @@ protected override bool MoveNextCore()
internal static class BootstrapSample
{
[TlcModule.EntryPoint(Name = "Transforms.ApproximateBootstrapSampler", Desc = BootstrapSamplingTransformer.Summary, UserName = BootstrapSamplingTransformer.UserName, ShortName = BootstrapSamplingTransformer.RegistrationName)]
- public static CommonOutputs.TransformOutput GetSample(IHostEnvironment env, BootstrapSamplingTransformer.Arguments input)
+ public static CommonOutputs.TransformOutput GetSample(IHostEnvironment env, BootstrapSamplingTransformer.Options input)
{
Contracts.CheckValue(env, nameof(env));
env.CheckValue(input, nameof(input));
diff --git a/src/Microsoft.ML.Data/Transforms/RowShufflingTransformer.cs b/src/Microsoft.ML.Data/Transforms/RowShufflingTransformer.cs
index 7f08930430..359067c01b 100644
--- a/src/Microsoft.ML.Data/Transforms/RowShufflingTransformer.cs
+++ b/src/Microsoft.ML.Data/Transforms/RowShufflingTransformer.cs
@@ -15,7 +15,7 @@
using Microsoft.ML.Model;
using Microsoft.ML.Transforms;
-[assembly: LoadableClass(RowShufflingTransformer.Summary, typeof(RowShufflingTransformer), typeof(RowShufflingTransformer.Arguments), typeof(SignatureDataTransform),
+[assembly: LoadableClass(RowShufflingTransformer.Summary, typeof(RowShufflingTransformer), typeof(RowShufflingTransformer.Options), typeof(SignatureDataTransform),
"Shuffle Transform", "ShuffleTransform", "Shuffle", "shuf")]
[assembly: LoadableClass(RowShufflingTransformer.Summary, typeof(RowShufflingTransformer), null, typeof(SignatureLoadDataTransform),
@@ -30,7 +30,8 @@ namespace Microsoft.ML.Transforms
/// rows in the input cursor, and then, successively, the output cursor will yield one
/// of these rows and replace it with another row from the input.
///
- public sealed class RowShufflingTransformer : RowToRowTransformBase
+ [BestFriend]
+ internal sealed class RowShufflingTransformer : RowToRowTransformBase
{
private static class Defaults
{
@@ -39,7 +40,7 @@ private static class Defaults
public const bool ForceShuffle = false;
}
- public sealed class Arguments
+ public sealed class Options
{
// REVIEW: A more intelligent heuristic, based on the expected size of the inputs, perhaps?
[Argument(ArgumentType.LastOccurenceWins, HelpText = "The pool will have this many rows", ShortName = "rows")]
@@ -99,14 +100,14 @@ public RowShufflingTransformer(IHostEnvironment env,
int poolRows = Defaults.PoolRows,
bool poolOnly = Defaults.PoolOnly,
bool forceShuffle = Defaults.ForceShuffle)
- : this(env, new Arguments() { PoolRows = poolRows, PoolOnly = poolOnly, ForceShuffle = forceShuffle }, input)
+ : this(env, new Options() { PoolRows = poolRows, PoolOnly = poolOnly, ForceShuffle = forceShuffle }, input)
{
}
///
/// Public constructor corresponding to SignatureDataTransform.
///
- public RowShufflingTransformer(IHostEnvironment env, Arguments args, IDataView input)
+ public RowShufflingTransformer(IHostEnvironment env, Options args, IDataView input)
: base(env, RegistrationName, input)
{
Host.CheckValue(args, nameof(args));
diff --git a/src/Microsoft.ML.Ensemble/Selector/SubsetSelector/BootstrapSelector.cs b/src/Microsoft.ML.Ensemble/Selector/SubsetSelector/BootstrapSelector.cs
index 149fb91159..c3a98e47c8 100644
--- a/src/Microsoft.ML.Ensemble/Selector/SubsetSelector/BootstrapSelector.cs
+++ b/src/Microsoft.ML.Ensemble/Selector/SubsetSelector/BootstrapSelector.cs
@@ -47,7 +47,7 @@ public override IEnumerable GetSubsets(Batch batch, Random rand)
for (int i = 0; i < Size; i++)
{
// REVIEW: Consider ways to reintroduce "balanced" samples.
- var viewTrain = new BootstrapSamplingTransformer(Host, new BootstrapSamplingTransformer.Arguments(), Data.Data);
+ var viewTrain = new BootstrapSamplingTransformer(Host, new BootstrapSamplingTransformer.Options(), Data.Data);
var dataTrain = new RoleMappedData(viewTrain, Data.Schema.GetColumnRoleNames());
yield return FeatureSelector.SelectFeatures(dataTrain, rand);
}
diff --git a/src/Microsoft.ML.HalLearners.StaticPipe/VectorWhiteningStaticExtensions.cs b/src/Microsoft.ML.HalLearners.StaticPipe/VectorWhiteningStaticExtensions.cs
index fad38dc3a5..b6f2b419a4 100644
--- a/src/Microsoft.ML.HalLearners.StaticPipe/VectorWhiteningStaticExtensions.cs
+++ b/src/Microsoft.ML.HalLearners.StaticPipe/VectorWhiteningStaticExtensions.cs
@@ -49,9 +49,9 @@ public override IEstimator Reconcile(IHostEnvironment env,
{
Contracts.Assert(toOutput.Length == 1);
- var infos = new VectorWhiteningTransformer.ColumnInfo[toOutput.Length];
+ var infos = new VectorWhiteningEstimator.ColumnInfo[toOutput.Length];
for (int i = 0; i < toOutput.Length; i++)
- infos[i] = new VectorWhiteningTransformer.ColumnInfo(outputNames[toOutput[i]], inputNames[((OutPipelineColumn)toOutput[i]).Input], _kind, _eps, _maxRows, _pcaNum);
+ infos[i] = new VectorWhiteningEstimator.ColumnInfo(outputNames[toOutput[i]], inputNames[((OutPipelineColumn)toOutput[i]).Input], _kind, _eps, _maxRows, _pcaNum);
return new VectorWhiteningEstimator(env, infos);
}
@@ -63,9 +63,9 @@ public override IEstimator Reconcile(IHostEnvironment env,
/// Maximum number of rows used to train the transform.
/// In case of PCA whitening, indicates the number of components to retain.
public static Vector PcaWhitening(this Vector input,
- float eps = VectorWhiteningTransformer.Defaults.Eps,
- int maxRows = VectorWhiteningTransformer.Defaults.MaxRows,
- int pcaNum = VectorWhiteningTransformer.Defaults.PcaNum)
+ float eps = VectorWhiteningEstimator.Defaults.Eps,
+ int maxRows = VectorWhiteningEstimator.Defaults.MaxRows,
+ int pcaNum = VectorWhiteningEstimator.Defaults.PcaNum)
=> new OutPipelineColumn(input, WhiteningKind.Pca, eps, maxRows, pcaNum);
///
@@ -73,8 +73,8 @@ public static Vector PcaWhitening(this Vector input,
/// Whitening constant, prevents division by zero.
/// Maximum number of rows used to train the transform.
public static Vector ZcaWhitening(this Vector input,
- float eps = VectorWhiteningTransformer.Defaults.Eps,
- int maxRows = VectorWhiteningTransformer.Defaults.MaxRows)
- => new OutPipelineColumn(input, WhiteningKind.Zca, eps, maxRows, VectorWhiteningTransformer.Defaults.PcaNum);
+ float eps = VectorWhiteningEstimator.Defaults.Eps,
+ int maxRows = VectorWhiteningEstimator.Defaults.MaxRows)
+ => new OutPipelineColumn(input, WhiteningKind.Zca, eps, maxRows, VectorWhiteningEstimator.Defaults.PcaNum);
}
}
diff --git a/src/Microsoft.ML.HalLearners/HalLearnersCatalog.cs b/src/Microsoft.ML.HalLearners/HalLearnersCatalog.cs
index 9f6f16ab51..1b635d49fc 100644
--- a/src/Microsoft.ML.HalLearners/HalLearnersCatalog.cs
+++ b/src/Microsoft.ML.HalLearners/HalLearnersCatalog.cs
@@ -112,10 +112,10 @@ public static SymSgdClassificationTrainer SymbolicStochasticGradientDescent(
///
///
public static VectorWhiteningEstimator VectorWhiten(this TransformsCatalog.ProjectionTransforms catalog, string outputColumnName, string inputColumnName = null,
- WhiteningKind kind = VectorWhiteningTransformer.Defaults.Kind,
- float eps = VectorWhiteningTransformer.Defaults.Eps,
- int maxRows = VectorWhiteningTransformer.Defaults.MaxRows,
- int pcaNum = VectorWhiteningTransformer.Defaults.PcaNum)
+ WhiteningKind kind = VectorWhiteningEstimator.Defaults.Kind,
+ float eps = VectorWhiteningEstimator.Defaults.Eps,
+ int maxRows = VectorWhiteningEstimator.Defaults.MaxRows,
+ int pcaNum = VectorWhiteningEstimator.Defaults.PcaNum)
=> new VectorWhiteningEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, kind, eps, maxRows, pcaNum);
///
@@ -124,7 +124,7 @@ public static VectorWhiteningEstimator VectorWhiten(this TransformsCatalog.Proje
///
/// The transform's catalog.
/// Describes the parameters of the whitening process for each column pair.
- public static VectorWhiteningEstimator VectorWhiten(this TransformsCatalog.ProjectionTransforms catalog, params VectorWhiteningTransformer.ColumnInfo[] columns)
+ public static VectorWhiteningEstimator VectorWhiten(this TransformsCatalog.ProjectionTransforms catalog, params VectorWhiteningEstimator.ColumnInfo[] columns)
=> new VectorWhiteningEstimator(CatalogUtils.GetEnvironment(catalog), columns);
}
diff --git a/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs b/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs
index af5ecd4a11..35a6b55d4f 100644
--- a/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs
+++ b/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs
@@ -141,7 +141,7 @@ private RoleMappedData PrepareDataFromTrainingExamples(IChannel ch, RoleMappedDa
idvToFeedTrain = idvToShuffle;
else
{
- var shuffleArgs = new RowShufflingTransformer.Arguments
+ var shuffleArgs = new RowShufflingTransformer.Options
{
PoolOnly = false,
ForceShuffle = _options.Shuffle
diff --git a/src/Microsoft.ML.HalLearners/VectorWhitening.cs b/src/Microsoft.ML.HalLearners/VectorWhitening.cs
index 31cdd0b201..f865ba4fcd 100644
--- a/src/Microsoft.ML.HalLearners/VectorWhitening.cs
+++ b/src/Microsoft.ML.HalLearners/VectorWhitening.cs
@@ -19,7 +19,7 @@
using Microsoft.ML.Model;
using Microsoft.ML.Transforms.Projections;
-[assembly: LoadableClass(VectorWhiteningTransformer.Summary, typeof(IDataTransform), typeof(VectorWhiteningTransformer), typeof(VectorWhiteningTransformer.Arguments), typeof(SignatureDataTransform),
+[assembly: LoadableClass(VectorWhiteningTransformer.Summary, typeof(IDataTransform), typeof(VectorWhiteningTransformer), typeof(VectorWhiteningTransformer.Options), typeof(SignatureDataTransform),
VectorWhiteningTransformer.FriendlyName, VectorWhiteningTransformer.LoaderSignature, "Whitening")]
[assembly: LoadableClass(VectorWhiteningTransformer.Summary, typeof(IDataTransform), typeof(VectorWhiteningTransformer), null, typeof(SignatureLoadDataTransform),
@@ -33,11 +33,18 @@
namespace Microsoft.ML.Transforms.Projections
{
+ ///
+ /// Which vector whitening technique to use. ZCA whitening ensures that the average covariance between whitened
+ /// and original variables is maximal. In contrast, PCA whitening lead to maximally compressed whitened variables, as
+ /// measured by squared covariance.
+ ///
public enum WhiteningKind
{
+ /// PCA whitening.
[TGUI(Label = "PCA whitening")]
Pca,
+ /// ZCA whitening.
[TGUI(Label = "ZCA whitening")]
Zca
}
@@ -45,42 +52,32 @@ public enum WhiteningKind
///
public sealed class VectorWhiteningTransformer : OneToOneTransformerBase
{
- [BestFriend]
- internal static class Defaults
- {
- public const WhiteningKind Kind = WhiteningKind.Zca;
- public const float Eps = 1e-5f;
- public const int MaxRows = 100 * 1000;
- public const bool SaveInverse = false;
- public const int PcaNum = 0;
- }
-
- public sealed class Arguments
+ internal sealed class Options
{
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)", Name = "Column", ShortName = "col", SortOrder = 1)]
public Column[] Columns;
[Argument(ArgumentType.AtMostOnce, HelpText = "Whitening kind (PCA/ZCA)")]
- public WhiteningKind Kind = Defaults.Kind;
+ public WhiteningKind Kind = VectorWhiteningEstimator.Defaults.Kind;
[Argument(ArgumentType.AtMostOnce, HelpText = "Scaling regularizer")]
- public float Eps = Defaults.Eps;
+ public float Eps = VectorWhiteningEstimator.Defaults.Eps;
[Argument(ArgumentType.AtMostOnce, HelpText = "Max number of rows", ShortName = "rows")]
- public int MaxRows = Defaults.MaxRows;
+ public int MaxRows = VectorWhiteningEstimator.Defaults.MaxRows;
[Argument(ArgumentType.AtMostOnce, HelpText = "Whether to save inverse (recovery) matrix", ShortName = "saveInv")]
- public bool SaveInverse = Defaults.SaveInverse;
+ public bool SaveInverse = VectorWhiteningEstimator.Defaults.SaveInverse;
[Argument(ArgumentType.AtMostOnce, HelpText = "PCA components to retain")]
- public int PcaNum = Defaults.PcaNum;
+ public int PcaNum = VectorWhiteningEstimator.Defaults.PcaNum;
// REVIEW: add the following options:
// 1. Currently there is no way to apply an inverse transform AFTER the the transform is trained.
// 2. How many PCA components to retain/drop. Options: retain-first, drop-first, variance-threshold.
}
- public sealed class Column : OneToOneColumn
+ internal sealed class Column : OneToOneColumn
{
[Argument(ArgumentType.AtMostOnce, HelpText = "Whitening kind (PCA/ZCA)")]
public WhiteningKind? Kind;
@@ -116,103 +113,6 @@ internal bool TryUnparse(StringBuilder sb)
}
}
- public sealed class ColumnInfo
- {
- public readonly string Name;
- public readonly string InputColumnName;
- public readonly WhiteningKind Kind;
- public readonly float Epsilon;
- public readonly int MaxRow;
- public readonly int PcaNum;
- internal readonly bool SaveInv;
-
- ///
- /// Describes how the transformer handles one input-output column pair.
- ///
- /// Name of the column resulting from the transformation of .
- /// Name of column to transform. If set to , the value of the will be used as source.
- /// Whitening kind (PCA/ZCA).
- /// Whitening constant, prevents division by zero.
- /// Maximum number of rows used to train the transform.
- /// In case of PCA whitening, indicates the number of components to retain.
- public ColumnInfo(string name, string inputColumnName = null, WhiteningKind kind = Defaults.Kind, float eps = Defaults.Eps,
- int maxRows = Defaults.MaxRows, int pcaNum = Defaults.PcaNum)
- {
- Name = name;
- Contracts.CheckValue(Name, nameof(Name));
- InputColumnName = inputColumnName ?? name;
- Contracts.CheckValue(InputColumnName, nameof(InputColumnName));
- Kind = kind;
- Contracts.CheckUserArg(Kind == WhiteningKind.Pca || Kind == WhiteningKind.Zca, nameof(Kind));
- Epsilon = eps;
- Contracts.CheckUserArg(0 <= Epsilon && Epsilon < float.PositiveInfinity, nameof(Epsilon));
- MaxRow = maxRows;
- Contracts.CheckUserArg(MaxRow > 0, nameof(MaxRow));
- SaveInv = Defaults.SaveInverse;
- PcaNum = pcaNum; // REVIEW: make it work with pcaNum == 1.
- Contracts.CheckUserArg(PcaNum >= 0, nameof(PcaNum));
- }
-
- internal ColumnInfo(Column item, Arguments args)
- {
- Name = item.Name;
- Contracts.CheckValue(Name, nameof(Name));
- InputColumnName = item.Source ?? item.Name;
- Contracts.CheckValue(InputColumnName, nameof(InputColumnName));
- Kind = item.Kind ?? args.Kind;
- Contracts.CheckUserArg(Kind == WhiteningKind.Pca || Kind == WhiteningKind.Zca, nameof(item.Kind));
- Epsilon = item.Eps ?? args.Eps;
- Contracts.CheckUserArg(0 <= Epsilon && Epsilon < float.PositiveInfinity, nameof(item.Eps));
- MaxRow = item.MaxRows ?? args.MaxRows;
- Contracts.CheckUserArg(MaxRow > 0, nameof(item.MaxRows));
- SaveInv = item.SaveInverse ?? args.SaveInverse;
- PcaNum = item.PcaNum ?? args.PcaNum;
- Contracts.CheckUserArg(PcaNum >= 0, nameof(item.PcaNum));
- }
-
- internal ColumnInfo(ModelLoadContext ctx)
- {
- Contracts.AssertValue(ctx);
-
- // *** Binary format ***
- // int: kind
- // float: epsilon
- // int: maxrow
- // byte: saveInv
- // int: pcaNum
- Kind = (WhiteningKind)ctx.Reader.ReadInt32();
- Contracts.CheckDecode(Kind == WhiteningKind.Pca || Kind == WhiteningKind.Zca);
- Epsilon = ctx.Reader.ReadFloat();
- Contracts.CheckDecode(0 <= Epsilon && Epsilon < float.PositiveInfinity);
- MaxRow = ctx.Reader.ReadInt32();
- Contracts.CheckDecode(MaxRow > 0);
- SaveInv = ctx.Reader.ReadBoolByte();
- PcaNum = ctx.Reader.ReadInt32();
- Contracts.CheckDecode(PcaNum >= 0);
- }
-
- internal void Save(ModelSaveContext ctx)
- {
- Contracts.AssertValue(ctx);
-
- // *** Binary format ***
- // int: kind
- // float: epsilon
- // int: maxrow
- // byte: saveInv
- // int: pcaNum
- Contracts.Assert(Kind == WhiteningKind.Pca || Kind == WhiteningKind.Zca);
- ctx.Writer.Write((int)Kind);
- Contracts.Assert(0 <= Epsilon && Epsilon < float.PositiveInfinity);
- ctx.Writer.Write(Epsilon);
- Contracts.Assert(MaxRow > 0);
- ctx.Writer.Write(MaxRow);
- ctx.Writer.WriteBoolByte(SaveInv);
- Contracts.Assert(PcaNum >= 0);
- ctx.Writer.Write(PcaNum);
- }
- }
-
private const Mkl.Layout Layout = Mkl.Layout.RowMajor;
// Stores whitening matrix as float[] for each column. _models[i] is the whitening matrix of the i-th input column.
@@ -238,7 +138,7 @@ private static VersionInfo GetVersionInfo()
loaderAssemblyName: typeof(VectorWhiteningTransformer).Assembly.FullName);
}
- private readonly ColumnInfo[] _columns;
+ private readonly VectorWhiteningEstimator.ColumnInfo[] _columns;
///
/// Initializes a new object.
@@ -247,7 +147,7 @@ private static VersionInfo GetVersionInfo()
/// An array of whitening matrices where models[i] is learned from the i-th element of .
/// An array of inverse whitening matrices, the i-th element being the inverse matrix of models[i].
/// Describes the parameters of the whitening process for each column pair.
- internal VectorWhiteningTransformer(IHostEnvironment env, float[][] models, float[][] invModels, params ColumnInfo[] columns)
+ internal VectorWhiteningTransformer(IHostEnvironment env, float[][] models, float[][] invModels, params VectorWhiteningEstimator.ColumnInfo[] columns)
: base(Contracts.CheckRef(env, nameof(env)).Register(nameof(VectorWhiteningTransformer)), GetColumnPairs(columns))
{
Host.AssertNonEmpty(ColumnPairs);
@@ -270,9 +170,9 @@ private VectorWhiteningTransformer(IHostEnvironment env, ModelLoadContext ctx)
// recovery matrix
Host.AssertNonEmpty(ColumnPairs);
- _columns = new ColumnInfo[ColumnPairs.Length];
+ _columns = new VectorWhiteningEstimator.ColumnInfo[ColumnPairs.Length];
for (int i = 0; i < _columns.Length; i++)
- _columns[i] = new ColumnInfo(ctx);
+ _columns[i] = new VectorWhiteningEstimator.ColumnInfo(ctx);
_models = new float[ColumnPairs.Length][];
_invModels = new float[ColumnPairs.Length][];
@@ -293,9 +193,9 @@ internal static VectorWhiteningTransformer Create(IHostEnvironment env, ModelLoa
}
// Factory method for SignatureDataTransform.
- internal static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input)
+ internal static IDataTransform Create(IHostEnvironment env, Options options, IDataView input)
{
- var infos = args.Columns.Select(colPair => new ColumnInfo(colPair, args)).ToArray();
+ var infos = options.Columns.Select(colPair => new VectorWhiteningEstimator.ColumnInfo(colPair, options)).ToArray();
(var models, var invModels) = TrainVectorWhiteningTransform(env, input, infos);
return new VectorWhiteningTransformer(env, models, invModels, infos).MakeDataTransform(input);
}
@@ -308,7 +208,7 @@ internal static IDataTransform Create(IHostEnvironment env, ModelLoadContext ctx
internal static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Schema inputSchema)
=> Create(env, ctx).MakeRowMapper(inputSchema);
- private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(ColumnInfo[] columns)
+ private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(VectorWhiteningEstimator.ColumnInfo[] columns)
=> columns.Select(c => (c.Name, c.InputColumnName ?? c.Name)).ToArray();
protected override void CheckInputColumn(Schema inputSchema, int col, int srcCol)
@@ -345,7 +245,7 @@ private static void ValidateModel(IExceptionContext ectx, float[] model, ColumnT
// Sometime GetRowCount doesn't really return the number of rows in the associated IDataView.
// A more reliable solution is to turely iterate through all rows via a RowCursor.
- private static long GetRowCount(IDataView inputData, params ColumnInfo[] columns)
+ private static long GetRowCount(IDataView inputData, params VectorWhiteningEstimator.ColumnInfo[] columns)
{
long? rows = inputData.GetRowCount();
if (rows != null)
@@ -362,7 +262,7 @@ private static long GetRowCount(IDataView inputData, params ColumnInfo[] columns
}
// Computes the transformation matrices needed for whitening process from training data.
- internal static (float[][] models, float[][] invModels) TrainVectorWhiteningTransform(IHostEnvironment env, IDataView inputData, params ColumnInfo[] columns)
+ internal static (float[][] models, float[][] invModels) TrainVectorWhiteningTransform(IHostEnvironment env, IDataView inputData, params VectorWhiteningEstimator.ColumnInfo[] columns)
{
var models = new float[columns.Length][];
var invModels = new float[columns.Length][];
@@ -378,7 +278,7 @@ internal static (float[][] models, float[][] invModels) TrainVectorWhiteningTran
}
// Extracts the indices and types of the input columns to the whitening transform.
- private static void GetColTypesAndIndex(IHostEnvironment env, IDataView inputData, ColumnInfo[] columns, out ColumnType[] srcTypes, out int[] cols)
+ private static void GetColTypesAndIndex(IHostEnvironment env, IDataView inputData, VectorWhiteningEstimator.ColumnInfo[] columns, out ColumnType[] srcTypes, out int[] cols)
{
cols = new int[columns.Length];
srcTypes = new ColumnType[columns.Length];
@@ -400,7 +300,7 @@ private static void GetColTypesAndIndex(IHostEnvironment env, IDataView inputDat
// Loads all relevant data for whitening training into memory.
private static float[][] LoadDataAsDense(IHostEnvironment env, IChannel ch, IDataView inputData, out int[] actualRowCounts,
- ColumnType[] srcTypes, int[] cols, params ColumnInfo[] columns)
+ ColumnType[] srcTypes, int[] cols, params VectorWhiteningEstimator.ColumnInfo[] columns)
{
long crowData = GetRowCount(inputData, columns);
@@ -467,7 +367,7 @@ private static float[][] LoadDataAsDense(IHostEnvironment env, IChannel ch, IDat
// will have dimension input_vec_size x input_vec_size. In the getter, the matrix will be truncated to only keep
// PcaNum columns, and thus produce the desired output size.
private static void TrainModels(IHostEnvironment env, IChannel ch, float[][] columnData, int[] rowCounts,
- ref float[][] models, ref float[][] invModels, ColumnType[] srcTypes, params ColumnInfo[] columns)
+ ref float[][] models, ref float[][] invModels, ColumnType[] srcTypes, params VectorWhiteningEstimator.ColumnInfo[] columns)
{
ch.Assert(columnData.Length == rowCounts.Length);
@@ -772,15 +672,143 @@ private static float DotProduct(float[] a, int aOffset, ReadOnlySpan b, R
///
public sealed class VectorWhiteningEstimator : IEstimator
{
+ [BestFriend]
+ internal static class Defaults
+ {
+ public const WhiteningKind Kind = WhiteningKind.Zca;
+ public const float Eps = 1e-5f;
+ public const int MaxRows = 100 * 1000;
+ public const bool SaveInverse = false;
+ public const int PcaNum = 0;
+ }
+
+ ///
+ /// Describes how the transformer handles one column pair.
+ ///
+ public sealed class ColumnInfo
+ {
+ ///
+ /// Name of the column resulting from the transformation of .
+ ///
+ public readonly string Name;
+ ///
+ /// Name of column to transform.
+ ///
+ public readonly string InputColumnName;
+ ///
+ /// Whitening kind (PCA/ZCA).
+ ///
+ public readonly WhiteningKind Kind;
+ ///
+ /// Whitening constant, prevents division by zero.
+ ///
+ public readonly float Epsilon;
+ ///
+ /// Maximum number of rows used to train the transform.
+ ///
+ public readonly int MaxRow;
+ ///
+ /// In case of PCA whitening, indicates the number of components to retain.
+ ///
+ public readonly int PcaNum;
+ internal readonly bool SaveInv;
+
+ ///
+ /// Describes how the transformer handles one input-output column pair.
+ ///
+ /// Name of the column resulting from the transformation of .
+ /// Name of column to transform. If set to , the value of the will be used as source.
+ /// Whitening kind (PCA/ZCA).
+ /// Whitening constant, prevents division by zero.
+ /// Maximum number of rows used to train the transform.
+ /// In case of PCA whitening, indicates the number of components to retain.
+ public ColumnInfo(string name, string inputColumnName = null, WhiteningKind kind = Defaults.Kind, float eps = Defaults.Eps,
+ int maxRows = Defaults.MaxRows, int pcaNum = Defaults.PcaNum)
+ {
+ Name = name;
+ Contracts.CheckValue(Name, nameof(Name));
+ InputColumnName = inputColumnName ?? name;
+ Contracts.CheckValue(InputColumnName, nameof(InputColumnName));
+ Kind = kind;
+ Contracts.CheckUserArg(Kind == WhiteningKind.Pca || Kind == WhiteningKind.Zca, nameof(Kind));
+ Epsilon = eps;
+ Contracts.CheckUserArg(0 <= Epsilon && Epsilon < float.PositiveInfinity, nameof(Epsilon));
+ MaxRow = maxRows;
+ Contracts.CheckUserArg(MaxRow > 0, nameof(MaxRow));
+ SaveInv = Defaults.SaveInverse;
+ PcaNum = pcaNum; // REVIEW: make it work with pcaNum == 1.
+ Contracts.CheckUserArg(PcaNum >= 0, nameof(PcaNum));
+ }
+
+ internal ColumnInfo(VectorWhiteningTransformer.Column item, VectorWhiteningTransformer.Options options)
+ {
+ Name = item.Name;
+ Contracts.CheckValue(Name, nameof(Name));
+ InputColumnName = item.Source ?? item.Name;
+ Contracts.CheckValue(InputColumnName, nameof(InputColumnName));
+ Kind = item.Kind ?? options.Kind;
+ Contracts.CheckUserArg(Kind == WhiteningKind.Pca || Kind == WhiteningKind.Zca, nameof(item.Kind));
+ Epsilon = item.Eps ?? options.Eps;
+ Contracts.CheckUserArg(0 <= Epsilon && Epsilon < float.PositiveInfinity, nameof(item.Eps));
+ MaxRow = item.MaxRows ?? options.MaxRows;
+ Contracts.CheckUserArg(MaxRow > 0, nameof(item.MaxRows));
+ SaveInv = item.SaveInverse ?? options.SaveInverse;
+ PcaNum = item.PcaNum ?? options.PcaNum;
+ Contracts.CheckUserArg(PcaNum >= 0, nameof(item.PcaNum));
+ }
+
+ internal ColumnInfo(ModelLoadContext ctx)
+ {
+ Contracts.AssertValue(ctx);
+
+ // *** Binary format ***
+ // int: kind
+ // float: epsilon
+ // int: maxrow
+ // byte: saveInv
+ // int: pcaNum
+ Kind = (WhiteningKind)ctx.Reader.ReadInt32();
+ Contracts.CheckDecode(Kind == WhiteningKind.Pca || Kind == WhiteningKind.Zca);
+ Epsilon = ctx.Reader.ReadFloat();
+ Contracts.CheckDecode(0 <= Epsilon && Epsilon < float.PositiveInfinity);
+ MaxRow = ctx.Reader.ReadInt32();
+ Contracts.CheckDecode(MaxRow > 0);
+ SaveInv = ctx.Reader.ReadBoolByte();
+ PcaNum = ctx.Reader.ReadInt32();
+ Contracts.CheckDecode(PcaNum >= 0);
+ }
+
+ internal void Save(ModelSaveContext ctx)
+ {
+ Contracts.AssertValue(ctx);
+
+ // *** Binary format ***
+ // int: kind
+ // float: epsilon
+ // int: maxrow
+ // byte: saveInv
+ // int: pcaNum
+ Contracts.Assert(Kind == WhiteningKind.Pca || Kind == WhiteningKind.Zca);
+ ctx.Writer.Write((int)Kind);
+ Contracts.Assert(0 <= Epsilon && Epsilon < float.PositiveInfinity);
+ ctx.Writer.Write(Epsilon);
+ Contracts.Assert(MaxRow > 0);
+ ctx.Writer.Write(MaxRow);
+ ctx.Writer.WriteBoolByte(SaveInv);
+ Contracts.Assert(PcaNum >= 0);
+ ctx.Writer.Write(PcaNum);
+ }
+ }
+
private readonly IHost _host;
- private readonly VectorWhiteningTransformer.ColumnInfo[] _infos;
+ private readonly ColumnInfo[] _infos;
///
/// The environment.
/// Describes the parameters of the whitening process for each column pair.
- public VectorWhiteningEstimator(IHostEnvironment env, params VectorWhiteningTransformer.ColumnInfo[] columns)
+ internal VectorWhiteningEstimator(IHostEnvironment env, params ColumnInfo[] columns)
{
- _host = Contracts.CheckRef(env, nameof(env)).Register(nameof(VectorWhiteningTransformer));
+ _host = Contracts.CheckRef(env, nameof(env)).Register(nameof(VectorWhiteningEstimator));
_infos = columns;
}
@@ -792,15 +820,18 @@ public VectorWhiteningEstimator(IHostEnvironment env, params VectorWhiteningTran
/// Whitening constant, prevents division by zero when scaling the data by inverse of eigenvalues.
/// Maximum number of rows used to train the transform.
/// In case of PCA whitening, indicates the number of components to retain.
- public VectorWhiteningEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null,
- WhiteningKind kind = VectorWhiteningTransformer.Defaults.Kind,
- float eps = VectorWhiteningTransformer.Defaults.Eps,
- int maxRows = VectorWhiteningTransformer.Defaults.MaxRows,
- int pcaNum = VectorWhiteningTransformer.Defaults.PcaNum)
- : this(env, new VectorWhiteningTransformer.ColumnInfo(outputColumnName, inputColumnName, kind, eps, maxRows, pcaNum))
+ internal VectorWhiteningEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null,
+ WhiteningKind kind = Defaults.Kind,
+ float eps = Defaults.Eps,
+ int maxRows = Defaults.MaxRows,
+ int pcaNum = Defaults.PcaNum)
+ : this(env, new ColumnInfo(outputColumnName, inputColumnName, kind, eps, maxRows, pcaNum))
{
}
+ ///
+ /// Trains and returns a .
+ ///
public VectorWhiteningTransformer Fit(IDataView input)
{
// Build transformation matrices for whitening process, then construct a trained transform.
@@ -809,7 +840,8 @@ public VectorWhiteningTransformer Fit(IDataView input)
}
///
- /// Returns the schema that would be produced by the transformation.
+ /// Returns the of the schema which will be produced by the transformer.
+ /// Used for schema propagation and verification in a pipeline.
///
public SchemaShape GetOutputSchema(SchemaShape inputSchema)
{
diff --git a/src/Microsoft.ML.PCA/PCACatalog.cs b/src/Microsoft.ML.PCA/PCACatalog.cs
index fca74a2524..d60bf7d1d0 100644
--- a/src/Microsoft.ML.PCA/PCACatalog.cs
+++ b/src/Microsoft.ML.PCA/PCACatalog.cs
@@ -33,7 +33,7 @@ public static PrincipalComponentAnalysisEstimator ProjectToPrincipalComponents(t
/// Initializes a new instance of .
/// The transform's catalog.
/// Input columns to apply PrincipalComponentAnalysis on.
- public static PrincipalComponentAnalysisEstimator ProjectToPrincipalComponents(this TransformsCatalog.ProjectionTransforms catalog, params PcaTransformer.ColumnInfo[] columns)
+ public static PrincipalComponentAnalysisEstimator ProjectToPrincipalComponents(this TransformsCatalog.ProjectionTransforms catalog, params PrincipalComponentAnalysisEstimator.ColumnInfo[] columns)
=> new PrincipalComponentAnalysisEstimator(CatalogUtils.GetEnvironment(catalog), columns);
}
}
diff --git a/src/Microsoft.ML.PCA/PcaTransformer.cs b/src/Microsoft.ML.PCA/PcaTransformer.cs
index 15918c4031..b6b134a1e0 100644
--- a/src/Microsoft.ML.PCA/PcaTransformer.cs
+++ b/src/Microsoft.ML.PCA/PcaTransformer.cs
@@ -17,26 +17,26 @@
using Microsoft.ML.Numeric;
using Microsoft.ML.Transforms.Projections;
-[assembly: LoadableClass(PcaTransformer.Summary, typeof(IDataTransform), typeof(PcaTransformer), typeof(PcaTransformer.Arguments), typeof(SignatureDataTransform),
- PcaTransformer.UserName, PcaTransformer.LoaderSignature, PcaTransformer.ShortName)]
+[assembly: LoadableClass(PrincipalComponentAnalysisTransformer.Summary, typeof(IDataTransform), typeof(PrincipalComponentAnalysisTransformer), typeof(PrincipalComponentAnalysisTransformer.Options), typeof(SignatureDataTransform),
+ PrincipalComponentAnalysisTransformer.UserName, PrincipalComponentAnalysisTransformer.LoaderSignature, PrincipalComponentAnalysisTransformer.ShortName)]
-[assembly: LoadableClass(PcaTransformer.Summary, typeof(IDataTransform), typeof(PcaTransformer), null, typeof(SignatureLoadDataTransform),
- PcaTransformer.UserName, PcaTransformer.LoaderSignature)]
+[assembly: LoadableClass(PrincipalComponentAnalysisTransformer.Summary, typeof(IDataTransform), typeof(PrincipalComponentAnalysisTransformer), null, typeof(SignatureLoadDataTransform),
+ PrincipalComponentAnalysisTransformer.UserName, PrincipalComponentAnalysisTransformer.LoaderSignature)]
-[assembly: LoadableClass(PcaTransformer.Summary, typeof(PcaTransformer), null, typeof(SignatureLoadModel),
- PcaTransformer.UserName, PcaTransformer.LoaderSignature)]
+[assembly: LoadableClass(PrincipalComponentAnalysisTransformer.Summary, typeof(PrincipalComponentAnalysisTransformer), null, typeof(SignatureLoadModel),
+ PrincipalComponentAnalysisTransformer.UserName, PrincipalComponentAnalysisTransformer.LoaderSignature)]
-[assembly: LoadableClass(typeof(IRowMapper), typeof(PcaTransformer), null, typeof(SignatureLoadRowMapper),
- PcaTransformer.UserName, PcaTransformer.LoaderSignature)]
+[assembly: LoadableClass(typeof(IRowMapper), typeof(PrincipalComponentAnalysisTransformer), null, typeof(SignatureLoadRowMapper),
+ PrincipalComponentAnalysisTransformer.UserName, PrincipalComponentAnalysisTransformer.LoaderSignature)]
-[assembly: LoadableClass(typeof(void), typeof(PcaTransformer), null, typeof(SignatureEntryPointModule), PcaTransformer.LoaderSignature)]
+[assembly: LoadableClass(typeof(void), typeof(PrincipalComponentAnalysisTransformer), null, typeof(SignatureEntryPointModule), PrincipalComponentAnalysisTransformer.LoaderSignature)]
namespace Microsoft.ML.Transforms.Projections
{
///
- public sealed class PcaTransformer : OneToOneTransformerBase
+ public sealed class PrincipalComponentAnalysisTransformer : OneToOneTransformerBase
{
- public sealed class Arguments : TransformInputBase
+ internal sealed class Options : TransformInputBase
{
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)", Name = "Column", ShortName = "col", SortOrder = 1)]
public Column[] Columns;
@@ -57,7 +57,7 @@ public sealed class Arguments : TransformInputBase
public int Seed = PrincipalComponentAnalysisEstimator.Defaults.Seed;
}
- public class Column : OneToOneColumn
+ internal class Column : OneToOneColumn
{
[Argument(ArgumentType.Multiple, HelpText = "The name of the weight column", ShortName = "weight")]
public string WeightColumn;
@@ -96,47 +96,6 @@ internal bool TryUnparse(StringBuilder sb)
}
}
- public sealed class ColumnInfo
- {
- public readonly string Name;
- public readonly string InputColumnName;
- public readonly string WeightColumn;
- public readonly int Rank;
- public readonly int Oversampling;
- public readonly bool Center;
- public readonly int? Seed;
-
- ///
- /// Describes how the transformer handles one column pair.
- ///
- /// Name of the column resulting from the transformation of .
- /// Name of column to transform.
- /// If set to , the value of the will be used as source.
- /// The name of the weight column.
- /// The number of components in the PCA.
- /// Oversampling parameter for randomized PCA training.
- /// If enabled, data is centered to be zero mean.
- /// The seed for random number generation.
- public ColumnInfo(string name,
- string inputColumnName = null,
- string weightColumn = PrincipalComponentAnalysisEstimator.Defaults.WeightColumn,
- int rank = PrincipalComponentAnalysisEstimator.Defaults.Rank,
- int overSampling = PrincipalComponentAnalysisEstimator.Defaults.Oversampling,
- bool center = PrincipalComponentAnalysisEstimator.Defaults.Center,
- int? seed = null)
- {
- Name = name;
- InputColumnName = inputColumnName ?? name;
- WeightColumn = weightColumn;
- Rank = rank;
- Oversampling = overSampling;
- Center = center;
- Seed = seed;
- Contracts.CheckParam(Oversampling >= 0, nameof(Oversampling), "Oversampling must be non-negative.");
- Contracts.CheckParam(Rank > 0, nameof(Rank), "Rank must be positive.");
- }
- }
-
private sealed class TransformInfo
{
public readonly int Dimension;
@@ -224,7 +183,7 @@ public void ProjectMean(float[] mean)
internal const string UserName = "Principal Component Analysis Transform";
internal const string ShortName = "Pca";
- public const string LoaderSignature = "PcaTransform";
+ internal const string LoaderSignature = "PcaTransform";
private static VersionInfo GetVersionInfo()
{
return new VersionInfo(
@@ -234,7 +193,7 @@ private static VersionInfo GetVersionInfo()
verReadableCur: 0x00010002,
verWeCanReadBack: 0x00010001,
loaderSignature: LoaderSignature,
- loaderAssemblyName: typeof(PcaTransformer).Assembly.FullName);
+ loaderAssemblyName: typeof(PrincipalComponentAnalysisTransformer).Assembly.FullName);
}
private readonly int _numColumns;
@@ -243,8 +202,8 @@ private static VersionInfo GetVersionInfo()
private const string RegistrationName = "Pca";
- internal PcaTransformer(IHostEnvironment env, IDataView input, ColumnInfo[] columns)
- : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(PcaTransformer)), GetColumnPairs(columns))
+ internal PrincipalComponentAnalysisTransformer(IHostEnvironment env, IDataView input, PrincipalComponentAnalysisEstimator.ColumnInfo[] columns)
+ : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(PrincipalComponentAnalysisTransformer)), GetColumnPairs(columns))
{
Host.AssertNonEmpty(ColumnPairs);
_numColumns = columns.Length;
@@ -262,7 +221,7 @@ internal PcaTransformer(IHostEnvironment env, IDataView input, ColumnInfo[] colu
Train(columns, _transformInfos, input);
}
- private PcaTransformer(IHost host, ModelLoadContext ctx)
+ private PrincipalComponentAnalysisTransformer(IHost host, ModelLoadContext ctx)
: base(host, ctx)
{
Host.AssertValue(ctx);
@@ -287,28 +246,28 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Sch
=> Create(env, ctx).MakeRowMapper(inputSchema);
// Factory method for SignatureDataTransform.
- private static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input)
+ private static IDataTransform Create(IHostEnvironment env, Options options, IDataView input)
{
Contracts.CheckValue(env, nameof(env));
- env.CheckValue(args, nameof(args));
+ env.CheckValue(options, nameof(options));
env.CheckValue(input, nameof(input));
- env.CheckValue(args.Columns, nameof(args.Columns));
- var cols = args.Columns.Select(item => new ColumnInfo(
+ env.CheckValue(options.Columns, nameof(options.Columns));
+ var cols = options.Columns.Select(item => new PrincipalComponentAnalysisEstimator.ColumnInfo(
item.Name,
item.Source,
item.WeightColumn,
- item.Rank ?? args.Rank,
- item.Oversampling ?? args.Oversampling,
- item.Center ?? args.Center,
- item.Seed ?? args.Seed)).ToArray();
- return new PcaTransformer(env, input, cols).MakeDataTransform(input);
+ item.Rank ?? options.Rank,
+ item.Oversampling ?? options.Oversampling,
+ item.Center ?? options.Center,
+ item.Seed ?? options.Seed)).ToArray();
+ return new PrincipalComponentAnalysisTransformer(env, input, cols).MakeDataTransform(input);
}
// Factory method for SignatureLoadModel.
- private static PcaTransformer Create(IHostEnvironment env, ModelLoadContext ctx)
+ private static PrincipalComponentAnalysisTransformer Create(IHostEnvironment env, ModelLoadContext ctx)
{
Contracts.CheckValue(env, nameof(env));
- var host = env.Register(nameof(PcaTransformer));
+ var host = env.Register(nameof(PrincipalComponentAnalysisTransformer));
host.CheckValue(ctx, nameof(ctx));
ctx.CheckAtModel(GetVersionInfo());
@@ -317,7 +276,7 @@ private static PcaTransformer Create(IHostEnvironment env, ModelLoadContext ctx)
int cbFloat = ctx.Reader.ReadInt32();
env.CheckDecode(cbFloat == sizeof(float));
}
- return new PcaTransformer(host, ctx);
+ return new PrincipalComponentAnalysisTransformer(host, ctx);
}
public override void Save(ModelSaveContext ctx)
@@ -333,13 +292,13 @@ public override void Save(ModelSaveContext ctx)
for (int i = 0; i < _transformInfos.Length; i++)
_transformInfos[i].Save(ctx);
}
- private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(ColumnInfo[] columns)
+ private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(PrincipalComponentAnalysisEstimator.ColumnInfo[] columns)
{
Contracts.CheckValue(columns, nameof(columns));
return columns.Select(x => (x.Name, x.InputColumnName)).ToArray();
}
- private void Train(ColumnInfo[] columns, TransformInfo[] transformInfos, IDataView trainingData)
+ private void Train(PrincipalComponentAnalysisEstimator.ColumnInfo[] columns, TransformInfo[] transformInfos, IDataView trainingData)
{
var y = new float[_numColumns][][];
var omega = new float[_numColumns][][];
@@ -579,10 +538,10 @@ public ColumnSchemaInfo((string outputColumnName, string inputColumnName) column
}
}
- private readonly PcaTransformer _parent;
+ private readonly PrincipalComponentAnalysisTransformer _parent;
private readonly int _numColumns;
- public Mapper(PcaTransformer parent, Schema inputSchema)
+ public Mapper(PrincipalComponentAnalysisTransformer parent, Schema inputSchema)
: base(parent.Host.Register(nameof(Mapper)), parent, inputSchema)
{
_parent = parent;
@@ -645,10 +604,10 @@ private static void TransformFeatures(IExceptionContext ectx, in VBuffer
Desc = Summary,
UserName = UserName,
ShortName = ShortName)]
- internal static CommonOutputs.TransformOutput Calculate(IHostEnvironment env, Arguments input)
+ internal static CommonOutputs.TransformOutput Calculate(IHostEnvironment env, Options input)
{
var h = EntryPointUtils.CheckArgsAndCreateHost(env, "Pca", input);
- var view = PcaTransformer.Create(h, input, input.Data);
+ var view = PrincipalComponentAnalysisTransformer.Create(h, input, input.Data);
return new CommonOutputs.TransformOutput()
{
Model = new TransformModelImpl(h, view, input.Data),
@@ -658,7 +617,7 @@ internal static CommonOutputs.TransformOutput Calculate(IHostEnvironment env, Ar
}
///
- public sealed class PrincipalComponentAnalysisEstimator : IEstimator
+ public sealed class PrincipalComponentAnalysisEstimator : IEstimator
{
[BestFriend]
internal static class Defaults
@@ -670,8 +629,73 @@ internal static class Defaults
public const int Seed = 0;
}
+ ///
+ /// Describes how the transformer handles one column pair.
+ ///
+ public sealed class ColumnInfo
+ {
+ ///
+ /// Name of the column resulting from the transformation of .
+ ///
+ public readonly string Name;
+ ///
+ /// Name of column to transform.
+ ///
+ public readonly string InputColumnName;
+ ///
+ /// The name of the weight column.
+ ///
+ public readonly string WeightColumn;
+ ///
+ /// The number of components in the PCA.
+ ///
+ public readonly int Rank;
+ ///
+ /// Oversampling parameter for randomized PCA training.
+ ///
+ public readonly int Oversampling;
+ ///
+ /// If enabled, data is centered to be zero mean.
+ ///
+ public readonly bool Center;
+ ///
+ /// The seed for random number generation.
+ ///
+ public readonly int? Seed;
+
+ ///
+ /// Describes how the transformer handles one column pair.
+ ///
+ /// Name of the column resulting from the transformation of .
+ /// Name of column to transform.
+ /// If set to , the value of the will be used as source.
+ /// The name of the weight column.
+ /// The number of components in the PCA.
+ /// Oversampling parameter for randomized PCA training.
+ /// If enabled, data is centered to be zero mean.
+ /// The random seed. If unspecified random state will be instead derived from the .
+ public ColumnInfo(string name,
+ string inputColumnName = null,
+ string weightColumn = Defaults.WeightColumn,
+ int rank = Defaults.Rank,
+ int overSampling = Defaults.Oversampling,
+ bool center = Defaults.Center,
+ int? seed = null)
+ {
+ Name = name;
+ InputColumnName = inputColumnName ?? name;
+ WeightColumn = weightColumn;
+ Rank = rank;
+ Oversampling = overSampling;
+ Center = center;
+ Seed = seed;
+ Contracts.CheckParam(Oversampling >= 0, nameof(Oversampling), "Oversampling must be non-negative.");
+ Contracts.CheckParam(Rank > 0, nameof(Rank), "Rank must be positive.");
+ }
+ }
+
private readonly IHost _host;
- private readonly PcaTransformer.ColumnInfo[] _columns;
+ private readonly ColumnInfo[] _columns;
///
/// The environment to use.
@@ -683,28 +707,35 @@ internal static class Defaults
/// Oversampling parameter for randomized PCA training.
/// If enabled, data is centered to be zero mean.
/// The seed for random number generation.
- public PrincipalComponentAnalysisEstimator(IHostEnvironment env,
+ internal PrincipalComponentAnalysisEstimator(IHostEnvironment env,
string outputColumnName,
string inputColumnName = null,
string weightColumn = Defaults.WeightColumn, int rank = Defaults.Rank,
int overSampling = Defaults.Oversampling, bool center = Defaults.Center,
int? seed = null)
- : this(env, new PcaTransformer.ColumnInfo(outputColumnName, inputColumnName ?? outputColumnName, weightColumn, rank, overSampling, center, seed))
+ : this(env, new ColumnInfo(outputColumnName, inputColumnName ?? outputColumnName, weightColumn, rank, overSampling, center, seed))
{
}
///
/// The environment to use.
/// The dataset columns to use, and their specific settings.
- public PrincipalComponentAnalysisEstimator(IHostEnvironment env, params PcaTransformer.ColumnInfo[] columns)
+ internal PrincipalComponentAnalysisEstimator(IHostEnvironment env, params ColumnInfo[] columns)
{
Contracts.CheckValue(env, nameof(env));
_host = env.Register(nameof(PrincipalComponentAnalysisEstimator));
_columns = columns;
}
- public PcaTransformer Fit(IDataView input) => new PcaTransformer(_host, input, _columns);
+ ///
+ /// Trains and returns a .
+ ///
+ public PrincipalComponentAnalysisTransformer Fit(IDataView input) => new PrincipalComponentAnalysisTransformer(_host, input, _columns);
+ ///
+ /// Returns the of the schema which will be produced by the transformer.
+ /// Used for schema propagation and verification in a pipeline.
+ ///
public SchemaShape GetOutputSchema(SchemaShape inputSchema)
{
_host.CheckValue(inputSchema, nameof(inputSchema));
diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs
index 9237f79abe..97f8b4b6fc 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaBinary.cs
@@ -107,7 +107,7 @@ private protected RoleMappedData PrepareDataFromTrainingExamples(IChannel ch, Ro
idvToFeedTrain = idvToShuffle;
else
{
- var shuffleArgs = new RowShufflingTransformer.Arguments
+ var shuffleArgs = new RowShufflingTransformer.Options
{
PoolOnly = false,
ForceShuffle = ShuffleData
diff --git a/src/Microsoft.ML.StandardLearners/Standard/StochasticTrainerBase.cs b/src/Microsoft.ML.StandardLearners/Standard/StochasticTrainerBase.cs
index 550939892f..a849baa509 100644
--- a/src/Microsoft.ML.StandardLearners/Standard/StochasticTrainerBase.cs
+++ b/src/Microsoft.ML.StandardLearners/Standard/StochasticTrainerBase.cs
@@ -73,7 +73,7 @@ private protected RoleMappedData PrepareDataFromTrainingExamples(IChannel ch, Ro
idvToFeedTrain = idvToShuffle;
else
{
- var shuffleArgs = new RowShufflingTransformer.Arguments
+ var shuffleArgs = new RowShufflingTransformer.Options
{
PoolOnly = false,
ForceShuffle = ShuffleData
diff --git a/src/Microsoft.ML.StaticPipe/TransformsStatic.cs b/src/Microsoft.ML.StaticPipe/TransformsStatic.cs
index 10bee69c74..b1c15d3633 100644
--- a/src/Microsoft.ML.StaticPipe/TransformsStatic.cs
+++ b/src/Microsoft.ML.StaticPipe/TransformsStatic.cs
@@ -1595,11 +1595,11 @@ private sealed class Reconciler : EstimatorReconciler
public override IEstimator Reconcile(IHostEnvironment env, PipelineColumn[] toOutput,
IReadOnlyDictionary inputNames, IReadOnlyDictionary outputNames, IReadOnlyCollection usedNames)
{
- var infos = new RandomFourierFeaturizingTransformer.ColumnInfo[toOutput.Length];
+ var infos = new RandomFourierFeaturizingEstimator.ColumnInfo[toOutput.Length];
for (int i = 0; i < toOutput.Length; ++i)
{
var tcol = (IColInput)toOutput[i];
- infos[i] = new RandomFourierFeaturizingTransformer.ColumnInfo(outputNames[toOutput[i]], tcol.Config.NewDim, tcol.Config.UseSin, inputNames[tcol.Input], tcol.Config.Generator, tcol.Config.Seed);
+ infos[i] = new RandomFourierFeaturizingEstimator.ColumnInfo(outputNames[toOutput[i]], tcol.Config.NewDim, tcol.Config.UseSin, inputNames[tcol.Input], tcol.Config.Generator, tcol.Config.Seed);
}
return new RandomFourierFeaturizingEstimator(env, infos);
}
@@ -1640,11 +1640,11 @@ public OutPipelineColumn(Vector input, string weightColumn, int rank,
private sealed class Reconciler : EstimatorReconciler
{
- private readonly PcaTransformer.ColumnInfo _colInfo;
+ private readonly PrincipalComponentAnalysisEstimator.ColumnInfo _colInfo;
public Reconciler(string weightColumn, int rank, int overSampling, bool center, int? seed = null)
{
- _colInfo = new PcaTransformer.ColumnInfo(
+ _colInfo = new PrincipalComponentAnalysisEstimator.ColumnInfo(
null, null, weightColumn, rank, overSampling, center, seed);
}
diff --git a/src/Microsoft.ML.Transforms/GcnTransform.cs b/src/Microsoft.ML.Transforms/GcnTransform.cs
index 3ae4156ec2..f48a4329b7 100644
--- a/src/Microsoft.ML.Transforms/GcnTransform.cs
+++ b/src/Microsoft.ML.Transforms/GcnTransform.cs
@@ -17,13 +17,13 @@
using Microsoft.ML.Model;
using Microsoft.ML.Transforms.Projections;
-[assembly: LoadableClass(LpNormalizingTransformer.GcnSummary, typeof(IDataTransform), typeof(LpNormalizingTransformer), typeof(LpNormalizingTransformer.GcnArguments), typeof(SignatureDataTransform),
+[assembly: LoadableClass(LpNormalizingTransformer.GcnSummary, typeof(IDataTransform), typeof(LpNormalizingTransformer), typeof(LpNormalizingTransformer.GcnOptions), typeof(SignatureDataTransform),
LpNormalizingTransformer.UserNameGn, "GcnTransform", LpNormalizingTransformer.ShortNameGn)]
[assembly: LoadableClass(LpNormalizingTransformer.GcnSummary, typeof(IDataTransform), typeof(LpNormalizingTransformer), null, typeof(SignatureLoadDataTransform),
LpNormalizingTransformer.UserNameGn, LpNormalizingTransformer.LoaderSignature, LpNormalizingTransformer.LoaderSignatureOld)]
-[assembly: LoadableClass(LpNormalizingTransformer.Summary, typeof(IDataTransform), typeof(LpNormalizingTransformer), typeof(LpNormalizingTransformer.Arguments), typeof(SignatureDataTransform),
+[assembly: LoadableClass(LpNormalizingTransformer.Summary, typeof(IDataTransform), typeof(LpNormalizingTransformer), typeof(LpNormalizingTransformer.Options), typeof(SignatureDataTransform),
LpNormalizingTransformer.UserNameLP, "LpNormNormalizer", LpNormalizingTransformer.ShortNameLP)]
[assembly: LoadableClass(LpNormalizingTransformer.Summary, typeof(LpNormalizingTransformer), null, typeof(SignatureLoadModel),
@@ -51,7 +51,7 @@ namespace Microsoft.ML.Transforms.Projections
///
public sealed class LpNormalizingTransformer : OneToOneTransformerBase
{
- public sealed class Arguments : TransformInputBase
+ internal sealed class Options : TransformInputBase
{
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)", Name = "Column", ShortName = "col", SortOrder = 1)]
public Column[] Columns;
@@ -63,7 +63,7 @@ public sealed class Arguments : TransformInputBase
public bool SubMean = LpNormalizingEstimatorBase.Defaults.LpSubstractMean;
}
- public sealed class GcnArguments : TransformInputBase
+ internal sealed class GcnOptions : TransformInputBase
{
[Argument(ArgumentType.Multiple, HelpText = "New column definition(s) (optional form: name:src)", Name = "Column", ShortName = "col", SortOrder = 1)]
public GcnColumn[] Columns;
@@ -78,7 +78,7 @@ public sealed class GcnArguments : TransformInputBase
public float Scale = LpNormalizingEstimatorBase.Defaults.Scale;
}
- public abstract class ColumnBase : OneToOneColumn
+ internal abstract class ColumnBase : OneToOneColumn
{
[Argument(ArgumentType.AtMostOnce, HelpText = "Subtract mean from each value before normalizing")]
public bool? SubMean;
@@ -96,7 +96,7 @@ private protected override bool TryUnparseCore(StringBuilder sb)
}
}
- public sealed class Column : ColumnBase
+ internal sealed class Column : ColumnBase
{
[Argument(ArgumentType.AtMostOnce, HelpText = "The norm to use to normalize each sample", ShortName = "norm", SortOrder = 1)]
public LpNormalizingEstimatorBase.NormalizerKind? NormKind;
@@ -120,7 +120,7 @@ internal bool TryUnparse(StringBuilder sb)
}
}
- public sealed class GcnColumn : ColumnBase
+ internal sealed class GcnColumn : ColumnBase
{
[Argument(ArgumentType.AtMostOnce, HelpText = "Normalize by standard deviation rather than L2 norm")]
public bool? UseStdDev;
@@ -147,49 +147,7 @@ internal bool TryUnparse(StringBuilder sb)
}
}
- ///
- /// Describes how the transformer handles one Gcn column pair.
- ///
- public sealed class GcnColumnInfo : ColumnInfoBase
- {
- ///
- /// Describes how the transformer handles one Gcn column pair.
- ///
- /// Name of the column resulting from the transformation of .
- /// Name of column to transform. If set to , the value of the will be used as source.
- /// Subtract mean from each value before normalizing.
- /// Normalize by standard deviation rather than L2 norm.
- /// Scale features by this value.
- public GcnColumnInfo(string name, string inputColumnName = null,
- bool substractMean = LpNormalizingEstimatorBase.Defaults.GcnSubstractMean,
- bool useStdDev = LpNormalizingEstimatorBase.Defaults.UseStdDev,
- float scale = LpNormalizingEstimatorBase.Defaults.Scale)
- : base(name, inputColumnName, substractMean, useStdDev ? LpNormalizingEstimatorBase.NormalizerKind.StdDev : LpNormalizingEstimatorBase.NormalizerKind.L2Norm, scale)
- {
- }
- }
-
- ///
- /// Describes how the transformer handles one LpNorm column pair.
- ///
- public sealed class LpNormColumnInfo : ColumnInfoBase
- {
- ///
- /// Describes how the transformer handles one LpNorm column pair.
- ///
- /// Name of the column resulting from the transformation of .
- /// Name of column to transform. If set to , the value of the will be used as source.
- /// Subtract mean from each value before normalizing.
- /// The norm to use to normalize each sample.
- public LpNormColumnInfo(string name, string inputColumnName = null,
- bool substractMean = LpNormalizingEstimatorBase.Defaults.LpSubstractMean,
- LpNormalizingEstimatorBase.NormalizerKind normalizerKind = LpNormalizingEstimatorBase.Defaults.NormKind)
- : base(name, inputColumnName ?? name, substractMean, normalizerKind, 1)
- {
- }
- }
-
- private sealed class ColumnInfoLoaded : ColumnInfoBase
+ private sealed class ColumnInfoLoaded : LpNormalizingEstimatorBase.ColumnInfoBase
{
internal ColumnInfoLoaded(ModelLoadContext ctx, string name, string inputColumnName, bool normKindSerialized)
: base(ctx, name, inputColumnName, normKindSerialized)
@@ -198,69 +156,6 @@ internal ColumnInfoLoaded(ModelLoadContext ctx, string name, string inputColumnN
}
}
- ///
- /// Describes base class for one column pair.
- ///
- public abstract class ColumnInfoBase
- {
- public readonly string Name;
- public readonly string InputColumnName;
- public readonly bool SubtractMean;
- public readonly LpNormalizingEstimatorBase.NormalizerKind NormKind;
- public readonly float Scale;
-
- internal ColumnInfoBase(string name, string inputColumnName, bool substractMean, LpNormalizingEstimatorBase.NormalizerKind normalizerKind, float scale)
- {
- Contracts.CheckNonWhiteSpace(name, nameof(name));
- Contracts.CheckNonWhiteSpace(inputColumnName, nameof(inputColumnName));
- Name = name;
- InputColumnName = inputColumnName;
- SubtractMean = substractMean;
- Contracts.CheckUserArg(0 < scale && scale < float.PositiveInfinity, nameof(scale), "scale must be a positive finite value");
- Scale = scale;
- NormKind = normalizerKind;
- }
-
- internal ColumnInfoBase(ModelLoadContext ctx, string name, string inputColumnName, bool normKindSerialized)
- {
- Contracts.AssertValue(ctx);
- Contracts.CheckNonWhiteSpace(inputColumnName, nameof(inputColumnName));
- Contracts.CheckNonWhiteSpace(name, nameof(name));
- Name = name;
- InputColumnName = inputColumnName;
-
- // *** Binary format ***
- // byte: SubtractMean
- // byte: NormKind
- // Float: Scale
- SubtractMean = ctx.Reader.ReadBoolByte();
- byte normKindVal = ctx.Reader.ReadByte();
- Contracts.CheckDecode(Enum.IsDefined(typeof(LpNormalizingEstimatorBase.NormalizerKind), normKindVal));
- NormKind = (LpNormalizingEstimatorBase.NormalizerKind)normKindVal;
- // Note: In early versions, a bool option (useStd) to whether to normalize by StdDev rather than
- // L2 norm was used. normKind was added in version=verVectorNormalizerSupported.
- // normKind was defined in a way such that the serialized boolean (0: use StdDev, 1: use L2) is
- // still valid.
- Contracts.CheckDecode(normKindSerialized ||
- (NormKind == LpNormalizingEstimatorBase.NormalizerKind.L2Norm || NormKind == LpNormalizingEstimatorBase.NormalizerKind.StdDev));
- Scale = ctx.Reader.ReadFloat();
- Contracts.CheckDecode(0 < Scale && Scale < float.PositiveInfinity);
- }
-
- internal void Save(ModelSaveContext ctx)
- {
- Contracts.AssertValue(ctx);
- // *** Binary format ***
- // byte: SubtractMean
- // byte: NormKind
- // Float: Scale
- ctx.Writer.WriteBoolByte(SubtractMean);
- ctx.Writer.Write((byte)NormKind);
- Contracts.Assert(0 < Scale && Scale < float.PositiveInfinity);
- ctx.Writer.Write(Scale);
- }
- }
-
internal const string GcnSummary = "Performs a global contrast normalization on input values: Y = (s * X - M) / D, where s is a scale, M is mean and D is "
+ "either L2 norm or standard deviation.";
internal const string UserNameGn = "Global Contrast Normalization Transform";
@@ -296,10 +191,13 @@ private static VersionInfo GetVersionInfo()
// REVIEW: should this be an argument instead?
private const float MinScale = 1e-8f;
- public IReadOnlyCollection Columns => _columns.AsReadOnly();
- private readonly ColumnInfoBase[] _columns;
+ ///
+ /// The objects describing how the transformation is applied on the input data.
+ ///
+ public IReadOnlyCollection Columns => _columns.AsReadOnly();
+ private readonly LpNormalizingEstimatorBase.ColumnInfoBase[] _columns;
- private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(ColumnInfoBase[] columns)
+ private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(LpNormalizingEstimatorBase.ColumnInfoBase[] columns)
{
Contracts.CheckValue(columns, nameof(columns));
return columns.Select(x => (x.Name, x.InputColumnName)).ToArray();
@@ -314,58 +212,58 @@ protected override void CheckInputColumn(Schema inputSchema, int col, int srcCol
///
/// Create a that takes multiple pairs of columns.
///
- public LpNormalizingTransformer(IHostEnvironment env, params ColumnInfoBase[] columns) :
+ internal LpNormalizingTransformer(IHostEnvironment env, params LpNormalizingEstimatorBase.ColumnInfoBase[] columns) :
base(Contracts.CheckRef(env, nameof(env)).Register(nameof(LpNormalizingTransformer)), GetColumnPairs(columns))
{
_columns = columns.ToArray();
}
// Factory method for SignatureDataTransform for GcnArguments class.
- internal static IDataTransform Create(IHostEnvironment env, GcnArguments args, IDataView input)
+ internal static IDataTransform Create(IHostEnvironment env, GcnOptions options, IDataView input)
{
Contracts.CheckValue(env, nameof(env));
- env.CheckValue(args, nameof(args));
+ env.CheckValue(options, nameof(options));
env.CheckValue(input, nameof(input));
- env.CheckValue(args.Columns, nameof(args.Columns));
- var cols = new GcnColumnInfo[args.Columns.Length];
+ env.CheckValue(options.Columns, nameof(options.Columns));
+ var cols = new GlobalContrastNormalizingEstimator.GcnColumnInfo[options.Columns.Length];
using (var ch = env.Start("ValidateArgs"))
{
for (int i = 0; i < cols.Length; i++)
{
- var item = args.Columns[i];
- cols[i] = new GcnColumnInfo(
+ var item = options.Columns[i];
+ cols[i] = new GlobalContrastNormalizingEstimator.GcnColumnInfo(
item.Name,
item.Source ?? item.Name,
- item.SubMean ?? args.SubMean,
- item.UseStdDev ?? args.UseStdDev,
- item.Scale ?? args.Scale);
+ item.SubMean ?? options.SubMean,
+ item.UseStdDev ?? options.UseStdDev,
+ item.Scale ?? options.Scale);
}
- if (!args.SubMean && args.UseStdDev)
+ if (!options.SubMean && options.UseStdDev)
ch.Warning("subMean parameter is false while useStd is true. It is advisable to set subMean to true in case useStd is set to true.");
}
return new LpNormalizingTransformer(env, cols).MakeDataTransform(input);
}
// Factory method for SignatureDataTransform for Arguments class.
- internal static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input)
+ internal static IDataTransform Create(IHostEnvironment env, Options options, IDataView input)
{
Contracts.CheckValue(env, nameof(env));
- env.CheckValue(args, nameof(args));
+ env.CheckValue(options, nameof(options));
env.CheckValue(input, nameof(input));
- env.CheckValue(args.Columns, nameof(args.Columns));
- var cols = new LpNormColumnInfo[args.Columns.Length];
+ env.CheckValue(options.Columns, nameof(options.Columns));
+ var cols = new LpNormalizingEstimator.LpNormColumnInfo[options.Columns.Length];
using (var ch = env.Start("ValidateArgs"))
{
for (int i = 0; i < cols.Length; i++)
{
- var item = args.Columns[i];
- cols[i] = new LpNormColumnInfo(
+ var item = options.Columns[i];
+ cols[i] = new LpNormalizingEstimator.LpNormColumnInfo(
item.Name,
item.Source ?? item.Name,
- item.SubMean ?? args.SubMean,
- item.NormKind ?? args.NormKind);
+ item.SubMean ?? options.SubMean,
+ item.NormKind ?? options.NormKind);
}
}
return new LpNormalizingTransformer(env, cols).MakeDataTransform(input);
@@ -717,7 +615,7 @@ internal static class LpNormalization
Desc = LpNormalizingTransformer.Summary,
UserName = LpNormalizingTransformer.UserNameLP,
ShortName = LpNormalizingTransformer.ShortNameLP)]
- public static CommonOutputs.TransformOutput Normalize(IHostEnvironment env, LpNormalizingTransformer.Arguments input)
+ public static CommonOutputs.TransformOutput Normalize(IHostEnvironment env, LpNormalizingTransformer.Options input)
{
var h = EntryPointUtils.CheckArgsAndCreateHost(env, "LpNormalize", input);
var xf = LpNormalizingTransformer.Create(h, input, input.Data);
@@ -732,7 +630,7 @@ public static CommonOutputs.TransformOutput Normalize(IHostEnvironment env, LpNo
Desc = LpNormalizingTransformer.GcnSummary,
UserName = LpNormalizingTransformer.UserNameGn,
ShortName = LpNormalizingTransformer.ShortNameGn)]
- public static CommonOutputs.TransformOutput GcNormalize(IHostEnvironment env, LpNormalizingTransformer.GcnArguments input)
+ public static CommonOutputs.TransformOutput GcNormalize(IHostEnvironment env, LpNormalizingTransformer.GcnOptions input)
{
var h = EntryPointUtils.CheckArgsAndCreateHost(env, "GcNormalize", input);
var xf = LpNormalizingTransformer.Create(h, input, input.Data);
@@ -760,6 +658,84 @@ public enum NormalizerKind : byte
LInf = 3
}
+ ///
+ /// Describes base class for one column pair.
+ ///
+ public abstract class ColumnInfoBase
+ {
+ ///
+ /// Name of the column resulting from the transformation of .
+ ///
+ public readonly string Name;
+ ///
+ /// Name of column to transform.
+ ///
+ public readonly string InputColumnName;
+ ///
+ /// Subtract mean from each value before normalizing.
+ ///
+ public readonly bool SubtractMean;
+ ///
+ /// The norm to use to normalize each sample.
+ ///
+ public readonly NormalizerKind NormKind;
+ ///
+ /// Scale features by this value.
+ ///
+ public readonly float Scale;
+
+ internal ColumnInfoBase(string name, string inputColumnName, bool substractMean, NormalizerKind normalizerKind, float scale)
+ {
+ Contracts.CheckNonWhiteSpace(name, nameof(name));
+ Contracts.CheckNonWhiteSpace(inputColumnName, nameof(inputColumnName));
+ Name = name;
+ InputColumnName = inputColumnName;
+ SubtractMean = substractMean;
+ Contracts.CheckUserArg(0 < scale && scale < float.PositiveInfinity, nameof(scale), "scale must be a positive finite value");
+ Scale = scale;
+ NormKind = normalizerKind;
+ }
+
+ internal ColumnInfoBase(ModelLoadContext ctx, string name, string inputColumnName, bool normKindSerialized)
+ {
+ Contracts.AssertValue(ctx);
+ Contracts.CheckNonWhiteSpace(inputColumnName, nameof(inputColumnName));
+ Contracts.CheckNonWhiteSpace(name, nameof(name));
+ Name = name;
+ InputColumnName = inputColumnName;
+
+ // *** Binary format ***
+ // byte: SubtractMean
+ // byte: NormKind
+ // Float: Scale
+ SubtractMean = ctx.Reader.ReadBoolByte();
+ byte normKindVal = ctx.Reader.ReadByte();
+ Contracts.CheckDecode(Enum.IsDefined(typeof(NormalizerKind), normKindVal));
+ NormKind = (NormalizerKind)normKindVal;
+ // Note: In early versions, a bool option (useStd) to whether to normalize by StdDev rather than
+ // L2 norm was used. normKind was added in version=verVectorNormalizerSupported.
+ // normKind was defined in a way such that the serialized boolean (0: use StdDev, 1: use L2) is
+ // still valid.
+ Contracts.CheckDecode(normKindSerialized ||
+ (NormKind == NormalizerKind.L2Norm || NormKind == NormalizerKind.StdDev));
+ Scale = ctx.Reader.ReadFloat();
+ Contracts.CheckDecode(0 < Scale && Scale < float.PositiveInfinity);
+ }
+
+ internal void Save(ModelSaveContext ctx)
+ {
+ Contracts.AssertValue(ctx);
+ // *** Binary format ***
+ // byte: SubtractMean
+ // byte: NormKind
+ // Float: Scale
+ ctx.Writer.WriteBoolByte(SubtractMean);
+ ctx.Writer.Write((byte)NormKind);
+ Contracts.Assert(0 < Scale && Scale < float.PositiveInfinity);
+ ctx.Writer.Write(Scale);
+ }
+ }
+
[BestFriend]
internal static class Defaults
{
@@ -773,10 +749,9 @@ internal static class Defaults
///
/// Create a that takes multiple pairs of columns.
///
- public LpNormalizingEstimatorBase(IHostEnvironment env, params LpNormalizingTransformer.ColumnInfoBase[] columns)
+ internal LpNormalizingEstimatorBase(IHostEnvironment env, params ColumnInfoBase[] columns)
: base(Contracts.CheckRef(env, nameof(env)).Register(nameof(LpNormalizingEstimator)), new LpNormalizingTransformer(env, columns))
{
-
}
internal static bool IsColumnTypeValid(ColumnType type)
@@ -795,6 +770,10 @@ internal static bool IsSchemaColumnValid(SchemaShape.Column col)
internal const string ExpectedColumnType = "Expected float or float vector of known size";
+ ///
+ /// Returns the of the schema which will be produced by the transformer.
+ /// Used for schema propagation and verification in a pipeline.
+ ///
public override SchemaShape GetOutputSchema(SchemaShape inputSchema)
{
Host.CheckValue(inputSchema, nameof(inputSchema));
@@ -816,17 +795,36 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema)
}
///
- /// Lp Normalizing estimator allow you take columns and normalize them individually by rescaling them to unit norm.
+ /// Lp Normalizing estimator takes columns and normalizes them individually by rescaling them to unit norm.
///
public sealed class LpNormalizingEstimator : LpNormalizingEstimatorBase
{
+ ///
+ /// Describes how the transformer handles one column pair.
+ ///
+ public sealed class LpNormColumnInfo : ColumnInfoBase
+ {
+ ///
+ /// Describes how the transformer handles one column pair.
+ ///
+ /// Name of the column resulting from the transformation of .
+ /// Name of column to transform. If set to , the value of the will be used as source.
+ /// Subtract mean from each value before normalizing.
+ /// The norm to use to normalize each sample.
+ public LpNormColumnInfo(string name, string inputColumnName = null,
+ bool substractMean = Defaults.LpSubstractMean,
+ NormalizerKind normalizerKind = Defaults.NormKind)
+ : base(name, inputColumnName ?? name, substractMean, normalizerKind, 1)
+ {
+ }
+ }
///
/// The environment.
/// Name of the column resulting from the transformation of .
/// Name of the column to transform. If set to , the value of the will be used as source.
/// Type of norm to use to normalize each sample.
/// Subtract mean from each value before normalizing.
- public LpNormalizingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null,
+ internal LpNormalizingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null,
NormalizerKind normKind = Defaults.NormKind, bool substractMean = Defaults.LpSubstractMean)
: this(env, new[] { (outputColumnName, inputColumnName ?? outputColumnName) }, normKind, substractMean)
{
@@ -837,26 +835,48 @@ public LpNormalizingEstimator(IHostEnvironment env, string outputColumnName, str
/// Pairs of columns to run the normalization on.
/// Type of norm to use to normalize each sample.
/// Subtract mean from each value before normalizing.
- public LpNormalizingEstimator(IHostEnvironment env, (string outputColumnName, string inputColumnName)[] columns,
+ internal LpNormalizingEstimator(IHostEnvironment env, (string outputColumnName, string inputColumnName)[] columns,
NormalizerKind normKind = Defaults.NormKind, bool substractMean = Defaults.LpSubstractMean)
- : this(env, columns.Select(x => new LpNormalizingTransformer.LpNormColumnInfo(x.outputColumnName, x.inputColumnName, substractMean, normKind)).ToArray())
+ : this(env, columns.Select(x => new LpNormColumnInfo(x.outputColumnName, x.inputColumnName, substractMean, normKind)).ToArray())
{
}
///
/// Create a that takes multiple pairs of columns.
///
- public LpNormalizingEstimator(IHostEnvironment env, params LpNormalizingTransformer.LpNormColumnInfo[] columns)
+ internal LpNormalizingEstimator(IHostEnvironment env, params LpNormColumnInfo[] columns)
: base(env, columns)
{
}
}
///
- /// Global contrast normalizing estimator allow you take columns and performs global constrast normalization on them.
+ /// Global contrast normalizing estimator takes columns and performs global constrast normalization.
///
public sealed class GlobalContrastNormalizingEstimator : LpNormalizingEstimatorBase
{
+ ///
+ /// Describes how the transformer handles one Gcn column pair.
+ ///
+ public sealed class GcnColumnInfo : ColumnInfoBase
+ {
+ ///
+ /// Describes how the transformer handles one Gcn column pair.
+ ///
+ /// Name of the column resulting from the transformation of .
+ /// Name of column to transform. If set to , the value of the will be used as source.
+ /// Subtract mean from each value before normalizing.
+ /// Normalize by standard deviation rather than L2 norm.
+ /// Scale features by this value.
+ public GcnColumnInfo(string name, string inputColumnName = null,
+ bool substractMean = Defaults.GcnSubstractMean,
+ bool useStdDev = Defaults.UseStdDev,
+ float scale = Defaults.Scale)
+ : base(name, inputColumnName, substractMean, useStdDev ? NormalizerKind.StdDev : NormalizerKind.L2Norm, scale)
+ {
+ }
+ }
+
///
/// The environment.
/// Name of the column resulting from the transformation of .
@@ -864,7 +884,7 @@ public sealed class GlobalContrastNormalizingEstimator : LpNormalizingEstimatorB
/// Subtract mean from each value before normalizing.
/// Normalize by standard deviation rather than L2 norm.
/// Scale features by this value.
- public GlobalContrastNormalizingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null,
+ internal GlobalContrastNormalizingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null,
bool substractMean = Defaults.GcnSubstractMean, bool useStdDev = Defaults.UseStdDev, float scale = Defaults.Scale)
: this(env, new[] { (outputColumnName, inputColumnName ?? outputColumnName) }, substractMean, useStdDev, scale)
{
@@ -876,16 +896,16 @@ public GlobalContrastNormalizingEstimator(IHostEnvironment env, string outputCol
/// Subtract mean from each value before normalizing.
/// Normalize by standard deviation rather than L2 norm.
/// Scale features by this value.
- public GlobalContrastNormalizingEstimator(IHostEnvironment env, (string outputColumnName, string inputColumnName)[] columns,
+ internal GlobalContrastNormalizingEstimator(IHostEnvironment env, (string outputColumnName, string inputColumnName)[] columns,
bool substractMean = Defaults.GcnSubstractMean, bool useStdDev = Defaults.UseStdDev, float scale = Defaults.Scale)
- : this(env, columns.Select(x => new LpNormalizingTransformer.GcnColumnInfo(x.outputColumnName, x.inputColumnName, substractMean, useStdDev, scale)).ToArray())
+ : this(env, columns.Select(x => new GcnColumnInfo(x.outputColumnName, x.inputColumnName, substractMean, useStdDev, scale)).ToArray())
{
}
///
/// Create a that takes multiple pairs of columns.
///
- public GlobalContrastNormalizingEstimator(IHostEnvironment env, params LpNormalizingTransformer.GcnColumnInfo[] columns) :
+ internal GlobalContrastNormalizingEstimator(IHostEnvironment env, params GcnColumnInfo[] columns) :
base(env, columns)
{
}
diff --git a/src/Microsoft.ML.Transforms/LearnerFeatureSelection.cs b/src/Microsoft.ML.Transforms/LearnerFeatureSelection.cs
index 405bd1155d..3c66d9d06d 100644
--- a/src/Microsoft.ML.Transforms/LearnerFeatureSelection.cs
+++ b/src/Microsoft.ML.Transforms/LearnerFeatureSelection.cs
@@ -13,7 +13,7 @@
using Microsoft.ML.Transforms;
using Microsoft.ML.Transforms.FeatureSelection;
-[assembly: LoadableClass(LearnerFeatureSelectionTransform.Summary, typeof(IDataTransform), typeof(LearnerFeatureSelectionTransform), typeof(LearnerFeatureSelectionTransform.Arguments), typeof(SignatureDataTransform),
+[assembly: LoadableClass(LearnerFeatureSelectionTransform.Summary, typeof(IDataTransform), typeof(LearnerFeatureSelectionTransform), typeof(LearnerFeatureSelectionTransform.Options), typeof(SignatureDataTransform),
"Learner Feature Selection Transform", "LearnerFeatureSelectionTransform", "LearnerFeatureSelection")]
namespace Microsoft.ML.Transforms
@@ -28,7 +28,7 @@ internal static class LearnerFeatureSelectionTransform
internal const string Summary = "Selects the slots for which the absolute value of the corresponding weight in a linear learner is greater than a threshold.";
#pragma warning disable CS0649 // The fields will still be set via the reflection driven mechanisms.
- public sealed class Arguments
+ public sealed class Options
{
[Argument(ArgumentType.LastOccurenceWins, HelpText = "If the corresponding absolute value of the weight for a slot is greater than this threshold, the slot is preserved", ShortName = "ft", SortOrder = 2)]
public Single? Threshold;
@@ -85,21 +85,21 @@ internal void Check(IExceptionContext ectx)
internal static string RegistrationName = "LearnerFeatureSelectionTransform";
// Factory method for SignatureDataTransform.
- private static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input)
+ private static IDataTransform Create(IHostEnvironment env, Options options, IDataView input)
{
Contracts.CheckValue(env, nameof(env));
var host = env.Register(RegistrationName);
- host.CheckValue(args, nameof(args));
+ host.CheckValue(options, nameof(options));
host.CheckValue(input, nameof(input));
- args.Check(host);
+ options.Check(host);
var scores = default(VBuffer);
- TrainCore(host, input, args, ref scores);
+ TrainCore(host, input, options, ref scores);
using (var ch = host.Start("Dropping Slots"))
{
int selectedCount;
- var column = CreateDropSlotsColumn(args, in scores, out selectedCount);
+ var column = CreateDropSlotsColumn(options, in scores, out selectedCount);
if (column == null)
{
@@ -107,39 +107,39 @@ private static IDataTransform Create(IHostEnvironment env, Arguments args, IData
return NopTransform.CreateIfNeeded(host, input);
}
- ch.Info(MessageSensitivity.Schema, "Selected {0} slots out of {1} in column '{2}'", selectedCount, scores.Length, args.FeatureColumn);
+ ch.Info(MessageSensitivity.Schema, "Selected {0} slots out of {1} in column '{2}'", selectedCount, scores.Length, options.FeatureColumn);
return new SlotsDroppingTransformer(host, column).Transform(input) as IDataTransform;
}
}
- private static SlotsDroppingTransformer.ColumnInfo CreateDropSlotsColumn(Arguments args, in VBuffer scores, out int selectedCount)
+ private static SlotsDroppingTransformer.ColumnInfo CreateDropSlotsColumn(Options options, in VBuffer scores, out int selectedCount)
{
// Not checking the scores.Length, because:
// 1. If it's the same as the features column length, we should be constructing the right DropSlots arguments.
// 2. If it's less, we assume that the rest of the scores are zero and we drop the slots.
// 3. If it's greater, the drop slots ignores the ranges that are outside the valid range of indices for the column.
- Contracts.Assert(args.Threshold.HasValue != args.NumSlotsToKeep.HasValue);
+ Contracts.Assert(options.Threshold.HasValue != options.NumSlotsToKeep.HasValue);
var col = new SlotsDroppingTransformer.Column();
- col.Source = args.FeatureColumn;
+ col.Source = options.FeatureColumn;
selectedCount = 0;
var scoresValues = scores.GetValues();
// Degenerate case, dropping all slots.
if (scoresValues.Length == 0)
- return new SlotsDroppingTransformer.ColumnInfo(args.FeatureColumn);
+ return new SlotsDroppingTransformer.ColumnInfo(options.FeatureColumn);
int tiedScoresToKeep;
float threshold;
- if (args.Threshold.HasValue)
+ if (options.Threshold.HasValue)
{
- threshold = args.Threshold.Value;
+ threshold = options.Threshold.Value;
tiedScoresToKeep = threshold > 0 ? int.MaxValue : 0;
}
else
{
- Contracts.Assert(args.NumSlotsToKeep.HasValue);
- threshold = ComputeThreshold(scoresValues, args.NumSlotsToKeep.Value, out tiedScoresToKeep);
+ Contracts.Assert(options.NumSlotsToKeep.HasValue);
+ threshold = ComputeThreshold(scoresValues, options.NumSlotsToKeep.Value, out tiedScoresToKeep);
}
var slots = new List<(int min, int? max)>();
@@ -224,7 +224,7 @@ private static SlotsDroppingTransformer.ColumnInfo CreateDropSlotsColumn(Argumen
}
if (slots.Count > 0)
- return new SlotsDroppingTransformer.ColumnInfo(args.FeatureColumn, slots: slots.ToArray());
+ return new SlotsDroppingTransformer.ColumnInfo(options.FeatureColumn, slots: slots.ToArray());
return null;
}
@@ -264,36 +264,36 @@ private static float ComputeThreshold(ReadOnlySpan scores, int topk, out
return threshold;
}
- private static void TrainCore(IHost host, IDataView input, Arguments args, ref VBuffer scores)
+ private static void TrainCore(IHost host, IDataView input, Options options, ref VBuffer scores)
{
Contracts.AssertValue(host);
- host.AssertValue(args);
+ host.AssertValue(options);
host.AssertValue(input);
- host.Assert(args.Threshold.HasValue != args.NumSlotsToKeep.HasValue);
+ host.Assert(options.Threshold.HasValue != options.NumSlotsToKeep.HasValue);
using (var ch = host.Start("Train"))
{
ch.Trace("Constructing trainer");
- ITrainer trainer = args.Filter.CreateComponent(host);
+ ITrainer trainer = options.Filter.CreateComponent(host);
IDataView view = input;
var schema = view.Schema;
- var label = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(args.LabelColumn), args.LabelColumn, DefaultColumnNames.Label);
- var feature = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(args.FeatureColumn), args.FeatureColumn, DefaultColumnNames.Features);
- var group = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(args.GroupColumn), args.GroupColumn, DefaultColumnNames.GroupId);
- var weight = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(args.WeightColumn), args.WeightColumn, DefaultColumnNames.Weight);
- var name = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(args.NameColumn), args.NameColumn, DefaultColumnNames.Name);
+ var label = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(options.LabelColumn), options.LabelColumn, DefaultColumnNames.Label);
+ var feature = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(options.FeatureColumn), options.FeatureColumn, DefaultColumnNames.Features);
+ var group = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(options.GroupColumn), options.GroupColumn, DefaultColumnNames.GroupId);
+ var weight = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(options.WeightColumn), options.WeightColumn, DefaultColumnNames.Weight);
+ var name = TrainUtils.MatchNameOrDefaultOrNull(ch, schema, nameof(options.NameColumn), options.NameColumn, DefaultColumnNames.Name);
- TrainUtils.AddNormalizerIfNeeded(host, ch, trainer, ref view, feature, args.NormalizeFeatures);
+ TrainUtils.AddNormalizerIfNeeded(host, ch, trainer, ref view, feature, options.NormalizeFeatures);
ch.Trace("Binding columns");
- var customCols = TrainUtils.CheckAndGenerateCustomColumns(ch, args.CustomColumns);
+ var customCols = TrainUtils.CheckAndGenerateCustomColumns(ch, options.CustomColumns);
var data = new RoleMappedData(view, label, feature, group, weight, name, customCols);
var predictor = TrainUtils.Train(host, ch, data, trainer, null,
- null, 0, args.CacheData);
+ null, 0, options.CacheData);
var rfs = predictor as IPredictorWithFeatureWeights;
Contracts.AssertValue(rfs);
@@ -304,15 +304,15 @@ private static void TrainCore(IHost host, IDataView input, Arguments args, ref V
///
/// Returns a score for each slot of the features column.
///
- public static void Train(IHostEnvironment env, IDataView input, Arguments args, ref VBuffer scores)
+ public static void Train(IHostEnvironment env, IDataView input, Options options, ref VBuffer scores)
{
Contracts.CheckValue(env, nameof(env));
var host = env.Register(RegistrationName);
- host.CheckValue(args, nameof(args));
+ host.CheckValue(options, nameof(options));
host.CheckValue(input, nameof(input));
- args.Check(host);
+ options.Check(host);
- TrainCore(host, input, args, ref scores);
+ TrainCore(host, input, options, ref scores);
}
}
}
diff --git a/src/Microsoft.ML.Transforms/ProjectionCatalog.cs b/src/Microsoft.ML.Transforms/ProjectionCatalog.cs
index 5ef9670f25..1da466ec24 100644
--- a/src/Microsoft.ML.Transforms/ProjectionCatalog.cs
+++ b/src/Microsoft.ML.Transforms/ProjectionCatalog.cs
@@ -36,7 +36,7 @@ public static RandomFourierFeaturizingEstimator CreateRandomFourierFeatures(this
///
/// The transform's catalog.
/// The input columns to use for the transformation.
- public static RandomFourierFeaturizingEstimator CreateRandomFourierFeatures(this TransformsCatalog.ProjectionTransforms catalog, params RandomFourierFeaturizingTransformer.ColumnInfo[] columns)
+ public static RandomFourierFeaturizingEstimator CreateRandomFourierFeatures(this TransformsCatalog.ProjectionTransforms catalog, params RandomFourierFeaturizingEstimator.ColumnInfo[] columns)
=> new RandomFourierFeaturizingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
///
@@ -63,7 +63,7 @@ public static LpNormalizingEstimator LpNormalize(this TransformsCatalog.Projecti
///
/// The transform's catalog.
/// Describes the parameters of the lp-normalization process for each column pair.
- public static LpNormalizingEstimator LpNormalize(this TransformsCatalog.ProjectionTransforms catalog, params LpNormalizingTransformer.LpNormColumnInfo[] columns)
+ public static LpNormalizingEstimator LpNormalize(this TransformsCatalog.ProjectionTransforms catalog, params LpNormalizingEstimator.LpNormColumnInfo[] columns)
=> new LpNormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
///
@@ -93,7 +93,7 @@ public static GlobalContrastNormalizingEstimator GlobalContrastNormalize(this Tr
///
/// The transform's catalog.
/// Describes the parameters of the gcn-normaliztion process for each column pair.
- public static GlobalContrastNormalizingEstimator GlobalContrastNormalize(this TransformsCatalog.ProjectionTransforms catalog, params LpNormalizingTransformer.GcnColumnInfo[] columns)
+ public static GlobalContrastNormalizingEstimator GlobalContrastNormalize(this TransformsCatalog.ProjectionTransforms catalog, params GlobalContrastNormalizingEstimator.GcnColumnInfo[] columns)
=> new GlobalContrastNormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
}
}
diff --git a/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs b/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs
index 6ba025abb0..754ac9b573 100644
--- a/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs
+++ b/src/Microsoft.ML.Transforms/RandomFourierFeaturizing.cs
@@ -17,7 +17,7 @@
using Microsoft.ML.Numeric;
using Microsoft.ML.Transforms.Projections;
-[assembly: LoadableClass(RandomFourierFeaturizingTransformer.Summary, typeof(IDataTransform), typeof(RandomFourierFeaturizingTransformer), typeof(RandomFourierFeaturizingTransformer.Arguments), typeof(SignatureDataTransform),
+[assembly: LoadableClass(RandomFourierFeaturizingTransformer.Summary, typeof(IDataTransform), typeof(RandomFourierFeaturizingTransformer), typeof(RandomFourierFeaturizingTransformer.Options), typeof(SignatureDataTransform),
"Random Fourier Features Transform", "RffTransform", "Rff")]
[assembly: LoadableClass(RandomFourierFeaturizingTransformer.Summary, typeof(IDataTransform), typeof(RandomFourierFeaturizingTransformer), null, typeof(SignatureLoadDataTransform),
@@ -31,9 +31,12 @@
namespace Microsoft.ML.Transforms.Projections
{
+ ///
+ /// Maps vector columns to a low -dimensional feature space.
+ ///
public sealed class RandomFourierFeaturizingTransformer : OneToOneTransformerBase
{
- public sealed class Arguments
+ internal sealed class Options
{
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)", Name = "Column", ShortName = "col", SortOrder = 1)]
public Column[] Columns;
@@ -52,7 +55,7 @@ public sealed class Arguments
public int? Seed;
}
- public sealed class Column : OneToOneColumn
+ internal sealed class Column : OneToOneColumn
{
[Argument(ArgumentType.AtMostOnce, HelpText = "The number of random Fourier features to create", ShortName = "dim")]
public int? NewDim;
@@ -103,7 +106,7 @@ private sealed class TransformInfo
private readonly TauswortheHybrid _rand;
private readonly TauswortheHybrid.State _state;
- public TransformInfo(IHost host, ColumnInfo column, int d, float avgDist)
+ public TransformInfo(IHost host, RandomFourierFeaturizingEstimator.ColumnInfo column, int d, float avgDist)
{
Contracts.AssertValue(host);
@@ -207,7 +210,7 @@ private void InitializeFourierCoefficients(int rowSize, int colSize)
+ "since the transform is designed so that the inner products of the transformed data are approximately equal to those in the feature space of a user specified "
+ "shift-invariant kernel.";
- public const string LoaderSignature = "RffTransform";
+ internal const string LoaderSignature = "RffTransform";
private static VersionInfo GetVersionInfo()
{
@@ -232,37 +235,7 @@ private static string TestColumnType(ColumnType type)
return "Expected vector of floats with known size";
}
- public sealed class ColumnInfo
- {
- public readonly string Name;
- public readonly string InputColumnName;
- public readonly IComponentFactory Generator;
- public readonly int NewDim;
- public readonly bool UseSin;
- public readonly int? Seed;
-
- ///
- /// Describes how the transformer handles one column pair.
- ///
- /// Name of the column resulting from the transformation of .
- /// The number of random Fourier features to create.
- /// Create two features for every random Fourier frequency? (one for cos and one for sin).
- /// Name of column to transform.
- /// Which fourier generator to use.
- /// The seed of the random number generator for generating the new features (if unspecified, the global random is used.
- public ColumnInfo(string name, int newDim, bool useSin, string inputColumnName = null, IComponentFactory generator = null, int? seed = null)
- {
- Contracts.CheckUserArg(newDim > 0, nameof(newDim), "must be positive.");
- InputColumnName = inputColumnName ?? name;
- Name = name;
- Generator = generator ?? new GaussianFourierSampler.Arguments();
- NewDim = newDim;
- UseSin = useSin;
- Seed = seed;
- }
- }
-
- private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(ColumnInfo[] columns)
+ private static (string outputColumnName, string inputColumnName)[] GetColumnPairs(RandomFourierFeaturizingEstimator.ColumnInfo[] columns)
{
Contracts.CheckValue(columns, nameof(columns));
return columns.Select(x => (x.Name, x.InputColumnName)).ToArray();
@@ -279,7 +252,7 @@ protected override void CheckInputColumn(Schema inputSchema, int col, int srcCol
new VectorType(NumberType.Float, _transformInfos[col].SrcDim).ToString(), type.ToString());
}
- public RandomFourierFeaturizingTransformer(IHostEnvironment env, IDataView input, ColumnInfo[] columns)
+ internal RandomFourierFeaturizingTransformer(IHostEnvironment env, IDataView input, RandomFourierFeaturizingEstimator.ColumnInfo[] columns)
: base(Contracts.CheckRef(env, nameof(env)).Register(nameof(RandomFourierFeaturizingTransformer)), GetColumnPairs(columns))
{
var avgDistances = GetAvgDistances(columns, input);
@@ -305,7 +278,7 @@ private static int RoundUp(int cflt, int cfltAlign)
return cblob * cfltAlign;
}
- private float[] GetAvgDistances(ColumnInfo[] columns, IDataView input)
+ private float[] GetAvgDistances(RandomFourierFeaturizingEstimator.ColumnInfo[] columns, IDataView input)
{
var avgDistances = new float[columns.Length];
const int reservoirSize = 5000;
@@ -448,27 +421,27 @@ private RandomFourierFeaturizingTransformer(IHost host, ModelLoadContext ctx)
}
// Factory method for SignatureDataTransform.
- private static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input)
+ private static IDataTransform Create(IHostEnvironment env, Options options, IDataView input)
{
Contracts.CheckValue(env, nameof(env));
- env.CheckValue(args, nameof(args));
+ env.CheckValue(options, nameof(options));
env.CheckValue(input, nameof(input));
- env.CheckValue(args.Columns, nameof(args.Columns));
- var cols = new ColumnInfo[args.Columns.Length];
+ env.CheckValue(options.Columns, nameof(options.Columns));
+ var cols = new RandomFourierFeaturizingEstimator.ColumnInfo[options.Columns.Length];
using (var ch = env.Start("ValidateArgs"))
{
for (int i = 0; i < cols.Length; i++)
{
- var item = args.Columns[i];
- cols[i] = new ColumnInfo(
+ var item = options.Columns[i];
+ cols[i] = new RandomFourierFeaturizingEstimator.ColumnInfo(
item.Name,
- item.NewDim ?? args.NewDim,
- item.UseSin ?? args.UseSin,
+ item.NewDim ?? options.NewDim,
+ item.UseSin ?? options.UseSin,
item.Source ?? item.Name,
- item.MatrixGenerator ?? args.MatrixGenerator,
- item.Seed ?? args.Seed);
+ item.MatrixGenerator ?? options.MatrixGenerator,
+ item.Seed ?? options.Seed);
};
}
return new RandomFourierFeaturizingTransformer(env, input, cols).MakeDataTransform(input);
@@ -639,7 +612,7 @@ private void TransformFeatures(in VBuffer src, ref VBuffer dst, Tr
}
///
- /// Estimator which takes set of vector columns and maps its input to a random low-dimensional feature space.
+ /// Maps vector columns to a low -dimensional feature space.
///
public sealed class RandomFourierFeaturizingEstimator : IEstimator
{
@@ -650,31 +623,89 @@ internal static class Defaults
public const bool UseSin = false;
}
+ ///
+ /// Describes how the transformer handles one Gcn column pair.
+ ///
+ public sealed class ColumnInfo
+ {
+ ///
+ /// Name of the column resulting from the transformation of .
+ ///
+ public readonly string Name;
+ ///
+ /// Name of the column to transform.
+ ///
+ public readonly string InputColumnName;
+ ///
+ /// Which fourier generator to use.
+ ///
+ public readonly IComponentFactory Generator;
+ ///
+ /// The number of random Fourier features to create.
+ ///
+ public readonly int NewDim;
+ ///
+ /// Create two features for every random Fourier frequency? (one for cos and one for sin).
+ ///
+ public readonly bool UseSin;
+ ///
+ /// The seed of the random number generator for generating the new features (if unspecified, the global random is used).
+ ///
+ public readonly int? Seed;
+
+ ///
+ /// Describes how the transformer handles one column pair.
+ ///
+ /// Name of the column resulting from the transformation of .
+ /// The number of random Fourier features to create.
+ /// Create two features for every random Fourier frequency? (one for cos and one for sin).
+ /// Name of column to transform.
+ /// Which fourier generator to use.
+ /// The seed of the random number generator for generating the new features (if unspecified, the global random is used).
+ public ColumnInfo(string name, int newDim, bool useSin, string inputColumnName = null, IComponentFactory generator = null, int? seed = null)
+ {
+ Contracts.CheckUserArg(newDim > 0, nameof(newDim), "must be positive.");
+ InputColumnName = inputColumnName ?? name;
+ Name = name;
+ Generator = generator ?? new GaussianFourierSampler.Arguments();
+ NewDim = newDim;
+ UseSin = useSin;
+ Seed = seed;
+ }
+ }
+
private readonly IHost _host;
- private readonly RandomFourierFeaturizingTransformer.ColumnInfo[] _columns;
+ private readonly ColumnInfo[] _columns;
///
- /// Convinence constructor for simple one column case
+ /// Convinence constructor for simple one column case.
///
/// Host Environment.
/// Name of the column resulting from the transformation of .
/// Name of the column to transform. If set to , the value of the will be used as source.
/// The number of random Fourier features to create.
/// Create two features for every random Fourier frequency? (one for cos and one for sin).
- public RandomFourierFeaturizingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null, int newDim = Defaults.NewDim, bool useSin = Defaults.UseSin)
- : this(env, new RandomFourierFeaturizingTransformer.ColumnInfo(outputColumnName, newDim, useSin, inputColumnName ?? outputColumnName))
+ internal RandomFourierFeaturizingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null, int newDim = Defaults.NewDim, bool useSin = Defaults.UseSin)
+ : this(env, new ColumnInfo(outputColumnName, newDim, useSin, inputColumnName ?? outputColumnName))
{
}
- public RandomFourierFeaturizingEstimator(IHostEnvironment env, params RandomFourierFeaturizingTransformer.ColumnInfo[] columns)
+ internal RandomFourierFeaturizingEstimator(IHostEnvironment env, params ColumnInfo[] columns)
{
Contracts.CheckValue(env, nameof(env));
_host = env.Register(nameof(RandomFourierFeaturizingEstimator));
_columns = columns;
}
+ ///
+ /// Trains and returns a .
+ ///
public RandomFourierFeaturizingTransformer Fit(IDataView input) => new RandomFourierFeaturizingTransformer(_host, input, _columns);
+ ///
+ /// Returns the of the schema which will be produced by the transformer.
+ /// Used for schema propagation and verification in a pipeline.
+ ///
public SchemaShape GetOutputSchema(SchemaShape inputSchema)
{
_host.CheckValue(inputSchema, nameof(inputSchema));
diff --git a/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs b/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs
index e4da959d74..4a1da4564c 100644
--- a/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs
+++ b/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs
@@ -32,7 +32,7 @@
[assembly: LoadableClass(typeof(IRowMapper), typeof(StopWordsRemovingTransformer), null, typeof(SignatureLoadRowMapper),
"Stopwords Remover Transform", StopWordsRemovingTransformer.LoaderSignature)]
-[assembly: LoadableClass(CustomStopWordsRemovingTransformer.Summary, typeof(IDataTransform), typeof(CustomStopWordsRemovingTransformer), typeof(CustomStopWordsRemovingTransformer.Arguments), typeof(SignatureDataTransform),
+[assembly: LoadableClass(CustomStopWordsRemovingTransformer.Summary, typeof(IDataTransform), typeof(CustomStopWordsRemovingTransformer), typeof(CustomStopWordsRemovingTransformer.Options), typeof(SignatureDataTransform),
"Custom Stopwords Remover Transform", "CustomStopWordsRemoverTransform", "CustomStopWords")]
[assembly: LoadableClass(CustomStopWordsRemovingTransformer.Summary, typeof(IDataTransform), typeof(CustomStopWordsRemovingTransformer), null, typeof(SignatureLoadDataTransform),
@@ -642,7 +642,7 @@ internal abstract class ArgumentsBase
public string StopwordsColumn;
}
- internal sealed class Arguments : ArgumentsBase
+ internal sealed class Options : ArgumentsBase
{
[Argument(ArgumentType.Multiple, HelpText = "New column definition(s)", Name = "Column", ShortName = "col", SortOrder = 1)]
public Column[] Columns;
@@ -713,7 +713,7 @@ private IDataLoader GetLoaderForStopwords(IChannel ch, string dataFile,
if (isBinary || isTranspose)
{
ch.Assert(isBinary != isTranspose);
- ch.CheckUserArg(!string.IsNullOrWhiteSpace(stopwordsCol), nameof(Arguments.StopwordsColumn),
+ ch.CheckUserArg(!string.IsNullOrWhiteSpace(stopwordsCol), nameof(Options.StopwordsColumn),
"stopwordsColumn should be specified");
if (isBinary)
dataLoader = new BinaryLoader(Host, new BinaryLoader.Arguments(), fileSource);
@@ -772,7 +772,7 @@ private void LoadStopWords(IChannel ch, ReadOnlyMemory stopwords, string d
warnEmpty = false;
}
}
- ch.CheckUserArg(stopWordsMap.Count > 0, nameof(Arguments.Stopword), "stopwords is empty");
+ ch.CheckUserArg(stopWordsMap.Count > 0, nameof(Options.Stopword), "stopwords is empty");
}
else
{
@@ -780,9 +780,9 @@ private void LoadStopWords(IChannel ch, ReadOnlyMemory stopwords, string d
var loader = GetLoaderForStopwords(ch, dataFile, loaderFactory, ref srcCol);
if (!loader.Schema.TryGetColumnIndex(srcCol, out int colSrcIndex))
- throw ch.ExceptUserArg(nameof(Arguments.StopwordsColumn), "Unknown column '{0}'", srcCol);
+ throw ch.ExceptUserArg(nameof(Options.StopwordsColumn), "Unknown column '{0}'", srcCol);
var typeSrc = loader.Schema[colSrcIndex].Type;
- ch.CheckUserArg(typeSrc is TextType, nameof(Arguments.StopwordsColumn), "Must be a scalar text column");
+ ch.CheckUserArg(typeSrc is TextType, nameof(Options.StopwordsColumn), "Must be a scalar text column");
// Accumulate the stopwords.
using (var cursor = loader.GetRowCursor(loader.Schema[srcCol]))
@@ -805,10 +805,13 @@ private void LoadStopWords(IChannel ch, ReadOnlyMemory stopwords, string d
}
}
}
- ch.CheckUserArg(stopWordsMap.Count > 0, nameof(Arguments.DataFile), "dataFile is empty");
+ ch.CheckUserArg(stopWordsMap.Count > 0, nameof(Options.DataFile), "dataFile is empty");
}
}
+ ///
+ /// The names of the input output column pairs on which this transformation is applied.
+ ///
public IReadOnlyCollection<(string outputColumnName, string inputColumnName)> Columns => ColumnPairs.AsReadOnly();
///
@@ -817,7 +820,7 @@ private void LoadStopWords(IChannel ch, ReadOnlyMemory stopwords, string d
/// The environment.
/// Array of words to remove.
/// Pairs of columns to remove stop words from.
- public CustomStopWordsRemovingTransformer(IHostEnvironment env, string[] stopwords, params (string outputColumnName, string inputColumnName)[] columns) :
+ internal CustomStopWordsRemovingTransformer(IHostEnvironment env, string[] stopwords, params (string outputColumnName, string inputColumnName)[] columns) :
base(Contracts.CheckRef(env, nameof(env)).Register(RegistrationName), columns)
{
_stopWordsMap = new NormStr.Pool();
@@ -938,24 +941,24 @@ private static CustomStopWordsRemovingTransformer Create(IHostEnvironment env, M
}
// Factory method for SignatureDataTransform.
- internal static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input)
+ internal static IDataTransform Create(IHostEnvironment env, Options options, IDataView input)
{
Contracts.CheckValue(env, nameof(env));
- env.CheckValue(args, nameof(args));
+ env.CheckValue(options, nameof(options));
env.CheckValue(input, nameof(input));
- env.CheckValue(args.Columns, nameof(args.Columns));
- var cols = new (string outputColumnName, string inputColumnName)[args.Columns.Length];
+ env.CheckValue(options.Columns, nameof(options.Columns));
+ var cols = new (string outputColumnName, string inputColumnName)[options.Columns.Length];
for (int i = 0; i < cols.Length; i++)
{
- var item = args.Columns[i];
+ var item = options.Columns[i];
cols[i] = (item.Name, item.Source ?? item.Name);
}
CustomStopWordsRemovingTransformer transfrom = null;
- if (Utils.Size(args.Stopwords) > 0)
- transfrom = new CustomStopWordsRemovingTransformer(env, args.Stopwords, cols);
+ if (Utils.Size(options.Stopwords) > 0)
+ transfrom = new CustomStopWordsRemovingTransformer(env, options.Stopwords, cols);
else
- transfrom = new CustomStopWordsRemovingTransformer(env, args.Stopword, args.DataFile, args.StopwordsColumn, args.Loader, cols);
+ transfrom = new CustomStopWordsRemovingTransformer(env, options.Stopword, options.DataFile, options.StopwordsColumn, options.Loader, cols);
return transfrom.MakeDataTransform(input);
}
@@ -1057,7 +1060,7 @@ public sealed class CustomStopWordsRemovingEstimator : TrivialEstimatorName of the column resulting from the transformation of .
/// Name of the column to transform. If set to , the value of the will be used as source.
/// Array of words to remove.
- public CustomStopWordsRemovingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null, params string[] stopwords)
+ internal CustomStopWordsRemovingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null, params string[] stopwords)
: this(env, new[] { (outputColumnName, inputColumnName ?? outputColumnName) }, stopwords)
{
}
@@ -1069,11 +1072,15 @@ public CustomStopWordsRemovingEstimator(IHostEnvironment env, string outputColum
/// The environment.
/// Pairs of columns to remove stop words on.
/// Array of words to remove.
- public CustomStopWordsRemovingEstimator(IHostEnvironment env, (string outputColumnName, string inputColumnName)[] columns, string[] stopwords) :
+ internal CustomStopWordsRemovingEstimator(IHostEnvironment env, (string outputColumnName, string inputColumnName)[] columns, string[] stopwords) :
base(Contracts.CheckRef(env, nameof(env)).Register(nameof(CustomStopWordsRemovingEstimator)), new CustomStopWordsRemovingTransformer(env, stopwords, columns))
{
}
+ ///
+ /// Returns the of the schema which will be produced by the transformer.
+ /// Used for schema propagation and verification in a pipeline.
+ ///
public override SchemaShape GetOutputSchema(SchemaShape inputSchema)
{
Host.CheckValue(inputSchema, nameof(inputSchema));
diff --git a/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs b/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs
index 25931c5787..7c9524e872 100644
--- a/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs
+++ b/src/Microsoft.ML.Transforms/Text/TextFeaturizingEstimator.cs
@@ -409,13 +409,13 @@ public ITransformer Fit(IDataView input)
if (tparams.VectorNormalizer != TextNormKind.None)
{
- var xfCols = new List(2);
+ var xfCols = new List(2);
if (charFeatureCol != null)
{
var dstCol = GenerateColumnName(view.Schema, charFeatureCol, "LpCharNorm");
tempCols.Add(dstCol);
- xfCols.Add(new LpNormalizingTransformer.LpNormColumnInfo(dstCol, charFeatureCol, normalizerKind: tparams.LpNormalizerKind));
+ xfCols.Add(new LpNormalizingEstimator.LpNormColumnInfo(dstCol, charFeatureCol, normalizerKind: tparams.LpNormalizerKind));
charFeatureCol = dstCol;
}
@@ -423,7 +423,7 @@ public ITransformer Fit(IDataView input)
{
var dstCol = GenerateColumnName(view.Schema, wordFeatureCol, "LpWordNorm");
tempCols.Add(dstCol);
- xfCols.Add(new LpNormalizingTransformer.LpNormColumnInfo(dstCol, wordFeatureCol, normalizerKind: tparams.LpNormalizerKind));
+ xfCols.Add(new LpNormalizingEstimator.LpNormColumnInfo(dstCol, wordFeatureCol, normalizerKind: tparams.LpNormalizerKind));
wordFeatureCol = dstCol;
}
diff --git a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv
index 1c8470a58d..2ddb2bb4b1 100644
--- a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv
+++ b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv
@@ -70,7 +70,7 @@ Trainers.StochasticDualCoordinateAscentClassifier The SDCA linear multi-class cl
Trainers.StochasticDualCoordinateAscentRegressor The SDCA linear regression trainer. Microsoft.ML.Trainers.Sdca TrainRegression Microsoft.ML.Trainers.SdcaRegressionTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+RegressionOutput
Trainers.StochasticGradientDescentBinaryClassifier Train an Hogwild SGD binary model. Microsoft.ML.Trainers.StochasticGradientDescentClassificationTrainer TrainBinary Microsoft.ML.Trainers.StochasticGradientDescentClassificationTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+BinaryClassificationOutput
Trainers.SymSgdBinaryClassifier Train a symbolic SGD. Microsoft.ML.Trainers.SymSgd.SymSgdClassificationTrainer TrainSymSgd Microsoft.ML.Trainers.SymSgd.SymSgdClassificationTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+BinaryClassificationOutput
-Transforms.ApproximateBootstrapSampler Approximate bootstrap sampling. Microsoft.ML.Transforms.BootstrapSample GetSample Microsoft.ML.Transforms.BootstrapSamplingTransformer+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
+Transforms.ApproximateBootstrapSampler Approximate bootstrap sampling. Microsoft.ML.Transforms.BootstrapSample GetSample Microsoft.ML.Transforms.BootstrapSamplingTransformer+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
Transforms.BinaryPredictionScoreColumnsRenamer For binary prediction, it renames the PredictedLabel and Score columns to include the name of the positive class. Microsoft.ML.EntryPoints.ScoreModel RenameBinaryPredictionScoreColumns Microsoft.ML.EntryPoints.ScoreModel+RenameBinaryPredictionScoreColumnsInput Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
Transforms.BinNormalizer The values are assigned into equidensity bins and a value is mapped to its bin_number/number_of_bins. Microsoft.ML.Data.Normalize Bin Microsoft.ML.Transforms.Normalizers.NormalizeTransform+BinArguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
Transforms.CategoricalHashOneHotVectorizer Converts the categorical value into an indicator array by hashing the value and using the hash as an index in the bag. If the input column is a vector, a single indicator bag is returned for it. Microsoft.ML.Transforms.Categorical.Categorical CatTransformHash Microsoft.ML.Transforms.Categorical.OneHotHashEncodingTransformer+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
@@ -90,7 +90,7 @@ Transforms.FeatureCombiner Combines all the features into one feature column. Mi
Transforms.FeatureContributionCalculationTransformer For each data point, calculates the contribution of individual features to the model prediction. Microsoft.ML.Data.FeatureContributionEntryPoint FeatureContributionCalculation Microsoft.ML.Data.FeatureContributionCalculatingTransformer+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
Transforms.FeatureSelectorByCount Selects the slots for which the count of non-default values is greater than or equal to a threshold. Microsoft.ML.Transforms.SelectFeatures CountSelect Microsoft.ML.Transforms.FeatureSelection.CountFeatureSelectingEstimator+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
Transforms.FeatureSelectorByMutualInformation Selects the top k slots across all specified columns ordered by their mutual information with the label column. Microsoft.ML.Transforms.SelectFeatures MutualInformationSelect Microsoft.ML.Transforms.FeatureSelection.MutualInformationFeatureSelectingEstimator+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
-Transforms.GlobalContrastNormalizer Performs a global contrast normalization on input values: Y = (s * X - M) / D, where s is a scale, M is mean and D is either L2 norm or standard deviation. Microsoft.ML.Transforms.Projections.LpNormalization GcNormalize Microsoft.ML.Transforms.Projections.LpNormalizingTransformer+GcnArguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
+Transforms.GlobalContrastNormalizer Performs a global contrast normalization on input values: Y = (s * X - M) / D, where s is a scale, M is mean and D is either L2 norm or standard deviation. Microsoft.ML.Transforms.Projections.LpNormalization GcNormalize Microsoft.ML.Transforms.Projections.LpNormalizingTransformer+GcnOptions Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
Transforms.HashConverter Converts column values into hashes. This transform accepts both numeric and text inputs, both single and vector-valued columns. Microsoft.ML.Transforms.Conversions.HashJoin Apply Microsoft.ML.Transforms.Conversions.HashJoiningTransform+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
Transforms.ImageGrayscale Convert image into grayscale. Microsoft.ML.ImageAnalytics.EntryPoints.ImageAnalytics ImageGrayscale Microsoft.ML.ImageAnalytics.ImageGrayscalingTransformer+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
Transforms.ImageLoader Load images from files. Microsoft.ML.ImageAnalytics.EntryPoints.ImageAnalytics ImageLoader Microsoft.ML.ImageAnalytics.ImageLoadingTransformer+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
@@ -102,7 +102,7 @@ Transforms.LabelIndicator Label remapper used by OVA Microsoft.ML.Transforms.Lab
Transforms.LabelToFloatConverter Transforms the label to float to make it suitable for regression. Microsoft.ML.EntryPoints.FeatureCombiner PrepareRegressionLabel Microsoft.ML.EntryPoints.FeatureCombiner+RegressionLabelInput Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
Transforms.LightLda The LDA transform implements LightLDA, a state-of-the-art implementation of Latent Dirichlet Allocation. Microsoft.ML.Transforms.Text.TextAnalytics LightLda Microsoft.ML.Transforms.Text.LatentDirichletAllocationTransformer+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
Transforms.LogMeanVarianceNormalizer Normalizes the data based on the computed mean and variance of the logarithm of the data. Microsoft.ML.Data.Normalize LogMeanVar Microsoft.ML.Transforms.Normalizers.NormalizeTransform+LogMeanVarArguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
-Transforms.LpNormalizer Normalize vectors (rows) individually by rescaling them to unit norm (L2, L1 or LInf). Performs the following operation on a vector X: Y = (X - M) / D, where M is mean and D is either L2 norm, L1 norm or LInf norm. Microsoft.ML.Transforms.Projections.LpNormalization Normalize Microsoft.ML.Transforms.Projections.LpNormalizingTransformer+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
+Transforms.LpNormalizer Normalize vectors (rows) individually by rescaling them to unit norm (L2, L1 or LInf). Performs the following operation on a vector X: Y = (X - M) / D, where M is mean and D is either L2 norm, L1 norm or LInf norm. Microsoft.ML.Transforms.Projections.LpNormalization Normalize Microsoft.ML.Transforms.Projections.LpNormalizingTransformer+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
Transforms.ManyHeterogeneousModelCombiner Combines a sequence of TransformModels and a PredictorModel into a single PredictorModel. Microsoft.ML.EntryPoints.ModelOperations CombineModels Microsoft.ML.EntryPoints.ModelOperations+PredictorModelInput Microsoft.ML.EntryPoints.ModelOperations+PredictorModelOutput
Transforms.MeanVarianceNormalizer Normalizes the data based on the computed mean and variance of the data. Microsoft.ML.Data.Normalize MeanVar Microsoft.ML.Transforms.Normalizers.NormalizeTransform+MeanVarArguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
Transforms.MinMaxNormalizer Normalizes the data based on the observed minimum and maximum values of the data. Microsoft.ML.Data.Normalize MinMax Microsoft.ML.Transforms.Normalizers.NormalizeTransform+MinMaxArguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
@@ -115,7 +115,7 @@ Transforms.ModelCombiner Combines a sequence of TransformModels into a single mo
Transforms.NGramTranslator Produces a bag of counts of ngrams (sequences of consecutive values of length 1-n) in a given vector of keys. It does so by building a dictionary of ngrams and using the id in the dictionary as the index in the bag. Microsoft.ML.Transforms.Text.TextAnalytics NGramTransform Microsoft.ML.Transforms.Text.NgramExtractingTransformer+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
Transforms.NoOperation Does nothing. Microsoft.ML.Data.NopTransform Nop Microsoft.ML.Data.NopTransform+NopInput Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
Transforms.OptionalColumnCreator If the source column does not exist after deserialization, create a column with the right type and default values. Microsoft.ML.Transforms.OptionalColumnTransform MakeOptional Microsoft.ML.Transforms.OptionalColumnTransform+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
-Transforms.PcaCalculator PCA is a dimensionality-reduction transform which computes the projection of a numeric vector onto a low-rank subspace. Microsoft.ML.Transforms.Projections.PcaTransformer Calculate Microsoft.ML.Transforms.Projections.PcaTransformer+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
+Transforms.PcaCalculator PCA is a dimensionality-reduction transform which computes the projection of a numeric vector onto a low-rank subspace. Microsoft.ML.Transforms.Projections.PrincipalComponentAnalysisTransformer Calculate Microsoft.ML.Transforms.Projections.PrincipalComponentAnalysisTransformer+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
Transforms.PredictedLabelColumnOriginalValueConverter Transforms a predicted label column to its original values, unless it is of type bool. Microsoft.ML.EntryPoints.FeatureCombiner ConvertPredictedLabel Microsoft.ML.EntryPoints.FeatureCombiner+PredictedLabelInput Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
Transforms.RandomNumberGenerator Adds a column with a generated number sequence. Microsoft.ML.Transforms.RandomNumberGenerator Generate Microsoft.ML.Transforms.GenerateNumberTransform+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
Transforms.RowRangeFilter Filters a dataview on a column of type Single, Double or Key (contiguous). Keeps the values that are in the specified min/max range. NaNs are always filtered out. If the input is a Key type, the min/max are considered percentages of the number of values. Microsoft.ML.EntryPoints.SelectRows FilterByRange Microsoft.ML.Transforms.RangeFilter+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput
diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
index b9c43e2653..b08505e149 100644
--- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
+++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs
@@ -733,7 +733,6 @@ public void EntryPointCalibrate()
var scoredFf = ScoreModel.Score(Env, new ScoreModel.Input() { Data = splitOutput.TestData[2], PredictorModel = twiceCalibratedFfModel }).ScoredData;
}
-
[Fact]
public void EntryPointPipelineEnsemble()
{
@@ -746,8 +745,8 @@ public void EntryPointPipelineEnsemble()
{
var data = splitOutput.TrainData[i];
data = new RandomFourierFeaturizingEstimator(Env, new[] {
- new RandomFourierFeaturizingTransformer.ColumnInfo("Features1", 10, false, "Features"),
- new RandomFourierFeaturizingTransformer.ColumnInfo("Features2", 10, false, "Features"),
+ new RandomFourierFeaturizingEstimator.ColumnInfo("Features1", 10, false, "Features"),
+ new RandomFourierFeaturizingEstimator.ColumnInfo("Features2", 10, false, "Features"),
}).Fit(data).Transform(data);
data = new ColumnConcatenatingTransformer(Env, "Features", new[] { "Features1", "Features2" }).Transform(data);
@@ -1198,8 +1197,8 @@ public void EntryPointMulticlassPipelineEnsemble()
{
var data = splitOutput.TrainData[i];
data = new RandomFourierFeaturizingEstimator(Env, new[] {
- new RandomFourierFeaturizingTransformer.ColumnInfo("Features1", 10, false, "Features"),
- new RandomFourierFeaturizingTransformer.ColumnInfo("Features2", 10, false, "Features"),
+ new RandomFourierFeaturizingEstimator.ColumnInfo("Features1", 10, false, "Features"),
+ new RandomFourierFeaturizingEstimator.ColumnInfo("Features2", 10, false, "Features"),
}).Fit(data).Transform(data);
data = new ColumnConcatenatingTransformer(Env, "Features", new[] { "Features1", "Features2" }).Transform(data);
diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs
index 0b95aff834..78120af00b 100644
--- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs
+++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs
@@ -527,14 +527,14 @@ private void ExecuteTFTransformMNISTConvTrainingTest(bool shuffle, int? shuffleS
if (shuffle)
{
// Shuffle training data set
- preprocessedTrainData = new RowShufflingTransformer(mlContext, new RowShufflingTransformer.Arguments()
+ preprocessedTrainData = new RowShufflingTransformer(mlContext, new RowShufflingTransformer.Options()
{
ForceShuffle = shuffle,
ForceShuffleSeed = shuffleSeed
}, trainData);
// Shuffle test data set
- preprocessedTestData = new RowShufflingTransformer(mlContext, new RowShufflingTransformer.Arguments()
+ preprocessedTestData = new RowShufflingTransformer(mlContext, new RowShufflingTransformer.Options()
{
ForceShuffle = shuffle,
ForceShuffleSeed = shuffleSeed
diff --git a/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs b/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs
index f3351ce34d..eb37308afd 100644
--- a/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs
+++ b/test/Microsoft.ML.Tests/Transformers/NormalizerTests.cs
@@ -273,8 +273,8 @@ public void LpGcNormAndWhiteningWorkout()
separator: ';', hasHeader: true)
.Read(dataSource);
- var est = new LpNormalizingEstimator(ML, "lpnorm", "features")
- .Append(new GlobalContrastNormalizingEstimator(ML, "gcnorm", "features"))
+ var est = ML.Transforms.Projection.LpNormalize("lpnorm", "features")
+ .Append(ML.Transforms.Projection.GlobalContrastNormalize("gcnorm", "features"))
.Append(new VectorWhiteningEstimator(ML, "whitened", "features"));
TestEstimatorCore(est, data.AsDynamic, invalidInput: invalidData.AsDynamic);
@@ -369,8 +369,8 @@ public void LpNormWorkout()
separator: ';', hasHeader: true)
.Read(dataSource);
- var est = new LpNormalizingEstimator(ML, "lpNorm1", "features")
- .Append(new LpNormalizingEstimator(ML, "lpNorm2", "features", normKind: LpNormalizingEstimatorBase.NormalizerKind.L1Norm, substractMean: true));
+ var est = ML.Transforms.Projection.LpNormalize("lpNorm1", "features")
+ .Append(ML.Transforms.Projection.LpNormalize("lpNorm2", "features", normKind: LpNormalizingEstimatorBase.NormalizerKind.L1Norm, subMean: true));
TestEstimatorCore(est, data.AsDynamic, invalidInput: invalidData.AsDynamic);
var outputPath = GetOutputPath("NormalizerEstimator", "lpNorm.tsv");
@@ -402,7 +402,7 @@ public void TestLpNormOldSavingAndLoading()
c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)),
separator: ';', hasHeader: true)
.Read(dataSource).AsDynamic;
- var pipe = new LpNormalizingEstimator(ML, "whitened", "features");
+ var pipe = ML.Transforms.Projection.LpNormalize("whitened", "features");
var result = pipe.Fit(dataView).Transform(dataView);
var resultRoles = new RoleMappedData(result);
@@ -428,8 +428,8 @@ public void GcnWorkout()
separator: ';', hasHeader: true)
.Read(dataSource);
- var est = new GlobalContrastNormalizingEstimator(ML, "gcnNorm1", "features")
- .Append(new GlobalContrastNormalizingEstimator(ML, "gcnNorm2", "features", substractMean: false, useStdDev: true, scale: 3));
+ var est = ML.Transforms.Projection.GlobalContrastNormalize("gcnNorm1", "features")
+ .Append(ML.Transforms.Projection.GlobalContrastNormalize("gcnNorm2", "features", substractMean: false, useStdDev: true, scale: 3));
TestEstimatorCore(est, data.AsDynamic, invalidInput: invalidData.AsDynamic);
var outputPath = GetOutputPath("NormalizerEstimator", "gcnNorm.tsv");
@@ -461,7 +461,7 @@ public void TestGcnNormOldSavingAndLoading()
c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)),
separator: ';', hasHeader: true)
.Read(dataSource).AsDynamic;
- var pipe = new GlobalContrastNormalizingEstimator(ML, "whitened", "features");
+ var pipe = ML.Transforms.Projection.GlobalContrastNormalize("whitened", "features");
var result = pipe.Fit(dataView).Transform(dataView);
var resultRoles = new RoleMappedData(result);
diff --git a/test/Microsoft.ML.Tests/Transformers/PcaTests.cs b/test/Microsoft.ML.Tests/Transformers/PcaTests.cs
index d3214b889e..f349282caa 100644
--- a/test/Microsoft.ML.Tests/Transformers/PcaTests.cs
+++ b/test/Microsoft.ML.Tests/Transformers/PcaTests.cs
@@ -40,10 +40,10 @@ public void PcaWorkout()
separator: ';', hasHeader: true)
.Read(_dataSource);
- var est = new PrincipalComponentAnalysisEstimator(_env, "pca", "features", rank: 4, seed: 10);
+ var est = ML.Transforms.Projection.ProjectToPrincipalComponents("pca", "features", rank: 4, seed: 10);
TestEstimatorCore(est, data.AsDynamic, invalidInput: invalidData.AsDynamic);
- var estNonDefaultArgs = new PrincipalComponentAnalysisEstimator(_env, "pca", "features", rank: 3, weightColumn: "weight", overSampling: 2, center: false);
+ var estNonDefaultArgs = ML.Transforms.Projection.ProjectToPrincipalComponents("pca", "features", rank: 3, weightColumn: "weight", overSampling: 2, center: false);
TestEstimatorCore(estNonDefaultArgs, data.AsDynamic, invalidInput: invalidData.AsDynamic);
Done();
@@ -57,7 +57,7 @@ public void TestPcaEstimator()
separator: ';', hasHeader: true)
.Read(_dataSource);
- var est = new PrincipalComponentAnalysisEstimator(_env, "pca", "features", rank: 5, seed: 1);
+ var est = ML.Transforms.Projection.ProjectToPrincipalComponents("pca", "features", rank: 5, seed: 1);
var outputPath = GetOutputPath("PCA", "pca.tsv");
using (var ch = _env.Start("save"))
{
diff --git a/test/Microsoft.ML.Tests/Transformers/RffTests.cs b/test/Microsoft.ML.Tests/Transformers/RffTests.cs
index 68af36863b..d89d7b9676 100644
--- a/test/Microsoft.ML.Tests/Transformers/RffTests.cs
+++ b/test/Microsoft.ML.Tests/Transformers/RffTests.cs
@@ -55,9 +55,9 @@ public void RffWorkout()
var dataView = ML.Data.ReadFromEnumerable(data);
var generator = new GaussianFourierSampler.Arguments();
- var pipe = new RandomFourierFeaturizingEstimator(Env, new[]{
- new RandomFourierFeaturizingTransformer.ColumnInfo("RffA", 5, false, "A"),
- new RandomFourierFeaturizingTransformer.ColumnInfo("RffB", 10, true, "A", new LaplacianFourierSampler.Arguments())
+ var pipe = ML.Transforms.Projection.CreateRandomFourierFeatures(new[]{
+ new RandomFourierFeaturizingEstimator.ColumnInfo("RffA", 5, false, "A"),
+ new RandomFourierFeaturizingEstimator.ColumnInfo("RffB", 10, true, "A", new LaplacianFourierSampler.Arguments())
});
TestEstimatorCore(pipe, dataView, invalidInput: invalidData, validForFitNotValidForTransformInput: validFitInvalidData);
@@ -111,9 +111,9 @@ public void TestOldSavingAndLoading()
};
var dataView = ML.Data.ReadFromEnumerable(data);
- var est = new RandomFourierFeaturizingEstimator(Env, new[]{
- new RandomFourierFeaturizingTransformer.ColumnInfo("RffA", 5, false, "A"),
- new RandomFourierFeaturizingTransformer.ColumnInfo("RffB", 10, true, "A", new LaplacianFourierSampler.Arguments())
+ var est = ML.Transforms.Projection.CreateRandomFourierFeatures(new[]{
+ new RandomFourierFeaturizingEstimator.ColumnInfo("RffA", 5, false, "A"),
+ new RandomFourierFeaturizingEstimator.ColumnInfo("RffB", 10, true, "A", new LaplacianFourierSampler.Arguments())
});
var result = est.Fit(dataView).Transform(dataView);
var resultRoles = new RoleMappedData(result);