Skip to content

Commit fa7776a

Browse files
committed
onehothash, hash, copy col, key to vector
1 parent 28ae548 commit fa7776a

File tree

16 files changed

+262
-247
lines changed

16 files changed

+262
-247
lines changed

src/Microsoft.ML.Data/EntryPoints/SchemaManipulation.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ public static CommonOutputs.TransformOutput SelectColumns(IHostEnvironment env,
3838
}
3939

4040
[TlcModule.EntryPoint(Name = "Transforms.ColumnCopier", Desc = "Duplicates columns from the dataset", UserName = ColumnCopyingTransformer.UserName, ShortName = ColumnCopyingTransformer.ShortName)]
41-
public static CommonOutputs.TransformOutput CopyColumns(IHostEnvironment env, ColumnCopyingTransformer.Arguments input)
41+
public static CommonOutputs.TransformOutput CopyColumns(IHostEnvironment env, ColumnCopyingTransformer.Options input)
4242
{
4343
Contracts.CheckValue(env, nameof(env));
4444
var host = env.Register("CopyColumns");

src/Microsoft.ML.Data/TrainCatalog.cs

+3-3
Original file line numberDiff line numberDiff line change
@@ -152,11 +152,11 @@ private void EnsureStratificationColumn(ref IDataView data, ref string stratific
152152
// Generate a new column with the hashed stratification column.
153153
while (data.Schema.TryGetColumnIndex(stratificationColumn, out tmp))
154154
stratificationColumn = string.Format("{0}_{1:000}", origStratCol, ++inc);
155-
HashingTransformer.ColumnInfo columnInfo;
155+
HashingEstimator.ColumnInfo columnInfo;
156156
if (seed.HasValue)
157-
columnInfo = new HashingTransformer.ColumnInfo(stratificationColumn, origStratCol, 30, seed.Value);
157+
columnInfo = new HashingEstimator.ColumnInfo(stratificationColumn, origStratCol, 30, seed.Value);
158158
else
159-
columnInfo = new HashingTransformer.ColumnInfo(stratificationColumn, origStratCol, 30);
159+
columnInfo = new HashingEstimator.ColumnInfo(stratificationColumn, origStratCol, 30);
160160
data = new HashingEstimator(Host, columnInfo).Fit(data).Transform(data);
161161
}
162162
}

src/Microsoft.ML.Data/Transforms/ColumnCopying.cs

+14-9
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
using Microsoft.ML.Transforms;
1919

2020
[assembly: LoadableClass(ColumnCopyingTransformer.Summary, typeof(IDataTransform), typeof(ColumnCopyingTransformer),
21-
typeof(ColumnCopyingTransformer.Arguments), typeof(SignatureDataTransform),
21+
typeof(ColumnCopyingTransformer.Options), typeof(SignatureDataTransform),
2222
ColumnCopyingTransformer.UserName, "CopyColumns", "CopyColumnsTransform", ColumnCopyingTransformer.ShortName,
2323
DocName = "transform/CopyColumnsTransformer.md")]
2424

@@ -35,16 +35,21 @@ namespace Microsoft.ML.Transforms
3535
{
3636
public sealed class ColumnCopyingEstimator : TrivialEstimator<ColumnCopyingTransformer>
3737
{
38-
public ColumnCopyingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName) :
38+
[BestFriend]
39+
internal ColumnCopyingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName) :
3940
this(env, (outputColumnName, inputColumnName))
4041
{
4142
}
4243

43-
public ColumnCopyingEstimator(IHostEnvironment env, params (string outputColumnName, string inputColumnName)[] columns)
44+
internal ColumnCopyingEstimator(IHostEnvironment env, params (string outputColumnName, string inputColumnName)[] columns)
4445
: base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ColumnCopyingEstimator)), new ColumnCopyingTransformer(env, columns))
4546
{
4647
}
4748

49+
/// <summary>
50+
/// Returns the <see cref="SchemaShape"/> of the schema which will be produced by the transformer.
51+
/// Used for schema propagation and verification in a pipeline.
52+
/// </summary>
4853
public override SchemaShape GetOutputSchema(SchemaShape inputSchema)
4954
{
5055
Host.CheckValue(inputSchema, nameof(inputSchema));
@@ -82,12 +87,12 @@ private static VersionInfo GetVersionInfo()
8287
loaderAssemblyName: typeof(ColumnCopyingTransformer).Assembly.FullName);
8388
}
8489

85-
public ColumnCopyingTransformer(IHostEnvironment env, params (string outputColumnName, string inputColumnName)[] columns)
90+
internal ColumnCopyingTransformer(IHostEnvironment env, params (string outputColumnName, string inputColumnName)[] columns)
8691
: base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ColumnCopyingTransformer)), columns)
8792
{
8893
}
8994

90-
public sealed class Column : OneToOneColumn
95+
internal sealed class Column : OneToOneColumn
9196
{
9297
internal static Column Parse(string str)
9398
{
@@ -106,20 +111,20 @@ internal bool TryUnparse(StringBuilder sb)
106111
}
107112
}
108113

109-
public sealed class Arguments : TransformInputBase
114+
internal sealed class Options : TransformInputBase
110115
{
111116
[Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)",
112117
Name = "Column", ShortName = "col", SortOrder = 1)]
113118
public Column[] Columns;
114119
}
115120

116121
// Factory method corresponding to SignatureDataTransform.
117-
internal static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input)
122+
internal static IDataTransform Create(IHostEnvironment env, Options options, IDataView input)
118123
{
119124
Contracts.CheckValue(env, nameof(env));
120-
env.CheckValue(args, nameof(args));
125+
env.CheckValue(options, nameof(options));
121126

122-
var transformer = new ColumnCopyingTransformer(env, args.Columns.Select(x => (x.Name, x.Source)).ToArray());
127+
var transformer = new ColumnCopyingTransformer(env, options.Columns.Select(x => (x.Name, x.Source)).ToArray());
123128
return transformer.MakeDataTransform(input);
124129
}
125130

src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ public static HashingEstimator Hash(this TransformsCatalog.ConversionTransforms
3636
/// </summary>
3737
/// <param name="catalog">The transform's catalog.</param>
3838
/// <param name="columns">Description of dataset columns and how to process them.</param>
39-
public static HashingEstimator Hash(this TransformsCatalog.ConversionTransforms catalog, params HashingTransformer.ColumnInfo[] columns)
39+
public static HashingEstimator Hash(this TransformsCatalog.ConversionTransforms catalog, params HashingEstimator.ColumnInfo[] columns)
4040
=> new HashingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
4141

4242
/// <summary>
@@ -81,7 +81,7 @@ public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.Co
8181
/// <param name="catalog">The categorical transform's catalog.</param>
8282
/// <param name="columns">The input column to map back to vectors.</param>
8383
public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.ConversionTransforms catalog,
84-
params KeyToVectorMappingTransformer.ColumnInfo[] columns)
84+
params KeyToVectorMappingEstimator.ColumnInfo[] columns)
8585
=> new KeyToVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
8686

8787
/// <summary>

0 commit comments

Comments
 (0)