Skip to content

Commit 27f48b6

Browse files
authored
Renaming ColumnInfo to ColumnOptions (#2709)
1 parent 482bb81 commit 27f48b6

File tree

89 files changed

+841
-841
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

89 files changed

+841
-841
lines changed

docs/code/MlNetCookBook.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -629,9 +629,9 @@ var trainData = mlContext.Data.ReadFromTextFile<IrisInputAllFeatures>(dataPath,
629629
// Apply all kinds of standard ML.NET normalization to the raw features.
630630
var pipeline =
631631
mlContext.Transforms.Normalize(
632-
new NormalizingEstimator.MinMaxColumn("MinMaxNormalized", "Features", fixZero: true),
633-
new NormalizingEstimator.MeanVarColumn("MeanVarNormalized", "Features", fixZero: true),
634-
new NormalizingEstimator.BinningColumn("BinNormalized", "Features", numBins: 256));
632+
new NormalizingEstimator.MinMaxColumnOptions("MinMaxNormalized", "Features", fixZero: true),
633+
new NormalizingEstimator.MeanVarColumnOptions("MeanVarNormalized", "Features", fixZero: true),
634+
new NormalizingEstimator.BinningColumnOptions("BinNormalized", "Features", numBins: 256));
635635

636636
// Let's train our pipeline of normalizers, and then apply it to the same data.
637637
var normalizedData = pipeline.Fit(trainData).Transform(trainData);

docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ public static void Example()
6666

6767
// Composing a different pipeline if we wanted to normalize more than one column at a time.
6868
// Using log scale as the normalization mode.
69-
var multiColPipeline = ml.Transforms.Normalize(NormalizingEstimator.NormalizerMode.LogMeanVariance, new SimpleColumnInfo[] { ("LogInduced", "Induced"), ("LogSpontaneous", "Spontaneous") });
69+
var multiColPipeline = ml.Transforms.Normalize(NormalizingEstimator.NormalizerMode.LogMeanVariance, new ColumnOptions[] { ("LogInduced", "Induced"), ("LogSpontaneous", "Spontaneous") });
7070
// The transformed data.
7171
var multiColtransformer = multiColPipeline.Fit(trainData);
7272
var multiColtransformedData = multiColtransformer.Transform(trainData);

docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ public static void Example()
7070
};
7171

7272
var engine = mlContext.Transforms.Text.TokenizeWords("TokenizedWords", "Sentiment_Text")
73-
.Append(mlContext.Transforms.Conversion.ValueMap(lookupMap, "Words", "Ids", new SimpleColumnInfo[] { ("VariableLenghtFeatures", "TokenizedWords") }))
73+
.Append(mlContext.Transforms.Conversion.ValueMap(lookupMap, "Words", "Ids", new ColumnOptions[] { ("VariableLenghtFeatures", "TokenizedWords") }))
7474
.Append(mlContext.Transforms.CustomMapping(ResizeFeaturesAction, "Resize"))
7575
.Append(mlContext.Transforms.ScoreTensorFlowModel(modelInfo, new[] { "Prediction/Softmax" }, new[] { "Features" }))
7676
.Append(mlContext.Transforms.CopyColumns(("Prediction", "Prediction/Softmax")))

docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithColumnInfo.cs renamed to docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Projection/VectorWhitenWithColumnOptions.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
namespace Microsoft.ML.Samples.Dynamic
77
{
8-
public sealed class VectorWhitenWithColumnInfo
8+
public sealed class VectorWhitenWithColumnOptions
99
{
1010
/// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.HalLearners/">Microsoft.ML.HalLearners</a>.
1111
public static void Example()
@@ -39,7 +39,7 @@ public static void Example()
3939

4040

4141
// A pipeline to project Features column into white noise vector.
42-
var whiteningPipeline = ml.Transforms.Projection.VectorWhiten(new Transforms.Projections.VectorWhiteningEstimator.ColumnInfo(
42+
var whiteningPipeline = ml.Transforms.Projection.VectorWhiten(new Transforms.Projections.VectorWhiteningEstimator.ColumnOptions(
4343
nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), kind: Transforms.Projections.WhiteningKind.Pca, pcaNum: 4));
4444
// The transformed (projected) data.
4545
var transformedData = whiteningPipeline.Fit(trainData).Transform(trainData);

src/Microsoft.ML.Data/TrainCatalog.cs

+4-4
Original file line numberDiff line numberDiff line change
@@ -226,12 +226,12 @@ private void EnsureGroupPreservationColumn(ref IDataView data, ref string sampli
226226
// Generate a new column with the hashed samplingKeyColumn.
227227
while (data.Schema.TryGetColumnIndex(samplingKeyColumn, out tmp))
228228
samplingKeyColumn = string.Format("{0}_{1:000}", origStratCol, ++inc);
229-
HashingEstimator.ColumnInfo columnInfo;
229+
HashingEstimator.ColumnOptions columnOptions;
230230
if (seed.HasValue)
231-
columnInfo = new HashingEstimator.ColumnInfo(samplingKeyColumn, origStratCol, 30, seed.Value);
231+
columnOptions = new HashingEstimator.ColumnOptions(samplingKeyColumn, origStratCol, 30, seed.Value);
232232
else
233-
columnInfo = new HashingEstimator.ColumnInfo(samplingKeyColumn, origStratCol, 30);
234-
data = new HashingEstimator(Environment, columnInfo).Fit(data).Transform(data);
233+
columnOptions = new HashingEstimator.ColumnOptions(samplingKeyColumn, origStratCol, 30);
234+
data = new HashingEstimator(Environment, columnOptions).Fit(data).Transform(data);
235235
}
236236
}
237237
}

src/Microsoft.ML.Data/Transforms/ColumnConcatenatingTransformer.cs

+26-26
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ internal sealed class TaggedOptions
134134
}
135135

136136
[BestFriend]
137-
internal sealed class ColumnInfo
137+
internal sealed class ColumnOptions
138138
{
139139
public readonly string Name;
140140
private readonly (string name, string alias)[] _sources;
@@ -143,7 +143,7 @@ internal sealed class ColumnInfo
143143
/// <summary>
144144
/// This denotes a concatenation of all <paramref name="inputColumnNames"/> into column called <paramref name="name"/>.
145145
/// </summary>
146-
public ColumnInfo(string name, params string[] inputColumnNames)
146+
public ColumnOptions(string name, params string[] inputColumnNames)
147147
: this(name, GetPairs(inputColumnNames))
148148
{
149149
}
@@ -159,7 +159,7 @@ public ColumnInfo(string name, params string[] inputColumnNames)
159159
/// For each input column, an 'alias' can be specified, to be used in constructing the resulting slot names.
160160
/// If the alias is not specified, it defaults to be column name.
161161
/// </summary>
162-
public ColumnInfo(string name, IEnumerable<(string name, string alias)> inputColumnNames)
162+
public ColumnOptions(string name, IEnumerable<(string name, string alias)> inputColumnNames)
163163
{
164164
Contracts.CheckNonEmpty(name, nameof(name));
165165
Contracts.CheckValue(inputColumnNames, nameof(inputColumnNames));
@@ -195,7 +195,7 @@ public void Save(ModelSaveContext ctx)
195195
}
196196
}
197197

198-
internal ColumnInfo(ModelLoadContext ctx)
198+
internal ColumnOptions(ModelLoadContext ctx)
199199
{
200200
Contracts.AssertValue(ctx);
201201
// *** Binary format ***
@@ -218,7 +218,7 @@ internal ColumnInfo(ModelLoadContext ctx)
218218
}
219219
}
220220

221-
private readonly ColumnInfo[] _columns;
221+
private readonly ColumnOptions[] _columns;
222222

223223
/// <summary>
224224
/// The names of the output and input column pairs for the transformation.
@@ -232,14 +232,14 @@ internal ColumnInfo(ModelLoadContext ctx)
232232
/// The column types must match, and the output column type is always a vector.
233233
/// </summary>
234234
internal ColumnConcatenatingTransformer(IHostEnvironment env, string outputColumnName, params string[] inputColumnNames)
235-
: this(env, new ColumnInfo(outputColumnName, inputColumnNames))
235+
: this(env, new ColumnOptions(outputColumnName, inputColumnNames))
236236
{
237237
}
238238

239239
/// <summary>
240240
/// Concatenates multiple groups of columns, each group is denoted by one of <paramref name="columns"/>.
241241
/// </summary>
242-
internal ColumnConcatenatingTransformer(IHostEnvironment env, params ColumnInfo[] columns) :
242+
internal ColumnConcatenatingTransformer(IHostEnvironment env, params ColumnOptions[] columns) :
243243
base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ColumnConcatenatingTransformer)))
244244
{
245245
Contracts.CheckValue(columns, nameof(columns));
@@ -272,7 +272,7 @@ private protected override void SaveModel(ModelSaveContext ctx)
272272
// *** Binary format ***
273273
// int: number of columns
274274
// for each column:
275-
// columnInfo
275+
// columnOptions
276276

277277
Contracts.Assert(_columns.Length > 0);
278278
ctx.Writer.Write(_columns.Length);
@@ -293,18 +293,18 @@ private ColumnConcatenatingTransformer(IHostEnvironment env, ModelLoadContext ct
293293
// *** Binary format ***
294294
// int: number of columns
295295
// for each column:
296-
// columnInfo
296+
// columnOptions
297297
int n = ctx.Reader.ReadInt32();
298298
Contracts.CheckDecode(n > 0);
299-
_columns = new ColumnInfo[n];
299+
_columns = new ColumnOptions[n];
300300
for (int i = 0; i < n; i++)
301-
_columns[i] = new ColumnInfo(ctx);
301+
_columns[i] = new ColumnOptions(ctx);
302302
}
303303
else
304304
_columns = LoadLegacy(ctx);
305305
}
306306

307-
private ColumnInfo[] LoadLegacy(ModelLoadContext ctx)
307+
private ColumnOptions[] LoadLegacy(ModelLoadContext ctx)
308308
{
309309
// *** Legacy binary format ***
310310
// int: sizeof(Float).
@@ -359,9 +359,9 @@ private ColumnInfo[] LoadLegacy(ModelLoadContext ctx)
359359
}
360360
}
361361

362-
var result = new ColumnInfo[n];
362+
var result = new ColumnOptions[n];
363363
for (int i = 0; i < n; i++)
364-
result[i] = new ColumnInfo(names[i],
364+
result[i] = new ColumnOptions(names[i],
365365
inputs[i].Zip(aliases[i], (name, alias) => (name, alias)));
366366
return result;
367367
}
@@ -380,7 +380,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa
380380
env.CheckUserArg(Utils.Size(options.Columns[i].Source) > 0, nameof(options.Columns));
381381

382382
var cols = options.Columns
383-
.Select(c => new ColumnInfo(c.Name, c.Source))
383+
.Select(c => new ColumnOptions(c.Name, c.Source))
384384
.ToArray();
385385
var transformer = new ColumnConcatenatingTransformer(env, cols);
386386
return transformer.MakeDataTransform(input);
@@ -400,7 +400,7 @@ internal static IDataTransform Create(IHostEnvironment env, TaggedOptions option
400400
env.CheckUserArg(Utils.Size(options.Columns[i].Source) > 0, nameof(options.Columns));
401401

402402
var cols = options.Columns
403-
.Select(c => new ColumnInfo(c.Name, c.Source.Select(kvp => (kvp.Value, kvp.Key != "" ? kvp.Key : null))))
403+
.Select(c => new ColumnOptions(c.Name, c.Source.Select(kvp => (kvp.Value, kvp.Key != "" ? kvp.Key : null))))
404404
.ToArray();
405405
var transformer = new ColumnConcatenatingTransformer(env, cols);
406406
return transformer.MakeDataTransform(input);
@@ -526,7 +526,7 @@ private sealed class BoundColumn
526526
{
527527
public readonly int[] SrcIndices;
528528

529-
private readonly ColumnInfo _columnInfo;
529+
private readonly ColumnOptions _columnOptions;
530530
private readonly DataViewType[] _srcTypes;
531531

532532
public readonly VectorType OutputType;
@@ -542,10 +542,10 @@ private sealed class BoundColumn
542542

543543
private readonly DataViewSchema _inputSchema;
544544

545-
public BoundColumn(DataViewSchema inputSchema, ColumnInfo columnInfo, int[] sources, VectorType outputType,
545+
public BoundColumn(DataViewSchema inputSchema, ColumnOptions columnOptions, int[] sources, VectorType outputType,
546546
bool isNormalized, bool hasSlotNames, bool hasCategoricals, int slotCount, int catCount)
547547
{
548-
_columnInfo = columnInfo;
548+
_columnOptions = columnOptions;
549549
SrcIndices = sources;
550550
_srcTypes = sources.Select(c => inputSchema[c].Type).ToArray();
551551

@@ -570,7 +570,7 @@ public DataViewSchema.DetachedColumn MakeSchemaColumn()
570570
if (_isIdentity)
571571
{
572572
var inputCol = _inputSchema[SrcIndices[0]];
573-
return new DataViewSchema.DetachedColumn(_columnInfo.Name, inputCol.Type, inputCol.Annotations);
573+
return new DataViewSchema.DetachedColumn(_columnOptions.Name, inputCol.Type, inputCol.Annotations);
574574
}
575575

576576
var metadata = new DataViewSchema.Annotations.Builder();
@@ -581,7 +581,7 @@ public DataViewSchema.DetachedColumn MakeSchemaColumn()
581581
if (_hasCategoricals)
582582
metadata.Add(AnnotationUtils.Kinds.CategoricalSlotRanges, _categoricalRangeType, (ValueGetter<VBuffer<int>>)GetCategoricalSlotRanges);
583583

584-
return new DataViewSchema.DetachedColumn(_columnInfo.Name, OutputType, metadata.ToAnnotations());
584+
return new DataViewSchema.DetachedColumn(_columnOptions.Name, OutputType, metadata.ToAnnotations());
585585
}
586586

587587
private void GetIsNormalized(ref bool value) => value = _isNormalized;
@@ -630,9 +630,9 @@ private void GetSlotNames(ref VBuffer<ReadOnlyMemory<char>> dst)
630630
{
631631
int colSrc = SrcIndices[i];
632632
var typeSrc = _srcTypes[i];
633-
Contracts.Assert(_columnInfo.Sources[i].alias != "");
633+
Contracts.Assert(_columnOptions.Sources[i].alias != "");
634634
var colName = _inputSchema[colSrc].Name;
635-
var nameSrc = _columnInfo.Sources[i].alias ?? colName;
635+
var nameSrc = _columnOptions.Sources[i].alias ?? colName;
636636
if (!(typeSrc is VectorType vectorTypeSrc))
637637
{
638638
bldr.AddFeature(slot++, nameSrc.AsMemory());
@@ -650,7 +650,7 @@ private void GetSlotNames(ref VBuffer<ReadOnlyMemory<char>> dst)
650650
{
651651
inputMetadata.GetValue(AnnotationUtils.Kinds.SlotNames, ref names);
652652
sb.Clear();
653-
if (_columnInfo.Sources[i].alias != colName)
653+
if (_columnOptions.Sources[i].alias != colName)
654654
sb.Append(nameSrc).Append(".");
655655
int len = sb.Length;
656656
foreach (var kvp in names.Items())
@@ -801,15 +801,15 @@ private Delegate MakeGetter<T>(DataViewRow input)
801801
public KeyValuePair<string, JToken> SavePfaInfo(BoundPfaContext ctx)
802802
{
803803
Contracts.AssertValue(ctx);
804-
string outName = _columnInfo.Name;
804+
string outName = _columnOptions.Name;
805805
if (!OutputType.IsKnownSize) // Do not attempt variable length.
806806
return new KeyValuePair<string, JToken>(outName, null);
807807

808808
string[] srcTokens = new string[SrcIndices.Length];
809809
bool[] srcPrimitive = new bool[SrcIndices.Length];
810810
for (int i = 0; i < SrcIndices.Length; ++i)
811811
{
812-
var srcName = _columnInfo.Sources[i].name;
812+
var srcName = _columnOptions.Sources[i].name;
813813
if ((srcTokens[i] = ctx.TokenOrNullForName(srcName)) == null)
814814
return new KeyValuePair<string, JToken>(outName, null);
815815
srcPrimitive[i] = _srcTypes[i] is PrimitiveDataViewType;

0 commit comments

Comments
 (0)