Skip to content

Commit 8194b87

Browse files
committed
Rename termData to keyData per Ivan.
1 parent ecd6d40 commit 8194b87

File tree

4 files changed

+48
-47
lines changed

4 files changed

+48
-47
lines changed

src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs

+6-5
Original file line numberDiff line numberDiff line change
@@ -112,15 +112,16 @@ public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.Co
112112
=> new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, maxNumTerms, sort);
113113

114114
/// <summary>
115-
/// Converts value types into <see cref="KeyType"/>, optionally loading the keys to use from <paramref name="termData"/>.
115+
/// Converts value types into <see cref="KeyType"/>, optionally loading the keys to use from <paramref name="keyData"/>.
116116
/// </summary>
117117
/// <param name="catalog">The categorical transform's catalog.</param>
118118
/// <param name="columns">The data columns to map to keys.</param>
119-
/// <param name="termData">The data view containing the terms. If unspecified, they will be
120-
/// determined from the input data upon fitting.</param>
119+
/// <param name="keyData">The data view containing the terms. If specified, this should be a single column data
120+
/// view, and the key-values will be taken from taht column. If unspecified, the key-values will be determined
121+
/// from the input data upon fitting.</param>
121122
public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.ConversionTransforms catalog,
122-
ValueToKeyMappingTransformer.ColumnInfo[] columns, IDataView termData = null)
123-
=> new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns, termData);
123+
ValueToKeyMappingTransformer.ColumnInfo[] columns, IDataView keyData = null)
124+
=> new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns, keyData);
124125

125126
/// <summary>
126127
/// Maps specified keys to specified values

src/Microsoft.ML.Data/Transforms/ValueToKeyMappingEstimator.cs

+8-8
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ public static class Defaults
1919

2020
private readonly IHost _host;
2121
private readonly ValueToKeyMappingTransformer.ColumnInfo[] _columns;
22-
private readonly IDataView _termData;
22+
private readonly IDataView _keyData;
2323

2424
/// <summary>
2525
/// Initializes a new instance of <see cref="ValueToKeyMappingEstimator"/>.
@@ -35,24 +35,24 @@ public ValueToKeyMappingEstimator(IHostEnvironment env, string inputColumn, stri
3535
{
3636
}
3737

38-
public ValueToKeyMappingEstimator(IHostEnvironment env, ValueToKeyMappingTransformer.ColumnInfo[] columns, IDataView termData = null)
38+
public ValueToKeyMappingEstimator(IHostEnvironment env, ValueToKeyMappingTransformer.ColumnInfo[] columns, IDataView keyData = null)
3939
{
4040
Contracts.CheckValue(env, nameof(env));
4141
_host = env.Register(nameof(ValueToKeyMappingEstimator));
4242
_host.CheckNonEmpty(columns, nameof(columns));
43-
_host.CheckValueOrNull(termData);
44-
if (termData != null && termData.Schema.Count != 1)
43+
_host.CheckValueOrNull(keyData);
44+
if (keyData != null && keyData.Schema.Count != 1)
4545
{
46-
throw _host.ExceptParam(nameof(termData), "If specified, this data view should contain only a single column " +
47-
$"containing the terms to map, but this had {termData.Schema.Count} columns.");
46+
throw _host.ExceptParam(nameof(keyData), "If specified, this data view should contain only a single column " +
47+
$"containing the terms to map, but this had {keyData.Schema.Count} columns.");
4848

4949
}
5050

5151
_columns = columns;
52-
_termData = termData;
52+
_keyData = keyData;
5353
}
5454

55-
public ValueToKeyMappingTransformer Fit(IDataView input) => new ValueToKeyMappingTransformer(_host, input, _columns, _termData, false);
55+
public ValueToKeyMappingTransformer Fit(IDataView input) => new ValueToKeyMappingTransformer(_host, input, _columns, _keyData, false);
5656

5757
public SchemaShape GetOutputSchema(SchemaShape inputSchema)
5858
{

src/Microsoft.ML.Data/Transforms/ValueToKeyMappingTransformer.cs

+28-28
Original file line numberDiff line numberDiff line change
@@ -294,13 +294,13 @@ internal ValueToKeyMappingTransformer(IHostEnvironment env, IDataView input,
294294
{ }
295295

296296
internal ValueToKeyMappingTransformer(IHostEnvironment env, IDataView input,
297-
ColumnInfo[] columns, IDataView termData, bool autoConvert)
297+
ColumnInfo[] columns, IDataView keyData, bool autoConvert)
298298
: base(Contracts.CheckRef(env, nameof(env)).Register(RegistrationName), GetColumnPairs(columns))
299299
{
300300
using (var ch = Host.Start("Training"))
301301
{
302302
var infos = CreateInfos(input.Schema);
303-
_unboundMaps = Train(Host, ch, infos, termData, columns, input, autoConvert);
303+
_unboundMaps = Train(Host, ch, infos, keyData, columns, input, autoConvert);
304304
_textMetadata = new bool[_unboundMaps.Length];
305305
for (int iinfo = 0; iinfo < columns.Length; ++iinfo)
306306
_textMetadata[iinfo] = columns[iinfo].TextKeyValues;
@@ -344,8 +344,8 @@ internal static IDataTransform Create(IHostEnvironment env, Arguments args, IDat
344344
item.TextKeyValues ?? args.TextKeyValues);
345345
cols[i].Terms = item.Terms ?? args.Terms;
346346
};
347-
var termData = GetTermDataViewOrNull(env, ch, args.DataFile, args.TermsColumn, args.Loader, out bool autoLoaded);
348-
return new ValueToKeyMappingTransformer(env, input, cols, termData, autoLoaded).MakeDataTransform(input);
347+
var keyData = GetKeyDataViewOrNull(env, ch, args.DataFile, args.TermsColumn, args.Loader, out bool autoLoaded);
348+
return new ValueToKeyMappingTransformer(env, input, cols, keyData, autoLoaded).MakeDataTransform(input);
349349
}
350350
}
351351

@@ -428,7 +428,7 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Sch
428428
/// the term map. This will not be true in the case that the loader was adequately specified automatically.</param>
429429
/// <returns>The single-column data containing the term data from the file.</returns>
430430
[BestFriend]
431-
internal static IDataView GetTermDataViewOrNull(IHostEnvironment env, IChannel ch,
431+
internal static IDataView GetKeyDataViewOrNull(IHostEnvironment env, IChannel ch,
432432
string file, string termsColumn, IComponentFactory<IMultiStreamSource, IDataLoader> loaderFactory,
433433
out bool autoConvert)
434434
{
@@ -448,9 +448,9 @@ internal static IDataView GetTermDataViewOrNull(IHostEnvironment env, IChannel c
448448
string src = termsColumn;
449449
IMultiStreamSource fileSource = new MultiFileSource(file);
450450

451-
IDataView termData;
451+
IDataView keyData;
452452
if (loaderFactory != null)
453-
termData = loaderFactory.CreateComponent(env, fileSource);
453+
keyData = loaderFactory.CreateComponent(env, fileSource);
454454
else
455455
{
456456
// Determine the default loader from the extension.
@@ -463,11 +463,11 @@ internal static IDataView GetTermDataViewOrNull(IHostEnvironment env, IChannel c
463463
ch.CheckUserArg(!string.IsNullOrWhiteSpace(src), nameof(termsColumn),
464464
"Must be specified");
465465
if (isBinary)
466-
termData = new BinaryLoader(env, new BinaryLoader.Arguments(), fileSource);
466+
keyData = new BinaryLoader(env, new BinaryLoader.Arguments(), fileSource);
467467
else
468468
{
469469
ch.Assert(isTranspose);
470-
termData = new TransposeLoader(env, new TransposeLoader.Arguments(), fileSource);
470+
keyData = new TransposeLoader(env, new TransposeLoader.Arguments(), fileSource);
471471
}
472472
}
473473
else
@@ -478,7 +478,7 @@ internal static IDataView GetTermDataViewOrNull(IHostEnvironment env, IChannel c
478478
"{0} should not be specified when default loader is " + nameof(TextLoader) + ". Ignoring {0}={1}",
479479
nameof(Arguments.TermsColumn), src);
480480
}
481-
termData = new TextLoader(env,
481+
keyData = new TextLoader(env,
482482
columns: new[] { new TextLoader.Column("Term", DataKind.TX, 0) },
483483
dataSample: fileSource)
484484
.Read(fileSource);
@@ -488,40 +488,40 @@ internal static IDataView GetTermDataViewOrNull(IHostEnvironment env, IChannel c
488488
}
489489
}
490490
ch.AssertNonEmpty(src);
491-
if (termData.Schema.GetColumnOrNull(src) == null)
491+
if (keyData.Schema.GetColumnOrNull(src) == null)
492492
throw ch.ExceptUserArg(nameof(termsColumn), "Unknown column '{0}'", src);
493493
// Now, remove everything but that one column.
494494
var selectTransformer = new ColumnSelectingTransformer(env, new string[] { src }, null);
495-
termData = selectTransformer.Transform(termData);
496-
ch.Assert(termData.Schema.Count == 1);
497-
return termData;
495+
keyData = selectTransformer.Transform(keyData);
496+
ch.Assert(keyData.Schema.Count == 1);
497+
return keyData;
498498
}
499499

500500
/// <summary>
501501
/// Utility method to create the file-based <see cref="TermMap"/>.
502502
/// </summary>
503-
private static TermMap CreateFileTermMap(IHostEnvironment env, IChannel ch, IDataView termData, bool autoConvert, Builder bldr)
503+
private static TermMap CreateTermMapFromData(IHostEnvironment env, IChannel ch, IDataView keyData, bool autoConvert, Builder bldr)
504504
{
505505
Contracts.AssertValue(ch);
506506
ch.AssertValue(env);
507-
ch.AssertValue(termData);
507+
ch.AssertValue(keyData);
508508
ch.AssertValue(bldr);
509-
if (termData.Schema.Count != 1)
509+
if (keyData.Schema.Count != 1)
510510
{
511-
throw ch.ExceptParam(nameof(termData), $"Input data containing terms should contain exactly one column, but " +
512-
$"had {termData.Schema.Count} instead. Consider using {nameof(ColumnSelectingEstimator)} on that data first.");
511+
throw ch.ExceptParam(nameof(keyData), $"Input data containing terms should contain exactly one column, but " +
512+
$"had {keyData.Schema.Count} instead. Consider using {nameof(ColumnSelectingEstimator)} on that data first.");
513513
}
514514

515-
var typeSrc = termData.Schema[0].Type;
515+
var typeSrc = keyData.Schema[0].Type;
516516
if (!autoConvert && !typeSrc.Equals(bldr.ItemType))
517-
throw ch.ExceptUserArg(nameof(termData), "Input data's column must be of type '{0}' but was '{1}'", bldr.ItemType, typeSrc);
517+
throw ch.ExceptUserArg(nameof(keyData), "Input data's column must be of type '{0}' but was '{1}'", bldr.ItemType, typeSrc);
518518

519-
using (var cursor = termData.GetRowCursor(termData.Schema[0]))
519+
using (var cursor = keyData.GetRowCursor(keyData.Schema[0]))
520520
using (var pch = env.StartProgressChannel("Building dictionary from term data"))
521521
{
522522
var header = new ProgressHeader(new[] { "Total Terms" }, new[] { "examples" });
523523
var trainer = Trainer.Create(cursor, 0, autoConvert, int.MaxValue, bldr);
524-
double rowCount = termData.GetRowCount() ?? double.NaN;
524+
double rowCount = keyData.GetRowCount() ?? double.NaN;
525525
long rowCur = 0;
526526
pch.SetHeader(header,
527527
e =>
@@ -544,12 +544,12 @@ private static TermMap CreateFileTermMap(IHostEnvironment env, IChannel ch, IDat
544544
/// This builds the <see cref="TermMap"/> instances per column.
545545
/// </summary>
546546
private static TermMap[] Train(IHostEnvironment env, IChannel ch, ColInfo[] infos,
547-
IDataView termData, ColumnInfo[] columns, IDataView trainingData, bool autoConvert)
547+
IDataView keyData, ColumnInfo[] columns, IDataView trainingData, bool autoConvert)
548548
{
549549
Contracts.AssertValue(env);
550550
env.AssertValue(ch);
551551
ch.AssertValue(infos);
552-
ch.AssertValueOrNull(termData);
552+
ch.AssertValueOrNull(keyData);
553553
ch.AssertValue(columns);
554554
ch.AssertValue(trainingData);
555555

@@ -577,13 +577,13 @@ private static TermMap[] Train(IHostEnvironment env, IChannel ch, ColInfo[] info
577577
bldr.ParseAddTermArg(termsArray, ch);
578578
termMap[iinfo] = bldr.Finish();
579579
}
580-
else if (termData != null)
580+
else if (keyData != null)
581581
{
582582
// First column using this file.
583583
if (termsFromFile == null)
584584
{
585585
var bldr = Builder.Create(infos[iinfo].TypeSrc, columns[iinfo].Sort);
586-
termsFromFile = CreateFileTermMap(env, ch, termData, autoConvert, bldr);
586+
termsFromFile = CreateTermMapFromData(env, ch, keyData, autoConvert, bldr);
587587
}
588588
if (!termsFromFile.ItemType.Equals(infos[iinfo].TypeSrc.GetItemType()))
589589
{
@@ -592,7 +592,7 @@ private static TermMap[] Train(IHostEnvironment env, IChannel ch, ColInfo[] info
592592
// a complicated feature would be, and also because it's difficult to see how we
593593
// can logically reconcile "reinterpretation" for different types with the resulting
594594
// data view having an actual type.
595-
throw ch.ExceptParam(nameof(termData), "Terms from input data type '{0}' but mismatches column '{1}' item type '{2}'",
595+
throw ch.ExceptParam(nameof(keyData), "Terms from input data type '{0}' but mismatches column '{1}' item type '{2}'",
596596
termsFromFile.ItemType, infos[iinfo].Name, infos[iinfo].TypeSrc.GetItemType());
597597
}
598598
termMap[iinfo] = termsFromFile;

src/Microsoft.ML.Transforms/OneHotEncoding.cs

+6-6
Original file line numberDiff line numberDiff line change
@@ -139,14 +139,14 @@ internal static IDataTransform Create(IHostEnvironment env, Arguments args, IDat
139139
col.SetTerms(column.Terms ?? args.Terms);
140140
columns.Add(col);
141141
}
142-
IDataView termData = null;
142+
IDataView keyData = null;
143143
if (!string.IsNullOrEmpty(args.DataFile))
144144
{
145145
using (var ch = h.Start("Load term data"))
146-
termData = ValueToKeyMappingTransformer.GetTermDataViewOrNull(env, ch, args.DataFile, args.TermsColumn, args.Loader, out bool autoLoaded);
147-
h.AssertValue(termData);
146+
keyData = ValueToKeyMappingTransformer.GetKeyDataViewOrNull(env, ch, args.DataFile, args.TermsColumn, args.Loader, out bool autoLoaded);
147+
h.AssertValue(keyData);
148148
}
149-
var transformed = new OneHotEncodingEstimator(env, columns.ToArray(), termData).Fit(input).Transform(input);
149+
var transformed = new OneHotEncodingEstimator(env, columns.ToArray(), keyData).Fit(input).Transform(input);
150150
return (IDataTransform)transformed;
151151
}
152152

@@ -228,11 +228,11 @@ public OneHotEncodingEstimator(IHostEnvironment env, string inputColumn,
228228
{
229229
}
230230

231-
public OneHotEncodingEstimator(IHostEnvironment env, ColumnInfo[] columns, IDataView termData = null)
231+
public OneHotEncodingEstimator(IHostEnvironment env, ColumnInfo[] columns, IDataView keyData = null)
232232
{
233233
Contracts.CheckValue(env, nameof(env));
234234
_host = env.Register(nameof(OneHotEncodingEstimator));
235-
_term = new ValueToKeyMappingEstimator(_host, columns, termData);
235+
_term = new ValueToKeyMappingEstimator(_host, columns, keyData);
236236
var binaryCols = new List<(string input, string output)>();
237237
var cols = new List<(string input, string output, bool bag)>();
238238
for (int i = 0; i < columns.Length; i++)

0 commit comments

Comments
 (0)