@@ -294,13 +294,13 @@ internal ValueToKeyMappingTransformer(IHostEnvironment env, IDataView input,
294
294
{ }
295
295
296
296
internal ValueToKeyMappingTransformer ( IHostEnvironment env , IDataView input ,
297
- ColumnInfo [ ] columns , IDataView termData , bool autoConvert )
297
+ ColumnInfo [ ] columns , IDataView keyData , bool autoConvert )
298
298
: base ( Contracts . CheckRef ( env , nameof ( env ) ) . Register ( RegistrationName ) , GetColumnPairs ( columns ) )
299
299
{
300
300
using ( var ch = Host . Start ( "Training" ) )
301
301
{
302
302
var infos = CreateInfos ( input . Schema ) ;
303
- _unboundMaps = Train ( Host , ch , infos , termData , columns , input , autoConvert ) ;
303
+ _unboundMaps = Train ( Host , ch , infos , keyData , columns , input , autoConvert ) ;
304
304
_textMetadata = new bool [ _unboundMaps . Length ] ;
305
305
for ( int iinfo = 0 ; iinfo < columns . Length ; ++ iinfo )
306
306
_textMetadata [ iinfo ] = columns [ iinfo ] . TextKeyValues ;
@@ -344,8 +344,8 @@ internal static IDataTransform Create(IHostEnvironment env, Arguments args, IDat
344
344
item . TextKeyValues ?? args . TextKeyValues ) ;
345
345
cols [ i ] . Terms = item . Terms ?? args . Terms ;
346
346
} ;
347
- var termData = GetTermDataViewOrNull ( env , ch , args . DataFile , args . TermsColumn , args . Loader , out bool autoLoaded ) ;
348
- return new ValueToKeyMappingTransformer ( env , input , cols , termData , autoLoaded ) . MakeDataTransform ( input ) ;
347
+ var keyData = GetKeyDataViewOrNull ( env , ch , args . DataFile , args . TermsColumn , args . Loader , out bool autoLoaded ) ;
348
+ return new ValueToKeyMappingTransformer ( env , input , cols , keyData , autoLoaded ) . MakeDataTransform ( input ) ;
349
349
}
350
350
}
351
351
@@ -428,7 +428,7 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Sch
428
428
/// the term map. This will not be true in the case that the loader was adequately specified automatically.</param>
429
429
/// <returns>The single-column data containing the term data from the file.</returns>
430
430
[ BestFriend ]
431
- internal static IDataView GetTermDataViewOrNull ( IHostEnvironment env , IChannel ch ,
431
+ internal static IDataView GetKeyDataViewOrNull ( IHostEnvironment env , IChannel ch ,
432
432
string file , string termsColumn , IComponentFactory < IMultiStreamSource , IDataLoader > loaderFactory ,
433
433
out bool autoConvert )
434
434
{
@@ -448,9 +448,9 @@ internal static IDataView GetTermDataViewOrNull(IHostEnvironment env, IChannel c
448
448
string src = termsColumn ;
449
449
IMultiStreamSource fileSource = new MultiFileSource ( file ) ;
450
450
451
- IDataView termData ;
451
+ IDataView keyData ;
452
452
if ( loaderFactory != null )
453
- termData = loaderFactory . CreateComponent ( env , fileSource ) ;
453
+ keyData = loaderFactory . CreateComponent ( env , fileSource ) ;
454
454
else
455
455
{
456
456
// Determine the default loader from the extension.
@@ -463,11 +463,11 @@ internal static IDataView GetTermDataViewOrNull(IHostEnvironment env, IChannel c
463
463
ch . CheckUserArg ( ! string . IsNullOrWhiteSpace ( src ) , nameof ( termsColumn ) ,
464
464
"Must be specified" ) ;
465
465
if ( isBinary )
466
- termData = new BinaryLoader ( env , new BinaryLoader . Arguments ( ) , fileSource ) ;
466
+ keyData = new BinaryLoader ( env , new BinaryLoader . Arguments ( ) , fileSource ) ;
467
467
else
468
468
{
469
469
ch . Assert ( isTranspose ) ;
470
- termData = new TransposeLoader ( env , new TransposeLoader . Arguments ( ) , fileSource ) ;
470
+ keyData = new TransposeLoader ( env , new TransposeLoader . Arguments ( ) , fileSource ) ;
471
471
}
472
472
}
473
473
else
@@ -478,7 +478,7 @@ internal static IDataView GetTermDataViewOrNull(IHostEnvironment env, IChannel c
478
478
"{0} should not be specified when default loader is " + nameof ( TextLoader ) + ". Ignoring {0}={1}" ,
479
479
nameof ( Arguments . TermsColumn ) , src ) ;
480
480
}
481
- termData = new TextLoader ( env ,
481
+ keyData = new TextLoader ( env ,
482
482
columns : new [ ] { new TextLoader . Column ( "Term" , DataKind . TX , 0 ) } ,
483
483
dataSample : fileSource )
484
484
. Read ( fileSource ) ;
@@ -488,40 +488,40 @@ internal static IDataView GetTermDataViewOrNull(IHostEnvironment env, IChannel c
488
488
}
489
489
}
490
490
ch . AssertNonEmpty ( src ) ;
491
- if ( termData . Schema . GetColumnOrNull ( src ) == null )
491
+ if ( keyData . Schema . GetColumnOrNull ( src ) == null )
492
492
throw ch . ExceptUserArg ( nameof ( termsColumn ) , "Unknown column '{0}'" , src ) ;
493
493
// Now, remove everything but that one column.
494
494
var selectTransformer = new ColumnSelectingTransformer ( env , new string [ ] { src } , null ) ;
495
- termData = selectTransformer . Transform ( termData ) ;
496
- ch . Assert ( termData . Schema . Count == 1 ) ;
497
- return termData ;
495
+ keyData = selectTransformer . Transform ( keyData ) ;
496
+ ch . Assert ( keyData . Schema . Count == 1 ) ;
497
+ return keyData ;
498
498
}
499
499
500
500
/// <summary>
501
501
/// Utility method to create the file-based <see cref="TermMap"/>.
502
502
/// </summary>
503
- private static TermMap CreateFileTermMap ( IHostEnvironment env , IChannel ch , IDataView termData , bool autoConvert , Builder bldr )
503
+ private static TermMap CreateTermMapFromData ( IHostEnvironment env , IChannel ch , IDataView keyData , bool autoConvert , Builder bldr )
504
504
{
505
505
Contracts . AssertValue ( ch ) ;
506
506
ch . AssertValue ( env ) ;
507
- ch . AssertValue ( termData ) ;
507
+ ch . AssertValue ( keyData ) ;
508
508
ch . AssertValue ( bldr ) ;
509
- if ( termData . Schema . Count != 1 )
509
+ if ( keyData . Schema . Count != 1 )
510
510
{
511
- throw ch . ExceptParam ( nameof ( termData ) , $ "Input data containing terms should contain exactly one column, but " +
512
- $ "had { termData . Schema . Count } instead. Consider using { nameof ( ColumnSelectingEstimator ) } on that data first.") ;
511
+ throw ch . ExceptParam ( nameof ( keyData ) , $ "Input data containing terms should contain exactly one column, but " +
512
+ $ "had { keyData . Schema . Count } instead. Consider using { nameof ( ColumnSelectingEstimator ) } on that data first.") ;
513
513
}
514
514
515
- var typeSrc = termData . Schema [ 0 ] . Type ;
515
+ var typeSrc = keyData . Schema [ 0 ] . Type ;
516
516
if ( ! autoConvert && ! typeSrc . Equals ( bldr . ItemType ) )
517
- throw ch . ExceptUserArg ( nameof ( termData ) , "Input data's column must be of type '{0}' but was '{1}'" , bldr . ItemType , typeSrc ) ;
517
+ throw ch . ExceptUserArg ( nameof ( keyData ) , "Input data's column must be of type '{0}' but was '{1}'" , bldr . ItemType , typeSrc ) ;
518
518
519
- using ( var cursor = termData . GetRowCursor ( termData . Schema [ 0 ] ) )
519
+ using ( var cursor = keyData . GetRowCursor ( keyData . Schema [ 0 ] ) )
520
520
using ( var pch = env . StartProgressChannel ( "Building dictionary from term data" ) )
521
521
{
522
522
var header = new ProgressHeader ( new [ ] { "Total Terms" } , new [ ] { "examples" } ) ;
523
523
var trainer = Trainer . Create ( cursor , 0 , autoConvert , int . MaxValue , bldr ) ;
524
- double rowCount = termData . GetRowCount ( ) ?? double . NaN ;
524
+ double rowCount = keyData . GetRowCount ( ) ?? double . NaN ;
525
525
long rowCur = 0 ;
526
526
pch . SetHeader ( header ,
527
527
e =>
@@ -544,12 +544,12 @@ private static TermMap CreateFileTermMap(IHostEnvironment env, IChannel ch, IDat
544
544
/// This builds the <see cref="TermMap"/> instances per column.
545
545
/// </summary>
546
546
private static TermMap [ ] Train ( IHostEnvironment env , IChannel ch , ColInfo [ ] infos ,
547
- IDataView termData , ColumnInfo [ ] columns , IDataView trainingData , bool autoConvert )
547
+ IDataView keyData , ColumnInfo [ ] columns , IDataView trainingData , bool autoConvert )
548
548
{
549
549
Contracts . AssertValue ( env ) ;
550
550
env . AssertValue ( ch ) ;
551
551
ch . AssertValue ( infos ) ;
552
- ch . AssertValueOrNull ( termData ) ;
552
+ ch . AssertValueOrNull ( keyData ) ;
553
553
ch . AssertValue ( columns ) ;
554
554
ch . AssertValue ( trainingData ) ;
555
555
@@ -577,13 +577,13 @@ private static TermMap[] Train(IHostEnvironment env, IChannel ch, ColInfo[] info
577
577
bldr . ParseAddTermArg ( termsArray , ch ) ;
578
578
termMap [ iinfo ] = bldr . Finish ( ) ;
579
579
}
580
- else if ( termData != null )
580
+ else if ( keyData != null )
581
581
{
582
582
// First column using this file.
583
583
if ( termsFromFile == null )
584
584
{
585
585
var bldr = Builder . Create ( infos [ iinfo ] . TypeSrc , columns [ iinfo ] . Sort ) ;
586
- termsFromFile = CreateFileTermMap ( env , ch , termData , autoConvert , bldr ) ;
586
+ termsFromFile = CreateTermMapFromData ( env , ch , keyData , autoConvert , bldr ) ;
587
587
}
588
588
if ( ! termsFromFile . ItemType . Equals ( infos [ iinfo ] . TypeSrc . GetItemType ( ) ) )
589
589
{
@@ -592,7 +592,7 @@ private static TermMap[] Train(IHostEnvironment env, IChannel ch, ColInfo[] info
592
592
// a complicated feature would be, and also because it's difficult to see how we
593
593
// can logically reconcile "reinterpretation" for different types with the resulting
594
594
// data view having an actual type.
595
- throw ch . ExceptParam ( nameof ( termData ) , "Terms from input data type '{0}' but mismatches column '{1}' item type '{2}'" ,
595
+ throw ch . ExceptParam ( nameof ( keyData ) , "Terms from input data type '{0}' but mismatches column '{1}' item type '{2}'" ,
596
596
termsFromFile . ItemType , infos [ iinfo ] . Name , infos [ iinfo ] . TypeSrc . GetItemType ( ) ) ;
597
597
}
598
598
termMap [ iinfo ] = termsFromFile ;
0 commit comments