@@ -163,8 +163,8 @@ public IStopWordsRemoverOptions StopWordsRemoverOptions
163
163
[ Argument ( ArgumentType . AtMostOnce , HelpText = "Whether to keep numbers or remove them." , ShortName = "num" , SortOrder = 8 ) ]
164
164
public bool KeepNumbers = TextNormalizingEstimator . Defaults . KeepNumbers ;
165
165
166
- [ Argument ( ArgumentType . AtMostOnce , HelpText = "Whether to output the transformed text tokens as an additional column ." , ShortName = "tokens,showtext,showTransformedText" , SortOrder = 9 ) ]
167
- public bool OutputTokens ;
166
+ [ Argument ( ArgumentType . AtMostOnce , HelpText = "Column containing the transformed text tokens." , ShortName = "tokens,showtext,showTransformedText" , SortOrder = 9 ) ]
167
+ public string OutputTokensColumnName ;
168
168
169
169
[ Argument ( ArgumentType . Multiple , HelpText = "A dictionary of whitelisted terms." , ShortName = "dict" , NullName = "<None>" , SortOrder = 10 , Hide = true ) ]
170
170
internal TermLoaderArguments Dictionary ;
@@ -278,7 +278,7 @@ private sealed class TransformApplierParams
278
278
public readonly bool KeepDiacritics ;
279
279
public readonly bool KeepPunctuations ;
280
280
public readonly bool KeepNumbers ;
281
- public readonly bool OutputTextTokens ;
281
+ public readonly string OutputTextTokensColumnName ;
282
282
public readonly TermLoaderArguments Dictionary ;
283
283
284
284
public StopWordsRemovingEstimator . Language StopwordsLanguage
@@ -305,7 +305,7 @@ internal LpNormNormalizingEstimatorBase.NormFunction LpNorm
305
305
306
306
// These properties encode the logic needed to determine which transforms to apply.
307
307
#region NeededTransforms
308
- public bool NeedsWordTokenizationTransform { get { return WordExtractorFactory != null || NeedsRemoveStopwordsTransform || OutputTextTokens ; } }
308
+ public bool NeedsWordTokenizationTransform { get { return WordExtractorFactory != null || NeedsRemoveStopwordsTransform || ! string . IsNullOrEmpty ( OutputTextTokensColumnName ) ; } }
309
309
310
310
public bool NeedsRemoveStopwordsTransform { get { return StopWordsRemover != null ; } }
311
311
@@ -358,7 +358,7 @@ public TransformApplierParams(TextFeaturizingEstimator parent)
358
358
KeepDiacritics = parent . OptionalSettings . KeepDiacritics ;
359
359
KeepPunctuations = parent . OptionalSettings . KeepPunctuations ;
360
360
KeepNumbers = parent . OptionalSettings . KeepNumbers ;
361
- OutputTextTokens = parent . OptionalSettings . OutputTokens ;
361
+ OutputTextTokensColumnName = parent . OptionalSettings . OutputTokensColumnName ;
362
362
Dictionary = parent . _dictionary ;
363
363
}
364
364
}
@@ -371,8 +371,6 @@ public TransformApplierParams(TextFeaturizingEstimator parent)
371
371
372
372
internal const Language DefaultLanguage = Language . English ;
373
373
374
- private const string TransformedTextColFormat = "{0}_TransformedText" ;
375
-
376
374
internal TextFeaturizingEstimator ( IHostEnvironment env , string outputColumnName , string inputColumnName = null )
377
375
: this ( env , outputColumnName , new [ ] { inputColumnName ?? outputColumnName } )
378
376
{
@@ -492,10 +490,10 @@ public ITransformer Fit(IDataView input)
492
490
wordFeatureCol = dstCol ;
493
491
}
494
492
495
- if ( tparams . OutputTextTokens )
493
+ if ( ! string . IsNullOrEmpty ( tparams . OutputTextTokensColumnName ) )
496
494
{
497
495
string [ ] srcCols = wordTokCols ?? textCols ;
498
- view = new ColumnConcatenatingTransformer ( h , string . Format ( TransformedTextColFormat , OutputColumn ) , srcCols ) . Transform ( view ) ;
496
+ view = new ColumnConcatenatingTransformer ( h , tparams . OutputTextTokensColumnName , srcCols ) . Transform ( view ) ;
499
497
}
500
498
501
499
if ( tparams . CharExtractorFactory != null )
@@ -564,7 +562,7 @@ public ITransformer Fit(IDataView input)
564
562
// Otherwise, simply use the slot names, omitting the original source column names
565
563
// entirely. For the Concat transform setting the Key == Value of the TaggedColumn
566
564
// KVP signals this intent.
567
- Contracts . Assert ( charFeatureCol != null || wordFeatureCol != null || tparams . OutputTextTokens ) ;
565
+ Contracts . Assert ( charFeatureCol != null || wordFeatureCol != null || ! string . IsNullOrEmpty ( tparams . OutputTextTokensColumnName ) ) ;
568
566
if ( charFeatureCol != null )
569
567
srcTaggedCols . Add ( new KeyValuePair < string , string > ( charFeatureCol , charFeatureCol ) ) ;
570
568
else if ( wordFeatureCol != null )
@@ -613,9 +611,10 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema)
613
611
614
612
result [ OutputColumn ] = new SchemaShape . Column ( OutputColumn , SchemaShape . Column . VectorKind . Vector , NumberDataViewType . Single , false ,
615
613
new SchemaShape ( metadata ) ) ;
616
- if ( OptionalSettings . OutputTokens )
614
+
615
+ if ( ! string . IsNullOrEmpty ( OptionalSettings . OutputTokensColumnName ) )
617
616
{
618
- string name = string . Format ( TransformedTextColFormat , OutputColumn ) ;
617
+ string name = OptionalSettings . OutputTokensColumnName ;
619
618
result [ name ] = new SchemaShape . Column ( name , SchemaShape . Column . VectorKind . VariableVector , TextDataViewType . Instance , false ) ;
620
619
}
621
620
0 commit comments