@@ -111,8 +111,8 @@ public sealed class Options : TransformInputBase
111
111
[ Argument ( ArgumentType . AtMostOnce , HelpText = "Whether to keep numbers or remove them." , ShortName = "num" , SortOrder = 8 ) ]
112
112
public bool KeepNumbers = TextNormalizingEstimator . Defaults . KeepNumbers ;
113
113
114
- [ Argument ( ArgumentType . AtMostOnce , HelpText = "Whether to output the transformed text tokens as an additional column ." , ShortName = "tokens,showtext,showTransformedText" , SortOrder = 9 ) ]
115
- public bool OutputTokens ;
114
+ [ Argument ( ArgumentType . AtMostOnce , HelpText = "Column containing the transformed text tokens." , ShortName = "OutputTokens, tokens,showtext,showTransformedText" , SortOrder = 9 ) ]
115
+ public string OutputTokensColumnName ;
116
116
117
117
[ Argument ( ArgumentType . Multiple , HelpText = "A dictionary of whitelisted terms." , ShortName = "dict" , NullName = "<None>" , SortOrder = 10 , Hide = true ) ]
118
118
internal TermLoaderArguments Dictionary ;
@@ -225,7 +225,7 @@ private sealed class TransformApplierParams
225
225
public readonly bool KeepDiacritics ;
226
226
public readonly bool KeepPunctuations ;
227
227
public readonly bool KeepNumbers ;
228
- public readonly bool OutputTextTokens ;
228
+ public readonly string OutputTextTokensColumnName ;
229
229
public readonly TermLoaderArguments Dictionary ;
230
230
231
231
public StopWordsRemovingEstimator . Language StopwordsLanguage
@@ -252,7 +252,7 @@ internal LpNormNormalizingEstimatorBase.NormFunction LpNorm
252
252
253
253
// These properties encode the logic needed to determine which transforms to apply.
254
254
#region NeededTransforms
255
- public bool NeedsWordTokenizationTransform { get { return WordExtractorFactory != null || UsePredefinedStopWordRemover || OutputTextTokens ; } }
255
+ public bool NeedsWordTokenizationTransform { get { return WordExtractorFactory != null || UsePredefinedStopWordRemover || ! string . IsNullOrEmpty ( OutputTextTokensColumnName ) ; } }
256
256
257
257
public bool NeedsNormalizeTransform
258
258
{
@@ -303,7 +303,7 @@ public TransformApplierParams(TextFeaturizingEstimator parent)
303
303
KeepDiacritics = parent . OptionalSettings . KeepDiacritics ;
304
304
KeepPunctuations = parent . OptionalSettings . KeepPunctuations ;
305
305
KeepNumbers = parent . OptionalSettings . KeepNumbers ;
306
- OutputTextTokens = parent . OptionalSettings . OutputTokens ;
306
+ OutputTextTokensColumnName = parent . OptionalSettings . OutputTokensColumnName ;
307
307
Dictionary = parent . _dictionary ;
308
308
}
309
309
}
@@ -316,8 +316,6 @@ public TransformApplierParams(TextFeaturizingEstimator parent)
316
316
317
317
internal const Language DefaultLanguage = Language . English ;
318
318
319
- private const string TransformedTextColFormat = "{0}_TransformedText" ;
320
-
321
319
internal TextFeaturizingEstimator ( IHostEnvironment env , string outputColumnName , string inputColumnName = null )
322
320
: this ( env , outputColumnName , new [ ] { inputColumnName ?? outputColumnName } )
323
321
{
@@ -434,10 +432,10 @@ public ITransformer Fit(IDataView input)
434
432
wordFeatureCol = dstCol ;
435
433
}
436
434
437
- if ( tparams . OutputTextTokens )
435
+ if ( ! string . IsNullOrEmpty ( tparams . OutputTextTokensColumnName ) )
438
436
{
439
437
string [ ] srcCols = wordTokCols ?? textCols ;
440
- view = new ColumnConcatenatingTransformer ( h , string . Format ( TransformedTextColFormat , OutputColumn ) , srcCols ) . Transform ( view ) ;
438
+ view = new ColumnConcatenatingTransformer ( h , tparams . OutputTextTokensColumnName , srcCols ) . Transform ( view ) ;
441
439
}
442
440
443
441
if ( tparams . CharExtractorFactory != null )
@@ -506,7 +504,7 @@ public ITransformer Fit(IDataView input)
506
504
// Otherwise, simply use the slot names, omitting the original source column names
507
505
// entirely. For the Concat transform setting the Key == Value of the TaggedColumn
508
506
// KVP signals this intent.
509
- Contracts . Assert ( charFeatureCol != null || wordFeatureCol != null || tparams . OutputTextTokens ) ;
507
+ Contracts . Assert ( charFeatureCol != null || wordFeatureCol != null || ! string . IsNullOrEmpty ( tparams . OutputTextTokensColumnName ) ) ;
510
508
if ( charFeatureCol != null )
511
509
srcTaggedCols . Add ( new KeyValuePair < string , string > ( charFeatureCol , charFeatureCol ) ) ;
512
510
else if ( wordFeatureCol != null )
@@ -555,9 +553,10 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema)
555
553
556
554
result [ OutputColumn ] = new SchemaShape . Column ( OutputColumn , SchemaShape . Column . VectorKind . Vector , NumberDataViewType . Single , false ,
557
555
new SchemaShape ( metadata ) ) ;
558
- if ( OptionalSettings . OutputTokens )
556
+
557
+ if ( ! string . IsNullOrEmpty ( OptionalSettings . OutputTokensColumnName ) )
559
558
{
560
- string name = string . Format ( TransformedTextColFormat , OutputColumn ) ;
559
+ string name = OptionalSettings . OutputTokensColumnName ;
561
560
result [ name ] = new SchemaShape . Column ( name , SchemaShape . Column . VectorKind . VariableVector , TextDataViewType . Instance , false ) ;
562
561
}
563
562
0 commit comments