33
33
34
34
namespace Microsoft . ML . Transforms . Conversions
35
35
{
36
+ /// <summary>
37
+ /// The ValueMappingEstimator is a 1-1 mapping from a key to value. This particular class load the mappings from an <see cref="IDataView"/>.
38
+ /// This gives user the flexibility to load the mapping from file instead of using IEnumerable in <see cref="ValueMappingEstimator{TKey, TValue}"/>
39
+ /// </summary>
40
+ public class ValueMappingEstimator : TrivialEstimator < ValueMappingTransformer >
41
+ {
42
+ private readonly ( string input , string output ) [ ] _columns ;
43
+
44
+ /// <summary>
45
+ /// Constructs the ValueMappingEstimator, key type -> value type mapping
46
+ /// </summary>
47
+ /// <param name="env">The environment to use.</param>
48
+ /// <param name="lookupMap">An instance of <see cref="IDataView"/> that contains the key and value columns.</param>
49
+ /// <param name="keyColumn">Name of the key column in <paramref name="lookupMap"/>.</param>
50
+ /// <param name="valueColumn">Name of the value column in <paramref name="lookupMap"/>.</param>
51
+ /// <param name="columns">The list of names of the input columns to apply the transformation, and the name of the resulting column.</param>
52
+ public ValueMappingEstimator ( IHostEnvironment env , IDataView lookupMap , string keyColumn , string valueColumn , params ( string input , string output ) [ ] columns )
53
+ : base ( Contracts . CheckRef ( env , nameof ( env ) ) . Register ( nameof ( ValueMappingEstimator ) ) ,
54
+ new ValueMappingTransformer ( env , lookupMap , keyColumn , valueColumn , columns ) )
55
+ {
56
+ _columns = columns ;
57
+ }
58
+
59
+ /// <summary>
60
+ /// Retrieves the output schema given the input schema
61
+ /// </summary>
62
+ /// <param name="inputSchema">Input schema</param>
63
+ /// <returns>Returns the generated output schema</returns>
64
+ public override SchemaShape GetOutputSchema ( SchemaShape inputSchema )
65
+ {
66
+ Host . CheckValue ( inputSchema , nameof ( inputSchema ) ) ;
67
+
68
+ var resultDic = inputSchema . ToDictionary ( x => x . Name ) ;
69
+ var vectorKind = Transformer . ValueColumnType is VectorType ? SchemaShape . Column . VectorKind . Vector : SchemaShape . Column . VectorKind . Scalar ;
70
+ var isKey = Transformer . ValueColumnType is KeyType ;
71
+ var columnType = ( isKey ) ? NumberType . U4 :
72
+ Transformer . ValueColumnType ;
73
+ var metadataShape = SchemaShape . Create ( Transformer . ValueColumnMetadata . Schema ) ;
74
+ foreach ( var ( Input , Output ) in _columns )
75
+ {
76
+ if ( ! inputSchema . TryFindColumn ( Input , out var originalColumn ) )
77
+ throw Host . ExceptSchemaMismatch ( nameof ( inputSchema ) , "input" , Input ) ;
78
+
79
+ if ( ( originalColumn . Kind == SchemaShape . Column . VectorKind . VariableVector ||
80
+ originalColumn . Kind == SchemaShape . Column . VectorKind . Vector ) && Transformer . ValueColumnType is VectorType )
81
+ throw Host . ExceptNotSupp ( "Column '{0}' cannot be mapped to values when the column and the map values are both vector type." , Input ) ;
82
+ // Create the Value column
83
+ var col = new SchemaShape . Column ( Output , vectorKind , columnType , isKey , metadataShape ) ;
84
+ resultDic [ Output ] = col ;
85
+ }
86
+ return new SchemaShape ( resultDic . Values ) ;
87
+ }
88
+ }
89
+
36
90
/// <summary>
37
91
/// The ValueMappingEstimator is a 1-1 mapping from a key to value. The key type and value type are specified
38
92
/// through TKey and TValue. TKey is always a scalar. TValue can be either a scalar or an array (array is only possible when input is scalar).
39
93
/// The mapping is specified, not trained by providing a list of keys and a list of values.
40
94
/// </summary>
41
95
/// <typeparam name="TKey">Specifies the key type.</typeparam>
42
96
/// <typeparam name="TValue">Specifies the value type.</typeparam>
43
- public sealed class ValueMappingEstimator < TKey , TValue > : TrivialEstimator < ValueMappingTransformer < TKey , TValue > >
97
+ public sealed class ValueMappingEstimator < TKey , TValue > : ValueMappingEstimator
44
98
{
45
99
private ( string input , string output ) [ ] _columns ;
46
100
@@ -52,8 +106,7 @@ public sealed class ValueMappingEstimator<TKey, TValue> : TrivialEstimator<Value
52
106
/// <param name="values">The list of values of TValue.</param>
53
107
/// <param name="columns">The list of columns to apply.</param>
54
108
public ValueMappingEstimator ( IHostEnvironment env , IEnumerable < TKey > keys , IEnumerable < TValue > values , params ( string input , string output ) [ ] columns )
55
- : base ( Contracts . CheckRef ( env , nameof ( env ) ) . Register ( nameof ( ValueMappingEstimator < TKey , TValue > ) ) ,
56
- new ValueMappingTransformer < TKey , TValue > ( env , keys , values , false , columns ) )
109
+ : base ( env , DataViewHelper . CreateDataView ( env , keys , values , ValueMappingTransformer . KeyColumnName , ValueMappingTransformer . ValueColumnName , false ) , ValueMappingTransformer . KeyColumnName , ValueMappingTransformer . ValueColumnName , columns )
57
110
{
58
111
_columns = columns ;
59
112
}
@@ -67,8 +120,7 @@ public ValueMappingEstimator(IHostEnvironment env, IEnumerable<TKey> keys, IEnum
67
120
/// <param name="treatValuesAsKeyType">Specifies to treat the values as a <see cref="KeyType"/>.</param>
68
121
/// <param name="columns">The list of columns to apply.</param>
69
122
public ValueMappingEstimator ( IHostEnvironment env , IEnumerable < TKey > keys , IEnumerable < TValue > values , bool treatValuesAsKeyType , params ( string input , string output ) [ ] columns )
70
- : base ( Contracts . CheckRef ( env , nameof ( env ) ) . Register ( nameof ( ValueMappingEstimator < TKey , TValue > ) ) ,
71
- new ValueMappingTransformer < TKey , TValue > ( env , keys , values , treatValuesAsKeyType , columns ) )
123
+ : base ( env , DataViewHelper . CreateDataView ( env , keys , values , ValueMappingTransformer . KeyColumnName , ValueMappingTransformer . ValueColumnName , treatValuesAsKeyType ) , ValueMappingTransformer . KeyColumnName , ValueMappingTransformer . ValueColumnName , columns )
72
124
{
73
125
_columns = columns ;
74
126
}
@@ -81,41 +133,10 @@ public ValueMappingEstimator(IHostEnvironment env, IEnumerable<TKey> keys, IEnum
81
133
/// <param name="values">The list of values of TValue[].</param>
82
134
/// <param name="columns">The list of columns to apply.</param>
83
135
public ValueMappingEstimator ( IHostEnvironment env , IEnumerable < TKey > keys , IEnumerable < TValue [ ] > values , params ( string input , string output ) [ ] columns )
84
- : base ( Contracts . CheckRef ( env , nameof ( env ) ) . Register ( nameof ( ValueMappingEstimator < TKey , TValue > ) ) ,
85
- new ValueMappingTransformer < TKey , TValue > ( env , keys , values , columns ) )
136
+ : base ( env , DataViewHelper . CreateDataView ( env , keys , values , ValueMappingTransformer . KeyColumnName , ValueMappingTransformer . ValueColumnName ) , ValueMappingTransformer . KeyColumnName , ValueMappingTransformer . ValueColumnName , columns )
86
137
{
87
138
_columns = columns ;
88
139
}
89
-
90
- /// <summary>
91
- /// Retrieves the output schema given the input schema
92
- /// </summary>
93
- /// <param name="inputSchema">Input schema</param>
94
- /// <returns>Returns the generated output schema</returns>
95
- public override SchemaShape GetOutputSchema ( SchemaShape inputSchema )
96
- {
97
- Host . CheckValue ( inputSchema , nameof ( inputSchema ) ) ;
98
-
99
- var resultDic = inputSchema . ToDictionary ( x => x . Name ) ;
100
- var vectorKind = Transformer . ValueColumnType is VectorType ? SchemaShape . Column . VectorKind . Vector : SchemaShape . Column . VectorKind . Scalar ;
101
- var isKey = Transformer . ValueColumnType is KeyType ;
102
- var columnType = ( isKey ) ? ColumnTypeExtensions . PrimitiveTypeFromKind ( DataKind . U4 ) :
103
- Transformer . ValueColumnType ;
104
- var metadataShape = SchemaShape . Create ( Transformer . ValueColumnMetadata . Schema ) ;
105
- foreach ( var ( Input , Output ) in _columns )
106
- {
107
- if ( ! inputSchema . TryFindColumn ( Input , out var originalColumn ) )
108
- throw Host . ExceptSchemaMismatch ( nameof ( inputSchema ) , "input" , Input ) ;
109
-
110
- if ( ( originalColumn . Kind == SchemaShape . Column . VectorKind . VariableVector ||
111
- originalColumn . Kind == SchemaShape . Column . VectorKind . Vector ) && Transformer . ValueColumnType is VectorType )
112
- throw Host . ExceptNotSupp ( "Column '{0}' cannot be mapped to values when the column and the map values are both vector type." , Input ) ;
113
- // Create the Value column
114
- var col = new SchemaShape . Column ( Output , vectorKind , columnType , isKey , metadataShape ) ;
115
- resultDic [ Output ] = col ;
116
- }
117
- return new SchemaShape ( resultDic . Values ) ;
118
- }
119
140
}
120
141
121
142
/// <summary>
@@ -281,53 +302,6 @@ internal static IDataView CreateDataView<TKey, TValue>(IHostEnvironment env,
281
302
}
282
303
}
283
304
284
- /// <summary>
285
- /// The ValueMappingTransformer is a 1-1 mapping from a key to value. The key type and value type are specified
286
- /// through TKey and TValue. Arrays are supported for vector types which can be used as either a key or a value
287
- /// or both. The mapping is specified, not trained by providiing a list of keys and a list of values.
288
- /// </summary>
289
- /// <typeparam name="TKey">Specifies the key type</typeparam>
290
- /// <typeparam name="TValue">Specifies the value type</typeparam>
291
- public sealed class ValueMappingTransformer < TKey , TValue > : ValueMappingTransformer
292
- {
293
- /// <summary>
294
- /// Constructs a ValueMappingTransformer with a key type to value type.
295
- /// </summary>
296
- /// <param name="env">The environment to use.</param>
297
- /// <param name="keys">The list of keys that are TKey.</param>
298
- /// <param name="values">The list of values that are TValue.</param>
299
- /// <param name="treatValuesAsKeyTypes">Specifies to treat the values as a <see cref="KeyType"/>.</param>
300
- /// <param name="columns">The specified columns to apply</param>
301
- public ValueMappingTransformer ( IHostEnvironment env , IEnumerable < TKey > keys , IEnumerable < TValue > values , bool treatValuesAsKeyTypes , ( string input , string output ) [ ] columns )
302
- : base ( Contracts . CheckRef ( env , nameof ( env ) ) . Register ( nameof ( ValueMappingTransformer < TKey , TValue > ) ) ,
303
- ConvertToDataView ( env , keys , values , treatValuesAsKeyTypes ) , KeyColumnName , ValueColumnName , columns )
304
- { }
305
-
306
- /// <summary>
307
- /// Constructs a ValueMappingTransformer with a key type to value array type.
308
- /// </summary>
309
- /// <param name="env">The environment to use.</param>
310
- /// <param name="keys">The list of keys that are TKey.</param>
311
- /// <param name="values">The list of values that are TValue[].</param>
312
- /// <param name="columns">The specified columns to apply.</param>
313
- public ValueMappingTransformer ( IHostEnvironment env , IEnumerable < TKey > keys , IEnumerable < TValue [ ] > values , ( string input , string output ) [ ] columns )
314
- : base ( Contracts . CheckRef ( env , nameof ( env ) ) . Register ( nameof ( ValueMappingTransformer < TKey , TValue > ) ) ,
315
- ConvertToDataView ( env , keys , values ) , KeyColumnName , ValueColumnName , columns )
316
- { }
317
-
318
- private static IDataView ConvertToDataView ( IHostEnvironment env , IEnumerable < TKey > keys , IEnumerable < TValue > values , bool treatValuesAsKeyValue )
319
- => DataViewHelper . CreateDataView ( env ,
320
- keys ,
321
- values ,
322
- ValueMappingTransformer . KeyColumnName ,
323
- ValueMappingTransformer . ValueColumnName ,
324
- treatValuesAsKeyValue ) ;
325
-
326
- // Handler for vector value types
327
- private static IDataView ConvertToDataView ( IHostEnvironment env , IEnumerable < TKey > keys , IEnumerable < TValue [ ] > values )
328
- => DataViewHelper . CreateDataView ( env , keys , values , ValueMappingTransformer . KeyColumnName , ValueMappingTransformer . ValueColumnName ) ;
329
- }
330
-
331
305
public class ValueMappingTransformer : OneToOneTransformerBase
332
306
{
333
307
internal const string Summary = "Maps text values columns to new columns using a map dataset." ;
@@ -339,8 +313,8 @@ public class ValueMappingTransformer : OneToOneTransformerBase
339
313
340
314
// Stream names for the binary idv streams.
341
315
private const string DefaultMapName = "DefaultMap.idv" ;
342
- protected static string KeyColumnName = "Key" ;
343
- protected static string ValueColumnName = "Value" ;
316
+ internal static string KeyColumnName = "Key" ;
317
+ internal static string ValueColumnName = "Value" ;
344
318
private ValueMap _valueMap ;
345
319
private Schema . Metadata _valueMetadata ;
346
320
private byte [ ] _dataView ;
@@ -411,7 +385,7 @@ public sealed class Arguments
411
385
public bool ValuesAsKeyType = true ;
412
386
}
413
387
414
- protected ValueMappingTransformer ( IHostEnvironment env , IDataView lookupMap ,
388
+ internal ValueMappingTransformer ( IHostEnvironment env , IDataView lookupMap ,
415
389
string keyColumn , string valueColumn , ( string input , string output ) [ ] columns )
416
390
: base ( Contracts . CheckRef ( env , nameof ( env ) ) . Register ( nameof ( ValueMappingTransformer ) ) , columns )
417
391
{
@@ -569,7 +543,8 @@ private static ValueMappingTransformer CreateTransformInvoke<TKey, TValue>(IHost
569
543
}
570
544
}
571
545
572
- return new ValueMappingTransformer < TKey , TValue > ( env , keys , values , treatValuesAsKeyTypes , columns ) ;
546
+ var lookupMap = DataViewHelper . CreateDataView ( env , keys , values , keyColumnName , valueColumnName , treatValuesAsKeyTypes ) ;
547
+ return new ValueMappingTransformer ( env , lookupMap , keyColumnName , valueColumnName , columns ) ;
573
548
}
574
549
575
550
private static IDataTransform Create ( IHostEnvironment env , Arguments args , IDataView input )
0 commit comments