1
1
using System ;
2
+ using System . Collections . Generic ;
3
+ using Microsoft . ML . Transforms ;
4
+
2
5
namespace Microsoft . ML . Samples . Dynamic
3
6
{
4
7
public static class CustomMapping
@@ -10,71 +13,88 @@ public static void Example()
10
13
var mlContext = new MLContext ( ) ;
11
14
12
15
// Get a small dataset as an IEnumerable and convert it to an IDataView.
13
- var data = SamplesUtils . DatasetUtils . GetInfertData ( ) ;
14
- var trainData = mlContext . Data . LoadFromEnumerable ( data ) ;
16
+ var rawData = GetData ( ) ;
17
+
18
+ // Printing the input data.
19
+ Console . WriteLine ( "Age\t Salary" ) ;
20
+ foreach ( var row in rawData )
21
+ Console . WriteLine ( $ "{ row . Age } \t { row . Salary } ") ;
22
+ // Expected output:
23
+ // Age Salary
24
+ // 26 40000
25
+ // 35 80000
26
+ // 34 10000
27
+ // 28 100000
15
28
16
- // Preview of the data.
17
- // Age RowNum Education ...
18
- // 26 0 0-5yrs ...
19
- // 42 1 0-5yrs ...
20
- // 39 2 12+yrs ...
21
- // 34 3 0-5yrs ...
22
- // 35 4 6-11yrs ...
29
+ var data = mlContext . Data . LoadFromEnumerable ( rawData ) ;
23
30
24
31
// We define the custom mapping between input and output rows that will be applied by the transformation.
25
- Action < SamplesUtils . DatasetUtils . SampleInfertData , OutputRow > mapping =
32
+ Action < InputData , CustomMappingOutput > mapping =
26
33
( input , output ) => output . IsUnderThirty = input . Age < 30 ;
27
34
28
- // Custom transformations can be used to transform data directly, or as part of a pipeline. Below we transform data directly.
29
- var estimator = mlContext . Transforms . CustomMapping ( mapping , null ) ;
30
- var transformedData = estimator . Fit ( trainData ) . Transform ( trainData ) ;
35
+ // Custom transformations can be used to transform data directly, or as part of a pipeline of estimators.
36
+ // Note: If contractName is null in the CustomMapping estimator, any pipeline of estimators containing it, cannot be saved and loaded back.
37
+ // See other sample on how to load and save the CustomMapping estimator.
38
+ var estimator = mlContext . Transforms . CustomMapping ( mapping , contractName : null ) ;
39
+ var transformedData = estimator . Fit ( data ) . Transform ( data ) ;
31
40
32
- // Preview 5 lines of the transformed data.
33
- transformedData = mlContext . Data . TakeRows ( transformedData , 5 ) ;
34
- var dataEnumerable = mlContext . Data . CreateEnumerable < SampleInfertDataTransformed > ( transformedData , reuseRowObject : true ) ;
35
- Console . WriteLine ( "IsUnderThirty\t Age\t RowNum\t Education\t ..." ) ;
41
+ // Printing the output data.
42
+ var dataEnumerable = mlContext . Data . CreateEnumerable < TransformedData > ( transformedData , reuseRowObject : true ) ;
43
+ Console . WriteLine ( "Age\t Salary\t IsUnderThirty" ) ;
36
44
foreach ( var row in dataEnumerable )
37
- Console . WriteLine ( $ "{ row . IsUnderThirty } \t { row . Age } \t { row . RowNum } \t { row . Education } \t ... ") ;
45
+ Console . WriteLine ( $ "{ row . Age } \t { row . Salary } \t { row . IsUnderThirty } ") ;
38
46
// Expected output:
39
- // IsUnderThirty Age RowNum Education ...
40
- // True 26 0 0-5yrs ...
41
- // False 42 1 0-5yrs ...
42
- // False 39 2 12+yrs ...
43
- // False 34 3 0-5yrs ...
44
- // False 35 4 6-11yrs ...
45
-
47
+ // Age Salary IsUnderThirty
48
+ // 26 40000 True
49
+ // 35 80000 False
50
+ // 34 10000 False
51
+ // 28 100000 True
52
+ }
46
53
47
- // Here instead we use it as part of a pipeline of estimators.
48
- var pipeline = mlContext . Transforms . CustomMapping ( mapping , null )
49
- . Append ( mlContext . Transforms . Concatenate ( outputColumnName : "Features" , inputColumnNames : new [ ] { "Parity" , "Induced" } ) )
50
- // It is useful to add a caching checkpoint before a trainer that does several passes over the data.
51
- . AppendCacheCheckpoint ( mlContext )
52
- // We use binary FastTree to predict the label column that was generated by the custom mapping at the first step of the pipeline.
53
- . Append ( mlContext . BinaryClassification . Trainers . FastTree ( labelColumnName : "IsUnderThirty" ) ) ;
54
+ // Defines only the column to be generated by the custom mapping transformation in addition to the columns already present.
55
+ public class CustomMappingOutput
56
+ {
57
+ public bool IsUnderThirty { get ; set ; }
58
+ }
54
59
55
- // We can train the pipeline and use it to transform data.
56
- transformedData = pipeline . Fit ( trainData ) . Transform ( trainData ) ;
60
+ // Defines the schema of the input data.
61
+ public class InputData
62
+ {
63
+ public float Age { get ; set ; }
64
+ public float Salary { get ; set ; }
57
65
}
58
66
59
- // This defines only the column to be generated by the transformation in addition to the columns already present .
60
- public class OutputRow
67
+ // Defines the schema of the transformed data, which includes the new column IsUnderThirty .
68
+ public class TransformedData
61
69
{
70
+ public float Age { get ; set ; }
71
+ public float Salary { get ; set ; }
62
72
public bool IsUnderThirty { get ; set ; }
73
+
63
74
}
64
75
65
- // Represents the transformed infertility dataset .
66
- public class SampleInfertDataTransformed
76
+ // Returns an enumerable of input rows .
77
+ public static IEnumerable < InputData > GetData ( )
67
78
{
68
- public bool IsUnderThirty { get ; set ; }
69
- public float Age { get ; set ; }
70
- public int RowNum { get ; set ; }
71
- public string Education { get ; set ; }
72
- public float Parity { get ; set ; }
73
- public float Induced { get ; set ; }
74
- public float Case { get ; set ; }
75
- public float Spontaneous { get ; set ; }
76
- public float Stratum { get ; set ; }
77
- public float PooledStratum { get ; set ; }
79
+ return new List < InputData >
80
+ {
81
+ new InputData {
82
+ Age = 26 ,
83
+ Salary = 40000 ,
84
+ } ,
85
+ new InputData {
86
+ Age = 35 ,
87
+ Salary = 80000 ,
88
+ } ,
89
+ new InputData {
90
+ Age = 34 ,
91
+ Salary = 10000 ,
92
+ } ,
93
+ new InputData {
94
+ Age = 28 ,
95
+ Salary = 100000 ,
96
+ } ,
97
+ } ;
78
98
}
79
99
}
80
100
}
0 commit comments