@@ -25,8 +25,9 @@ public CategoricalHashTests(ITestOutputHelper output) : base(output)
25
25
private class TestClass
26
26
{
27
27
public string A ;
28
- public string B ;
29
- public string C ;
28
+ [ VectorType ( 2 ) ]
29
+ public string [ ] B ;
30
+ public string [ ] C ;
30
31
}
31
32
32
33
private class TestMeta
@@ -45,17 +46,31 @@ private class TestMeta
45
46
[ Fact ]
46
47
public void CategoricalHashWorkout ( )
47
48
{
48
- var data = new [ ] { new TestClass ( ) { A = "1" , B = "2" , C = " 3", } , new TestClass ( ) { A = "4" , B = "5" , C = "6" } } ;
49
+ var data = new [ ] { new TestClass ( ) { A = "1" , B = new [ ] { "2" , "3" } , C = new [ ] { "2" , " 3", "4" } } , new TestClass ( ) { A = "4" , B = new [ ] { "4" , "5" } , C = new [ ] { "3" , "4" , "5" } } } ;
49
50
50
51
var dataView = ML . Data . LoadFromEnumerable ( data ) ;
51
52
var pipe = ML . Transforms . Categorical . OneHotHashEncoding ( new [ ] {
52
- new OneHotHashEncodingEstimator . ColumnOptions ( "CatA" , "A" , OneHotEncodingEstimator . OutputKind . Bag ) ,
53
+ new OneHotHashEncodingEstimator . ColumnOptions ( "CatA" , "A" , OneHotEncodingEstimator . OutputKind . Bag ) ,
53
54
new OneHotHashEncodingEstimator . ColumnOptions ( "CatB" , "A" , OneHotEncodingEstimator . OutputKind . Binary ) ,
54
55
new OneHotHashEncodingEstimator . ColumnOptions ( "CatC" , "A" , OneHotEncodingEstimator . OutputKind . Indicator ) ,
55
56
new OneHotHashEncodingEstimator . ColumnOptions ( "CatD" , "A" , OneHotEncodingEstimator . OutputKind . Key ) ,
57
+ new OneHotHashEncodingEstimator . ColumnOptions ( "CatVA" , "B" , OneHotEncodingEstimator . OutputKind . Bag ) ,
58
+ new OneHotHashEncodingEstimator . ColumnOptions ( "CatVB" , "B" , OneHotEncodingEstimator . OutputKind . Binary ) ,
59
+ new OneHotHashEncodingEstimator . ColumnOptions ( "CatVC" , "B" , OneHotEncodingEstimator . OutputKind . Indicator ) ,
60
+ new OneHotHashEncodingEstimator . ColumnOptions ( "CatVD" , "B" , OneHotEncodingEstimator . OutputKind . Key ) ,
61
+ new OneHotHashEncodingEstimator . ColumnOptions ( "CatVVA" , "C" , OneHotEncodingEstimator . OutputKind . Bag ) ,
62
+ new OneHotHashEncodingEstimator . ColumnOptions ( "CatVVB" , "C" , OneHotEncodingEstimator . OutputKind . Binary ) ,
63
+ new OneHotHashEncodingEstimator . ColumnOptions ( "CatVVC" , "C" , OneHotEncodingEstimator . OutputKind . Indicator ) ,
64
+ new OneHotHashEncodingEstimator . ColumnOptions ( "CatVVD" , "C" , OneHotEncodingEstimator . OutputKind . Key ) ,
56
65
} ) ;
57
66
58
67
TestEstimatorCore ( pipe , dataView ) ;
68
+ var outputPath = GetOutputPath ( "CategoricalHash" , "oneHotHash.tsv" ) ;
69
+ var savedData = pipe . Fit ( dataView ) . Transform ( dataView ) ;
70
+
71
+ using ( var fs = File . Create ( outputPath ) )
72
+ ML . Data . SaveAsText ( savedData , fs , headerRow : true , keepHidden : true ) ;
73
+ CheckEquality ( "CategoricalHash" , "oneHotHash.tsv" ) ;
59
74
Done ( ) ;
60
75
}
61
76
@@ -68,7 +83,7 @@ public void CategoricalHashStatic()
68
83
VectorString : ctx . LoadText ( 1 , 4 ) ,
69
84
SingleVectorString : ctx . LoadText ( 1 , 1 ) ) ) ;
70
85
var data = reader . Load ( dataPath ) ;
71
- var wrongCollection = new [ ] { new TestClass ( ) { A = "1" , B = "2" , C = " 3", } , new TestClass ( ) { A = "4" , B = "5" , C = "6" } } ;
86
+ var wrongCollection = new [ ] { new TestClass ( ) { A = "1" , B = new [ ] { "2" , "3" } , C = new [ ] { "2" , " 3", "4" } } , new TestClass ( ) { A = "4" , B = new [ ] { "4" , "5" } , C = new [ ] { "3" , "4" , "5" } } } ;
72
87
73
88
var invalidData = ML . Data . LoadFromEnumerable ( wrongCollection ) ;
74
89
var est = data . MakeNewEstimator ( ) .
@@ -211,12 +226,12 @@ public void TestCommandLine()
211
226
[ Fact ]
212
227
public void TestOldSavingAndLoading ( )
213
228
{
214
- var data = new [ ] { new TestClass ( ) { A = "1" , B = "2" , C = " 3", } , new TestClass ( ) { A = "4" , B = "5" , C = "6" } } ;
229
+ var data = new [ ] { new TestClass ( ) { A = "1" , B = new [ ] { "2" , "3" } , C = new [ ] { "2" , " 3", "4" } } , new TestClass ( ) { A = "4" , B = new [ ] { "4" , "5" } , C = new [ ] { "3" , "4" , "5" } } } ;
215
230
var dataView = ML . Data . LoadFromEnumerable ( data ) ;
216
231
var pipe = ML . Transforms . Categorical . OneHotHashEncoding ( new [ ] {
217
232
new OneHotHashEncodingEstimator . ColumnOptions ( "CatHashA" , "A" ) ,
218
233
new OneHotHashEncodingEstimator . ColumnOptions ( "CatHashB" , "B" ) ,
219
- new OneHotHashEncodingEstimator . ColumnOptions ( "CatHashC" , "C" )
234
+ new OneHotHashEncodingEstimator . ColumnOptions ( "CatHashC" , "C" ) ,
220
235
} ) ;
221
236
var result = pipe . Fit ( dataView ) . Transform ( dataView ) ;
222
237
var resultRoles = new RoleMappedData ( result ) ;
0 commit comments