@@ -44,5 +44,38 @@ void FileBasedSavingOfData()
44
44
DeleteOutputPath ( "i.idv" ) ;
45
45
}
46
46
}
47
+
48
+ /// <summary>
49
+ /// File-based saving of data: Come up with transform pipeline. Transform training and
50
+ /// test data, and save the featurized data to some file, using the .idv format.
51
+ /// Train and evaluate multiple models over that pre-featurized data. (Useful for
52
+ /// sweeping scenarios, where you are training many times on the same data,
53
+ /// and don't necessarily want to transform it every single time.)
54
+ /// </summary>
55
+ [ Fact ]
56
+ void New_FileBasedSavingOfData ( )
57
+ {
58
+ var dataPath = GetDataPath ( SentimentDataPath ) ;
59
+ var testDataPath = GetDataPath ( SentimentTestPath ) ;
60
+
61
+ using ( var env = new TlcEnvironment ( seed : 1 , conc : 1 ) )
62
+ {
63
+ // Pipeline.
64
+ var pipeline = new MyTextLoader ( env , MakeSentimentTextLoaderArgs ( ) )
65
+ . Append ( new MyTextTransform ( env , MakeSentimentTextTransformArgs ( ) ) ) ;
66
+
67
+ var trainData = pipeline . Fit ( new MultiFileSource ( dataPath ) ) . Read ( new MultiFileSource ( dataPath ) ) ;
68
+
69
+ using ( var file = env . CreateOutputFile ( "i.idv" ) )
70
+ trainData . SaveAsBinary ( env , file . CreateWriteStream ( ) ) ;
71
+
72
+ var trainer = new MySdca ( env , new LinearClassificationTrainer . Arguments { NumThreads = 1 } , "Features" , "Label" ) ;
73
+ var loadedTrainData = new BinaryLoader ( env , new BinaryLoader . Arguments ( ) , new MultiFileSource ( "i.idv" ) ) ;
74
+
75
+ // Train.
76
+ var model = trainer . Train ( loadedTrainData ) ;
77
+ DeleteOutputPath ( "i.idv" ) ;
78
+ }
79
+ }
47
80
}
48
81
}
0 commit comments