@@ -7,59 +7,64 @@ public static class RandomTrainer
7
7
{
8
8
public static void Example ( )
9
9
{
10
- // Downloading the dataset from github.com/dotnet/machinelearning.
11
- // This will create a sentiment.tsv file in the filesystem.
12
- // You can open this file, if you want to see the data.
13
- string dataFile = SamplesUtils . DatasetUtils . DownloadSentimentDataset ( ) [ 0 ] ;
10
+ // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
11
+ // as a catalog of available operations and as the source of randomness.
12
+ var mlContext = new MLContext ( seed : 1 ) ;
13
+
14
+ // Download and featurize the dataset.
15
+ var dataFiles = SamplesUtils . DatasetUtils . DownloadSentimentDataset ( ) ;
16
+ var trainFile = dataFiles [ 0 ] ;
17
+ var testFile = dataFiles [ 1 ] ;
14
18
15
19
// A preview of the data.
16
20
// Sentiment SentimentText
17
21
// 0 " :Erm, thank you. "
18
22
// 1 ==You're cool==
19
23
20
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
21
- // as a catalog of available operations and as the source of randomness.
22
- var mlContext = new MLContext ( seed : 1 ) ;
23
-
24
- // Step 1: Load the data as an IDataView.
25
- // First, we define the loader: specify the data columns and where to find them in the text file.
26
- var loader = mlContext . Data . CreateTextLoader (
24
+ // Step 1: Read the data as an IDataView.
25
+ // First, we define the reader: specify the data columns and where to find them in the text file.
26
+ var reader = mlContext . Data . CreateTextLoader (
27
27
columns : new [ ]
28
28
{
29
29
new TextLoader . Column ( "Sentiment" , DataKind . Single , 0 ) ,
30
30
new TextLoader . Column ( "SentimentText" , DataKind . String , 1 )
31
31
} ,
32
32
hasHeader : true
33
33
) ;
34
-
35
- // Load the data
36
- var data = loader . Load ( dataFile ) ;
37
34
38
- // Split it between training and test data
39
- var trainTestData = mlContext . BinaryClassification . TrainTestSplit ( data ) ;
35
+ // Read the data
36
+ var trainData = reader . Load ( trainFile ) ;
40
37
41
38
// Step 2: Pipeline
42
39
// Featurize the text column through the FeaturizeText API.
43
40
// Then append a binary classifier, setting the "Label" column as the label of the dataset, and
44
41
// the "Features" column produced by FeaturizeText as the features column.
45
42
var pipeline = mlContext . Transforms . Text . FeaturizeText ( "Features" , "SentimentText" )
46
- . AppendCacheCheckpoint ( mlContext ) // Add a data-cache step within a pipeline.
43
+ . AppendCacheCheckpoint ( mlContext )
47
44
. Append ( mlContext . BinaryClassification . Trainers . Random ( ) ) ;
48
45
49
46
// Step 3: Train the pipeline
50
- var trainedPipeline = pipeline . Fit ( trainTestData . TrainSet ) ;
47
+ var trainedPipeline = pipeline . Fit ( trainData ) ;
51
48
52
49
// Step 4: Evaluate on the test set
53
- var transformedData = trainedPipeline . Transform ( trainTestData . TestSet ) ;
50
+ var transformedData = trainedPipeline . Transform ( reader . Load ( testFile ) ) ;
54
51
var evalMetrics = mlContext . BinaryClassification . Evaluate ( transformedData , label : "Sentiment" ) ;
55
-
56
- // Step 5: Inspect the output
57
- Console . WriteLine ( "Accuracy: " + evalMetrics . Accuracy ) ;
52
+ SamplesUtils . ConsoleUtils . PrintMetrics ( evalMetrics ) ;
58
53
59
54
// We expect an output probability closet to 0.5 as the Random trainer outputs a random prediction.
60
55
// Regardless of the input features, the trainer will predict either positive or negative label with equal probability.
61
- // Expected output (close to 0.5):
62
- // Accuracy: 0.588235294117647
56
+ // Expected output: (close to 0.5):
57
+
58
+ // Accuracy: 0.56
59
+ // AUC: 0.57
60
+ // F1 Score: 0.60
61
+ // Negative Precision: 0.57
62
+ // Negative Recall: 0.44
63
+ // Positive Precision: 0.55
64
+ // Positive Recall: 0.67
65
+ // LogLoss: 1.53
66
+ // LogLossReduction: -53.37
67
+ // Entropy: 1.00
63
68
}
64
69
}
65
70
}
0 commit comments