@@ -10,38 +10,47 @@ public static class KMeans
10
10
{
11
11
public static void Example ( )
12
12
{
13
- // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
14
- // as a catalog of available operations and as the source of randomness.
15
- // Setting the seed to a fixed number in this example to make outputs deterministic.
13
+ // Create a new context for ML.NET operations. It can be used for
14
+ // exception tracking and logging, as a catalog of available operations
15
+ // and as the source of randomness. Setting the seed to a fixed number
16
+ // in this example to make outputs deterministic.
16
17
var mlContext = new MLContext ( seed : 0 ) ;
17
18
18
- // Convert the list of data points to an IDataView object, which is consumable by ML.NET API .
19
+ // Create a list of training data points.
19
20
var dataPoints = GenerateRandomDataPoints ( 1000 , 123 ) ;
20
21
21
- // Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
22
- var trainingData = mlContext . Data . LoadFromEnumerable ( dataPoints ) ;
22
+ // Convert the list of data points to an IDataView object, which is
23
+ // consumable by ML.NET API.
24
+ IDataView trainingData = mlContext . Data . LoadFromEnumerable ( dataPoints ) ;
23
25
24
26
// Define the trainer.
25
- var pipeline = mlContext . Clustering . Trainers . KMeans ( numberOfClusters : 2 ) ;
27
+ var pipeline = mlContext . Clustering . Trainers . KMeans (
28
+ numberOfClusters : 2 ) ;
26
29
27
30
// Train the model.
28
31
var model = pipeline . Fit ( trainingData ) ;
29
32
30
- // Create testing data. Use different random seed to make it different from training data.
31
- var testData = mlContext . Data . LoadFromEnumerable ( GenerateRandomDataPoints ( 500 , seed : 123 ) ) ;
33
+ // Create testing data. Use a different random seed to make it different
34
+ // from the training data.
35
+ var testData = mlContext . Data . LoadFromEnumerable (
36
+ GenerateRandomDataPoints ( 500 , seed : 123 ) ) ;
32
37
33
38
// Run the model on test data set.
34
39
var transformedTestData = model . Transform ( testData ) ;
35
40
36
41
// Convert IDataView object to a list.
37
- var predictions = mlContext . Data . CreateEnumerable < Prediction > ( transformedTestData , reuseRowObject : false ) . ToList ( ) ;
42
+ var predictions = mlContext . Data . CreateEnumerable < Prediction > (
43
+ transformedTestData , reuseRowObject : false ) . ToList ( ) ;
38
44
39
- // Print 5 predictions. Note that the label is only used as a comparison wiht the predicted label.
40
- // It is not used during training.
45
+ // Print 5 predictions. Note that the label is only used as a comparison
46
+ // with the predicted label. It is not used during training.
41
47
foreach ( var p in predictions . Take ( 2 ) )
42
- Console . WriteLine ( $ "Label: { p . Label } , Prediction: { p . PredictedLabel } ") ;
48
+ Console . WriteLine (
49
+ $ "Label: { p . Label } , Prediction: { p . PredictedLabel } ") ;
50
+
43
51
foreach ( var p in predictions . TakeLast ( 3 ) )
44
- Console . WriteLine ( $ "Label: { p . Label } , Prediction: { p . PredictedLabel } ") ;
52
+ Console . WriteLine (
53
+ $ "Label: { p . Label } , Prediction: { p . PredictedLabel } ") ;
45
54
46
55
// Expected output:
47
56
// Label: 1, Prediction: 1
@@ -51,28 +60,37 @@ public static void Example()
51
60
// Label: 2, Prediction: 2
52
61
53
62
// Evaluate the overall metrics
54
- var metrics = mlContext . Clustering . Evaluate ( transformedTestData , "Label" , "Score" , "Features" ) ;
63
+ var metrics = mlContext . Clustering . Evaluate (
64
+ transformedTestData , "Label" , "Score" , "Features" ) ;
65
+
55
66
PrintMetrics ( metrics ) ;
56
-
67
+
57
68
// Expected output:
58
69
// Normalized Mutual Information: 0.95
59
70
// Average Distance: 4.17
60
71
// Davies Bouldin Index: 2.87
61
72
62
- // Get cluster centroids and the number of clusters k from KMeansModelParameters.
73
+ // Get the cluster centroids and the number of clusters k from
74
+ // KMeansModelParameters.
63
75
VBuffer < float > [ ] centroids = default ;
64
76
65
77
var modelParams = model . Model ;
66
78
modelParams . GetClusterCentroids ( ref centroids , out int k ) ;
67
- Console . WriteLine ( $ "The first 3 coordinates of the first centroid are: ({ string . Join ( ", " , centroids [ 0 ] . GetValues ( ) . ToArray ( ) . Take ( 3 ) ) } )") ;
68
- Console . WriteLine ( $ "The first 3 coordinates of the second centroid are: ({ string . Join ( ", " , centroids [ 1 ] . GetValues ( ) . ToArray ( ) . Take ( 3 ) ) } )") ;
79
+ Console . WriteLine (
80
+ $ "The first 3 coordinates of the first centroid are: " +
81
+ string . Join ( ", " , centroids [ 0 ] . GetValues ( ) . ToArray ( ) . Take ( 3 ) ) ) ;
82
+
83
+ Console . WriteLine (
84
+ $ "The first 3 coordinates of the second centroid are: " +
85
+ string . Join ( ", " , centroids [ 1 ] . GetValues ( ) . ToArray ( ) . Take ( 3 ) ) ) ;
69
86
70
87
// Expected output similar to:
71
88
// The first 3 coordinates of the first centroid are: (0.6035213, 0.6017533, 0.5964218)
72
89
// The first 3 coordinates of the second centroid are: (0.4031044, 0.4175443, 0.4082336)
73
90
}
74
91
75
- private static IEnumerable < DataPoint > GenerateRandomDataPoints ( int count , int seed = 0 )
92
+ private static IEnumerable < DataPoint > GenerateRandomDataPoints ( int count ,
93
+ int seed = 0 )
76
94
{
77
95
var random = new Random ( seed ) ;
78
96
float randomFloat ( ) => ( float ) random . NextDouble ( ) ;
@@ -83,16 +101,21 @@ private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int se
83
101
{
84
102
Label = ( uint ) label ,
85
103
// Create random features with two clusters.
86
- // The first half has feature values centered around 0.6 the second half has values centered around 0.4.
87
- Features = Enumerable . Repeat ( label , 50 ) . Select ( index => label == 0 ? randomFloat ( ) + 0.1f : randomFloat ( ) - 0.1f ) . ToArray ( )
104
+ // The first half has feature values centered around 0.6, while
105
+ // the second half has values centered around 0.4.
106
+ Features = Enumerable . Repeat ( label , 50 )
107
+ . Select ( index => label == 0 ? randomFloat ( ) + 0.1f :
108
+ randomFloat ( ) - 0.1f ) . ToArray ( )
88
109
} ;
89
110
}
90
111
}
91
112
92
- // Example with label and 50 feature values. A data set is a collection of such examples.
113
+ // Example with label and 50 feature values. A data set is a collection of
114
+ // such examples.
93
115
private class DataPoint
94
116
{
95
- // The label is not used during training, just for comparison with the predicted label.
117
+ // The label is not used during training, just for comparison with the
118
+ // predicted label.
96
119
[ KeyType ( 2 ) ]
97
120
public uint Label { get ; set ; }
98
121
@@ -112,9 +135,14 @@ private class Prediction
112
135
// Pretty-print of ClusteringMetrics object.
113
136
private static void PrintMetrics ( ClusteringMetrics metrics )
114
137
{
115
- Console . WriteLine ( $ "Normalized Mutual Information: { metrics . NormalizedMutualInformation : F2} ") ;
116
- Console . WriteLine ( $ "Average Distance: { metrics . AverageDistance : F2} ") ;
117
- Console . WriteLine ( $ "Davies Bouldin Index: { metrics . DaviesBouldinIndex : F2} ") ;
138
+ Console . WriteLine ( $ "Normalized Mutual Information: " +
139
+ $ "{ metrics . NormalizedMutualInformation : F2} ") ;
140
+
141
+ Console . WriteLine ( $ "Average Distance: " +
142
+ $ "{ metrics . AverageDistance : F2} ") ;
143
+
144
+ Console . WriteLine ( $ "Davies Bouldin Index: " +
145
+ $ "{ metrics . DaviesBouldinIndex : F2} ") ;
118
146
}
119
147
}
120
148
}
0 commit comments