Skip to content

Commit 4e2dba7

Browse files
authored
Refactoring clustering catalog samples line width to 85 (#3595)
1 parent fabdabf commit 4e2dba7

File tree

4 files changed

+164
-80
lines changed

4 files changed

+164
-80
lines changed

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Clustering/Clustering.ttinclude

Lines changed: 55 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,23 @@ namespace Samples.Dynamic.Trainers.Clustering
1313
{<#=Comments#>
1414
public static void Example()
1515
{
16-
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
17-
// as a catalog of available operations and as the source of randomness.
18-
// Setting the seed to a fixed number in this example to make outputs deterministic.
16+
// Create a new context for ML.NET operations. It can be used for
17+
// exception tracking and logging, as a catalog of available operations
18+
// and as the source of randomness. Setting the seed to a fixed number
19+
// in this example to make outputs deterministic.
1920
var mlContext = new MLContext(seed: 0);
2021

21-
// Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
22+
// Create a list of training data points.
2223
var dataPoints = GenerateRandomDataPoints(1000, <#=DataSeed#>);
2324

24-
// Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
25-
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
25+
// Convert the list of data points to an IDataView object, which is
26+
// consumable by ML.NET API.
27+
IDataView trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
2628

2729
<# if (TrainerOptions == null) { #>
2830
// Define the trainer.
29-
var pipeline = mlContext.Clustering.Trainers.<#=Trainer#>(<#=InlineTrainerOptions#>);
31+
var pipeline = mlContext.Clustering.Trainers.<#=Trainer#>(
32+
<#=InlineTrainerOptions#>);
3033
<# } else { #>
3134
// Define trainer options.
3235
var options = new <#=TrainerOptions#>;
@@ -38,42 +41,57 @@ namespace Samples.Dynamic.Trainers.Clustering
3841
// Train the model.
3942
var model = pipeline.Fit(trainingData);
4043

41-
// Create testing data. Use different random seed to make it different from training data.
42-
var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123));
44+
// Create testing data. Use a different random seed to make it different
45+
// from the training data.
46+
var testData = mlContext.Data.LoadFromEnumerable(
47+
GenerateRandomDataPoints(500, seed: 123));
4348

4449
// Run the model on test data set.
4550
var transformedTestData = model.Transform(testData);
4651

4752
// Convert IDataView object to a list.
48-
var predictions = mlContext.Data.CreateEnumerable<Prediction>(transformedTestData, reuseRowObject: false).ToList();
53+
var predictions = mlContext.Data.CreateEnumerable<Prediction>(
54+
transformedTestData, reuseRowObject: false).ToList();
4955

50-
// Print 5 predictions. Note that the label is only used as a comparison wiht the predicted label.
51-
// It is not used during training.
56+
// Print 5 predictions. Note that the label is only used as a comparison
57+
// with the predicted label. It is not used during training.
5258
foreach (var p in predictions.Take(2))
53-
Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
59+
Console.WriteLine(
60+
$"Label: {p.Label}, Prediction: {p.PredictedLabel}");
61+
5462
foreach (var p in predictions.TakeLast(3))
55-
Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
63+
Console.WriteLine(
64+
$"Label: {p.Label}, Prediction: {p.PredictedLabel}");
5665

5766
<#=ExpectedOutputPerInstance#>
5867

5968
// Evaluate the overall metrics
60-
var metrics = mlContext.Clustering.Evaluate(transformedTestData, "Label", "Score", "Features");
69+
var metrics = mlContext.Clustering.Evaluate(
70+
transformedTestData, "Label", "Score", "Features");
71+
6172
PrintMetrics(metrics);
62-
73+
6374
<#=ExpectedOutput#>
6475

65-
// Get cluster centroids and the number of clusters k from KMeansModelParameters.
76+
// Get the cluster centroids and the number of clusters k from
77+
// KMeansModelParameters.
6678
VBuffer<float>[] centroids = default;
6779

6880
var modelParams = model.Model;
6981
modelParams.GetClusterCentroids(ref centroids, out int k);
70-
Console.WriteLine($"The first 3 coordinates of the first centroid are: ({string.Join(", ", centroids[0].GetValues().ToArray().Take(3))})");
71-
Console.WriteLine($"The first 3 coordinates of the second centroid are: ({string.Join(", ", centroids[1].GetValues().ToArray().Take(3))})");
82+
Console.WriteLine(
83+
$"The first 3 coordinates of the first centroid are: " +
84+
string.Join(", ", centroids[0].GetValues().ToArray().Take(3)));
85+
86+
Console.WriteLine(
87+
$"The first 3 coordinates of the second centroid are: " +
88+
string.Join(", ", centroids[1].GetValues().ToArray().Take(3)));
7289

7390
<#=ExpectedCentroidsOutput#>
7491
}
7592

76-
private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int seed = 0)
93+
private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count,
94+
int seed = 0)
7795
{
7896
var random = new Random(seed);
7997
float randomFloat() => (float)random.NextDouble();
@@ -84,16 +102,21 @@ namespace Samples.Dynamic.Trainers.Clustering
84102
{
85103
Label = (uint)label,
86104
// Create random features with two clusters.
87-
// The first half has feature values centered around 0.6 the second half has values centered around 0.4.
88-
Features = Enumerable.Repeat(label, 50).Select(index => label == 0 ? randomFloat() + 0.1f : randomFloat() - 0.1f).ToArray()
105+
// The first half has feature values centered around 0.6, while
106+
// the second half has values centered around 0.4.
107+
Features = Enumerable.Repeat(label, 50)
108+
.Select(index => label == 0 ? randomFloat() + 0.1f :
109+
randomFloat() - 0.1f).ToArray()
89110
};
90111
}
91112
}
92113

93-
// Example with label and 50 feature values. A data set is a collection of such examples.
114+
// Example with label and 50 feature values. A data set is a collection of
115+
// such examples.
94116
private class DataPoint
95117
{
96-
// The label is not used during training, just for comparison with the predicted label.
118+
// The label is not used during training, just for comparison with the
119+
// predicted label.
97120
[KeyType(2)]
98121
public uint Label { get; set; }
99122

@@ -113,9 +136,14 @@ namespace Samples.Dynamic.Trainers.Clustering
113136
// Pretty-print of ClusteringMetrics object.
114137
private static void PrintMetrics(ClusteringMetrics metrics)
115138
{
116-
Console.WriteLine($"Normalized Mutual Information: {metrics.NormalizedMutualInformation:F2}");
117-
Console.WriteLine($"Average Distance: {metrics.AverageDistance:F2}");
118-
Console.WriteLine($"Davies Bouldin Index: {metrics.DaviesBouldinIndex:F2}");
139+
Console.WriteLine($"Normalized Mutual Information: " +
140+
$"{metrics.NormalizedMutualInformation:F2}");
141+
142+
Console.WriteLine($"Average Distance: " +
143+
$"{metrics.AverageDistance:F2}");
144+
145+
Console.WriteLine($"Davies Bouldin Index: " +
146+
$"{metrics.DaviesBouldinIndex:F2}");
119147
}
120148
}
121149
}

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Clustering/KMeans.cs

Lines changed: 55 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -10,38 +10,47 @@ public static class KMeans
1010
{
1111
public static void Example()
1212
{
13-
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
14-
// as a catalog of available operations and as the source of randomness.
15-
// Setting the seed to a fixed number in this example to make outputs deterministic.
13+
// Create a new context for ML.NET operations. It can be used for
14+
// exception tracking and logging, as a catalog of available operations
15+
// and as the source of randomness. Setting the seed to a fixed number
16+
// in this example to make outputs deterministic.
1617
var mlContext = new MLContext(seed: 0);
1718

18-
// Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
19+
// Create a list of training data points.
1920
var dataPoints = GenerateRandomDataPoints(1000, 123);
2021

21-
// Convert the list of data points to an IDataView object, which is consumable by ML.NET API.
22-
var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
22+
// Convert the list of data points to an IDataView object, which is
23+
// consumable by ML.NET API.
24+
IDataView trainingData = mlContext.Data.LoadFromEnumerable(dataPoints);
2325

2426
// Define the trainer.
25-
var pipeline = mlContext.Clustering.Trainers.KMeans(numberOfClusters: 2);
27+
var pipeline = mlContext.Clustering.Trainers.KMeans(
28+
numberOfClusters: 2);
2629

2730
// Train the model.
2831
var model = pipeline.Fit(trainingData);
2932

30-
// Create testing data. Use different random seed to make it different from training data.
31-
var testData = mlContext.Data.LoadFromEnumerable(GenerateRandomDataPoints(500, seed: 123));
33+
// Create testing data. Use a different random seed to make it different
34+
// from the training data.
35+
var testData = mlContext.Data.LoadFromEnumerable(
36+
GenerateRandomDataPoints(500, seed: 123));
3237

3338
// Run the model on test data set.
3439
var transformedTestData = model.Transform(testData);
3540

3641
// Convert IDataView object to a list.
37-
var predictions = mlContext.Data.CreateEnumerable<Prediction>(transformedTestData, reuseRowObject: false).ToList();
42+
var predictions = mlContext.Data.CreateEnumerable<Prediction>(
43+
transformedTestData, reuseRowObject: false).ToList();
3844

39-
// Print 5 predictions. Note that the label is only used as a comparison wiht the predicted label.
40-
// It is not used during training.
45+
// Print 5 predictions. Note that the label is only used as a comparison
46+
// with the predicted label. It is not used during training.
4147
foreach (var p in predictions.Take(2))
42-
Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
48+
Console.WriteLine(
49+
$"Label: {p.Label}, Prediction: {p.PredictedLabel}");
50+
4351
foreach (var p in predictions.TakeLast(3))
44-
Console.WriteLine($"Label: {p.Label}, Prediction: {p.PredictedLabel}");
52+
Console.WriteLine(
53+
$"Label: {p.Label}, Prediction: {p.PredictedLabel}");
4554

4655
// Expected output:
4756
// Label: 1, Prediction: 1
@@ -51,28 +60,37 @@ public static void Example()
5160
// Label: 2, Prediction: 2
5261

5362
// Evaluate the overall metrics
54-
var metrics = mlContext.Clustering.Evaluate(transformedTestData, "Label", "Score", "Features");
63+
var metrics = mlContext.Clustering.Evaluate(
64+
transformedTestData, "Label", "Score", "Features");
65+
5566
PrintMetrics(metrics);
56-
67+
5768
// Expected output:
5869
// Normalized Mutual Information: 0.95
5970
// Average Distance: 4.17
6071
// Davies Bouldin Index: 2.87
6172

62-
// Get cluster centroids and the number of clusters k from KMeansModelParameters.
73+
// Get the cluster centroids and the number of clusters k from
74+
// KMeansModelParameters.
6375
VBuffer<float>[] centroids = default;
6476

6577
var modelParams = model.Model;
6678
modelParams.GetClusterCentroids(ref centroids, out int k);
67-
Console.WriteLine($"The first 3 coordinates of the first centroid are: ({string.Join(", ", centroids[0].GetValues().ToArray().Take(3))})");
68-
Console.WriteLine($"The first 3 coordinates of the second centroid are: ({string.Join(", ", centroids[1].GetValues().ToArray().Take(3))})");
79+
Console.WriteLine(
80+
$"The first 3 coordinates of the first centroid are: " +
81+
string.Join(", ", centroids[0].GetValues().ToArray().Take(3)));
82+
83+
Console.WriteLine(
84+
$"The first 3 coordinates of the second centroid are: " +
85+
string.Join(", ", centroids[1].GetValues().ToArray().Take(3)));
6986

7087
// Expected output similar to:
7188
// The first 3 coordinates of the first centroid are: (0.6035213, 0.6017533, 0.5964218)
7289
// The first 3 coordinates of the second centroid are: (0.4031044, 0.4175443, 0.4082336)
7390
}
7491

75-
private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int seed = 0)
92+
private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count,
93+
int seed = 0)
7694
{
7795
var random = new Random(seed);
7896
float randomFloat() => (float)random.NextDouble();
@@ -83,16 +101,21 @@ private static IEnumerable<DataPoint> GenerateRandomDataPoints(int count, int se
83101
{
84102
Label = (uint)label,
85103
// Create random features with two clusters.
86-
// The first half has feature values centered around 0.6 the second half has values centered around 0.4.
87-
Features = Enumerable.Repeat(label, 50).Select(index => label == 0 ? randomFloat() + 0.1f : randomFloat() - 0.1f).ToArray()
104+
// The first half has feature values centered around 0.6, while
105+
// the second half has values centered around 0.4.
106+
Features = Enumerable.Repeat(label, 50)
107+
.Select(index => label == 0 ? randomFloat() + 0.1f :
108+
randomFloat() - 0.1f).ToArray()
88109
};
89110
}
90111
}
91112

92-
// Example with label and 50 feature values. A data set is a collection of such examples.
113+
// Example with label and 50 feature values. A data set is a collection of
114+
// such examples.
93115
private class DataPoint
94116
{
95-
// The label is not used during training, just for comparison with the predicted label.
117+
// The label is not used during training, just for comparison with the
118+
// predicted label.
96119
[KeyType(2)]
97120
public uint Label { get; set; }
98121

@@ -112,9 +135,14 @@ private class Prediction
112135
// Pretty-print of ClusteringMetrics object.
113136
private static void PrintMetrics(ClusteringMetrics metrics)
114137
{
115-
Console.WriteLine($"Normalized Mutual Information: {metrics.NormalizedMutualInformation:F2}");
116-
Console.WriteLine($"Average Distance: {metrics.AverageDistance:F2}");
117-
Console.WriteLine($"Davies Bouldin Index: {metrics.DaviesBouldinIndex:F2}");
138+
Console.WriteLine($"Normalized Mutual Information: " +
139+
$"{metrics.NormalizedMutualInformation:F2}");
140+
141+
Console.WriteLine($"Average Distance: " +
142+
$"{metrics.AverageDistance:F2}");
143+
144+
Console.WriteLine($"Davies Bouldin Index: " +
145+
$"{metrics.DaviesBouldinIndex:F2}");
118146
}
119147
}
120148
}

0 commit comments

Comments
 (0)