@@ -36,18 +36,18 @@ public class KMeansPlusPlusTrainer : TrainerEstimatorBase<ClusteringPredictionTr
36
36
+ "number of clusters in order to minimize the within-cluster sum of squares. K-means++ improves upon K-means by using a better "
37
37
+ "method for choosing the initial cluster centers." ;
38
38
39
- public enum InitAlgorithm
39
+ public enum InitializationAlgorithm
40
40
{
41
41
KMeansPlusPlus = 0 ,
42
42
Random = 1 ,
43
- KMeansParallel = 2
43
+ KMeansYinyang = 2
44
44
}
45
45
46
46
[ BestFriend ]
47
47
internal static class Defaults
48
48
{
49
49
/// <value>The number of clusters.</value>
50
- public const int ClustersCount = 5 ;
50
+ public const int NumberOfClusters = 5 ;
51
51
}
52
52
53
53
public sealed class Options : UnsupervisedTrainerInputBaseWithWeight
@@ -58,13 +58,13 @@ public sealed class Options : UnsupervisedTrainerInputBaseWithWeight
58
58
[ Argument ( ArgumentType . AtMostOnce , HelpText = "The number of clusters" , SortOrder = 50 , Name = "K" ) ]
59
59
[ TGUI ( SuggestedSweeps = "5,10,20,40" ) ]
60
60
[ TlcModule . SweepableDiscreteParam ( "K" , new object [ ] { 5 , 10 , 20 , 40 } ) ]
61
- public int ClustersCount = Defaults . ClustersCount ;
61
+ public int NumberOfClusters = Defaults . NumberOfClusters ;
62
62
63
63
/// <summary>
64
64
/// Cluster initialization algorithm.
65
65
/// </summary>
66
66
[ Argument ( ArgumentType . AtMostOnce , HelpText = "Cluster initialization algorithm" , ShortName = "init" ) ]
67
- public InitAlgorithm InitAlgorithm = InitAlgorithm . KMeansParallel ;
67
+ public InitializationAlgorithm InitializationAlgorithm = InitializationAlgorithm . KMeansYinyang ;
68
68
69
69
/// <summary>
70
70
/// Tolerance parameter for trainer convergence. Low = slower, more accurate.
@@ -79,7 +79,7 @@ public sealed class Options : UnsupervisedTrainerInputBaseWithWeight
79
79
/// </summary>
80
80
[ Argument ( ArgumentType . AtMostOnce , HelpText = "Maximum number of iterations." , ShortName = "maxiter" ) ]
81
81
[ TGUI ( Label = "Max Number of Iterations" ) ]
82
- public int MaxIterations = 1000 ;
82
+ public int NumberOfIterations = 1000 ;
83
83
84
84
/// <summary>
85
85
/// Memory budget (in MBs) to use for KMeans acceleration.
@@ -94,7 +94,7 @@ public sealed class Options : UnsupervisedTrainerInputBaseWithWeight
94
94
/// </summary>
95
95
[ Argument ( ArgumentType . AtMostOnce , HelpText = "Degree of lock-free parallelism. Defaults to automatic. Determinism not guaranteed." , ShortName = "nt,t,threads" , SortOrder = 50 ) ]
96
96
[ TGUI ( Label = "Number of threads" ) ]
97
- public int ? NumThreads ;
97
+ public int ? NumberOfThreads ;
98
98
}
99
99
100
100
private readonly int _k ;
@@ -103,7 +103,7 @@ public sealed class Options : UnsupervisedTrainerInputBaseWithWeight
103
103
private readonly float _convergenceThreshold ; // convergence thresholds
104
104
105
105
private readonly long _accelMemBudgetMb ;
106
- private readonly InitAlgorithm _initAlgorithm ;
106
+ private readonly InitializationAlgorithm _initAlgorithm ;
107
107
private readonly int _numThreads ;
108
108
private readonly string _featureColumn ;
109
109
@@ -119,26 +119,26 @@ internal KMeansPlusPlusTrainer(IHostEnvironment env, Options options)
119
119
: base ( Contracts . CheckRef ( env , nameof ( env ) ) . Register ( LoadNameValue ) , TrainerUtils . MakeR4VecFeature ( options . FeatureColumnName ) , default , TrainerUtils . MakeR4ScalarWeightColumn ( options . ExampleWeightColumnName ) )
120
120
{
121
121
Host . CheckValue ( options , nameof ( options ) ) ;
122
- Host . CheckUserArg ( options . ClustersCount > 0 , nameof ( options . ClustersCount ) , "Must be positive" ) ;
122
+ Host . CheckUserArg ( options . NumberOfClusters > 0 , nameof ( options . NumberOfClusters ) , "Must be positive" ) ;
123
123
124
124
_featureColumn = options . FeatureColumnName ;
125
125
126
- _k = options . ClustersCount ;
126
+ _k = options . NumberOfClusters ;
127
127
128
- Host . CheckUserArg ( options . MaxIterations > 0 , nameof ( options . MaxIterations ) , "Must be positive" ) ;
129
- _maxIterations = options . MaxIterations ;
128
+ Host . CheckUserArg ( options . NumberOfIterations > 0 , nameof ( options . NumberOfIterations ) , "Must be positive" ) ;
129
+ _maxIterations = options . NumberOfIterations ;
130
130
131
131
Host . CheckUserArg ( options . OptimizationTolerance > 0 , nameof ( options . OptimizationTolerance ) , "Tolerance must be positive" ) ;
132
132
_convergenceThreshold = options . OptimizationTolerance ;
133
133
134
134
Host . CheckUserArg ( options . AccelerationMemoryBudgetMb > 0 , nameof ( options . AccelerationMemoryBudgetMb ) , "Must be positive" ) ;
135
135
_accelMemBudgetMb = options . AccelerationMemoryBudgetMb ;
136
136
137
- _initAlgorithm = options . InitAlgorithm ;
137
+ _initAlgorithm = options . InitializationAlgorithm ;
138
138
139
- Host . CheckUserArg ( ! options . NumThreads . HasValue || options . NumThreads > 0 , nameof ( options . NumThreads ) ,
139
+ Host . CheckUserArg ( ! options . NumberOfThreads . HasValue || options . NumberOfThreads > 0 , nameof ( options . NumberOfThreads ) ,
140
140
"Must be either null or a positive integer." ) ;
141
- _numThreads = ComputeNumThreads ( Host , options . NumThreads ) ;
141
+ _numThreads = ComputeNumThreads ( Host , options . NumberOfThreads ) ;
142
142
Info = new TrainerInfo ( ) ;
143
143
}
144
144
@@ -184,12 +184,12 @@ private KMeansModelParameters TrainCore(IChannel ch, RoleMappedData data, int di
184
184
// all produce a valid set of output centroids with various trade-offs in runtime (with perhaps
185
185
// random initialization creating a set that's not terribly useful.) They could also be extended to
186
186
// pay attention to their incoming set of centroids and incrementally train.
187
- if ( _initAlgorithm == InitAlgorithm . KMeansPlusPlus )
187
+ if ( _initAlgorithm == InitializationAlgorithm . KMeansPlusPlus )
188
188
{
189
189
KMeansPlusPlusInit . Initialize ( Host , _numThreads , ch , cursorFactory , _k , dimensionality ,
190
190
centroids , out missingFeatureCount , out totalTrainingInstances ) ;
191
191
}
192
- else if ( _initAlgorithm == InitAlgorithm . Random )
192
+ else if ( _initAlgorithm == InitializationAlgorithm . Random )
193
193
{
194
194
KMeansRandomInit . Initialize ( Host , _numThreads , ch , cursorFactory , _k ,
195
195
centroids , out missingFeatureCount , out totalTrainingInstances ) ;
@@ -743,8 +743,8 @@ public static void Initialize(IHost host, int numThreads, IChannel ch, FeatureFl
743
743
host . CheckValue ( ch , nameof ( ch ) ) ;
744
744
ch . CheckValue ( cursorFactory , nameof ( cursorFactory ) ) ;
745
745
ch . CheckValue ( centroids , nameof ( centroids ) ) ;
746
- ch . CheckUserArg ( numThreads > 0 , nameof ( KMeansPlusPlusTrainer . Options . NumThreads ) , "Must be positive" ) ;
747
- ch . CheckUserArg ( k > 0 , nameof ( KMeansPlusPlusTrainer . Options . ClustersCount ) , "Must be positive" ) ;
746
+ ch . CheckUserArg ( numThreads > 0 , nameof ( KMeansPlusPlusTrainer . Options . NumberOfThreads ) , "Must be positive" ) ;
747
+ ch . CheckUserArg ( k > 0 , nameof ( KMeansPlusPlusTrainer . Options . NumberOfClusters ) , "Must be positive" ) ;
748
748
ch . CheckParam ( dimensionality > 0 , nameof ( dimensionality ) , "Must be positive" ) ;
749
749
ch . CheckUserArg ( accelMemBudgetMb >= 0 , nameof ( KMeansPlusPlusTrainer . Options . AccelerationMemoryBudgetMb ) , "Must be non-negative" ) ;
750
750
0 commit comments