@@ -980,20 +980,62 @@ internal LatentDirichletAllocationEstimator(IHostEnvironment env, params ColumnI
980
980
_columns = columns . ToImmutableArray ( ) ;
981
981
}
982
982
983
+ /// <summary>
984
+ /// Describes how the transformer handles one column pair.
985
+ /// </summary>
983
986
public sealed class ColumnInfo
984
987
{
988
+ /// <summary>
989
+ /// Name of the column resulting from the transformation of <cref see="InputColumnName"/>.
990
+ /// </summary>
985
991
public readonly string Name ;
992
+ /// <summary>
993
+ /// Name of column to transform. If set to <see langword="null"/>, the value of the <cref see="Name"/> will be used as source.
994
+ /// </summary>
986
995
public readonly string InputColumnName ;
996
+ /// <summary>
997
+ /// The number of topics.
998
+ /// </summary>
987
999
public readonly int NumTopic ;
1000
+ /// <summary>
1001
+ /// Dirichlet prior on document-topic vectors.
1002
+ /// </summary>
988
1003
public readonly float AlphaSum ;
1004
+ /// <summary>
1005
+ /// Dirichlet prior on vocab-topic vectors.
1006
+ /// </summary>
989
1007
public readonly float Beta ;
1008
+ /// <summary>
1009
+ /// Number of Metropolis Hasting step.
1010
+ /// </summary>
990
1011
public readonly int MHStep ;
1012
+ /// <summary>
1013
+ /// Number of iterations.
1014
+ /// </summary>
991
1015
public readonly int NumIter ;
1016
+ /// <summary>
1017
+ /// Compute log likelihood over local dataset on this iteration interval.
1018
+ /// </summary>
992
1019
public readonly int LikelihoodInterval ;
1020
+ /// <summary>
1021
+ /// The number of training threads.
1022
+ /// </summary>
993
1023
public readonly int NumThread ;
1024
+ /// <summary>
1025
+ /// The threshold of maximum count of tokens per doc.
1026
+ /// </summary>
994
1027
public readonly int NumMaxDocToken ;
1028
+ /// <summary>
1029
+ /// The number of words to summarize the topic.
1030
+ /// </summary>
995
1031
public readonly int NumSummaryTermPerTopic ;
1032
+ /// <summary>
1033
+ /// The number of burn-in iterations.
1034
+ /// </summary>
996
1035
public readonly int NumBurninIter ;
1036
+ /// <summary>
1037
+ /// Reset the random number generator for each document.
1038
+ /// </summary>
997
1039
public readonly bool ResetRandomGenerator ;
998
1040
999
1041
/// <summary>
@@ -1150,7 +1192,8 @@ internal void Save(ModelSaveContext ctx)
1150
1192
}
1151
1193
1152
1194
/// <summary>
1153
- /// Returns the schema that would be produced by the transformation.
1195
+ /// Returns the <see cref="SchemaShape"/> of the schema which will be produced by the transformer.
1196
+ /// Used for schema propagation and verification in a pipeline.
1154
1197
/// </summary>
1155
1198
public SchemaShape GetOutputSchema ( SchemaShape inputSchema )
1156
1199
{
@@ -1169,6 +1212,9 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema)
1169
1212
return new SchemaShape ( result . Values ) ;
1170
1213
}
1171
1214
1215
+ /// <summary>
1216
+ /// Trains and returns a <see cref="LatentDirichletAllocationTransformer"/>.
1217
+ /// </summary>
1172
1218
public LatentDirichletAllocationTransformer Fit ( IDataView input )
1173
1219
{
1174
1220
return LatentDirichletAllocationTransformer . TrainLdaTransformer ( _host , input , _columns . ToArray ( ) ) ;
0 commit comments