Skip to content

Commit 4d5bf08

Browse files
authored
Update xml documentation for ProduceHashedNgrams (#3419)
1 parent 1e166fb commit 4d5bf08

File tree

2 files changed

+42
-23
lines changed

2 files changed

+42
-23
lines changed

src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs

+22-9
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,7 @@
2929

3030
namespace Microsoft.ML.Transforms.Text
3131
{
32-
/// <summary>
33-
/// Produces a bag of counts of ngrams (sequences of consecutive words of length 1-n) in a given text.
34-
/// It does so by hashing each ngram and using the hash value as the index in the bag.
35-
/// </summary>
32+
/// <see cref="ITransformer"/> resulting from fitting a <see cref="NgramHashingEstimator"/>.
3633
public sealed class NgramHashingTransformer : RowToRowTransformerBase
3734
{
3835
internal sealed class Column : ManyToOneColumn
@@ -861,12 +858,28 @@ public VBuffer<ReadOnlyMemory<char>>[] SlotNamesMetadata(out VectorDataViewType[
861858
}
862859

863860
/// <summary>
864-
/// Produces a bag of counts of ngrams (sequences of consecutive words of length 1-n) in a given text.
865-
/// It does so by hashing each ngram and using the hash value as the index in the bag.
866-
///
867-
/// <see cref="NgramHashingEstimator"/> is different from <see cref="WordHashBagEstimator"/> in a way that <see cref="NgramHashingEstimator"/>
868-
/// takes tokenized text as input while <see cref="WordHashBagEstimator"/> tokenizes text internally.
861+
/// <see cref="IEstimator{TTransformer}"/> for the <see cref="NgramHashingTransformer"/>.
869862
/// </summary>
863+
/// <remarks>
864+
/// <format type="text/markdown"><![CDATA[
865+
/// ### Estimator Characteristics
866+
/// | | |
867+
/// | -- | -- |
868+
/// | Does this estimator need to look at the data to train its parameters? | Yes |
869+
/// | Input column data type | Vector of [Key](<xref:Microsoft.ML.Data.KeyDataViewType>) |
870+
/// | Output column data type | Vector of known size of <xref:System.Single> |
871+
///
872+
/// The resulting <xref:Microsoft.ML.Transforms.Text.NgramHashingTransformer/> creates a new column, named as specified in the output column name parameters, and
873+
/// produces a vector of counts of n-grams (sequences of consecutive words of length 1-n) from a given data.
874+
/// It does so by hashing each n-gram and using the hash value as the index in the bag.
875+
///
876+
/// <xref:Microsoft.ML.Transforms.Text.NgramHashingEstimator> is different from <xref:Microsoft.ML.Transforms.Text.WordHashBagEstimator>
877+
/// in a way that The former takes tokenized text as input while the latter tokenizes text internally.
878+
/// See the See Also section for links to examples of the usage.
879+
/// ]]>
880+
/// </format>
881+
/// </remarks>
882+
/// <seealso cref="TextCatalog.ProduceHashedNgrams(TransformsCatalog.TextTransforms, string, string, int, int, int, bool, uint, bool, int, bool)" />
870883
public sealed class NgramHashingEstimator : IEstimator<NgramHashingTransformer>
871884
{
872885
/// <summary>

src/Microsoft.ML.Transforms/Text/TextCatalog.cs

+20-14
Original file line numberDiff line numberDiff line change
@@ -403,15 +403,18 @@ public static WordHashBagEstimator ProduceHashedWordBags(this TransformsCatalog.
403403
maximumNumberOfInverts: maximumNumberOfInverts);
404404

405405
/// <summary>
406-
/// Produces a bag of counts of hashed ngrams in <paramref name="inputColumnName"/>
407-
/// and outputs ngram vector as <paramref name="outputColumnName"/>
408-
///
406+
/// Create a <see cref="NgramHashingEstimator"/>, which copies the data from the column specified in <paramref name="inputColumnName"/>
407+
/// to a new column: <paramref name="outputColumnName"/> and produces a vector of counts of hashed n-grams.
408+
/// </summary>
409+
/// <remarks>
409410
/// <see cref="NgramHashingEstimator"/> is different from <see cref="WordHashBagEstimator"/> in a way that <see cref="NgramHashingEstimator"/>
410411
/// takes tokenized text as input while <see cref="WordHashBagEstimator"/> tokenizes text internally.
411-
/// </summary>
412-
/// <param name="catalog">The text-related transform's catalog.</param>
413-
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
414-
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
412+
/// </remarks>
413+
/// <param name="catalog">The transform's catalog.</param>
414+
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.
415+
/// This column's data type will be vector of <see cref="System.Single"/>.</param>
416+
/// <param name="inputColumnName">Name of the column to copy the data from.
417+
/// This estimator operates over vector of key type.</param>
415418
/// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
416419
/// <param name="ngramLength">Ngram length.</param>
417420
/// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
@@ -439,15 +442,18 @@ public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.T
439442
useAllLengths: useAllLengths, numberOfBits: numberOfBits, seed: seed, useOrderedHashing: useOrderedHashing, maximumNumberOfInverts: maximumNumberOfInverts, rehashUnigrams) });
440443

441444
/// <summary>
442-
/// Produces a bag of counts of hashed ngrams in <paramref name="inputColumnNames"/>
443-
/// and outputs ngram vector as <paramref name="outputColumnName"/>
444-
///
445+
/// Create a <see cref="NgramHashingEstimator"/>, which takes the data from the multiple columns specified in <paramref name="inputColumnNames"/>
446+
/// to a new column: <paramref name="outputColumnName"/> and produces a vector of counts of hashed n-grams.
447+
/// </summary>
448+
/// <remarks>
445449
/// <see cref="NgramHashingEstimator"/> is different from <see cref="WordHashBagEstimator"/> in a way that <see cref="NgramHashingEstimator"/>
446450
/// takes tokenized text as input while <see cref="WordHashBagEstimator"/> tokenizes text internally.
447-
/// </summary>
448-
/// <param name="catalog">The text-related transform's catalog.</param>
449-
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnNames"/>.</param>
450-
/// <param name="inputColumnNames">Names of the columns to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
451+
/// </remarks>
452+
/// <param name="catalog">The transform's catalog.</param>
453+
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnNames"/>.
454+
/// This column's data type will be vector of known size of <see cref="System.Single"/>.</param>
455+
/// <param name="inputColumnNames">Name of the multiple columns to take the data from.
456+
/// This estimator operates over vector of key type.</param>
451457
/// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
452458
/// <param name="ngramLength">Ngram length.</param>
453459
/// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>

0 commit comments

Comments
 (0)