-
Notifications
You must be signed in to change notification settings - Fork 1.9k
XML documentation for five text related transforms #3418
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -57,11 +57,15 @@ public static TextFeaturizingEstimator FeaturizeText(this TransformsCatalog.Text | |
outputColumnName, inputColumnNames, options); | ||
|
||
/// <summary> | ||
/// Tokenize incoming text in <paramref name="inputColumnName"/> and output the tokens as <paramref name="outputColumnName"/>. | ||
/// Create a <see cref="TokenizingByCharactersEstimator"/>, which tokenizes characters by splitting text into sequences of characters | ||
/// using a sliding window. | ||
/// </summary> | ||
/// <param name="catalog">The text-related transform's catalog.</param> | ||
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> | ||
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> | ||
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>. | ||
/// This column's data type will be a variable-sized vector of keys.</param> | ||
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the | ||
/// <paramref name="outputColumnName"/> will be used as source. | ||
/// This estimator operates over text data type.</param> | ||
/// <param name="useMarkerCharacters">Whether to prepend a marker character, <see langword="0x02"/>, to the beginning, | ||
/// and append another marker character, <see langword="0x03"/>, to the end of the output vector of characters.</param> | ||
/// <example> | ||
|
@@ -85,7 +89,6 @@ public static TokenizingByCharactersEstimator TokenizeIntoCharactersAsKeys(this | |
/// <param name="useMarkerCharacters">Whether to prepend a marker character, <see langword="0x02"/>, to the beginning, | ||
/// and append another marker character, <see langword="0x03"/>, to the end of the output vector of characters.</param> | ||
/// <param name="columns">Pairs of columns to run the tokenization on.</param> | ||
|
||
[BestFriend] | ||
internal static TokenizingByCharactersEstimator TokenizeIntoCharactersAsKeys(this TransformsCatalog.TextTransforms catalog, | ||
bool useMarkerCharacters = CharTokenizingDefaults.UseMarkerCharacters, | ||
|
@@ -97,12 +100,15 @@ internal static TokenizingByCharactersEstimator TokenizeIntoCharactersAsKeys(thi | |
} | ||
|
||
/// <summary> | ||
/// Normalizes incoming text in <paramref name="inputColumnName"/> by changing case, removing diacritical marks, punctuation marks and/or numbers | ||
/// and outputs new text as <paramref name="outputColumnName"/>. | ||
/// Creates a <see cref="TextNormalizingEstimator"/>, which normalizes incoming text in <paramref name="inputColumnName"/> by optionally | ||
/// changing case, removing diacritical marks, punctuation marks, numbers, and outputs new text as <paramref name="outputColumnName"/>. | ||
/// </summary> | ||
/// <param name="catalog">The text-related transform's catalog.</param> | ||
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> | ||
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> | ||
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>. | ||
/// This column's data type will remain scalar of text or a vector of text depending on the input column data type.</param> | ||
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, | ||
/// the value of the <paramref name="outputColumnName"/> will be used as source. | ||
/// This estimator operates on text or vector of text data types.</param> | ||
/// <param name="caseMode">Casing text using the rules of the invariant culture.</param> | ||
/// <param name="keepDiacritics">Whether to keep diacritical marks or remove them.</param> | ||
/// <param name="keepPunctuations">Whether to keep punctuation marks or remove them.</param> | ||
|
@@ -124,10 +130,16 @@ public static TextNormalizingEstimator NormalizeText(this TransformsCatalog.Text | |
=> new TextNormalizingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), | ||
outputColumnName, inputColumnName, caseMode, keepDiacritics, keepPunctuations, keepNumbers); | ||
|
||
/// <include file='doc.xml' path='doc/members/member[@name="WordEmbeddings"]/*' /> | ||
/// <summary> | ||
/// Create an <see cref="WordEmbeddingEstimator"/>, which is a text featurizer that converts a vector | ||
/// of text into a numerical vector using pre-trained embeddings models. | ||
/// </summary> | ||
/// <param name="catalog">The text-related transform's catalog.</param> | ||
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> | ||
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> | ||
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>. | ||
/// This column's data type will be a vector of <see cref="System.Single"/>.</param> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
known-sized #Resolved |
||
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, | ||
/// the value of the <paramref name="outputColumnName"/> will be used as source. | ||
/// This estimator operates over known-sized vector of text data type.</param> | ||
/// <param name="modelKind">The embeddings <see cref="WordEmbeddingEstimator.PretrainedModelKind"/> to use. </param> | ||
/// <example> | ||
/// <format type="text/markdown"> | ||
|
@@ -142,11 +154,17 @@ public static WordEmbeddingEstimator ApplyWordEmbedding(this TransformsCatalog.T | |
WordEmbeddingEstimator.PretrainedModelKind modelKind = WordEmbeddingEstimator.PretrainedModelKind.SentimentSpecificWordEmbedding) | ||
=> new WordEmbeddingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), outputColumnName, inputColumnName, modelKind); | ||
|
||
/// <include file='doc.xml' path='doc/members/member[@name="WordEmbeddings"]/*' /> | ||
/// <summary> | ||
/// Create an <see cref="WordEmbeddingEstimator"/>, which is a text featurizer that converts vectors | ||
/// of text into numerical vectors using pre-trained embeddings models. | ||
/// </summary> | ||
/// <param name="catalog">The text-related transform's catalog.</param> | ||
/// <param name="customModelFile">The path of the pre-trained embeedings model to use. </param> | ||
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> | ||
/// <param name="inputColumnName">Name of the column to transform.</param> | ||
/// <param name="customModelFile">The path of the pre-trained embeddings model to use.</param> | ||
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>. | ||
/// This column's data type will be a vector of <see cref="System.Single"/>.</param> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
known-sized #Resolved |
||
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, | ||
/// the value of the <paramref name="outputColumnName"/> will be used as source. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
let's not use 'source'. just drop 'as source'. it reads fine without it. #Pending There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can modify it here, but it's everywhere, that's the pattern that a lot of our transforms follow. In reply to: 277032960 [](ancestors = 277032960) |
||
/// This estimator operates over known-sized vector of text data type.</param> | ||
/// <example> | ||
/// <format type="text/markdown"> | ||
/// <![CDATA[ | ||
|
@@ -161,10 +179,13 @@ public static WordEmbeddingEstimator ApplyWordEmbedding(this TransformsCatalog.T | |
=> new WordEmbeddingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), | ||
outputColumnName, customModelFile, inputColumnName ?? outputColumnName); | ||
|
||
/// <include file='doc.xml' path='doc/members/member[@name="WordEmbeddings"]/*' /> | ||
/// <summary> | ||
/// Create an <see cref="WordEmbeddingEstimator"/>, which is a text featurizer that converts vectors | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
singular or plural? if it's plural, please say 'multiple vectors' or 'one or more vectors' #Resolved |
||
/// of text into numerical vectors using pre-trained embeddings models. | ||
/// </summary> | ||
/// <param name="catalog">The text-related transform's catalog.</param> | ||
/// <param name="modelKind">The embeddings <see cref="WordEmbeddingEstimator.PretrainedModelKind"/> to use. </param> | ||
/// <param name="columns">The array columns, and per-column configurations to extract embeedings from.</param> | ||
/// <param name="columns">The array columns, and per-column configurations to extract embeddings from.</param> | ||
/// <example> | ||
/// <format type="text/markdown"> | ||
/// <