Skip to content

Commit 883784a

Browse files
committed
Rename ProduceCharacterTokens to ProduceCharactersAsKeys
1 parent 8e5c515 commit 883784a

File tree

7 files changed

+8
-8
lines changed

7 files changed

+8
-8
lines changed

docs/code/MlNetCookBook.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -775,7 +775,7 @@ var pipeline =
775775
ngramLength: 2, useAllLengths: false))
776776

777777
// NLP pipeline 3: bag of tri-character sequences with TF-IDF weighting.
778-
.Append(mlContext.Transforms.Text.ProduceCharacterTokens("MessageChars", "Message"))
778+
.Append(mlContext.Transforms.Text.ProduceCharactersAsKeys("MessageChars", "Message"))
779779
.Append(new NgramExtractingEstimator(mlContext, "BagOfTrichar", "MessageChars",
780780
ngramLength: 3, weighting: NgramExtractingEstimator.WeightingCriteria.TfIdf))
781781

docs/samples/Microsoft.ML.Samples/Dynamic/NgramExtraction.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ public static void NgramTransform()
2626
// A pipeline to tokenize text as characters and then combine them together into ngrams
2727
// The pipeline uses the default settings to featurize.
2828

29-
var charsPipeline = ml.Transforms.Text.ProduceCharacterTokens("Chars", "SentimentText", useMarkerCharacters: false);
29+
var charsPipeline = ml.Transforms.Text.ProduceCharactersAsKeys("Chars", "SentimentText", useMarkerCharacters: false);
3030
var ngramOnePipeline = ml.Transforms.Text.ProduceNgrams("CharsUnigrams", "Chars", ngramLength: 1);
3131
var ngramTwpPipeline = ml.Transforms.Text.ProduceNgrams("CharsTwograms", "Chars");
3232
var oneCharsPipeline = charsPipeline.Append(ngramOnePipeline);

src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
109109
/// </summary>
110110
/// <param name="input">The column to apply to.</param>
111111
/// <param name="useMarkerCharacters">Whether to use marker characters to separate words.</param>
112-
public static VarVector<Key<ushort, string>> ProduceCharacterTokens(this Scalar<string> input, bool useMarkerCharacters = true) => new OutPipelineColumn(input, useMarkerCharacters);
112+
public static VarVector<Key<ushort, string>> ProduceCharactersAsKeys(this Scalar<string> input, bool useMarkerCharacters = true) => new OutPipelineColumn(input, useMarkerCharacters);
113113
}
114114

115115
/// <summary>

src/Microsoft.ML.Transforms/Text/TextCatalog.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ public static TextFeaturizingEstimator FeaturizeText(this TransformsCatalog.Text
5757
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
5858
/// <param name="useMarkerCharacters">Whether to prepend a marker character, <see langword="0x02"/>, to the beginning,
5959
/// and append another marker character, <see langword="0x03"/>, to the end of the output vector of characters.</param>
60-
public static TokenizingByCharactersEstimator ProduceCharacterTokens(this TransformsCatalog.TextTransforms catalog,
60+
public static TokenizingByCharactersEstimator ProduceCharactersAsKeys(this TransformsCatalog.TextTransforms catalog,
6161
string outputColumnName,
6262
string inputColumnName = null,
6363
bool useMarkerCharacters = CharTokenizingDefaults.UseMarkerCharacters)
@@ -72,7 +72,7 @@ public static TokenizingByCharactersEstimator ProduceCharacterTokens(this Transf
7272
/// and append another marker character, <see langword="0x03"/>, to the end of the output vector of characters.</param>
7373
/// <param name="columns">Pairs of columns to run the tokenization on.</param>
7474

75-
public static TokenizingByCharactersEstimator ProduceCharacterTokens(this TransformsCatalog.TextTransforms catalog,
75+
public static TokenizingByCharactersEstimator ProduceCharactersAsKeys(this TransformsCatalog.TextTransforms catalog,
7676
bool useMarkerCharacters = CharTokenizingDefaults.UseMarkerCharacters,
7777
params ColumnOptions[] columns)
7878
=> new TokenizingByCharactersEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), useMarkerCharacters, ColumnOptions.ConvertToValueTuples(columns));

test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -520,7 +520,7 @@ public void Tokenize()
520520
.Append(r => (
521521
r.label,
522522
tokens: r.text.ProduceWordTokens(),
523-
chars: r.text.ProduceCharacterTokens()));
523+
chars: r.text.ProduceCharactersAsKeys()));
524524

525525
var tdata = est.Fit(data).Transform(data);
526526
var schema = tdata.AsDynamic.Schema;

test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,7 @@ private void TextFeaturizationOn(string dataPath)
467467
BagOfBigrams: r.Message.NormalizeText().ProduceHashedWordBags(ngramLength: 2, useAllLengths: false),
468468

469469
// NLP pipeline 3: bag of tri-character sequences with TF-IDF weighting.
470-
BagOfTrichar: r.Message.ProduceCharacterTokens().ProduceNgrams(ngramLength: 3, weighting: NgramExtractingEstimator.WeightingCriteria.TfIdf),
470+
BagOfTrichar: r.Message.ProduceCharactersAsKeys().ProduceNgrams(ngramLength: 3, weighting: NgramExtractingEstimator.WeightingCriteria.TfIdf),
471471

472472
// NLP pipeline 4: word embeddings.
473473
// PretrainedModelKind.Sswe is used here for performance of the test. In a real

test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ private void TextFeaturizationOn(string dataPath)
305305
ngramLength: 2, useAllLengths: false))
306306

307307
// NLP pipeline 3: bag of tri-character sequences with TF-IDF weighting.
308-
.Append(mlContext.Transforms.Text.ProduceCharacterTokens("MessageChars", "Message"))
308+
.Append(mlContext.Transforms.Text.ProduceCharactersAsKeys("MessageChars", "Message"))
309309
.Append(new NgramExtractingEstimator(mlContext, "BagOfTrichar", "MessageChars",
310310
ngramLength: 3, weighting: NgramExtractingEstimator.WeightingCriteria.TfIdf))
311311

0 commit comments

Comments
 (0)