Skip to content

Commit e58f879

Browse files
committed
Handle static part
1 parent a284b0f commit e58f879

File tree

3 files changed

+16
-16
lines changed

3 files changed

+16
-16
lines changed

src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs

+13-13
Original file line numberDiff line numberDiff line change
@@ -512,26 +512,26 @@ private sealed class OutPipelineColumn : Vector<float>
512512
{
513513
public readonly VarVector<Key<uint, string>> Input;
514514

515-
public OutPipelineColumn(VarVector<Key<uint, string>> input, int hashBits, int ngramLength, int skipLength, bool allLengths, uint seed, bool ordered, int invertHash)
516-
: base(new Reconciler(hashBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash), input)
515+
public OutPipelineColumn(VarVector<Key<uint, string>> input, int numberOfBits, int ngramLength, int skipLength, bool allLengths, uint seed, bool ordered, int invertHash)
516+
: base(new Reconciler(numberOfBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash), input)
517517
{
518518
Input = input;
519519
}
520520
}
521521

522522
private sealed class Reconciler : EstimatorReconciler, IEquatable<Reconciler>
523523
{
524-
private readonly int _hashBits;
524+
private readonly int _numberOfBits;
525525
private readonly int _ngramLength;
526526
private readonly int _skipLength;
527527
private readonly bool _allLengths;
528528
private readonly uint _seed;
529529
private readonly bool _ordered;
530530
private readonly int _invertHash;
531531

532-
public Reconciler(int hashBits, int ngramLength, int skipLength, bool allLengths, uint seed, bool ordered, int invertHash)
532+
public Reconciler(int numberOfBits, int ngramLength, int skipLength, bool allLengths, uint seed, bool ordered, int invertHash)
533533
{
534-
_hashBits = hashBits;
534+
_numberOfBits = numberOfBits;
535535
_ngramLength = ngramLength;
536536
_skipLength = skipLength;
537537
_allLengths = allLengths;
@@ -542,7 +542,7 @@ public Reconciler(int hashBits, int ngramLength, int skipLength, bool allLengths
542542

543543
public bool Equals(Reconciler other)
544544
{
545-
return _hashBits == other._hashBits &&
545+
return _numberOfBits == other._numberOfBits &&
546546
_ngramLength == other._ngramLength &&
547547
_skipLength == other._skipLength &&
548548
_allLengths == other._allLengths &&
@@ -561,7 +561,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
561561
var columns = new List<NgramHashingEstimator.ColumnOptions>();
562562
foreach (var outCol in toOutput)
563563
columns.Add(new NgramHashingEstimator.ColumnOptions(outputNames[outCol], new[] { inputNames[((OutPipelineColumn)outCol).Input] },
564-
_ngramLength, _skipLength, _allLengths, _hashBits, _seed, _ordered, _invertHash));
564+
_ngramLength, _skipLength, _allLengths, _numberOfBits, _seed, _ordered, _invertHash));
565565

566566
return new NgramHashingEstimator(env, columns.ToArray());
567567
}
@@ -571,11 +571,11 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
571571
/// Produces a bag of counts of ngrams (sequences of consecutive words of length 1-n) in a given tokenized text.
572572
/// It does so by hashing each ngram and using the hash value as the index in the bag.
573573
///
574-
/// <see cref="ToNgramsHash"/> is different from <see cref="WordHashBagEstimatorStaticExtensions.ToBagofHashedWords"/>
575-
/// in a way that <see cref="ToNgramsHash"/> takes tokenized text as input while <see cref="WordHashBagEstimatorStaticExtensions.ToBagofHashedWords"/> tokenizes text internally.
574+
/// <see cref="ApplyNgramHashing"/> is different from <see cref="WordHashBagEstimatorStaticExtensions.ToBagofHashedWords"/>
575+
/// in a way that <see cref="ApplyNgramHashing"/> takes tokenized text as input while <see cref="WordHashBagEstimatorStaticExtensions.ToBagofHashedWords"/> tokenizes text internally.
576576
/// </summary>
577577
/// <param name="input">The column to apply to.</param>
578-
/// <param name="hashBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
578+
/// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param>
579579
/// <param name="ngramLength">Ngram length.</param>
580580
/// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
581581
/// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
@@ -585,13 +585,13 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
585585
/// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one.
586586
/// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
587587
/// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.</param>
588-
public static Vector<float> ToNgramsHash(this VarVector<Key<uint, string>> input,
589-
int hashBits = 16,
588+
public static Vector<float> ApplyNgramHashing(this VarVector<Key<uint, string>> input,
589+
int numberOfBits = 16,
590590
int ngramLength = 2,
591591
int skipLength = 0,
592592
bool allLengths = true,
593593
uint seed = 314489979,
594594
bool ordered = true,
595-
int invertHash = 0) => new OutPipelineColumn(input, hashBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash);
595+
int invertHash = 0) => new OutPipelineColumn(input, numberOfBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash);
596596
}
597597
}

src/Microsoft.ML.Transforms/Text/TextCatalog.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -495,7 +495,7 @@ public static WordHashBagEstimator ProduceHashedWordBags(this TransformsCatalog.
495495
/// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one.
496496
/// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
497497
/// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.</param>
498-
public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.TextTransforms catalog,
498+
public static NgramHashingEstimator ApplyNgramHashing(this TransformsCatalog.TextTransforms catalog,
499499
string outputColumnName,
500500
string inputColumnName = null,
501501
int numberOfBits = NgramHashingEstimator.Defaults.NumberOfBits,
@@ -517,7 +517,7 @@ public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.T
517517
/// </summary>
518518
/// <param name="catalog">The text-related transform's catalog.</param>
519519
/// <param name="columns">Pairs of columns to compute n-grams. Note that gram indices are generated by hashing.</param>
520-
public static NgramHashingEstimator ProduceHashedNgrams(this TransformsCatalog.TextTransforms catalog,
520+
public static NgramHashingEstimator ApplyNgramHashing(this TransformsCatalog.TextTransforms catalog,
521521
NgramHashingEstimator.ColumnOptions[] columns)
522522
=> new NgramHashingEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), columns);
523523

test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -605,7 +605,7 @@ public void Ngrams()
605605
.Append(r => (
606606
r.label,
607607
ngrams: r.text.TokenizeText().ToKey().ToNgrams(),
608-
ngramshash: r.text.TokenizeText().ToKey().ToNgramsHash()));
608+
ngramshash: r.text.TokenizeText().ToKey().ApplyNgramHashing()));
609609

610610
var tdata = est.Fit(data).Transform(data);
611611
var schema = tdata.AsDynamic.Schema;

0 commit comments

Comments
 (0)