Skip to content

Commit b2fb038

Browse files
committed
ordered ---> useOrderedHashing
1 parent 521bd5c commit b2fb038

File tree

4 files changed

+51
-52
lines changed

4 files changed

+51
-52
lines changed

src/Microsoft.ML.StaticPipe/TextStaticExtensions.cs

+20-20
Original file line numberDiff line numberDiff line change
@@ -336,9 +336,9 @@ public OutPipelineColumn(Scalar<string> input,
336336
int skipLength,
337337
bool allLengths,
338338
uint seed,
339-
bool ordered,
339+
bool useOrderedHashing,
340340
int invertHash)
341-
: base(new Reconciler(hashBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash), input)
341+
: base(new Reconciler(hashBits, ngramLength, skipLength, allLengths, seed, useOrderedHashing, invertHash), input)
342342
{
343343
Input = input;
344344
}
@@ -351,17 +351,17 @@ private sealed class Reconciler : EstimatorReconciler, IEquatable<Reconciler>
351351
private readonly int _skipLength;
352352
private readonly bool _allLengths;
353353
private readonly uint _seed;
354-
private readonly bool _ordered;
354+
private readonly bool _useOrderedHashing;
355355
private readonly int _invertHash;
356356

357-
public Reconciler(int hashBits, int ngramLength, int skipLength, bool allLengths, uint seed, bool ordered, int invertHash)
357+
public Reconciler(int hashBits, int ngramLength, int skipLength, bool allLengths, uint seed, bool useOrderedHashing, int invertHash)
358358
{
359359
_hashBits = hashBits;
360360
_ngramLength = ngramLength;
361361
_skipLength = skipLength;
362362
_allLengths = allLengths;
363363
_seed = seed;
364-
_ordered = ordered;
364+
_useOrderedHashing = useOrderedHashing;
365365
_invertHash = invertHash;
366366
}
367367

@@ -372,7 +372,7 @@ public bool Equals(Reconciler other)
372372
_skipLength == other._skipLength &&
373373
_allLengths == other._allLengths &&
374374
_seed == other._seed &&
375-
_ordered == other._ordered &&
375+
_useOrderedHashing == other._useOrderedHashing &&
376376
_invertHash == other._invertHash;
377377
}
378378

@@ -388,7 +388,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
388388
foreach (var outCol in toOutput)
389389
pairs.Add((outputNames[outCol], new[] { inputNames[((OutPipelineColumn)outCol).Input] }));
390390

391-
return new WordHashBagEstimator(env, pairs.ToArray(), _hashBits, _ngramLength, _skipLength, _allLengths, _seed, _ordered, _invertHash);
391+
return new WordHashBagEstimator(env, pairs.ToArray(), _hashBits, _ngramLength, _skipLength, _allLengths, _seed, _useOrderedHashing, _invertHash);
392392
}
393393
}
394394

@@ -402,7 +402,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
402402
/// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
403403
/// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
404404
/// <param name="seed">Hashing seed.</param>
405-
/// <param name="ordered">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
405+
/// <param name="useOrderedHashing">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
406406
/// <param name="invertHash">During hashing we constuct mappings between original values and the produced hash values.
407407
/// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one.
408408
/// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
@@ -413,8 +413,8 @@ public static Vector<float> ProduceHashedWordBags(this Scalar<string> input,
413413
int skipLength = 0,
414414
bool allLengths = true,
415415
uint seed = 314489979,
416-
bool ordered = true,
417-
int invertHash = 0) => new OutPipelineColumn(input, numberOfBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash);
416+
bool useOrderedHashing = true,
417+
int invertHash = 0) => new OutPipelineColumn(input, numberOfBits, ngramLength, skipLength, allLengths, seed, useOrderedHashing, invertHash);
418418
}
419419

420420
/// <summary>
@@ -512,8 +512,8 @@ private sealed class OutPipelineColumn : Vector<float>
512512
{
513513
public readonly VarVector<Key<uint, string>> Input;
514514

515-
public OutPipelineColumn(VarVector<Key<uint, string>> input, int numberOfBits, int ngramLength, int skipLength, bool allLengths, uint seed, bool ordered, int invertHash)
516-
: base(new Reconciler(numberOfBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash), input)
515+
public OutPipelineColumn(VarVector<Key<uint, string>> input, int numberOfBits, int ngramLength, int skipLength, bool allLengths, uint seed, bool useOrderedHashing, int invertHash)
516+
: base(new Reconciler(numberOfBits, ngramLength, skipLength, allLengths, seed, useOrderedHashing, invertHash), input)
517517
{
518518
Input = input;
519519
}
@@ -526,17 +526,17 @@ private sealed class Reconciler : EstimatorReconciler, IEquatable<Reconciler>
526526
private readonly int _skipLength;
527527
private readonly bool _allLengths;
528528
private readonly uint _seed;
529-
private readonly bool _ordered;
529+
private readonly bool _useOrderedHashing;
530530
private readonly int _invertHash;
531531

532-
public Reconciler(int numberOfBits, int ngramLength, int skipLength, bool allLengths, uint seed, bool ordered, int invertHash)
532+
public Reconciler(int numberOfBits, int ngramLength, int skipLength, bool allLengths, uint seed, bool useOrderedHashing, int invertHash)
533533
{
534534
_numberOfBits = numberOfBits;
535535
_ngramLength = ngramLength;
536536
_skipLength = skipLength;
537537
_allLengths = allLengths;
538538
_seed = seed;
539-
_ordered = ordered;
539+
_useOrderedHashing = useOrderedHashing;
540540
_invertHash = invertHash;
541541
}
542542

@@ -547,7 +547,7 @@ public bool Equals(Reconciler other)
547547
_skipLength == other._skipLength &&
548548
_allLengths == other._allLengths &&
549549
_seed == other._seed &&
550-
_ordered == other._ordered &&
550+
_useOrderedHashing == other._useOrderedHashing &&
551551
_invertHash == other._invertHash;
552552
}
553553

@@ -561,7 +561,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
561561
var columns = new List<NgramHashingEstimator.ColumnOptions>();
562562
foreach (var outCol in toOutput)
563563
columns.Add(new NgramHashingEstimator.ColumnOptions(outputNames[outCol], new[] { inputNames[((OutPipelineColumn)outCol).Input] },
564-
_ngramLength, _skipLength, _allLengths, _numberOfBits, _seed, _ordered, _invertHash));
564+
_ngramLength, _skipLength, _allLengths, _numberOfBits, _seed, _useOrderedHashing, _invertHash));
565565

566566
return new NgramHashingEstimator(env, columns.ToArray());
567567
}
@@ -580,7 +580,7 @@ public override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
580580
/// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
581581
/// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
582582
/// <param name="seed">Hashing seed.</param>
583-
/// <param name="ordered">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
583+
/// <param name="useOrderedHashing">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
584584
/// <param name="invertHash">During hashing we constuct mappings between original values and the produced hash values.
585585
/// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one.
586586
/// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
@@ -591,7 +591,7 @@ public static Vector<float> ProduceHashedNgrams(this VarVector<Key<uint, string>
591591
int skipLength = 0,
592592
bool allLengths = true,
593593
uint seed = 314489979,
594-
bool ordered = true,
595-
int invertHash = 0) => new OutPipelineColumn(input, numberOfBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash);
594+
bool useOrderedHashing = true,
595+
int invertHash = 0) => new OutPipelineColumn(input, numberOfBits, ngramLength, skipLength, allLengths, seed, useOrderedHashing, invertHash);
596596
}
597597
}

src/Microsoft.ML.Transforms/Text/NgramHashingTransformer.cs

+13-13
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ internal sealed class Options
145145
[Argument(ArgumentType.AtMostOnce,
146146
HelpText = "Whether the position of each source column should be included in the hash (when there are multiple source columns).",
147147
ShortName = "ord", SortOrder = 6)]
148-
public bool Ordered = NgramHashingEstimator.Defaults.Ordered;
148+
public bool Ordered = NgramHashingEstimator.Defaults.UseOrderedHashing;
149149

150150
[Argument(ArgumentType.AtMostOnce, HelpText = "Limit the number of keys used to generate the slot name to this many. 0 means no invert hashing, -1 means no limit.",
151151
ShortName = "ih")]
@@ -417,7 +417,7 @@ private NgramIdFinder GetNgramIdFinder(int iinfo)
417417
uint mask = (1U << _parent._columns[iinfo].NumberOfBits) - 1;
418418
int ngramLength = _parent._columns[iinfo].NgramLength;
419419
bool rehash = _parent._columns[iinfo].RehashUnigrams;
420-
bool ordered = _parent._columns[iinfo].Ordered;
420+
bool ordered = _parent._columns[iinfo].UseOrderedHashing;
421421
bool all = _parent._columns[iinfo].AllLengths;
422422
uint seed = _parent._columns[iinfo].Seed;
423423

@@ -891,7 +891,7 @@ public sealed class ColumnOptions
891891
/// <summary>Hashing seed.</summary>
892892
public readonly uint Seed;
893893
/// <summary>Whether the position of each term should be included in the hash.</summary>
894-
public readonly bool Ordered;
894+
public readonly bool UseOrderedHashing;
895895
/// <summary>
896896
/// During hashing we constuct mappings between original values and the produced hash values.
897897
/// Text representation of original values are stored in the slot names of the metadata for the new column.
@@ -916,7 +916,7 @@ public sealed class ColumnOptions
916916
/// <param name="allLengths">Whether to store all ngram lengths up to <paramref name="ngramLength"/>, or only <paramref name="ngramLength"/>.</param>
917917
/// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 31, inclusive.</param>
918918
/// <param name="seed">Hashing seed.</param>
919-
/// <param name="ordered">Whether the position of each term should be included in the hash.</param>
919+
/// <param name="useOrderedHashing">Whether the position of each term should be included in the hash.</param>
920920
/// <param name="invertHash">During hashing we constuct mappings between original values and the produced hash values.
921921
/// Text representation of original values are stored in the slot names of the metadata for the new column.
922922
/// Hashing, as such, can map many initial values to one.
@@ -930,7 +930,7 @@ public ColumnOptions(string name,
930930
bool allLengths = NgramHashingEstimator.Defaults.AllLengths,
931931
int numberOfBits = NgramHashingEstimator.Defaults.NumberOfBits,
932932
uint seed = NgramHashingEstimator.Defaults.Seed,
933-
bool ordered = NgramHashingEstimator.Defaults.Ordered,
933+
bool useOrderedHashing = NgramHashingEstimator.Defaults.UseOrderedHashing,
934934
int invertHash = NgramHashingEstimator.Defaults.InvertHash,
935935
bool rehashUnigrams = NgramHashingEstimator.Defaults.RehashUnigrams)
936936
{
@@ -962,7 +962,7 @@ public ColumnOptions(string name,
962962
AllLengths = allLengths;
963963
NumberOfBits = numberOfBits;
964964
Seed = seed;
965-
Ordered = ordered;
965+
UseOrderedHashing = useOrderedHashing;
966966
InvertHash = invertHash;
967967
RehashUnigrams = rehashUnigrams;
968968
}
@@ -996,7 +996,7 @@ internal ColumnOptions(ModelLoadContext ctx)
996996
Contracts.CheckDecode(1 <= NumberOfBits && NumberOfBits <= 30);
997997
Seed = ctx.Reader.ReadUInt32();
998998
RehashUnigrams = ctx.Reader.ReadBoolByte();
999-
Ordered = ctx.Reader.ReadBoolByte();
999+
UseOrderedHashing = ctx.Reader.ReadBoolByte();
10001000
AllLengths = ctx.Reader.ReadBoolByte();
10011001
}
10021002

@@ -1026,7 +1026,7 @@ internal ColumnOptions(ModelLoadContext ctx, string name, string[] inputColumnNa
10261026
Contracts.CheckDecode(1 <= NumberOfBits && NumberOfBits <= 30);
10271027
Seed = ctx.Reader.ReadUInt32();
10281028
RehashUnigrams = ctx.Reader.ReadBoolByte();
1029-
Ordered = ctx.Reader.ReadBoolByte();
1029+
UseOrderedHashing = ctx.Reader.ReadBoolByte();
10301030
AllLengths = ctx.Reader.ReadBoolByte();
10311031
}
10321032

@@ -1060,7 +1060,7 @@ internal void Save(ModelSaveContext ctx)
10601060
ctx.Writer.Write(NumberOfBits);
10611061
ctx.Writer.Write(Seed);
10621062
ctx.Writer.WriteBoolByte(RehashUnigrams);
1063-
ctx.Writer.WriteBoolByte(Ordered);
1063+
ctx.Writer.WriteBoolByte(UseOrderedHashing);
10641064
ctx.Writer.WriteBoolByte(AllLengths);
10651065
}
10661066
}
@@ -1073,7 +1073,7 @@ internal static class Defaults
10731073
internal const int NumberOfBits = 16;
10741074
internal const uint Seed = 314489979;
10751075
internal const bool RehashUnigrams = false;
1076-
internal const bool Ordered = true;
1076+
internal const bool UseOrderedHashing = true;
10771077
internal const int InvertHash = 0;
10781078
}
10791079

@@ -1095,7 +1095,7 @@ internal static class Defaults
10951095
/// <param name="skipLength">Maximum number of tokens to skip when constructing an ngram.</param>
10961096
/// <param name="allLengths">Whether to include all ngram lengths up to <paramref name="ngramLength"/> or only <paramref name="ngramLength"/>.</param>
10971097
/// <param name="seed">Hashing seed.</param>
1098-
/// <param name="ordered">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
1098+
/// <param name="useOrderedHashing">Whether the position of each source column should be included in the hash (when there are multiple source columns).</param>
10991099
/// <param name="invertHash">During hashing we constuct mappings between original values and the produced hash values.
11001100
/// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one.
11011101
/// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
@@ -1108,9 +1108,9 @@ internal NgramHashingEstimator(IHostEnvironment env,
11081108
int skipLength = 0,
11091109
bool allLengths = true,
11101110
uint seed = 314489979,
1111-
bool ordered = true,
1111+
bool useOrderedHashing = true,
11121112
int invertHash = 0)
1113-
: this(env, new ColumnOptions(outputColumnName, new[] { inputColumnName ?? outputColumnName }, ngramLength, skipLength, allLengths, numberOfBits, seed, ordered, invertHash))
1113+
: this(env, new ColumnOptions(outputColumnName, new[] { inputColumnName ?? outputColumnName }, ngramLength, skipLength, allLengths, numberOfBits, seed, useOrderedHashing, invertHash))
11141114
{
11151115
}
11161116

0 commit comments

Comments
 (0)