Skip to content

Commit a285f8d

Browse files
authored
Convert TextNormalizer to estimator (#1276)
1 parent 14dadfe commit a285f8d

File tree

20 files changed

+761
-603
lines changed

20 files changed

+761
-603
lines changed

src/Microsoft.ML.Data/Transforms/HashTransform.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1289,10 +1289,10 @@ public override void Process()
12891289
/// </summary>
12901290
public sealed class HashEstimator : IEstimator<HashTransformer>
12911291
{
1292-
public const int NumBitsMin = 1;
1293-
public const int NumBitsLim = 32;
1292+
internal const int NumBitsMin = 1;
1293+
internal const int NumBitsLim = 32;
12941294

1295-
public static class Defaults
1295+
internal static class Defaults
12961296
{
12971297
public const int HashBits = NumBitsLim - 1;
12981298
public const uint Seed = 314489979;

src/Microsoft.ML.Data/Transforms/KeyToVectorTransform.cs

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -208,17 +208,14 @@ private static IDataTransform Create(IHostEnvironment env, Arguments args, IData
208208

209209
env.CheckValue(args.Column, nameof(args.Column));
210210
var cols = new ColumnInfo[args.Column.Length];
211-
using (var ch = env.Start("ValidateArgs"))
211+
for (int i = 0; i < cols.Length; i++)
212212
{
213-
for (int i = 0; i < cols.Length; i++)
214-
{
215-
var item = args.Column[i];
213+
var item = args.Column[i];
216214

217-
cols[i] = new ColumnInfo(item.Source ?? item.Name,
218-
item.Name,
219-
item.Bag ?? args.Bag);
220-
};
221-
}
215+
cols[i] = new ColumnInfo(item.Source ?? item.Name,
216+
item.Name,
217+
item.Bag ?? args.Bag);
218+
};
222219
return new KeyToVectorTransform(env, cols).MakeDataTransform(input);
223220
}
224221

@@ -727,7 +724,7 @@ private bool SaveAsOnnxCore(OnnxContext ctx, int iinfo, ColInfo info, string src
727724
// Note that one input feature got expended to an one-hot vector.
728725
opType = "ReduceSum";
729726
var reduceNode = ctx.CreateNode(opType, encodedVariableName, dstVariableName, ctx.GetNodeName(opType), "");
730-
reduceNode.AddAttribute("axes", new long[] { shape.Count - 1});
727+
reduceNode.AddAttribute("axes", new long[] { shape.Count - 1 });
731728
reduceNode.AddAttribute("keepdims", 0);
732729
}
733730
return true;
@@ -737,7 +734,7 @@ private bool SaveAsOnnxCore(OnnxContext ctx, int iinfo, ColInfo info, string src
737734

738735
public sealed class KeyToVectorEstimator : TrivialEstimator<KeyToVectorTransform>
739736
{
740-
public static class Defaults
737+
internal static class Defaults
741738
{
742739
public const bool Bag = false;
743740
}

src/Microsoft.ML.Legacy/CSharpApi.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16789,7 +16789,7 @@ public enum TextTransformLanguage
1678916789
Japanese = 7
1679016790
}
1679116791

16792-
public enum TextNormalizerTransformCaseNormalizationMode
16792+
public enum TextNormalizerEstimatorCaseNormalizationMode
1679316793
{
1679416794
Lower = 0,
1679516795
Upper = 1,
@@ -16877,7 +16877,7 @@ public void AddColumn(string name, params string[] source)
1687716877
/// <summary>
1687816878
/// Casing text using the rules of the invariant culture.
1687916879
/// </summary>
16880-
public TextNormalizerTransformCaseNormalizationMode TextCase { get; set; } = TextNormalizerTransformCaseNormalizationMode.Lower;
16880+
public TextNormalizerEstimatorCaseNormalizationMode TextCase { get; set; } = TextNormalizerEstimatorCaseNormalizationMode.Lower;
1688116881

1688216882
/// <summary>
1688316883
/// Whether to keep diacritical marks or remove them.

src/Microsoft.ML.Transforms/RffTransform.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -642,7 +642,7 @@ private void TransformFeatures(ref VBuffer<float> src, ref VBuffer<float> dst, T
642642
/// </summary>
643643
public sealed class RffEstimator : IEstimator<RffTransform>
644644
{
645-
public static class Defaults
645+
internal static class Defaults
646646
{
647647
public const int NewDim = 1000;
648648
public const bool UseSin = false;

0 commit comments

Comments
 (0)