Skip to content

Commit 8e19654

Browse files
committed
review comments and some more cleanup
1 parent bd98b16 commit 8e19654

File tree

7 files changed

+55
-17
lines changed

7 files changed

+55
-17
lines changed

src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs

+2-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ public static class ConversionsExtensionsCatalog
2222
/// </summary>
2323
/// <param name="catalog">The transform's catalog.</param>
2424
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
25-
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> /// <param name="hashBits">Number of bits to hash into. Must be between 1 and 31, inclusive.</param>
25+
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
26+
/// <param name="hashBits">Number of bits to hash into. Must be between 1 and 31, inclusive.</param>
2627
/// <param name="invertHash">During hashing we constuct mappings between original values and the produced hash values.
2728
/// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one.
2829
/// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.

src/Microsoft.ML.Data/Transforms/FeatureContributionCalculationTransformer.cs

+4-4
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,9 @@ internal sealed class Options : TransformInputBase
9999
internal const string FriendlyName = "Feature Contribution Calculation";
100100
internal const string LoaderSignature = "FeatureContribution";
101101

102-
public readonly int Top;
103-
public readonly int Bottom;
104-
public readonly bool Normalize;
102+
internal readonly int Top;
103+
internal readonly int Bottom;
104+
internal readonly bool Normalize;
105105

106106
private readonly IFeatureContributionMapper _predictor;
107107

@@ -281,7 +281,7 @@ public sealed class FeatureContributionCalculatingEstimator : TrivialEstimator<F
281281
private readonly string _featureColumn;
282282
private readonly ICalculateFeatureContribution _predictor;
283283

284-
public static class Defaults
284+
internal static class Defaults
285285
{
286286
public const int NumPositiveContributions = 10;
287287
public const int NumNegativeContributions = 10;

src/Microsoft.ML.Data/Transforms/TypeConverting.cs

+34-4
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,8 @@ public static CommonOutputs.TransformOutput Convert(IHostEnvironment env, TypeCo
5353
}
5454

5555
/// <summary>
56-
/// ConvertTransform allow to change underlying column type as long as we know how to convert types.
56+
/// <see cref="TypeConvertingTransformer"/> converts underlying column types.
57+
/// The source and destination column types need to be compatible.
5758
/// </summary>
5859
public sealed class TypeConvertingTransformer : OneToOneTransformerBase
5960
{
@@ -510,11 +511,12 @@ private bool SaveAsOnnxCore(OnnxContext ctx, int iinfo, string srcVariableName,
510511
}
511512

512513
/// <summary>
513-
/// Convert estimator allow you take column and change it type as long as we know how to do conversion between types.
514+
/// <see cref="TypeConvertingEstimator"/> converts underlying column types.
515+
/// The source and destination column types need to be compatible.
514516
/// </summary>
515517
public sealed class TypeConvertingEstimator : TrivialEstimator<TypeConvertingTransformer>
516518
{
517-
public sealed class Defaults
519+
internal sealed class Defaults
518520
{
519521
public const DataKind DefaultOutputKind = DataKind.R4;
520522
}
@@ -524,9 +526,21 @@ public sealed class Defaults
524526
/// </summary>
525527
public sealed class ColumnInfo
526528
{
529+
/// <summary>
530+
/// Name of the column resulting from the transformation of <see cref="InputColumnName"/>.
531+
/// </summary>
527532
public readonly string Name;
533+
/// <summary>
534+
/// Name of column to transform. If set to <see langword="null"/>, the value of the <see cref="Name"/> will be used as source.
535+
/// </summary>
528536
public readonly string InputColumnName;
537+
/// <summary>
538+
/// The expected kind of the converted column.
539+
/// </summary>
529540
public readonly DataKind OutputKind;
541+
/// <summary>
542+
/// New key count, if we work with key type.
543+
/// </summary>
530544
public readonly KeyCount OutputKeyCount;
531545

532546
/// <summary>
@@ -535,14 +549,30 @@ public sealed class ColumnInfo
535549
/// <param name="name">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
536550
/// <param name="outputKind">The expected kind of the converted column.</param>
537551
/// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="name"/> will be used as source.</param>
538-
/// <param name="outputKeyCount">New key range, if we work with key type.</param>
552+
/// <param name="outputKeyCount">New key count, if we work with key type.</param>
539553
public ColumnInfo(string name, DataKind outputKind, string inputColumnName, KeyCount outputKeyCount = null)
540554
{
541555
Name = name;
542556
InputColumnName = inputColumnName ?? name;
543557
OutputKind = outputKind;
544558
OutputKeyCount = outputKeyCount;
545559
}
560+
561+
/// <summary>
562+
/// Describes how the transformer handles one column pair.
563+
/// </summary>
564+
/// <param name="name">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
565+
/// <param name="type">The expected kind of the converted column.</param>
566+
/// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="name"/> will be used as source.</param>
567+
/// <param name="outputKeyCount">New key count, if we work with key type.</param>
568+
public ColumnInfo(string name, Type type, string inputColumnName, KeyCount outputKeyCount = null)
569+
{
570+
Name = name;
571+
InputColumnName = inputColumnName ?? name;
572+
if (!type.TryGetDataKind(out OutputKind))
573+
throw Contracts.ExceptUserArg(nameof(type), $"Unsupported type {type}.");
574+
OutputKeyCount = outputKeyCount;
575+
}
546576
}
547577

548578
/// <summary>

src/Microsoft.ML.Transforms/CountFeatureSelection.cs

+4-1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ internal sealed class Options : TransformInputBase
4646

4747
internal static string RegistrationName = "CountFeatureSelectionTransform";
4848

49+
/// <summary>
50+
/// Describes how the transformer handles one column pair.
51+
/// </summary>
4952
public sealed class ColumnInfo
5053
{
5154
public readonly string Name;
@@ -131,7 +134,7 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema)
131134
}
132135

133136
/// <summary>
134-
/// Train and return a transformer.
137+
/// Trains and returns a <see cref="ITransformer"/>.
135138
/// </summary>
136139
public ITransformer Fit(IDataView input)
137140
{

src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs

+3
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,9 @@ private static VersionInfo GetVersionInfo()
7575

7676
private const string RegistrationName = "DropNAs";
7777

78+
/// <summary>
79+
/// The names of the input columns of the transformation and the corresponding names for the output columns.
80+
/// </summary>
7881
public IReadOnlyList<(string outputColumnName, string inputColumnName)> Columns => ColumnPairs.AsReadOnly();
7982

8083
/// <summary>

src/Microsoft.ML.Transforms/MutualInformationFeatureSelection.cs

+3-2
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ public sealed class MutualInformationFeatureSelectingEstimator : IEstimator<ITra
3131
internal const string ShortName = "MIFeatureSelection";
3232
internal static string RegistrationName = "MutualInformationFeatureSelectionTransform";
3333

34-
public static class Defaults
34+
[BestFriend]
35+
internal static class Defaults
3536
{
3637
public const string LabelColumn = DefaultColumnNames.Label;
3738
public const int SlotsInOutput = 1000;
@@ -116,7 +117,7 @@ internal MutualInformationFeatureSelectingEstimator(IHostEnvironment env, string
116117
}
117118

118119
/// <summary>
119-
/// Train and return a transformer.
120+
/// Trains and returns a <see cref="ITransformer"/>.
120121
/// </summary>
121122
public ITransformer Fit(IDataView input)
122123
{

test/Microsoft.ML.Tests/Transformers/ConvertTests.cs

+5-5
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ public void TestConvertWorkout()
7878
var data = new[] { new TestClass() { A = 1, B = new int[2] { 1,4 } },
7979
new TestClass() { A = 2, B = new int[2] { 3,4 } }};
8080
var dataView = ML.Data.ReadFromEnumerable(data);
81-
var pipe = new TypeConvertingEstimator(Env, columns: new[] {new TypeConvertingEstimator.ColumnInfo("ConvA", DataKind.R4, "A"),
81+
var pipe = ML.Transforms.Conversion.ConvertType(columns: new[] {new TypeConvertingEstimator.ColumnInfo("ConvA", DataKind.R4, "A"),
8282
new TypeConvertingEstimator.ColumnInfo("ConvB", DataKind.R4, "B")});
8383

8484
TestEstimatorCore(pipe, dataView);
@@ -117,7 +117,7 @@ public void TestConvertWorkout()
117117
};
118118

119119
var allTypesDataView = ML.Data.ReadFromEnumerable(allTypesData);
120-
var allTypesPipe = new TypeConvertingEstimator(Env, columns: new[] {
120+
var allTypesPipe = ML.Transforms.Conversion.ConvertType(columns: new[] {
121121
new TypeConvertingEstimator.ColumnInfo("ConvA", DataKind.R4, "AA"),
122122
new TypeConvertingEstimator.ColumnInfo("ConvB", DataKind.R4, "AB"),
123123
new TypeConvertingEstimator.ColumnInfo("ConvC", DataKind.R4, "AC"),
@@ -192,8 +192,8 @@ public void TestOldSavingAndLoading()
192192
var data = new[] { new TestClass() { A = 1, B = new int[2] { 1,4 } },
193193
new TestClass() { A = 2, B = new int[2] { 3,4 } }};
194194
var dataView = ML.Data.ReadFromEnumerable(data);
195-
var pipe = new TypeConvertingEstimator(Env, columns: new[] {new TypeConvertingEstimator.ColumnInfo("ConvA", DataKind.R8, "A"),
196-
new TypeConvertingEstimator.ColumnInfo("ConvB", DataKind.R8, "B")});
195+
var pipe = ML.Transforms.Conversion.ConvertType(columns: new[] {new TypeConvertingEstimator.ColumnInfo("ConvA", typeof(double), "A"),
196+
new TypeConvertingEstimator.ColumnInfo("ConvB", typeof(double), "B")});
197197

198198
var result = pipe.Fit(dataView).Transform(dataView);
199199
var resultRoles = new RoleMappedData(result);
@@ -213,7 +213,7 @@ public void TestMetadata()
213213
var pipe = new OneHotEncodingEstimator(Env, new[] {
214214
new OneHotEncodingEstimator.ColumnInfo("CatA", "A", OneHotEncodingTransformer.OutputKind.Ind),
215215
new OneHotEncodingEstimator.ColumnInfo("CatB", "B", OneHotEncodingTransformer.OutputKind.Key)
216-
}).Append(new TypeConvertingEstimator(Env, new[] {
216+
}).Append(ML.Transforms.Conversion.ConvertType(new[] {
217217
new TypeConvertingEstimator.ColumnInfo("ConvA", DataKind.R8, "CatA"),
218218
new TypeConvertingEstimator.ColumnInfo("ConvB", DataKind.U2, "CatB")
219219
}));

0 commit comments

Comments
 (0)