Skip to content

Commit 236de94

Browse files
committed
further cleaning and fix build with bestfriends assemblies
1 parent 31afefd commit 236de94

File tree

7 files changed

+102
-31
lines changed

7 files changed

+102
-31
lines changed

src/Microsoft.ML.Data/Properties/AssemblyInfo.cs

+4
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@
3939
[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.TensorFlow" + PublicKey.Value)]
4040
[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.TimeSeries" + PublicKey.Value)]
4141
[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.Transforms" + PublicKey.Value)]
42+
[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.DnnImageFeaturizer.AlexNet" + PublicKey.Value)]
43+
[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.DnnImageFeaturizer.ResNet101" + PublicKey.Value)]
44+
[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.DnnImageFeaturizer.ResNet18" + PublicKey.Value)]
45+
[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.DnnImageFeaturizer.ResNet50" + PublicKey.Value)]
4246

4347
[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.StaticPipe" + PublicKey.Value)]
4448

src/Microsoft.ML.Data/Transforms/ColumnCopying.cs

+7
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@
3333

3434
namespace Microsoft.ML.Transforms
3535
{
36+
/// <summary>
37+
/// <see cref="ColumnCopyingEstimator"/> copies the input column to another column named as specified in the parameters of the transformation.
38+
/// </summary>
3639
public sealed class ColumnCopyingEstimator : TrivialEstimator<ColumnCopyingTransformer>
3740
{
3841
[BestFriend]
@@ -41,6 +44,7 @@ internal ColumnCopyingEstimator(IHostEnvironment env, string outputColumnName, s
4144
{
4245
}
4346

47+
[BestFriend]
4448
internal ColumnCopyingEstimator(IHostEnvironment env, params (string outputColumnName, string inputColumnName)[] columns)
4549
: base(Contracts.CheckRef(env, nameof(env)).Register(nameof(ColumnCopyingEstimator)), new ColumnCopyingTransformer(env, columns))
4650
{
@@ -74,6 +78,9 @@ public sealed class ColumnCopyingTransformer : OneToOneTransformerBase
7478
internal const string UserName = "Copy Columns Transform";
7579
internal const string ShortName = "Copy";
7680

81+
/// <summary>
82+
/// Names of input and ouput column pairs on which the transformation is applied.
83+
/// </summary>
7784
public IReadOnlyCollection<(string outputColumnName, string inputColumnName)> Columns => ColumnPairs.AsReadOnly();
7885

7986
private static VersionInfo GetVersionInfo()

src/Microsoft.ML.Data/Transforms/Hashing.cs

+39-14
Original file line numberDiff line numberDiff line change
@@ -1106,20 +1106,53 @@ public override void Process()
11061106
}
11071107

11081108
/// <summary>
1109-
/// Estimator for <see cref="HashingTransformer"/>
1109+
/// Estimator for <see cref="HashingTransformer"/> which can hash either single valued columns or vector columns. For vector columns,
1110+
/// it hashes each slot separately. It can hash either text values or key values.
11101111
/// </summary>
11111112
public sealed class HashingEstimator : IEstimator<HashingTransformer>
11121113
{
11131114
internal const int NumBitsMin = 1;
11141115
internal const int NumBitsLim = 32;
11151116

1117+
internal static class Defaults
1118+
{
1119+
public const int HashBits = NumBitsLim - 1;
1120+
public const uint Seed = 314489979;
1121+
public const bool Ordered = false;
1122+
public const int InvertHash = 0;
1123+
}
1124+
1125+
/// <summary>
1126+
/// Describes how the transformer handles one column pair.
1127+
/// </summary>
11161128
public sealed class ColumnInfo
11171129
{
1130+
/// <summary>
1131+
/// Name of the column resulting from the transformation of <see cref="InputColumnName"/>.
1132+
/// </summary>
11181133
public readonly string Name;
1134+
/// <summary>
1135+
/// Name of column to transform. If set to <see langword="null"/>, the value of the <see cref="Name"/> will be used as source.
1136+
/// </summary>
11191137
public readonly string InputColumnName;
1138+
/// <summary>
1139+
/// Number of bits to hash into. Must be between 1 and 31, inclusive.
1140+
/// </summary>
11201141
public readonly int HashBits;
1142+
/// <summary>
1143+
/// Hashing seed.
1144+
/// </summary>
11211145
public readonly uint Seed;
1146+
/// <summary>
1147+
/// Whether the position of each term should be included in the hash.
1148+
/// </summary>
11221149
public readonly bool Ordered;
1150+
/// <summary>
1151+
/// During hashing we constuct mappings between original values and the produced hash values.
1152+
/// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one.
1153+
/// <see cref="InvertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
1154+
/// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.
1155+
/// </summary>
11231156
public readonly int InvertHash;
11241157

11251158
/// <summary>
@@ -1136,10 +1169,10 @@ public sealed class ColumnInfo
11361169
/// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.</param>
11371170
public ColumnInfo(string name,
11381171
string inputColumnName = null,
1139-
int hashBits = HashingEstimator.Defaults.HashBits,
1140-
uint seed = HashingEstimator.Defaults.Seed,
1141-
bool ordered = HashingEstimator.Defaults.Ordered,
1142-
int invertHash = HashingEstimator.Defaults.InvertHash)
1172+
int hashBits = Defaults.HashBits,
1173+
uint seed = Defaults.Seed,
1174+
bool ordered = Defaults.Ordered,
1175+
int invertHash = Defaults.InvertHash)
11431176
{
11441177
if (invertHash < -1)
11451178
throw Contracts.ExceptParam(nameof(invertHash), "Value too small, must be -1 or larger");
@@ -1183,14 +1216,6 @@ internal void Save(ModelSaveContext ctx)
11831216
}
11841217
}
11851218

1186-
public static class Defaults
1187-
{
1188-
public const int HashBits = NumBitsLim - 1;
1189-
public const uint Seed = 314489979;
1190-
public const bool Ordered = false;
1191-
public const int InvertHash = 0;
1192-
}
1193-
11941219
private readonly IHost _host;
11951220
private readonly ColumnInfo[] _columns;
11961221

@@ -1234,7 +1259,7 @@ internal HashingEstimator(IHostEnvironment env, params ColumnInfo[] columns)
12341259
}
12351260

12361261
/// <summary>
1237-
/// Train and return a transformer.
1262+
/// Trains and returns a <see cref="HashingTransformer"/>.
12381263
/// </summary>
12391264
public HashingTransformer Fit(IDataView input) => new HashingTransformer(_host, input, _columns);
12401265

src/Microsoft.ML.Data/Transforms/KeyToVector.cs

+15
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@
3232

3333
namespace Microsoft.ML.Transforms.Conversions
3434
{
35+
/// <summary>
36+
/// Converts the key types back to their original vectors.
37+
/// </summary>
3538
public sealed class KeyToVectorMappingTransformer : OneToOneTransformerBase
3639
{
3740
internal abstract class ColumnBase : OneToOneColumn
@@ -720,6 +723,9 @@ private bool SaveAsOnnxCore(OnnxContext ctx, int iinfo, ColInfo info, string src
720723
}
721724
}
722725

726+
/// <summary>
727+
/// Estimator for <see cref="KeyToVectorMappingTransformer"/>. Converts the key types back to their original vectors.
728+
/// </summary>
723729
public sealed class KeyToVectorMappingEstimator : TrivialEstimator<KeyToVectorMappingTransformer>
724730
{
725731
public static class Defaults
@@ -732,8 +738,17 @@ public static class Defaults
732738
/// </summary>
733739
public sealed class ColumnInfo
734740
{
741+
/// <summary>
742+
/// Name of the column resulting from the transformation of <cref see="InputColumnName"/>.
743+
/// </summary>
735744
public readonly string Name;
745+
/// <summary>
746+
/// Name of column to transform. If set to <see langword="null"/>, the value of the <cref see="Name"/> will be used as source.
747+
/// </summary>
736748
public readonly string InputColumnName;
749+
/// <summary>
750+
/// Whether to combine multiple indicator vectors into a single bag vector instead of concatenating them. This is only relevant when the input column is a vector.
751+
/// </summary>
737752
public readonly bool Bag;
738753

739754
/// <summary>

src/Microsoft.ML.Transforms/OneHotEncoding.cs

+4-4
Original file line numberDiff line numberDiff line change
@@ -316,16 +316,16 @@ public static CommonOutputs.TransformOutput CatTransformDict(IHostEnvironment en
316316
}
317317

318318
[TlcModule.EntryPoint(Name = "Transforms.CategoricalHashOneHotVectorizer",
319-
Desc = OneHotHashEncoding.Summary,
320-
UserName = OneHotHashEncoding.UserName)]
321-
public static CommonOutputs.TransformOutput CatTransformHash(IHostEnvironment env, OneHotHashEncoding.Options input)
319+
Desc = OneHotHashEncodingTransformer.Summary,
320+
UserName = OneHotHashEncodingTransformer.UserName)]
321+
public static CommonOutputs.TransformOutput CatTransformHash(IHostEnvironment env, OneHotHashEncodingTransformer.Options input)
322322
{
323323
Contracts.CheckValue(env, nameof(env));
324324
var host = env.Register("CatTransformDict");
325325
host.CheckValue(input, nameof(input));
326326
EntryPointUtils.CheckInputArgs(host, input);
327327

328-
var xf = OneHotHashEncoding.Create(host, input, input.Data);
328+
var xf = OneHotHashEncodingTransformer.Create(host, input, input.Data);
329329
return new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, xf, input.Data), OutputData = xf };
330330
}
331331

src/Microsoft.ML.Transforms/OneHotHashEncoding.cs

+31-11
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,15 @@
1616
using Microsoft.ML.Transforms.Categorical;
1717
using Microsoft.ML.Transforms.Conversions;
1818

19-
[assembly: LoadableClass(OneHotHashEncoding.Summary, typeof(IDataTransform), typeof(OneHotHashEncoding), typeof(OneHotHashEncoding.Options), typeof(SignatureDataTransform),
20-
OneHotHashEncoding.UserName, "CategoricalHashTransform", "CatHashTransform", "CategoricalHash", "CatHash")]
19+
[assembly: LoadableClass(OneHotHashEncodingTransformer.Summary, typeof(IDataTransform), typeof(OneHotHashEncodingTransformer), typeof(OneHotHashEncodingTransformer.Options), typeof(SignatureDataTransform),
20+
OneHotHashEncodingTransformer.UserName, "CategoricalHashTransform", "CatHashTransform", "CategoricalHash", "CatHash")]
2121

2222
namespace Microsoft.ML.Transforms.Categorical
2323
{
24-
public sealed class OneHotHashEncoding : ITransformer, ICanSaveModel
24+
/// <summary>
25+
/// Produces a column of indicator vectors. The mapping between a value and a corresponding index is done through hashing.
26+
/// </summary>
27+
public sealed class OneHotHashEncodingTransformer : ITransformer, ICanSaveModel
2528
{
2629
internal sealed class Column : OneToOneColumn
2730
{
@@ -118,7 +121,7 @@ internal sealed class Options : TransformInputBase
118121
internal const string UserName = "Categorical Hash Transform";
119122

120123
/// <summary>
121-
/// A helper method to create <see cref="OneHotHashEncoding"/>.
124+
/// A helper method to create <see cref="OneHotHashEncodingTransformer"/>.
122125
/// </summary>
123126
/// <param name="env">Host Environment.</param>
124127
/// <param name="input">Input <see cref="IDataView"/>. This is the output from previous transform or loader.</param>
@@ -167,31 +170,45 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa
167170

168171
private readonly TransformerChain<ITransformer> _transformer;
169172

170-
internal OneHotHashEncoding(HashingEstimator hash, IEstimator<ITransformer> keyToVector, IDataView input)
173+
internal OneHotHashEncodingTransformer(HashingEstimator hash, IEstimator<ITransformer> keyToVector, IDataView input)
171174
{
172175
if (keyToVector != null)
173176
_transformer = hash.Append(keyToVector).Fit(input);
174177
else
175178
_transformer = new TransformerChain<ITransformer>(hash.Fit(input));
176179
}
177-
180+
/// <summary>
181+
/// Schema propagation for transformers. Returns the output schema of the data, if
182+
/// the input schema is like the one provided.
183+
/// </summary>
178184
public Schema GetOutputSchema(Schema inputSchema) => _transformer.GetOutputSchema(inputSchema);
179185

186+
/// <summary>
187+
/// Take the data in, make transformations, output the data. Note that <see cref="IDataView"/>
188+
/// are lazy, so no actual transformations happen here, just schema validation.
189+
/// </summary>
180190
public IDataView Transform(IDataView input) => _transformer.Transform(input);
181191

182192
public void Save(ModelSaveContext ctx) => _transformer.Save(ctx);
183193

194+
/// <summary>
195+
/// Whether a call to <see cref="GetRowToRowMapper"/> should succeed, on an appropriate schema.
196+
/// </summary>
184197
public bool IsRowToRowMapper => _transformer.IsRowToRowMapper;
185198

199+
/// <summary>
200+
/// Constructs a row-to-row mapper based on an input schema.
201+
/// </summary>
186202
public IRowToRowMapper GetRowToRowMapper(Schema inputSchema) => _transformer.GetRowToRowMapper(inputSchema);
187203
}
188204

189205
/// <summary>
190-
/// Estimator which takes set of columns and produce for each column indicator array. Use hashing to determine indicator position.
206+
/// Estimator that produces a column of indicator vectors. The mapping between a value and a corresponding index is done through hashing.
191207
/// </summary>
192-
public sealed class OneHotHashEncodingEstimator : IEstimator<OneHotHashEncoding>
208+
public sealed class OneHotHashEncodingEstimator : IEstimator<OneHotHashEncodingTransformer>
193209
{
194-
public static class Defaults
210+
[BestFriend]
211+
internal static class Defaults
195212
{
196213
public const int HashBits = 16;
197214
public const uint Seed = 314489979;
@@ -200,6 +217,9 @@ public static class Defaults
200217
public const OneHotEncodingTransformer.OutputKind OutputKind = OneHotEncodingTransformer.OutputKind.Bag;
201218
}
202219

220+
/// <summary>
221+
/// Describes how the transformer handles one column pair.
222+
/// </summary>
203223
public sealed class ColumnInfo
204224
{
205225
public readonly HashingEstimator.ColumnInfo HashInfo;
@@ -321,8 +341,8 @@ public SchemaShape GetOutputSchema(SchemaShape inputSchema)
321341
}
322342

323343
/// <summary>
324-
/// Train and return a transformer.
344+
/// Trains and returns a <see cref="OneHotHashEncodingTransformer"/>.
325345
/// </summary>
326-
public OneHotHashEncoding Fit(IDataView input) => new OneHotHashEncoding(_hash, _toSomething, input);
346+
public OneHotHashEncodingTransformer Fit(IDataView input) => new OneHotHashEncodingTransformer(_hash, _toSomething, input);
327347
}
328348
}

test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1104,7 +1104,7 @@ private void TestHashTransformHelper<T>(T[] data, uint[] results, NumberType typ
11041104
builder.AddColumn("F1", type, data);
11051105
var srcView = builder.GetDataView();
11061106

1107-
var hashTransform = new HashingTransformer(Env, new HashingTransformer.ColumnInfo("F1", "F1", 5, 42)).Transform(srcView);
1107+
var hashTransform = new HashingTransformer(Env, new HashingEstimator.ColumnInfo("F1", "F1", 5, 42)).Transform(srcView);
11081108
using (var cursor = hashTransform.GetRowCursorForAllColumns())
11091109
{
11101110
var resultGetter = cursor.GetGetter<uint>(1);
@@ -1135,7 +1135,7 @@ private void TestHashTransformVectorHelper<T>(VBuffer<T> data, uint[][] results,
11351135
private void TestHashTransformVectorHelper(ArrayDataViewBuilder builder, uint[][] results)
11361136
{
11371137
var srcView = builder.GetDataView();
1138-
var hashTransform = new HashingTransformer(Env, new HashingTransformer.ColumnInfo("F1V", "F1V", 5, 42)).Transform(srcView);
1138+
var hashTransform = new HashingTransformer(Env, new HashingEstimator.ColumnInfo("F1V", "F1V", 5, 42)).Transform(srcView);
11391139
using (var cursor = hashTransform.GetRowCursorForAllColumns())
11401140
{
11411141
var resultGetter = cursor.GetGetter<VBuffer<uint>>(1);

0 commit comments

Comments
 (0)