Skip to content

Commit 1dd66f0

Browse files
authored
Towards #3204 - Conversion's catalog (#3394)
* towards adapting the Conversions catalog documentation to the new template.
1 parent 9efe749 commit 1dd66f0

9 files changed

+286
-173
lines changed

src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs

+120-64
Large diffs are not rendered by default.

src/Microsoft.ML.Data/Transforms/Hashing.cs

+20-9
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,7 @@
2929
namespace Microsoft.ML.Transforms
3030
{
3131
/// <summary>
32-
/// This transformer can hash either single valued columns or vector columns. For vector columns,
33-
/// it hashes each slot separately.
34-
/// It can hash either text values or key values.
32+
/// <see cref="ITransformer"/> resulting from fitting a <see cref="HashingEstimator"/>.
3533
/// </summary>
3634
public sealed class HashingTransformer : OneToOneTransformerBase
3735
{
@@ -1104,9 +1102,22 @@ public override void Process()
11041102
}
11051103

11061104
/// <summary>
1107-
/// Estimator for <see cref="HashingTransformer"/> which can hash either single valued columns or vector columns. For vector columns,
1108-
/// it hashes each slot separately. It can hash either text values or key values.
1105+
/// Estimator for <see cref="HashingTransformer"/>, which hashes either single valued columns or vector columns. For vector columns,
1106+
/// it hashes each slot separately.
11091107
/// </summary>
1108+
/// <remarks>
1109+
/// <format type="text/markdown"><![CDATA[
1110+
///
1111+
/// ### Estimator Characteristics
1112+
/// | | |
1113+
/// | -- | -- |
1114+
/// | Does this estimator need to look at the data to train its parameters? | Yes, if the mapping of the hashes to the values is required. |
1115+
/// | Input column data type | Vector or scalars of numeric, boolean, [text](xref:Microsoft.ML.Data.TextDataViewType), [DateTime](xref: System.DateTime) and [key](xref:Microsoft.ML.Data.KeyDataViewType) data types.|
1116+
/// | Output column data type | Vector or scalar [System.Int32](xref:System.Int32).|
1117+
///
1118+
/// ]]></format>
1119+
/// </remarks>
1120+
/// <seealso cref="ConversionsExtensionsCatalog.Hash(TransformsCatalog.ConversionTransforms, string, string, int, int)"/>
11101121
public sealed class HashingEstimator : IEstimator<HashingTransformer>
11111122
{
11121123
internal const int NumBitsMin = 1;
@@ -1140,7 +1151,7 @@ internal sealed class ColumnOptions
11401151
public readonly bool UseOrderedHashing;
11411152
/// <summary>
11421153
/// During hashing we constuct mappings between original values and the produced hash values.
1143-
/// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one.
1154+
/// Text representation of original values are stored in the slot names of the annotations for the new column.Hashing, as such, can map many initial values to one.
11441155
/// <see cref="MaximumNumberOfInverts"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
11451156
/// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.
11461157
/// </summary>
@@ -1154,8 +1165,8 @@ internal sealed class ColumnOptions
11541165
/// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 31, inclusive.</param>
11551166
/// <param name="seed">Hashing seed.</param>
11561167
/// <param name="useOrderedHashing">Whether the position of each term should be included in the hash, only applies to inputs of vector type..</param>
1157-
/// <param name="maximumNumberOfInverts">During hashing we constuct mappings between original values and the produced hash values.
1158-
/// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one.
1168+
/// <param name="maximumNumberOfInverts">During hashing we construct mappings between original values and the produced hash values.
1169+
/// Text representation of original values are stored in the slot names of the annotations for the new column.Hashing, as such, can map many initial values to one.
11591170
/// <paramref name="maximumNumberOfInverts"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
11601171
/// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.</param>
11611172
public ColumnOptions(string name,
@@ -1227,7 +1238,7 @@ internal static bool IsColumnTypeValid(DataViewType type)
12271238
/// <param name="inputColumnName">Name of the column to transform.
12281239
/// If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
12291240
/// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 31, inclusive.</param>
1230-
/// <param name="maximumNumberOfInverts">During hashing we constuct mappings between original values and the produced hash values.
1241+
/// <param name="maximumNumberOfInverts">During hashing we construct mappings between original values and the produced hash values.
12311242
/// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one.
12321243
/// <paramref name="maximumNumberOfInverts"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
12331244
/// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.</param>

src/Microsoft.ML.Data/Transforms/KeyToValue.cs

+19-4
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,7 @@
3131
namespace Microsoft.ML.Transforms
3232
{
3333
/// <summary>
34-
/// KeyToValueTransform utilizes KeyValues metadata to map key indices to the corresponding values in the KeyValues metadata.
35-
/// Notes:
36-
/// * Output columns utilize the KeyValues metadata.
37-
/// * Maps zero values of the key type to the NA of the output type.
34+
/// <see cref="ITransformer"/> resulting from fitting a <see cref="KeyToValueMappingEstimator"/>.
3835
/// </summary>
3936
public sealed class KeyToValueMappingTransformer : OneToOneTransformerBase
4037
{
@@ -502,6 +499,24 @@ public override JToken SavePfa(BoundPfaContext ctx, JToken srcToken)
502499
}
503500
}
504501

502+
/// <summary>
503+
/// Utilizes KeyValues <see cref="AnnotationInfo"/> of the input column, to map keys to the corresponding values.
504+
/// Zero values of the <see cref="KeyDataViewType"/> are mapped to the <see langword="default"/> value of the output type.
505+
/// </summary>
506+
/// <remarks>
507+
/// <format type="text/markdown"><![CDATA[
508+
///
509+
/// ### Estimator Characteristics
510+
/// | | |
511+
/// | -- | -- |
512+
/// | Does this estimator need to look at the data to train its parameters? | No |
513+
/// | Input column data type | [key](xref:Microsoft.ML.Data.KeyDataViewType) |
514+
/// | Output column data type | Type of the original data, prior to converting to [key](xref:Microsoft.ML.Data.KeyDataViewType). |
515+
///
516+
/// ]]></format>
517+
/// </remarks>
518+
/// <seealso cref="ConversionsExtensionsCatalog.MapKeyToValue(TransformsCatalog.ConversionTransforms, InputOutputColumnPair[])"/>
519+
/// <seealso cref="ConversionsExtensionsCatalog.MapKeyToValue(TransformsCatalog.ConversionTransforms, string, string)"/>
505520
public sealed class KeyToValueMappingEstimator : TrivialEstimator<KeyToValueMappingTransformer>
506521
{
507522
internal KeyToValueMappingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null)

src/Microsoft.ML.Data/Transforms/KeyToVector.cs

+17-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
namespace Microsoft.ML.Transforms
3232
{
3333
/// <summary>
34-
/// Converts the key types back to their original vectors.
34+
/// <see cref="ITransformer"/> resulting from fitting a <see cref="KeyToVectorMappingEstimator"/>.
3535
/// </summary>
3636
public sealed class KeyToVectorMappingTransformer : OneToOneTransformerBase
3737
{
@@ -725,6 +725,22 @@ private bool SaveAsOnnxCore(OnnxContext ctx, int iinfo, ColInfo info, string src
725725
/// <summary>
726726
/// Estimator for <see cref="KeyToVectorMappingTransformer"/>. Converts the key types back to their original vectors.
727727
/// </summary>
728+
/// <summary>
729+
/// Utilizes KeyValues <see cref="AnnotationInfo"/> of the input column, to map keys to a vector representing the original value.
730+
/// Maps zero values of the <see cref="KeyDataViewType"/> are mapped to the <see langword="default"/> value of the output type.
731+
/// </summary>
732+
/// <remarks>
733+
/// <format type="text/markdown"><![CDATA[
734+
///
735+
/// ### Estimator Characteristics
736+
/// | | |
737+
/// | -- | -- |
738+
/// | Does this estimator need to look at the data to train its parameters? | No |
739+
/// | Input column data type | [key](xref:Microsoft.Ml.Data.KeyDataViewType) |
740+
/// | Output column data type | A vector of [System.Single](xref:System.Single). |
741+
///
742+
/// ]]></format>
743+
/// </remarks>
728744
public sealed class KeyToVectorMappingEstimator : TrivialEstimator<KeyToVectorMappingTransformer>
729745
{
730746
internal static class Defaults

src/Microsoft.ML.Data/Transforms/TypeConverting.cs

+18-4
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,7 @@ public static CommonOutputs.TransformOutput Convert(IHostEnvironment env, TypeCo
5151
}
5252

5353
/// <summary>
54-
/// <see cref="TypeConvertingTransformer"/> converts underlying column types.
55-
/// The source and destination column types need to be compatible.
54+
/// <see cref="ITransformer"/> resulting from fitting a <see cref="TypeConvertingEstimator"/>.
5655
/// </summary>
5756
public sealed class TypeConvertingTransformer : OneToOneTransformerBase
5857
{
@@ -513,9 +512,24 @@ private bool SaveAsOnnxCore(OnnxContext ctx, int iinfo, string srcVariableName,
513512
}
514513

515514
/// <summary>
516-
/// <see cref="TypeConvertingEstimator"/> converts underlying column types.
517-
/// The source and destination column types need to be compatible.
515+
/// Estimator for <see cref="KeyToVectorMappingTransformer"/>. Converts the underlying input column type to a new type.
516+
/// The input and output column types need to be compatible.
517+
/// <see cref="PrimitiveDataViewType"/>
518518
/// </summary>
519+
/// <remarks>
520+
/// <format type="text/markdown"><![CDATA[
521+
///
522+
/// ### Estimator Characteristics
523+
/// | | |
524+
/// | -- | -- |
525+
/// | Does this estimator need to look at the data to train its parameters? | No |
526+
/// | Input column data type | Vector or primitive numeric, boolean, [text](xref:Microsoft.ML.Data.TextDataViewType), [System.DateTime](xref:System.DateTime) and [key](xref:Microsoft.ML.Data.KeyDataViewType) data types.|
527+
/// | Output column data type | Vector or primitive numeric, boolean, [text](xref:Microsoft.ML.Data.TextDataViewType), [System.DateTime](xref:System.DateTime) and [key](xref:Microsoft.ML.Data.KeyDataViewType) data types.|
528+
///
529+
/// ]]></format>
530+
/// </remarks>
531+
/// <seealso cref="ConversionsExtensionsCatalog.ConvertType(TransformsCatalog.ConversionTransforms, InputOutputColumnPair[], DataKind)"/>
532+
/// <seealso cref="ConversionsExtensionsCatalog.ConvertType(TransformsCatalog.ConversionTransforms, string, string, DataKind)"/>
519533
public sealed class TypeConvertingEstimator : TrivialEstimator<TypeConvertingTransformer>
520534
{
521535
internal sealed class Defaults

src/Microsoft.ML.Data/Transforms/ValueMapping.cs

+60-3
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,34 @@
3131

3232
namespace Microsoft.ML.Transforms
3333
{
34-
/// <include file='doc.xml' path='doc/members/member[@name="ValueMappingEstimator"]/*' />
34+
35+
/// <summary>
36+
/// Estimator for <see cref="ValueMappingTransformer"/> creating a key-value map using the pairs of values in the input data
37+
/// <see cref="PrimitiveDataViewType"/>
38+
/// </summary>
39+
/// <remarks>
40+
/// <format type="text/markdown"><![CDATA[
41+
///
42+
/// ### Estimator Characteristics
43+
/// | | |
44+
/// | -- | -- |
45+
/// | Does this estimator need to look at the data to train its parameters? | No |
46+
/// | Input column data type | Vector or primitive numeric, boolean, text, [System.DateTime](xref:System.DateTime) and [key](xref:Microsoft.ML.Data.KeyDataViewType) data types.|
47+
/// | Output column data type | Vector or primitive numeric, boolean, text, [System.DateTime](xref:System.DateTime) and [key](xref:Microsoft.ML.Data.KeyDataViewType) data types.|
48+
///
49+
/// Given two sets of values, one serving as the key, and the other as the value of a Dictionary, the ValueMappingEstimator builds up this dictionary so that when given a specific key it will return a
50+
/// specific value.The ValueMappingEstimator supports keys and values of different [System.Type](xref:System.Type) to support different data types.
51+
/// Examples for using a ValueMappingEstimator are:
52+
/// * Converting a string value to a string value, this can be useful for grouping (i.e. 'cat', 'dog', 'horse' maps to 'mammals').
53+
/// * Converting a string value to a integer value (i.e. converting the text description like quality to an numeric where 'good' maps to 1, 'poor' maps to 0.
54+
/// * Converting a integer value to a string value and have the string value represented as a [key](xref:Microsoft.ML.Data.KeyDataViewType)
55+
/// (i.e. convert zip codes to a state string value, which will generate a unique integer value that can be used as a label.
56+
///
57+
/// Values can be repeated to allow for multiple keys to map to the same value, however keys can not be repeated. The mapping between keys and values
58+
/// can be specified either through lists, where the key list and value list must be the same size or can be done through an [System.IDataView](xref:Microsoft.ML.IDataView).
59+
/// ]]></format>
60+
/// </remarks>
61+
/// <seealso cref="ConversionsExtensionsCatalog.MapValue(TransformsCatalog.ConversionTransforms, string, IDataView, DataViewSchema.Column, DataViewSchema.Column, string)"/>
3562
public class ValueMappingEstimator : TrivialEstimator<ValueMappingTransformer>
3663
{
3764
private readonly (string outputColumnName, string inputColumnName)[] _columns;
@@ -101,9 +128,37 @@ public sealed override SchemaShape GetOutputSchema(SchemaShape inputSchema)
101128
}
102129
}
103130

104-
/// <include file='doc.xml' path='doc/members/member[@name="ValueMappingEstimator"]/*' />
131+
/// <summary>
132+
/// Estimator for <see cref="ValueMappingTransformer"/> creating a key-value map using the pairs of values in the input data
133+
/// <see cref="PrimitiveDataViewType"/>
134+
/// </summary>
135+
/// <remarks>
136+
/// <format type="text/markdown"><![CDATA[
137+
///
138+
/// ### Estimator Characteristics
139+
/// | | |
140+
/// | -- | -- |
141+
/// | Does this estimator need to look at the data to train its parameters? | No |
142+
/// | Input column data type | Vector or primitive numeric, boolean, text, [System.DateTime](xref:System.DateTime) and [key](xref:Microsoft.ML.Data.KeyDataViewType) data types.|
143+
/// | Output column data type | Vector or primitive numeric, boolean, text, [System.DateTime](xref:System.DateTime) and [key](xref:Microsoft.ML.Data.KeyDataViewType) data types.|
144+
///
145+
/// Given two sets of values, one serving as the key, and the other as the value of a Dictionary, the ValueMappingEstimator builds up this dictionary so that when given a specific key it will return a
146+
/// specific value.The ValueMappingEstimator supports keys and values of different [System.Type](xref:System.Type) to support different data types.
147+
/// Examples for using a ValueMappingEstimator are:
148+
/// * Converting a string value to a string value, this can be useful for grouping (i.e. 'cat', 'dog', 'horse' maps to 'mammals').
149+
/// * Converting a string value to a integer value (i.e. converting the text description like quality to an numeric where 'good' maps to 1, 'poor' maps to 0.
150+
/// * Converting a integer value to a string value and have the string value represented as a [key](xref:Microsoft.ML.Data.KeyDataViewType)
151+
/// (i.e. convert zip codes to a state string value, which will generate a unique integer value that can be used as a label.
152+
///
153+
/// Values can be repeated to allow for multiple keys to map to the same value, however keys can not be repeated. The mapping between keys and values
154+
/// can be specified either through lists, where the key list and value list must be the same size or can be done through an [System.IDataView](xref:Microsoft.ML.IDataView).
155+
/// ]]></format>
156+
/// </remarks>
105157
/// <typeparam name="TKey">Specifies the key type.</typeparam>
106158
/// <typeparam name="TValue">Specifies the value type.</typeparam>
159+
/// <seealso cref="ConversionsExtensionsCatalog.MapValue{TInputType, TOutputType}(TransformsCatalog.ConversionTransforms, IEnumerable{KeyValuePair{TInputType, TOutputType}}, InputOutputColumnPair[])"/>
160+
/// <seealso cref="ConversionsExtensionsCatalog.MapValue{TInputType, TOutputType}(TransformsCatalog.ConversionTransforms, IEnumerable{KeyValuePair{TInputType, TOutputType[]}}, InputOutputColumnPair[])"/>
161+
/// <seealso cref="ConversionsExtensionsCatalog.MapValue{TInputType, TOutputType}(TransformsCatalog.ConversionTransforms, string, IEnumerable{KeyValuePair{TInputType, TOutputType}}, string, bool)"/>
107162
public sealed class ValueMappingEstimator<TKey, TValue> : ValueMappingEstimator
108163
{
109164
/// <summary>
@@ -283,7 +338,9 @@ internal static IDataView CreateDataView<TKey, TValue>(IHostEnvironment env,
283338
}
284339
}
285340

286-
/// <include file='doc.xml' path='doc/members/member[@name="ValueMappingEstimator"]/*' />
341+
/// <summary>
342+
/// <see cref="ITransformer"/> resulting from fitting a <see cref="ValueMappingEstimator"/>.
343+
/// </summary>
287344
public class ValueMappingTransformer : OneToOneTransformerBase
288345
{
289346
internal const string Summary = "Maps text values columns to new columns using a map dataset.";

0 commit comments

Comments
 (0)