Skip to content

Commit 5f9be36

Browse files
authored
Multicolumn mapping for some estimators (#3066)
1 parent 9926f98 commit 5f9be36

19 files changed

+319
-121
lines changed

docs/code/MlNetCookBook.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ var pipeline =
424424
// Use the multi-class SDCA model to predict the label using features.
425425
.Append(mlContext.MulticlassClassification.Trainers.SdcaCalibrated())
426426
// Apply the inverse conversion from 'PredictedLabel' column back to string value.
427-
.Append(mlContext.Transforms.Conversion.MapKeyToValue(("PredictedLabel", "Data")));
427+
.Append(mlContext.Transforms.Conversion.MapKeyToValue("Data", "PredictedLabel"));
428428

429429
// Train the model.
430430
var model = pipeline.Fit(trainData);

src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs

+89-24
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
using System.Collections.Generic;
66
using System.Linq;
77
using Microsoft.ML.Data;
8+
using Microsoft.ML.Runtime;
89
using Microsoft.ML.Transforms;
910

1011
namespace Microsoft.ML
@@ -65,6 +66,22 @@ public static TypeConvertingEstimator ConvertType(this TransformsCatalog.Convers
6566
DataKind outputKind = ConvertDefaults.DefaultOutputKind)
6667
=> new TypeConvertingEstimator(CatalogUtils.GetEnvironment(catalog), new[] { new TypeConvertingEstimator.ColumnOptions(outputColumnName, outputKind, inputColumnName) });
6768

69+
/// <summary>
70+
/// Changes column type of the input columns.
71+
/// </summary>
72+
/// <param name="catalog">The conversion transform's catalog.</param>
73+
/// <param name="columns">Specifies the names of the columns on which to apply the transformation.</param>
74+
/// <param name="outputKind">The expected kind of the output column.</param>
75+
public static TypeConvertingEstimator ConvertType(this TransformsCatalog.ConversionTransforms catalog,
76+
InputOutputColumnPair[] columns,
77+
DataKind outputKind = ConvertDefaults.DefaultOutputKind)
78+
{
79+
var env = CatalogUtils.GetEnvironment(catalog);
80+
env.CheckValue(columns, nameof(columns));
81+
var columnOptions = columns.Select(x => new TypeConvertingEstimator.ColumnOptions(x.OutputColumnName, outputKind, x.InputColumnName)).ToArray();
82+
return new TypeConvertingEstimator(env, columnOptions);
83+
}
84+
6885
/// <summary>
6986
/// Changes column type of the input column.
7087
/// </summary>
@@ -90,20 +107,16 @@ public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.Co
90107
=> new KeyToValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName);
91108

92109
/// <summary>
93-
/// Convert the key types (name of the column specified in the first item of the tuple) back to their original values
94-
/// (named as specified in the second item of the tuple).
110+
/// Convert the key types back to their original values.
95111
/// </summary>
96-
/// <param name="catalog">The conversion transform's catalog</param>
97-
/// <param name="columns">The pairs of input and output columns.</param>
98-
/// <example>
99-
/// <format type="text/markdown">
100-
/// <![CDATA[
101-
/// [!code-csharp[KeyToValueMappingEstimator](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs)]
102-
/// ]]></format>
103-
/// </example>
104-
[BestFriend]
105-
internal static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, params ColumnOptions[] columns)
106-
=> new KeyToValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns));
112+
/// <param name="catalog">The conversion transform's catalog.</param>
113+
/// <param name="columns">Specifies the names of the columns on which to apply the transformation.</param>
114+
public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, InputOutputColumnPair[] columns)
115+
{
116+
var env = CatalogUtils.GetEnvironment(catalog);
117+
env.CheckValue(columns, nameof(columns));
118+
return new KeyToValueMappingEstimator(env, columns.Select(x => (x.OutputColumnName, x.InputColumnName)).ToArray());
119+
}
107120

108121
/// <summary>
109122
/// Maps key types or key values into a floating point vector.
@@ -127,6 +140,23 @@ public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.
127140
string outputColumnName, string inputColumnName = null, bool outputCountVector = KeyToVectorMappingEstimator.Defaults.OutputCountVector)
128141
=> new KeyToVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, outputCountVector);
129142

143+
/// <summary>
144+
/// Maps columns of key types or key values into columns of floating point vectors.
145+
/// </summary>
146+
/// <param name="catalog">The conversion transform's catalog.</param>
147+
/// <param name="columns">Specifies the names of the columns on which to apply the transformation.</param>
148+
/// <param name="outputCountVector">Whether to combine multiple indicator vectors into a single vector of counts instead of concatenating them.
149+
/// This is only relevant when the input column is a vector of keys.</param>
150+
public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.ConversionTransforms catalog,
151+
InputOutputColumnPair[] columns, bool outputCountVector = KeyToVectorMappingEstimator.Defaults.OutputCountVector)
152+
{
153+
var env = CatalogUtils.GetEnvironment(catalog);
154+
env.CheckValue(columns, nameof(columns));
155+
var columnOptions = columns.Select(x => new KeyToVectorMappingEstimator.ColumnOptions(x.OutputColumnName, x.InputColumnName, outputCountVector)).ToArray();
156+
return new KeyToVectorMappingEstimator(env, columnOptions);
157+
158+
}
159+
130160
/// <summary>
131161
/// Converts value types into <see cref="KeyDataViewType"/>.
132162
/// </summary>
@@ -157,6 +187,31 @@ public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.Co
157187
=> new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog),
158188
new[] { new ValueToKeyMappingEstimator.ColumnOptions(outputColumnName, inputColumnName, maximumNumberOfKeys, keyOrdinality, addKeyValueAnnotationsAsText) }, keyData);
159189

190+
/// <summary>
191+
/// Converts value types into <see cref="KeyDataViewType"/>.
192+
/// </summary>
193+
/// <param name="catalog">The conversion transform's catalog.</param>
194+
/// <param name="columns">Specifies the names of the columns on which to apply the transformation.</param>
195+
/// <param name="maximumNumberOfKeys">Maximum number of keys to keep per column when auto-training.</param>
196+
/// <param name="keyOrdinality">How items should be ordered when vectorized. If <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByOccurrence"/> choosen they will be in the order encountered.
197+
/// If <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByValue"/>, items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a').</param>
198+
/// <param name="addKeyValueAnnotationsAsText">Whether key value annotations should be text, regardless of the actual input type.</param>
199+
/// <param name="keyData">The data view containing the terms. If specified, this should be a single column data
200+
/// view, and the key-values will be taken from that column. If unspecified, the key-values will be determined
201+
/// from the input data upon fitting.</param>
202+
public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.ConversionTransforms catalog,
203+
InputOutputColumnPair[] columns,
204+
int maximumNumberOfKeys = ValueToKeyMappingEstimator.Defaults.MaximumNumberOfKeys,
205+
ValueToKeyMappingEstimator.KeyOrdinality keyOrdinality = ValueToKeyMappingEstimator.Defaults.Ordinality,
206+
bool addKeyValueAnnotationsAsText = ValueToKeyMappingEstimator.Defaults.AddKeyValueAnnotationsAsText,
207+
IDataView keyData = null)
208+
{
209+
var env = CatalogUtils.GetEnvironment(catalog);
210+
env.CheckValue(columns, nameof(columns));
211+
var columnOptions = columns.Select(x => new ValueToKeyMappingEstimator.ColumnOptions(x.OutputColumnName, x.InputColumnName, maximumNumberOfKeys, keyOrdinality, addKeyValueAnnotationsAsText)).ToArray();
212+
return new ValueToKeyMappingEstimator(env, columnOptions, keyData);
213+
}
214+
160215
/// <summary>
161216
/// Converts value types into <see cref="KeyDataViewType"/>, optionally loading the keys to use from <paramref name="keyData"/>.
162217
/// </summary>
@@ -232,11 +287,13 @@ public static ValueMappingEstimator<TInputType, TOutputType> MapValue<TInputType
232287
internal static ValueMappingEstimator<TInputType, TOutputType> MapValue<TInputType, TOutputType>(
233288
this TransformsCatalog.ConversionTransforms catalog,
234289
IEnumerable<KeyValuePair<TInputType, TOutputType>> keyValuePairs,
235-
params ColumnOptions[] columns)
290+
params InputOutputColumnPair[] columns)
236291
{
292+
var env = CatalogUtils.GetEnvironment(catalog);
293+
env.CheckValue(columns, nameof(columns));
237294
var keys = keyValuePairs.Select(pair => pair.Key);
238295
var values = keyValuePairs.Select(pair => pair.Value);
239-
return new ValueMappingEstimator<TInputType, TOutputType>(CatalogUtils.GetEnvironment(catalog), keys, values, ColumnOptions.ConvertToValueTuples(columns));
296+
return new ValueMappingEstimator<TInputType, TOutputType>(env, keys, values, InputOutputColumnPair.ConvertToValueTuples(columns));
240297
}
241298

242299
/// <summary>
@@ -260,12 +317,14 @@ internal static ValueMappingEstimator<TInputType, TOutputType> MapValue<TInputTy
260317
this TransformsCatalog.ConversionTransforms catalog,
261318
IEnumerable<KeyValuePair<TInputType, TOutputType>> keyValuePairs,
262319
bool treatValuesAsKeyType,
263-
params ColumnOptions[] columns)
320+
params InputOutputColumnPair[] columns)
264321
{
322+
var env = CatalogUtils.GetEnvironment(catalog);
323+
env.CheckValue(columns, nameof(columns));
265324
var keys = keyValuePairs.Select(pair => pair.Key);
266325
var values = keyValuePairs.Select(pair => pair.Value);
267-
return new ValueMappingEstimator<TInputType, TOutputType>(CatalogUtils.GetEnvironment(catalog), keys, values, treatValuesAsKeyType,
268-
ColumnOptions.ConvertToValueTuples(columns));
326+
return new ValueMappingEstimator<TInputType, TOutputType>(env, keys, values, treatValuesAsKeyType,
327+
InputOutputColumnPair.ConvertToValueTuples(columns));
269328
}
270329

271330
/// <summary>
@@ -321,12 +380,14 @@ public static ValueMappingEstimator<TInputType, TOutputType> MapValue<TInputType
321380
internal static ValueMappingEstimator<TInputType, TOutputType> MapValue<TInputType, TOutputType>(
322381
this TransformsCatalog.ConversionTransforms catalog,
323382
IEnumerable<KeyValuePair<TInputType, TOutputType[]>> keyValuePairs,
324-
params ColumnOptions[] columns)
383+
params InputOutputColumnPair[] columns)
325384
{
385+
var env = CatalogUtils.GetEnvironment(catalog);
386+
env.CheckValue(columns, nameof(columns));
326387
var keys = keyValuePairs.Select(pair => pair.Key);
327388
var values = keyValuePairs.Select(pair => pair.Value);
328-
return new ValueMappingEstimator<TInputType, TOutputType>(CatalogUtils.GetEnvironment(catalog), keys, values,
329-
ColumnOptions.ConvertToValueTuples(columns));
389+
return new ValueMappingEstimator<TInputType, TOutputType>(env, keys, values,
390+
InputOutputColumnPair.ConvertToValueTuples(columns));
330391
}
331392

332393
/// <summary>
@@ -377,8 +438,12 @@ public static ValueMappingEstimator MapValue(
377438
[BestFriend]
378439
internal static ValueMappingEstimator MapValue(
379440
this TransformsCatalog.ConversionTransforms catalog,
380-
IDataView lookupMap, DataViewSchema.Column keyColumn, DataViewSchema.Column valueColumn, params ColumnOptions[] columns)
381-
=> new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), lookupMap, keyColumn.Name, valueColumn.Name,
382-
ColumnOptions.ConvertToValueTuples(columns));
441+
IDataView lookupMap, DataViewSchema.Column keyColumn, DataViewSchema.Column valueColumn, params InputOutputColumnPair[] columns)
442+
{
443+
var env = CatalogUtils.GetEnvironment(catalog);
444+
env.CheckValue(columns, nameof(columns));
445+
return new ValueMappingEstimator(env, lookupMap, keyColumn.Name, valueColumn.Name,
446+
InputOutputColumnPair.ConvertToValueTuples(columns));
447+
}
383448
}
384449
}

src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs

+22-19
Original file line numberDiff line numberDiff line change
@@ -4,42 +4,41 @@
44

55
using System.Linq;
66
using Microsoft.ML.Data;
7+
using Microsoft.ML.Runtime;
78
using Microsoft.ML.Transforms;
89

910
namespace Microsoft.ML
1011
{
1112
/// <summary>
1213
/// Specifies input and output column names for a transformation.
1314
/// </summary>
14-
[BestFriend]
15-
internal sealed class ColumnOptions
15+
public sealed class InputOutputColumnPair
1616
{
17-
private readonly string _outputColumnName;
18-
private readonly string _inputColumnName;
17+
/// <summary>
18+
/// Name of the column to transform. If set to <see langword="null"/>, the value of the <see cref="OutputColumnName"/> will be used as source.
19+
/// </summary>
20+
public string InputColumnName { get; }
21+
/// <summary>
22+
/// Name of the column resulting from the transformation of <see cref="InputColumnName"/>.
23+
/// </summary>
24+
public string OutputColumnName { get; }
1925

2026
/// <summary>
2127
/// Specifies input and output column names for a transformation.
2228
/// </summary>
2329
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
2430
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
25-
public ColumnOptions(string outputColumnName, string inputColumnName = null)
31+
public InputOutputColumnPair(string outputColumnName, string inputColumnName = null)
2632
{
27-
_outputColumnName = outputColumnName;
28-
_inputColumnName = inputColumnName ?? outputColumnName;
29-
}
30-
31-
/// <summary>
32-
/// Instantiates a <see cref="ColumnOptions"/> from a tuple of input and output column names.
33-
/// </summary>
34-
public static implicit operator ColumnOptions((string outputColumnName, string inputColumnName) value)
35-
{
36-
return new ColumnOptions(value.outputColumnName, value.inputColumnName);
33+
Contracts.CheckNonEmpty(outputColumnName, nameof(outputColumnName));
34+
InputColumnName = inputColumnName ?? outputColumnName;
35+
OutputColumnName = outputColumnName;
3736
}
3837

3938
[BestFriend]
40-
internal static (string outputColumnName, string inputColumnName)[] ConvertToValueTuples(ColumnOptions[] infos)
39+
internal static (string outputColumnName, string inputColumnName)[] ConvertToValueTuples(InputOutputColumnPair[] infos)
4140
{
42-
return infos.Select(info => (info._outputColumnName, info._inputColumnName)).ToArray();
41+
return infos.Select(info => (info.OutputColumnName, info.InputColumnName)).ToArray();
4342
}
4443
}
4544

@@ -78,8 +77,12 @@ public static ColumnCopyingEstimator CopyColumns(this TransformsCatalog catalog,
7877
/// </format>
7978
/// </example>
8079
[BestFriend]
81-
internal static ColumnCopyingEstimator CopyColumns(this TransformsCatalog catalog, params ColumnOptions[] columns)
82-
=> new ColumnCopyingEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns));
80+
internal static ColumnCopyingEstimator CopyColumns(this TransformsCatalog catalog, params InputOutputColumnPair[] columns)
81+
{
82+
var env = CatalogUtils.GetEnvironment(catalog);
83+
env.CheckValue(columns, nameof(columns));
84+
return new ColumnCopyingEstimator(env, InputOutputColumnPair.ConvertToValueTuples(columns));
85+
}
8386

8487
/// <summary>
8588
/// Concatenates columns together.

src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs

+13-4
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// See the LICENSE file in the project root for more information.
44

55
using Microsoft.ML.Data;
6+
using Microsoft.ML.Runtime;
67
using Microsoft.ML.Transforms.Image;
78

89
namespace Microsoft.ML
@@ -32,8 +33,12 @@ public static ImageGrayscalingEstimator ConvertToGrayscale(this TransformsCatalo
3233
/// ]]></format>
3334
/// </example>
3435
[BestFriend]
35-
internal static ImageGrayscalingEstimator ConvertToGrayscale(this TransformsCatalog catalog, params ColumnOptions[] columns)
36-
=> new ImageGrayscalingEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns));
36+
internal static ImageGrayscalingEstimator ConvertToGrayscale(this TransformsCatalog catalog, params InputOutputColumnPair[] columns)
37+
{
38+
var env = CatalogUtils.GetEnvironment(catalog);
39+
env.CheckValue(columns, nameof(columns));
40+
return new ImageGrayscalingEstimator(env, InputOutputColumnPair.ConvertToValueTuples(columns));
41+
}
3742

3843
/// <summary>
3944
/// Loads the images from the <see cref="ImageLoadingTransformer.ImageFolder" /> into memory.
@@ -80,8 +85,12 @@ public static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, s
8085
/// ]]></format>
8186
/// </example>
8287
[BestFriend]
83-
internal static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, string imageFolder, params ColumnOptions[] columns)
84-
=> new ImageLoadingEstimator(CatalogUtils.GetEnvironment(catalog), imageFolder, ColumnOptions.ConvertToValueTuples(columns));
88+
internal static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, string imageFolder, params InputOutputColumnPair[] columns)
89+
{
90+
var env = CatalogUtils.GetEnvironment(catalog);
91+
env.CheckValue(columns, nameof(columns));
92+
return new ImageLoadingEstimator(env, imageFolder, InputOutputColumnPair.ConvertToValueTuples(columns));
93+
}
8594

8695
/// <include file='doc.xml' path='doc/members/member[@name="ImagePixelExtractingEstimator"]/*' />
8796
/// <param name="catalog">The transform's catalog.</param>

0 commit comments

Comments
 (0)