Skip to content

Commit 5272c0c

Browse files
committed
merging ML.ColumnOptions and ML.InputOutputColumnPair
1 parent 9c418e7 commit 5272c0c

File tree

13 files changed

+54
-94
lines changed

13 files changed

+54
-94
lines changed

docs/code/MlNetCookBook.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ var pipeline =
424424
// Use the multi-class SDCA model to predict the label using features.
425425
.Append(mlContext.MulticlassClassification.Trainers.SdcaCalibrated())
426426
// Apply the inverse conversion from 'PredictedLabel' column back to string value.
427-
.Append(mlContext.Transforms.Conversion.MapKeyToValue(("PredictedLabel", "Data")));
427+
.Append(mlContext.Transforms.Conversion.MapKeyToValue("Data", "PredictedLabel"));
428428

429429
// Train the model.
430430
var model = pipeline.Fit(trainData);

src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs

+8-24
Original file line numberDiff line numberDiff line change
@@ -111,22 +111,6 @@ public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.Co
111111
public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, InputOutputColumnPair[] columns)
112112
=> new KeyToValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns.Select(x => (x.OutputColumnName, x.InputColumnName)).ToArray());
113113

114-
/// <summary>
115-
/// Convert the key types (name of the column specified in the first item of the tuple) back to their original values
116-
/// (named as specified in the second item of the tuple).
117-
/// </summary>
118-
/// <param name="catalog">The conversion transform's catalog</param>
119-
/// <param name="columns">The pairs of input and output columns.</param>
120-
/// <example>
121-
/// <format type="text/markdown">
122-
/// <![CDATA[
123-
/// [!code-csharp[KeyToValueMappingEstimator](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs)]
124-
/// ]]></format>
125-
/// </example>
126-
[BestFriend]
127-
internal static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, params ColumnOptions[] columns)
128-
=> new KeyToValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns));
129-
130114
/// <summary>
131115
/// Maps key types or key values into a floating point vector.
132116
/// </summary>
@@ -292,11 +276,11 @@ public static ValueMappingEstimator<TInputType, TOutputType> MapValue<TInputType
292276
internal static ValueMappingEstimator<TInputType, TOutputType> MapValue<TInputType, TOutputType>(
293277
this TransformsCatalog.ConversionTransforms catalog,
294278
IEnumerable<KeyValuePair<TInputType, TOutputType>> keyValuePairs,
295-
params ColumnOptions[] columns)
279+
params InputOutputColumnPair[] columns)
296280
{
297281
var keys = keyValuePairs.Select(pair => pair.Key);
298282
var values = keyValuePairs.Select(pair => pair.Value);
299-
return new ValueMappingEstimator<TInputType, TOutputType>(CatalogUtils.GetEnvironment(catalog), keys, values, ColumnOptions.ConvertToValueTuples(columns));
283+
return new ValueMappingEstimator<TInputType, TOutputType>(CatalogUtils.GetEnvironment(catalog), keys, values, InputOutputColumnPair.ConvertToValueTuples(columns));
300284
}
301285

302286
/// <summary>
@@ -320,12 +304,12 @@ internal static ValueMappingEstimator<TInputType, TOutputType> MapValue<TInputTy
320304
this TransformsCatalog.ConversionTransforms catalog,
321305
IEnumerable<KeyValuePair<TInputType, TOutputType>> keyValuePairs,
322306
bool treatValuesAsKeyType,
323-
params ColumnOptions[] columns)
307+
params InputOutputColumnPair[] columns)
324308
{
325309
var keys = keyValuePairs.Select(pair => pair.Key);
326310
var values = keyValuePairs.Select(pair => pair.Value);
327311
return new ValueMappingEstimator<TInputType, TOutputType>(CatalogUtils.GetEnvironment(catalog), keys, values, treatValuesAsKeyType,
328-
ColumnOptions.ConvertToValueTuples(columns));
312+
InputOutputColumnPair.ConvertToValueTuples(columns));
329313
}
330314

331315
/// <summary>
@@ -381,12 +365,12 @@ public static ValueMappingEstimator<TInputType, TOutputType> MapValue<TInputType
381365
internal static ValueMappingEstimator<TInputType, TOutputType> MapValue<TInputType, TOutputType>(
382366
this TransformsCatalog.ConversionTransforms catalog,
383367
IEnumerable<KeyValuePair<TInputType, TOutputType[]>> keyValuePairs,
384-
params ColumnOptions[] columns)
368+
params InputOutputColumnPair[] columns)
385369
{
386370
var keys = keyValuePairs.Select(pair => pair.Key);
387371
var values = keyValuePairs.Select(pair => pair.Value);
388372
return new ValueMappingEstimator<TInputType, TOutputType>(CatalogUtils.GetEnvironment(catalog), keys, values,
389-
ColumnOptions.ConvertToValueTuples(columns));
373+
InputOutputColumnPair.ConvertToValueTuples(columns));
390374
}
391375

392376
/// <summary>
@@ -437,8 +421,8 @@ public static ValueMappingEstimator MapValue(
437421
[BestFriend]
438422
internal static ValueMappingEstimator MapValue(
439423
this TransformsCatalog.ConversionTransforms catalog,
440-
IDataView lookupMap, DataViewSchema.Column keyColumn, DataViewSchema.Column valueColumn, params ColumnOptions[] columns)
424+
IDataView lookupMap, DataViewSchema.Column keyColumn, DataViewSchema.Column valueColumn, params InputOutputColumnPair[] columns)
441425
=> new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), lookupMap, keyColumn.Name, valueColumn.Name,
442-
ColumnOptions.ConvertToValueTuples(columns));
426+
InputOutputColumnPair.ConvertToValueTuples(columns));
443427
}
444428
}

src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs

+4-33
Original file line numberDiff line numberDiff line change
@@ -32,40 +32,11 @@ public InputOutputColumnPair(string outputColumnName, string inputColumnName = n
3232
InputColumnName = inputColumnName ?? outputColumnName;
3333
OutputColumnName = outputColumnName;
3434
}
35-
}
36-
37-
/// <summary>
38-
/// Specifies input and output column names for a transformation.
39-
/// </summary>
40-
[BestFriend]
41-
internal sealed class ColumnOptions
42-
{
43-
private readonly string _outputColumnName;
44-
private readonly string _inputColumnName;
45-
46-
/// <summary>
47-
/// Specifies input and output column names for a transformation.
48-
/// </summary>
49-
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
50-
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
51-
public ColumnOptions(string outputColumnName, string inputColumnName = null)
52-
{
53-
_outputColumnName = outputColumnName;
54-
_inputColumnName = inputColumnName ?? outputColumnName;
55-
}
56-
57-
/// <summary>
58-
/// Instantiates a <see cref="ColumnOptions"/> from a tuple of input and output column names.
59-
/// </summary>
60-
public static implicit operator ColumnOptions((string outputColumnName, string inputColumnName) value)
61-
{
62-
return new ColumnOptions(value.outputColumnName, value.inputColumnName);
63-
}
6435

6536
[BestFriend]
66-
internal static (string outputColumnName, string inputColumnName)[] ConvertToValueTuples(ColumnOptions[] infos)
37+
internal static (string outputColumnName, string inputColumnName)[] ConvertToValueTuples(InputOutputColumnPair[] infos)
6738
{
68-
return infos.Select(info => (info._outputColumnName, info._inputColumnName)).ToArray();
39+
return infos.Select(info => (info.OutputColumnName, info.InputColumnName)).ToArray();
6940
}
7041
}
7142

@@ -104,8 +75,8 @@ public static ColumnCopyingEstimator CopyColumns(this TransformsCatalog catalog,
10475
/// </format>
10576
/// </example>
10677
[BestFriend]
107-
internal static ColumnCopyingEstimator CopyColumns(this TransformsCatalog catalog, params ColumnOptions[] columns)
108-
=> new ColumnCopyingEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns));
78+
internal static ColumnCopyingEstimator CopyColumns(this TransformsCatalog catalog, params InputOutputColumnPair[] columns)
79+
=> new ColumnCopyingEstimator(CatalogUtils.GetEnvironment(catalog), InputOutputColumnPair.ConvertToValueTuples(columns));
10980

11081
/// <summary>
11182
/// Concatenates columns together.

src/Microsoft.ML.ImageAnalytics/ExtensionsCatalog.cs

+4-4
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ public static ImageGrayscalingEstimator ConvertToGrayscale(this TransformsCatalo
3232
/// ]]></format>
3333
/// </example>
3434
[BestFriend]
35-
internal static ImageGrayscalingEstimator ConvertToGrayscale(this TransformsCatalog catalog, params ColumnOptions[] columns)
36-
=> new ImageGrayscalingEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns));
35+
internal static ImageGrayscalingEstimator ConvertToGrayscale(this TransformsCatalog catalog, params InputOutputColumnPair[] columns)
36+
=> new ImageGrayscalingEstimator(CatalogUtils.GetEnvironment(catalog), InputOutputColumnPair.ConvertToValueTuples(columns));
3737

3838
/// <summary>
3939
/// Loads the images from the <see cref="ImageLoadingTransformer.ImageFolder" /> into memory.
@@ -80,8 +80,8 @@ public static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, s
8080
/// ]]></format>
8181
/// </example>
8282
[BestFriend]
83-
internal static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, string imageFolder, params ColumnOptions[] columns)
84-
=> new ImageLoadingEstimator(CatalogUtils.GetEnvironment(catalog), imageFolder, ColumnOptions.ConvertToValueTuples(columns));
83+
internal static ImageLoadingEstimator LoadImages(this TransformsCatalog catalog, string imageFolder, params InputOutputColumnPair[] columns)
84+
=> new ImageLoadingEstimator(CatalogUtils.GetEnvironment(catalog), imageFolder, InputOutputColumnPair.ConvertToValueTuples(columns));
8585

8686
/// <include file='doc.xml' path='doc/members/member[@name="ImagePixelExtractingEstimator"]/*' />
8787
/// <param name="catalog">The transform's catalog.</param>

src/Microsoft.ML.Transforms/ConversionsCatalog.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ public static class ConversionsCatalog
2020
/// <param name="columns">Specifies the output and input columns on which the transformation should be applied.</param>
2121
[BestFriend]
2222
internal static KeyToBinaryVectorMappingEstimator MapKeyToBinaryVector(this TransformsCatalog.ConversionTransforms catalog,
23-
params ColumnOptions[] columns)
24-
=> new KeyToBinaryVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns));
23+
params InputOutputColumnPair[] columns)
24+
=> new KeyToBinaryVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), InputOutputColumnPair.ConvertToValueTuples(columns));
2525

2626
/// <summary>
2727
/// Convert the key types back to binary vector.

src/Microsoft.ML.Transforms/NormalizerCatalog.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ public static NormalizingEstimator Normalize(this TransformsCatalog catalog,
4343
[BestFriend]
4444
internal static NormalizingEstimator Normalize(this TransformsCatalog catalog,
4545
NormalizingEstimator.NormalizationMode mode,
46-
params ColumnOptions[] columns)
47-
=> new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), mode, ColumnOptions.ConvertToValueTuples(columns));
46+
params InputOutputColumnPair[] columns)
47+
=> new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), mode, InputOutputColumnPair.ConvertToValueTuples(columns));
4848

4949
/// <summary>
5050
/// Normalize (rescale) columns according to specified custom parameters.

src/Microsoft.ML.Transforms/Text/TextCatalog.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,8 @@ public static TokenizingByCharactersEstimator TokenizeIntoCharactersAsKeys(this
7575
[BestFriend]
7676
internal static TokenizingByCharactersEstimator TokenizeIntoCharactersAsKeys(this TransformsCatalog.TextTransforms catalog,
7777
bool useMarkerCharacters = CharTokenizingDefaults.UseMarkerCharacters,
78-
params ColumnOptions[] columns)
79-
=> new TokenizingByCharactersEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), useMarkerCharacters, ColumnOptions.ConvertToValueTuples(columns));
78+
params InputOutputColumnPair[] columns)
79+
=> new TokenizingByCharactersEstimator(Contracts.CheckRef(catalog, nameof(catalog)).GetEnvironment(), useMarkerCharacters, InputOutputColumnPair.ConvertToValueTuples(columns));
8080

8181
/// <summary>
8282
/// Normalizes incoming text in <paramref name="inputColumnName"/> by changing case, removing diacritical marks, punctuation marks and/or numbers

test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ private ITransformer TrainOnIris(string irisDataPath)
187187
// [2] -9.709775 float
188188

189189
// Apply the inverse conversion from 'PredictedLabel' column back to string value.
190-
var finalPipeline = pipeline.Append(mlContext.Transforms.Conversion.MapKeyToValue(("Data", "PredictedLabel")));
190+
var finalPipeline = pipeline.Append(mlContext.Transforms.Conversion.MapKeyToValue("Data", "PredictedLabel"));
191191
dataPreview = finalPipeline.Preview(trainData);
192192

193193
return finalPipeline.Fit(trainData);

test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ public void TrainAndPredictIrisModelWithStringLabelTest()
3939
.AppendCacheCheckpoint(mlContext)
4040
.Append(mlContext.MulticlassClassification.Trainers.SdcaCalibrated(
4141
new SdcaCalibratedMulticlassTrainer.Options { NumberOfThreads = 1 }))
42-
.Append(mlContext.Transforms.Conversion.MapKeyToValue(("Plant", "PredictedLabel")));
42+
.Append(mlContext.Transforms.Conversion.MapKeyToValue("Plant", "PredictedLabel"));
4343

4444
// Train the pipeline
4545
var trainedModel = pipe.Fit(trainData);

test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs

+8-8
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,7 @@ public void TensorFlowTransformInceptionTest()
391391
);
392392

393393
var data = reader.Load(new MultiFileSource(dataFile));
394-
var images = mlContext.Transforms.LoadImages(imageFolder, ("ImageReal", "ImagePath")).Fit(data).Transform(data);
394+
var images = mlContext.Transforms.LoadImages("ImageReal", "ImagePath", imageFolder).Fit(data).Transform(data);
395395
var cropped = mlContext.Transforms.ResizeImages("ImageCropped", 224, 224, "ImageReal").Fit(images).Transform(images);
396396
var pixels = mlContext.Transforms.ExtractPixels(inputName, "ImageCropped", interleavePixelColors: true).Fit(cropped).Transform(cropped);
397397
var tf = mlContext.Model.LoadTensorFlowModel(modelLocation).ScoreTensorFlowModel(outputName, inputName, true).Fit(pixels).Transform(pixels);
@@ -507,7 +507,7 @@ public void TensorFlowTransformMNISTConvTest()
507507
var trainData = reader.Load(GetDataPath(TestDatasets.mnistTiny28.trainFilename));
508508
var testData = reader.Load(GetDataPath(TestDatasets.mnistOneClass.testFilename));
509509

510-
var pipe = mlContext.Transforms.CopyColumns(("reshape_input", "Placeholder"))
510+
var pipe = mlContext.Transforms.CopyColumns("reshape_input", "Placeholder")
511511
.Append(mlContext.Model.LoadTensorFlowModel("mnist_model/frozen_saved_model.pb").ScoreTensorFlowModel(new[] { "Softmax", "dense/Relu" }, new[] { "Placeholder", "reshape_input" }))
512512
.Append(mlContext.Transforms.Concatenate("Features", "Softmax", "dense/Relu"))
513513
.Append(mlContext.MulticlassClassification.Trainers.LightGbm("Label", "Features"));
@@ -662,7 +662,7 @@ private void ExecuteTFTransformMNISTConvTrainingTest(bool shuffle, int? shuffleS
662662
preprocessedTestData = testData;
663663
}
664664

665-
var pipe = mlContext.Transforms.CopyColumns(("Features", "Placeholder"))
665+
var pipe = mlContext.Transforms.CopyColumns("Features", "Placeholder")
666666
.Append(mlContext.Model.LoadTensorFlowModel(modelLocation).RetrainTensorFlowModel(
667667
inputColumnNames: new[] { "Features" },
668668
outputColumnNames: new[] { "Prediction" },
@@ -729,7 +729,7 @@ public void TensorFlowTransformMNISTConvSavedModelTest()
729729
var trainData = reader.Load(GetDataPath(TestDatasets.mnistTiny28.trainFilename));
730730
var testData = reader.Load(GetDataPath(TestDatasets.mnistOneClass.testFilename));
731731

732-
var pipe = mlContext.Transforms.CopyColumns(("reshape_input", "Placeholder"))
732+
var pipe = mlContext.Transforms.CopyColumns("reshape_input", "Placeholder")
733733
.Append(mlContext.Model.LoadTensorFlowModel("mnist_model").ScoreTensorFlowModel(new[] { "Softmax", "dense/Relu" }, new[] { "Placeholder", "reshape_input" }))
734734
.Append(mlContext.Transforms.Concatenate("Features", new[] { "Softmax", "dense/Relu" }))
735735
.Append(mlContext.MulticlassClassification.Trainers.LightGbm("Label", "Features"));
@@ -898,7 +898,7 @@ public void TensorFlowTransformCifarSavedModel()
898898
new TextLoader.Column("Name", DataKind.String, 1),
899899
}
900900
);
901-
var images = mlContext.Transforms.LoadImages(imageFolder, ("ImageReal", "ImagePath")).Fit(data).Transform(data);
901+
var images = mlContext.Transforms.LoadImages("ImageReal", imageFolder, "ImagePath").Fit(data).Transform(data);
902902
var cropped = mlContext.Transforms.ResizeImages("ImageCropped", imageWidth, imageHeight, "ImageReal").Fit(images).Transform(images);
903903
var pixels = mlContext.Transforms.ExtractPixels("Input", "ImageCropped", interleavePixelColors: true).Fit(cropped).Transform(cropped);
904904
IDataView trans = tensorFlowModel.ScoreTensorFlowModel("Output", "Input").Fit(pixels).Transform(pixels);
@@ -1000,15 +1000,15 @@ public void TensorFlowSentimentClassificationTest()
10001000
// The second pipeline 'tfEnginePipe' takes the resized integer vector and passes it to TensoFlow and gets the classification scores.
10011001
var estimator = mlContext.Transforms.Text.TokenizeIntoWords("TokenizedWords", "Sentiment_Text")
10021002
.Append(mlContext.Transforms.Conversion.MapValue(lookupMap, lookupMap.Schema["Words"], lookupMap.Schema["Ids"],
1003-
new ColumnOptions[] { ("Features", "TokenizedWords") }));
1003+
new[] { new InputOutputColumnPair("Features", "TokenizedWords") }));
10041004
var model = estimator.Fit(dataView);
10051005
var dataPipe = mlContext.Model.CreatePredictionEngine<TensorFlowSentiment, TensorFlowSentiment>(model);
10061006

10071007
// For explanation on how was the `sentiment_model` created
10081008
// c.f. https://github.com/dotnet/machinelearning-testdata/blob/master/Microsoft.ML.TensorFlow.TestModels/sentiment_model/README.md
10091009
string modelLocation = @"sentiment_model";
10101010
var pipelineModel = mlContext.Model.LoadTensorFlowModel(modelLocation).ScoreTensorFlowModel(new[] { "Prediction/Softmax" }, new[] { "Features" })
1011-
.Append(mlContext.Transforms.CopyColumns(("Prediction", "Prediction/Softmax")))
1011+
.Append(mlContext.Transforms.CopyColumns("Prediction", "Prediction/Softmax"))
10121012
.Fit(dataView);
10131013
var tfEnginePipe = mlContext.Model.CreatePredictionEngine<TensorFlowSentiment, TensorFlowSentiment>(pipelineModel);
10141014

@@ -1052,7 +1052,7 @@ public void TensorFlowStringTest()
10521052
var dataview = mlContext.Data.CreateTextLoader<TextInput>().Load(new MultiFileSource(null));
10531053

10541054
var pipeline = tensorFlowModel.ScoreTensorFlowModel(new[] { "Original_A", "Joined_Splited_Text" }, new[] { "A", "B" })
1055-
.Append(mlContext.Transforms.CopyColumns(("AOut", "Original_A"), ("BOut", "Joined_Splited_Text")));
1055+
.Append(mlContext.Transforms.CopyColumns(new[] { new InputOutputColumnPair("AOut", "Original_A"), new InputOutputColumnPair("BOut", "Joined_Splited_Text") }));
10561056
var transformer = mlContext.Model.CreatePredictionEngine<TextInput, TextOutput>(pipeline.Fit(dataview));
10571057

10581058
var input = new TextInput

0 commit comments

Comments
 (0)