Skip to content

Commit 584a0d6

Browse files
authored
Samples / API rev from 2/27 bug bash feedback (dotnet#242)
1 parent 12cff38 commit 584a0d6

15 files changed

+5857
-236
lines changed

src/Microsoft.ML.Auto/API/BinaryClassificationExperiment.cs

+7-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,13 @@ public IEnumerable<RunResult<BinaryClassificationMetrics>> Execute(IDataView tra
6464
return Execute(_context, trainData, columnInformation, null, preFeaturizers);
6565
}
6666

67-
public IEnumerable<RunResult<BinaryClassificationMetrics>> Execute(IDataView trainData, IDataView validationData, ColumnInformation columnInformation = null, IEstimator<ITransformer> preFeaturizers = null)
67+
public IEnumerable<RunResult<BinaryClassificationMetrics>> Execute(IDataView trainData, IDataView validationData, string labelColumn = DefaultColumnNames.Label, IEstimator<ITransformer> preFeaturizers = null)
68+
{
69+
var columnInformation = new ColumnInformation() { LabelColumn = labelColumn };
70+
return Execute(_context, trainData, columnInformation, validationData, preFeaturizers);
71+
}
72+
73+
public IEnumerable<RunResult<BinaryClassificationMetrics>> Execute(IDataView trainData, IDataView validationData, ColumnInformation columnInformation, IEstimator<ITransformer> preFeaturizers = null)
6874
{
6975
return Execute(_context, trainData, columnInformation, validationData, preFeaturizers);
7076
}

src/Microsoft.ML.Auto/API/MulticlassClassificationExperiment.cs

+7-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,13 @@ public IEnumerable<RunResult<MultiClassClassifierMetrics>> Execute(IDataView tra
6262
return Execute(_context, trainData, columnInformation, null, preFeaturizers);
6363
}
6464

65-
public IEnumerable<RunResult<MultiClassClassifierMetrics>> Execute(IDataView trainData, IDataView validationData, ColumnInformation columnInformation = null, IEstimator<ITransformer> preFeaturizers = null)
65+
public IEnumerable<RunResult<MultiClassClassifierMetrics>> Execute(IDataView trainData, IDataView validationData, string labelColumn = DefaultColumnNames.Label, IEstimator<ITransformer> preFeaturizers = null)
66+
{
67+
var columnInformation = new ColumnInformation() { LabelColumn = labelColumn };
68+
return Execute(_context, trainData, columnInformation, validationData, preFeaturizers);
69+
}
70+
71+
public IEnumerable<RunResult<MultiClassClassifierMetrics>> Execute(IDataView trainData, IDataView validationData, ColumnInformation columnInformation, IEstimator<ITransformer> preFeaturizers = null)
6672
{
6773
return Execute(_context, trainData, columnInformation, validationData, preFeaturizers);
6874
}

src/Microsoft.ML.Auto/API/RegressionExperiment.cs

+7-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,13 @@ public IEnumerable<RunResult<RegressionMetrics>> Execute(IDataView trainData, Co
5959
return Execute(_context, trainData, columnInformation, null, preFeaturizers);
6060
}
6161

62-
public IEnumerable<RunResult<RegressionMetrics>> Execute(IDataView trainData, IDataView validationData, ColumnInformation columnInformation = null, IEstimator<ITransformer> preFeaturizers = null)
62+
public IEnumerable<RunResult<RegressionMetrics>> Execute(IDataView trainData, IDataView validationData, string labelColumn = DefaultColumnNames.Label, IEstimator<ITransformer> preFeaturizers = null)
63+
{
64+
var columnInformation = new ColumnInformation() { LabelColumn = labelColumn };
65+
return Execute(_context, trainData, columnInformation, validationData, preFeaturizers);
66+
}
67+
68+
public IEnumerable<RunResult<RegressionMetrics>> Execute(IDataView trainData, IDataView validationData, ColumnInformation columnInformation, IEstimator<ITransformer> preFeaturizers = null)
6369
{
6470
return Execute(_context, trainData, columnInformation, validationData, preFeaturizers);
6571
}

src/Samples/AutoTrainBinaryClassification.cs

+14-11
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@
33
// See the LICENSE file in the project root for more information.
44

55
using System;
6+
using System.Collections.Generic;
67
using System.IO;
78
using System.Linq;
89
using Microsoft.Data.DataView;
910
using Microsoft.ML;
1011
using Microsoft.ML.Auto;
1112
using Microsoft.ML.Data;
13+
using Samples.Helpers;
1214

1315
namespace Samples
1416
{
@@ -26,32 +28,33 @@ public static void Run()
2628
MLContext mlContext = new MLContext();
2729

2830
// STEP 1: Infer columns
29-
var columnInference = mlContext.Auto().InferColumns(TrainDataPath, LabelColumn);
31+
ColumnInferenceResults columnInference = mlContext.Auto().InferColumns(TrainDataPath, LabelColumn);
32+
ConsoleHelper.Print(columnInference);
3033

3134
// STEP 2: Load data
32-
var textLoader = mlContext.Data.CreateTextLoader(columnInference.TextLoaderArgs);
33-
var trainDataView = textLoader.Read(TrainDataPath);
34-
var testDataView = textLoader.Read(TestDataPath);
35+
TextLoader textLoader = mlContext.Data.CreateTextLoader(columnInference.TextLoaderArgs);
36+
IDataView trainDataView = textLoader.Read(TrainDataPath);
37+
IDataView testDataView = textLoader.Read(TestDataPath);
3538

3639
// STEP 3: Auto featurize, auto train and auto hyperparameter tune
3740
Console.WriteLine($"Running AutoML binary classification experiment for {ExperimentTime} seconds...");
38-
var runResults = mlContext.Auto()
39-
.CreateBinaryClassificationExperiment(ExperimentTime)
40-
.Execute(trainDataView, LabelColumn);
41+
IEnumerable<RunResult<BinaryClassificationMetrics>> runResults = mlContext.Auto()
42+
.CreateBinaryClassificationExperiment(ExperimentTime)
43+
.Execute(trainDataView, LabelColumn);
4144

4245
// STEP 4: Print metric from the best model
43-
var best = runResults.Best();
46+
RunResult<BinaryClassificationMetrics> best = runResults.Best();
4447
Console.WriteLine($"Total models produced: {runResults.Count()}");
4548
Console.WriteLine($"Best model's trainer: {best.TrainerName}");
4649
Console.WriteLine($"Accuracy of best model from validation data: {best.ValidationMetrics.Accuracy}");
4750

4851
// STEP 5: Evaluate test data
49-
var testDataViewWithBestScore = best.Model.Transform(testDataView);
50-
var testMetrics = mlContext.BinaryClassification.EvaluateNonCalibrated(testDataViewWithBestScore, label: LabelColumn);
52+
IDataView testDataViewWithBestScore = best.Model.Transform(testDataView);
53+
BinaryClassificationMetrics testMetrics = mlContext.BinaryClassification.EvaluateNonCalibrated(testDataViewWithBestScore, label: LabelColumn);
5154
Console.WriteLine($"Accuracy of best model on test data: {testMetrics.Accuracy}");
5255

5356
// STEP 6: Save the best model for later deployment and inferencing
54-
using (var fs = File.Create(ModelPath))
57+
using (FileStream fs = File.Create(ModelPath))
5558
best.Model.SaveTo(mlContext, fs);
5659

5760
Console.WriteLine("Press any key to continue...");

src/Samples/AutoTrainMulticlassClassification.cs

+17-14
Original file line numberDiff line numberDiff line change
@@ -3,54 +3,57 @@
33
// See the LICENSE file in the project root for more information.
44

55
using System;
6+
using System.Collections.Generic;
67
using System.IO;
78
using System.Linq;
89
using Microsoft.Data.DataView;
910
using Microsoft.ML;
1011
using Microsoft.ML.Auto;
1112
using Microsoft.ML.Data;
13+
using Samples.Helpers;
1214

1315
namespace Samples
1416
{
1517
public class AutoTrainMulticlassClassification
1618
{
1719
private static string BaseDatasetsLocation = @"../../../../src/Samples/Data";
18-
private static string TrainDataPath = $"{BaseDatasetsLocation}/iris-train.txt";
19-
private static string TestDataPath = $"{BaseDatasetsLocation}/iris-test.txt";
20-
private static string ModelPath = $"{BaseDatasetsLocation}/IrisClassificationModel.zip";
20+
private static string TrainDataPath = $"{BaseDatasetsLocation}/optdigits-train.csv";
21+
private static string TestDataPath = $"{BaseDatasetsLocation}/optdigits-test.csv";
22+
private static string ModelPath = $"{BaseDatasetsLocation}/OptDigits.zip";
2123
private static uint ExperimentTime = 60;
2224

2325
public static void Run()
2426
{
2527
MLContext mlContext = new MLContext();
2628

2729
// STEP 1: Infer columns
28-
var columnInference = mlContext.Auto().InferColumns(TrainDataPath);
30+
ColumnInferenceResults columnInference = mlContext.Auto().InferColumns(TrainDataPath);
31+
ConsoleHelper.Print(columnInference);
2932

3033
// STEP 2: Load data
31-
var textLoader = mlContext.Data.CreateTextLoader(columnInference.TextLoaderArgs);
32-
var trainDataView = textLoader.Read(TrainDataPath);
33-
var testDataView = textLoader.Read(TestDataPath);
34+
TextLoader textLoader = mlContext.Data.CreateTextLoader(columnInference.TextLoaderArgs);
35+
IDataView trainDataView = textLoader.Read(TrainDataPath);
36+
IDataView testDataView = textLoader.Read(TestDataPath);
3437

3538
// STEP 3: Auto featurize, auto train and auto hyperparameter tune
3639
Console.WriteLine($"Running AutoML multiclass classification experiment for {ExperimentTime} seconds...");
37-
var runResults = mlContext.Auto()
38-
.CreateMulticlassClassificationExperiment(60)
39-
.Execute(trainDataView);
40+
IEnumerable<RunResult<MultiClassClassifierMetrics>> runResults = mlContext.Auto()
41+
.CreateMulticlassClassificationExperiment(60)
42+
.Execute(trainDataView);
4043

4144
// STEP 4: Print metric from the best model
42-
var best = runResults.Best();
45+
RunResult<MultiClassClassifierMetrics> best = runResults.Best();
4346
Console.WriteLine($"Total models produced: {runResults.Count()}");
4447
Console.WriteLine($"Best model's trainer: {best.TrainerName}");
4548
Console.WriteLine($"AccuracyMacro of best model from validation data: {best.ValidationMetrics.AccuracyMacro}");
4649

4750
// STEP 5: Evaluate test data
48-
var testDataViewWithBestScore = best.Model.Transform(testDataView);
49-
var testMetrics = mlContext.MulticlassClassification.Evaluate(testDataViewWithBestScore);
51+
IDataView testDataViewWithBestScore = best.Model.Transform(testDataView);
52+
MultiClassClassifierMetrics testMetrics = mlContext.MulticlassClassification.Evaluate(testDataViewWithBestScore);
5053
Console.WriteLine($"AccuracyMacro of best model on test data: {testMetrics.AccuracyMacro}");
5154

5255
// STEP 6: Save the best model for later deployment and inferencing
53-
using (var fs = File.Create(ModelPath))
56+
using (FileStream fs = File.Create(ModelPath))
5457
best.Model.SaveTo(mlContext, fs);
5558

5659
Console.WriteLine("Press any key to continue...");

src/Samples/AutoTrainRegression.cs

+15-12
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@
33
// See the LICENSE file in the project root for more information.
44

55
using System;
6+
using System.Collections.Generic;
67
using System.IO;
78
using System.Linq;
89
using Microsoft.Data.DataView;
910
using Microsoft.ML;
1011
using Microsoft.ML.Auto;
1112
using Microsoft.ML.Data;
13+
using Samples.Helpers;
1214

1315
namespace Samples
1416
{
@@ -26,32 +28,33 @@ public static void Run()
2628
MLContext mlContext = new MLContext();
2729

2830
// STEP 1: Infer columns
29-
var columnInference = mlContext.Auto().InferColumns(TrainDataPath, LabelColumn);
31+
ColumnInferenceResults columnInference = mlContext.Auto().InferColumns(TrainDataPath, LabelColumn);
32+
ConsoleHelper.Print(columnInference);
3033

3134
// STEP 2: Load data
32-
var textLoader = mlContext.Data.CreateTextLoader(columnInference.TextLoaderArgs);
33-
var trainDataView = textLoader.Read(TrainDataPath);
34-
var testDataView = textLoader.Read(TestDataPath);
35+
TextLoader textLoader = mlContext.Data.CreateTextLoader(columnInference.TextLoaderArgs);
36+
IDataView trainDataView = textLoader.Read(TrainDataPath);
37+
IDataView testDataView = textLoader.Read(TestDataPath);
3538

3639
// STEP 3: Auto featurize, auto train and auto hyperparameter tune
3740
Console.WriteLine($"Running AutoML multiclass classification experiment for {ExperimentTime} seconds...");
38-
var runResults = mlContext.Auto()
39-
.CreateRegressionExperiment(60)
40-
.Execute(trainDataView, LabelColumn);
41-
41+
IEnumerable<RunResult<RegressionMetrics>> runResults = mlContext.Auto()
42+
.CreateRegressionExperiment(60)
43+
.Execute(trainDataView, LabelColumn);
44+
4245
// STEP 4: Print metric from best model
43-
var best = runResults.Best();
46+
RunResult<RegressionMetrics> best = runResults.Best();
4447
Console.WriteLine($"Total models produced: {runResults.Count()}");
4548
Console.WriteLine($"Best model's trainer: {best.TrainerName}");
4649
Console.WriteLine($"RSquared of best model from validation data: {best.ValidationMetrics.RSquared}");
4750

4851
// STEP 5: Evaluate test data
49-
var testDataViewWithBestScore = best.Model.Transform(testDataView);
50-
var testMetrics = mlContext.Regression.Evaluate(testDataViewWithBestScore, label: LabelColumn);
52+
IDataView testDataViewWithBestScore = best.Model.Transform(testDataView);
53+
RegressionMetrics testMetrics = mlContext.Regression.Evaluate(testDataViewWithBestScore, label: LabelColumn);
5154
Console.WriteLine($"RSquared of best model on test data: {testMetrics.RSquared}");
5255

5356
// STEP 6: Save the best model for later deployment and inferencing
54-
using (var fs = File.Create(ModelPath))
57+
using (FileStream fs = File.Create(ModelPath))
5558
best.Model.SaveTo(mlContext, fs);
5659

5760
Console.WriteLine("Press any key to continue...");

src/Samples/Cancellation.cs

+8-5
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,15 @@
33
// See the LICENSE file in the project root for more information.
44

55
using System;
6+
using System.Collections.Generic;
67
using System.Diagnostics;
78
using System.Linq;
89
using System.Threading;
910
using Microsoft.Data.DataView;
1011
using Microsoft.ML;
1112
using Microsoft.ML.Auto;
1213
using Microsoft.ML.Data;
14+
using Samples.Helpers;
1315

1416
namespace Samples
1517
{
@@ -26,12 +28,13 @@ public static void Run()
2628
MLContext mlContext = new MLContext();
2729

2830
// STEP 1: Infer columns
29-
var columnInference = mlContext.Auto().InferColumns(TrainDataPath, LabelColumn, ',');
31+
ColumnInferenceResults columnInference = mlContext.Auto().InferColumns(TrainDataPath, LabelColumn, ',');
32+
ConsoleHelper.Print(columnInference);
3033

3134
// STEP 2: Load data
32-
var textLoader = mlContext.Data.CreateTextLoader(columnInference.TextLoaderArgs);
33-
var trainDataView = textLoader.Read(TrainDataPath);
34-
var testDataView = textLoader.Read(TestDataPath);
35+
TextLoader textLoader = mlContext.Data.CreateTextLoader(columnInference.TextLoaderArgs);
36+
IDataView trainDataView = textLoader.Read(TrainDataPath);
37+
IDataView testDataView = textLoader.Read(TestDataPath);
3538

3639
int cancelAfterInSeconds = 20;
3740
CancellationTokenSource cts = new CancellationTokenSource();
@@ -41,7 +44,7 @@ public static void Run()
4144

4245
// STEP 3: Auto inference with a cancellation token
4346
Console.WriteLine($"Invoking an experiment that will be cancelled after {cancelAfterInSeconds} seconds");
44-
var runResults = mlContext.Auto()
47+
IEnumerable<RunResult<RegressionMetrics>> runResults = mlContext.Auto()
4548
.CreateRegressionExperiment(new RegressionExperimentSettings()
4649
{
4750
MaxExperimentTimeInSeconds = 60,

src/Samples/CustomizeTraining.cs

+7-5
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
using Microsoft.ML;
88
using Microsoft.ML.Auto;
99
using Microsoft.ML.Data;
10+
using Samples.Helpers;
1011

1112
namespace Samples
1213
{
@@ -23,16 +24,17 @@ public static void Run()
2324
MLContext mlContext = new MLContext();
2425

2526
// STEP 1: Infer columns
26-
var columnInference = mlContext.Auto().InferColumns(TrainDataPath, LabelColumn, ',');
27+
ColumnInferenceResults columnInference = mlContext.Auto().InferColumns(TrainDataPath, LabelColumn, ',');
28+
ConsoleHelper.Print(columnInference);
2729

2830
// STEP 2: Load data
29-
var textLoader = mlContext.Data.CreateTextLoader(columnInference.TextLoaderArgs);
30-
var trainDataView = textLoader.Read(TrainDataPath);
31-
var testDataView = textLoader.Read(TestDataPath);
31+
TextLoader textLoader = mlContext.Data.CreateTextLoader(columnInference.TextLoaderArgs);
32+
IDataView trainDataView = textLoader.Read(TrainDataPath);
33+
IDataView testDataView = textLoader.Read(TestDataPath);
3234

3335
// STEP 3: Using a different optimizing metric instead of default R2 and whitelisting only LightGbm
3436
Console.WriteLine($"Starting an experiment with L2 optimizing metric and whitelisting LightGbm trainer");
35-
var autoExperiment = mlContext.Auto().CreateRegressionExperiment(new RegressionExperimentSettings()
37+
RegressionExperiment autoExperiment = mlContext.Auto().CreateRegressionExperiment(new RegressionExperimentSettings()
3638
{
3739
MaxExperimentTimeInSeconds = 20,
3840
OptimizingMetric = RegressionMetric.L2,

src/Samples/Data/README.md

+3-7
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,11 @@ The datasets are provided under the original terms that Microsoft received such
1616
>
1717
>Original readme: https://meta.wikimedia.org/wiki/Research:Detox
1818
19-
### UCI Iris Flower Dataset
19+
### MNIST
2020

21-
>Redistributing the datasets "iris-test.txt" and "iris-train.txt" with attribution:
21+
> MNIST data originally from [NIST](https://www.nist.gov) and modified by Chris Burges, Corinna Cortes, and Yann LeCun. http://yann.lecun.com/exdb/mnist/
2222
>
23-
>Dua, D. and Karra Taniskidou, E. (2017). UCI Machine Learning Repository [https://archive.ics.uci.edu/ml]. Irvine, CA: University of California, School of Information and Computer Science.
24-
>
25-
>With modifications to "iris.txt" by changing the separator character, order of columns, and numerical encoding of labels.
26-
>
27-
>https://archive.ics.uci.edu/ml/datasets/iris
23+
> More information: https://en.wikipedia.org/wiki/MNIST_database
2824
2925
### NYC Taxi Fare
3026

src/Samples/Data/iris-test.txt

-31
This file was deleted.

0 commit comments

Comments
 (0)