diff --git a/Microsoft.ML.AutoML.sln b/Microsoft.ML.AutoML.sln
deleted file mode 100644
index 280cef5704..0000000000
--- a/Microsoft.ML.AutoML.sln
+++ /dev/null
@@ -1,91 +0,0 @@
-
-Microsoft Visual Studio Solution File, Format Version 12.00
-# Visual Studio 15
-VisualStudioVersion = 15.0.28010.2050
-MinimumVisualStudioVersion = 10.0.40219.1
-Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Auto", "src\Microsoft.ML.Auto\Microsoft.ML.Auto.csproj", "{B3727729-3DF8-47E0-8710-9B41DAF55817}"
-EndProject
-Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.AutoML.Tests", "test\Microsoft.ML.AutoML.Tests\Microsoft.ML.AutoML.Tests.csproj", "{55ACB7E2-053D-43BB-88E8-0E102FBD62F0}"
-EndProject
-Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "mlnet", "src\mlnet\mlnet.csproj", "{ED714FA5-6F89-401B-9E7F-CADF1373C553}"
-EndProject
-Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "mlnet.Tests", "test\mlnet.Tests\mlnet.Tests.csproj", "{AAC3E4E6-C146-44BB-8873-A1E61D563F2A}"
-EndProject
-Global
- GlobalSection(SolutionConfigurationPlatforms) = preSolution
- Debug|Any CPU = Debug|Any CPU
- Debug-Intrinsics|Any CPU = Debug-Intrinsics|Any CPU
- Debug-netfx|Any CPU = Debug-netfx|Any CPU
- Release|Any CPU = Release|Any CPU
- Release-Intrinsics|Any CPU = Release-Intrinsics|Any CPU
- Release-netfx|Any CPU = Release-netfx|Any CPU
- EndGlobalSection
- GlobalSection(ProjectConfigurationPlatforms) = postSolution
- {B3727729-3DF8-47E0-8710-9B41DAF55817}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
- {B3727729-3DF8-47E0-8710-9B41DAF55817}.Debug|Any CPU.Build.0 = Debug|Any CPU
- {B3727729-3DF8-47E0-8710-9B41DAF55817}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug|Any CPU
- {B3727729-3DF8-47E0-8710-9B41DAF55817}.Debug-Intrinsics|Any CPU.Build.0 = Debug|Any CPU
- {B3727729-3DF8-47E0-8710-9B41DAF55817}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU
- {B3727729-3DF8-47E0-8710-9B41DAF55817}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU
- {B3727729-3DF8-47E0-8710-9B41DAF55817}.Release|Any CPU.ActiveCfg = Release|Any CPU
- {B3727729-3DF8-47E0-8710-9B41DAF55817}.Release|Any CPU.Build.0 = Release|Any CPU
- {B3727729-3DF8-47E0-8710-9B41DAF55817}.Release-Intrinsics|Any CPU.ActiveCfg = Release|Any CPU
- {B3727729-3DF8-47E0-8710-9B41DAF55817}.Release-Intrinsics|Any CPU.Build.0 = Release|Any CPU
- {B3727729-3DF8-47E0-8710-9B41DAF55817}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
- {B3727729-3DF8-47E0-8710-9B41DAF55817}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
- {64A7294E-A2C7-4499-8F0B-4BB074047C6B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
- {64A7294E-A2C7-4499-8F0B-4BB074047C6B}.Debug|Any CPU.Build.0 = Debug|Any CPU
- {64A7294E-A2C7-4499-8F0B-4BB074047C6B}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug|Any CPU
- {64A7294E-A2C7-4499-8F0B-4BB074047C6B}.Debug-Intrinsics|Any CPU.Build.0 = Debug|Any CPU
- {64A7294E-A2C7-4499-8F0B-4BB074047C6B}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU
- {64A7294E-A2C7-4499-8F0B-4BB074047C6B}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU
- {64A7294E-A2C7-4499-8F0B-4BB074047C6B}.Release|Any CPU.ActiveCfg = Release|Any CPU
- {64A7294E-A2C7-4499-8F0B-4BB074047C6B}.Release|Any CPU.Build.0 = Release|Any CPU
- {64A7294E-A2C7-4499-8F0B-4BB074047C6B}.Release-Intrinsics|Any CPU.ActiveCfg = Release|Any CPU
- {64A7294E-A2C7-4499-8F0B-4BB074047C6B}.Release-Intrinsics|Any CPU.Build.0 = Release|Any CPU
- {64A7294E-A2C7-4499-8F0B-4BB074047C6B}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
- {64A7294E-A2C7-4499-8F0B-4BB074047C6B}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
- {55ACB7E2-053D-43BB-88E8-0E102FBD62F0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
- {55ACB7E2-053D-43BB-88E8-0E102FBD62F0}.Debug|Any CPU.Build.0 = Debug|Any CPU
- {55ACB7E2-053D-43BB-88E8-0E102FBD62F0}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug-Intrinsics|Any CPU
- {55ACB7E2-053D-43BB-88E8-0E102FBD62F0}.Debug-Intrinsics|Any CPU.Build.0 = Debug-Intrinsics|Any CPU
- {55ACB7E2-053D-43BB-88E8-0E102FBD62F0}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU
- {55ACB7E2-053D-43BB-88E8-0E102FBD62F0}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU
- {55ACB7E2-053D-43BB-88E8-0E102FBD62F0}.Release|Any CPU.ActiveCfg = Release|Any CPU
- {55ACB7E2-053D-43BB-88E8-0E102FBD62F0}.Release|Any CPU.Build.0 = Release|Any CPU
- {55ACB7E2-053D-43BB-88E8-0E102FBD62F0}.Release-Intrinsics|Any CPU.ActiveCfg = Release-Intrinsics|Any CPU
- {55ACB7E2-053D-43BB-88E8-0E102FBD62F0}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU
- {55ACB7E2-053D-43BB-88E8-0E102FBD62F0}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
- {55ACB7E2-053D-43BB-88E8-0E102FBD62F0}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
- {ED714FA5-6F89-401B-9E7F-CADF1373C553}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
- {ED714FA5-6F89-401B-9E7F-CADF1373C553}.Debug|Any CPU.Build.0 = Debug|Any CPU
- {ED714FA5-6F89-401B-9E7F-CADF1373C553}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug-Intrinsics|Any CPU
- {ED714FA5-6F89-401B-9E7F-CADF1373C553}.Debug-Intrinsics|Any CPU.Build.0 = Debug-Intrinsics|Any CPU
- {ED714FA5-6F89-401B-9E7F-CADF1373C553}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU
- {ED714FA5-6F89-401B-9E7F-CADF1373C553}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU
- {ED714FA5-6F89-401B-9E7F-CADF1373C553}.Release|Any CPU.ActiveCfg = Release|Any CPU
- {ED714FA5-6F89-401B-9E7F-CADF1373C553}.Release|Any CPU.Build.0 = Release|Any CPU
- {ED714FA5-6F89-401B-9E7F-CADF1373C553}.Release-Intrinsics|Any CPU.ActiveCfg = Release-Intrinsics|Any CPU
- {ED714FA5-6F89-401B-9E7F-CADF1373C553}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU
- {ED714FA5-6F89-401B-9E7F-CADF1373C553}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
- {ED714FA5-6F89-401B-9E7F-CADF1373C553}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
- {AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
- {AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Debug|Any CPU.Build.0 = Debug|Any CPU
- {AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug-Intrinsics|Any CPU
- {AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Debug-Intrinsics|Any CPU.Build.0 = Debug-Intrinsics|Any CPU
- {AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU
- {AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU
- {AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Release|Any CPU.ActiveCfg = Release|Any CPU
- {AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Release|Any CPU.Build.0 = Release|Any CPU
- {AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Release-Intrinsics|Any CPU.ActiveCfg = Release-Intrinsics|Any CPU
- {AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU
- {AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
- {AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
- EndGlobalSection
- GlobalSection(SolutionProperties) = preSolution
- HideSolutionNode = FALSE
- EndGlobalSection
- GlobalSection(ExtensibilityGlobals) = postSolution
- SolutionGuid = {8C1BC26C-B87E-47CD-928E-00EFE4353B40}
- EndGlobalSection
-EndGlobal
diff --git a/Microsoft.ML.sln b/Microsoft.ML.sln
index 6b0b880210..daa4ff4511 100644
--- a/Microsoft.ML.sln
+++ b/Microsoft.ML.sln
@@ -274,6 +274,14 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Microsoft.ML.FastTree", "Mi
pkg\Microsoft.ML.FastTree\Microsoft.ML.FastTree.symbols.nupkgproj = pkg\Microsoft.ML.FastTree\Microsoft.ML.FastTree.symbols.nupkgproj
EndProjectSection
EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Auto", "src\Microsoft.ML.Auto\Microsoft.ML.Auto.csproj", "{D57A04E8-1A2B-4A3E-A96A-229C746C2FBE}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.AutoML.Tests", "test\Microsoft.ML.AutoML.Tests\Microsoft.ML.AutoML.Tests.csproj", "{D48126A1-5334-4575-BC91-4CDAA754C8C8}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "mlnet", "src\mlnet\mlnet.csproj", "{C2F953F9-9825-48AB-88D8-D4538268F017}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "mlnet.Tests", "test\mlnet.Tests\mlnet.Tests.csproj", "{73D4685B-94D2-4C28-A434-16ED6CA39BDE}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -968,6 +976,54 @@ Global
{E02DA82D-3FEE-4C60-BD80-9EC3C3448DFC}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU
{E02DA82D-3FEE-4C60-BD80-9EC3C3448DFC}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
{E02DA82D-3FEE-4C60-BD80-9EC3C3448DFC}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
+ {D57A04E8-1A2B-4A3E-A96A-229C746C2FBE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {D57A04E8-1A2B-4A3E-A96A-229C746C2FBE}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {D57A04E8-1A2B-4A3E-A96A-229C746C2FBE}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug-Intrinsics|Any CPU
+ {D57A04E8-1A2B-4A3E-A96A-229C746C2FBE}.Debug-Intrinsics|Any CPU.Build.0 = Debug-Intrinsics|Any CPU
+ {D57A04E8-1A2B-4A3E-A96A-229C746C2FBE}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU
+ {D57A04E8-1A2B-4A3E-A96A-229C746C2FBE}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU
+ {D57A04E8-1A2B-4A3E-A96A-229C746C2FBE}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {D57A04E8-1A2B-4A3E-A96A-229C746C2FBE}.Release|Any CPU.Build.0 = Release|Any CPU
+ {D57A04E8-1A2B-4A3E-A96A-229C746C2FBE}.Release-Intrinsics|Any CPU.ActiveCfg = Release-Intrinsics|Any CPU
+ {D57A04E8-1A2B-4A3E-A96A-229C746C2FBE}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU
+ {D57A04E8-1A2B-4A3E-A96A-229C746C2FBE}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
+ {D57A04E8-1A2B-4A3E-A96A-229C746C2FBE}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
+ {D48126A1-5334-4575-BC91-4CDAA754C8C8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {D48126A1-5334-4575-BC91-4CDAA754C8C8}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {D48126A1-5334-4575-BC91-4CDAA754C8C8}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug-Intrinsics|Any CPU
+ {D48126A1-5334-4575-BC91-4CDAA754C8C8}.Debug-Intrinsics|Any CPU.Build.0 = Debug-Intrinsics|Any CPU
+ {D48126A1-5334-4575-BC91-4CDAA754C8C8}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU
+ {D48126A1-5334-4575-BC91-4CDAA754C8C8}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU
+ {D48126A1-5334-4575-BC91-4CDAA754C8C8}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {D48126A1-5334-4575-BC91-4CDAA754C8C8}.Release|Any CPU.Build.0 = Release|Any CPU
+ {D48126A1-5334-4575-BC91-4CDAA754C8C8}.Release-Intrinsics|Any CPU.ActiveCfg = Release-Intrinsics|Any CPU
+ {D48126A1-5334-4575-BC91-4CDAA754C8C8}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU
+ {D48126A1-5334-4575-BC91-4CDAA754C8C8}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
+ {D48126A1-5334-4575-BC91-4CDAA754C8C8}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
+ {C2F953F9-9825-48AB-88D8-D4538268F017}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {C2F953F9-9825-48AB-88D8-D4538268F017}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {C2F953F9-9825-48AB-88D8-D4538268F017}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug-Intrinsics|Any CPU
+ {C2F953F9-9825-48AB-88D8-D4538268F017}.Debug-Intrinsics|Any CPU.Build.0 = Debug-Intrinsics|Any CPU
+ {C2F953F9-9825-48AB-88D8-D4538268F017}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU
+ {C2F953F9-9825-48AB-88D8-D4538268F017}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU
+ {C2F953F9-9825-48AB-88D8-D4538268F017}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {C2F953F9-9825-48AB-88D8-D4538268F017}.Release|Any CPU.Build.0 = Release|Any CPU
+ {C2F953F9-9825-48AB-88D8-D4538268F017}.Release-Intrinsics|Any CPU.ActiveCfg = Release-Intrinsics|Any CPU
+ {C2F953F9-9825-48AB-88D8-D4538268F017}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU
+ {C2F953F9-9825-48AB-88D8-D4538268F017}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
+ {C2F953F9-9825-48AB-88D8-D4538268F017}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
+ {73D4685B-94D2-4C28-A434-16ED6CA39BDE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {73D4685B-94D2-4C28-A434-16ED6CA39BDE}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {73D4685B-94D2-4C28-A434-16ED6CA39BDE}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug-Intrinsics|Any CPU
+ {73D4685B-94D2-4C28-A434-16ED6CA39BDE}.Debug-Intrinsics|Any CPU.Build.0 = Debug-Intrinsics|Any CPU
+ {73D4685B-94D2-4C28-A434-16ED6CA39BDE}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU
+ {73D4685B-94D2-4C28-A434-16ED6CA39BDE}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU
+ {73D4685B-94D2-4C28-A434-16ED6CA39BDE}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {73D4685B-94D2-4C28-A434-16ED6CA39BDE}.Release|Any CPU.Build.0 = Release|Any CPU
+ {73D4685B-94D2-4C28-A434-16ED6CA39BDE}.Release-Intrinsics|Any CPU.ActiveCfg = Release-Intrinsics|Any CPU
+ {73D4685B-94D2-4C28-A434-16ED6CA39BDE}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU
+ {73D4685B-94D2-4C28-A434-16ED6CA39BDE}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
+ {73D4685B-94D2-4C28-A434-16ED6CA39BDE}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
@@ -1055,6 +1111,10 @@ Global
{AD7058C9-5608-49A8-BE23-58C33A74EE91} = {D3D38B03-B557-484D-8348-8BADEE4DF592}
{E02DA82D-3FEE-4C60-BD80-9EC3C3448DFC} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
{B1B3F284-FA3D-4D76-A712-FF04495D244B} = {D3D38B03-B557-484D-8348-8BADEE4DF592}
+ {D57A04E8-1A2B-4A3E-A96A-229C746C2FBE} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
+ {D48126A1-5334-4575-BC91-4CDAA754C8C8} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
+ {C2F953F9-9825-48AB-88D8-D4538268F017} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
+ {73D4685B-94D2-4C28-A434-16ED6CA39BDE} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {41165AF1-35BB-4832-A189-73060F82B01D}
diff --git a/build.proj b/build.proj
index 3dd5edd0e7..15fea4e309 100644
--- a/build.proj
+++ b/build.proj
@@ -22,7 +22,6 @@
-
diff --git a/docs/samples/Microsoft.ML.Samples/AutoML/BinaryClassificationExperiment.cs b/docs/samples/Microsoft.ML.Samples/AutoML/BinaryClassificationExperiment.cs
new file mode 100644
index 0000000000..2957a2c635
--- /dev/null
+++ b/docs/samples/Microsoft.ML.Samples/AutoML/BinaryClassificationExperiment.cs
@@ -0,0 +1,23 @@
+using Microsoft.ML;
+using Microsoft.ML.Auto;
+
+namespace Samples.AutoML
+{
+ public static class BinaryClassificationExperiment
+ {
+ public static void Example()
+ {
+ // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
+ // as a catalog of available operations and as the source of randomness.
+ var mlContext = new MLContext();
+
+ // Download and featurize the dataset.
+ var dataView = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
+
+ // Run an AutoML experiment
+ var experimentResult = mlContext.Auto()
+ .CreateBinaryClassificationExperiment(60)
+ .Execute(dataView, "IsOver50K");
+ }
+ }
+}
diff --git a/docs/samples/Microsoft.ML.Samples/AutoML/BinaryClassificationExperimentWithExperimentSettings.cs b/docs/samples/Microsoft.ML.Samples/AutoML/BinaryClassificationExperimentWithExperimentSettings.cs
new file mode 100644
index 0000000000..c383a3963c
--- /dev/null
+++ b/docs/samples/Microsoft.ML.Samples/AutoML/BinaryClassificationExperimentWithExperimentSettings.cs
@@ -0,0 +1,27 @@
+using Microsoft.ML;
+using Microsoft.ML.Auto;
+
+namespace Samples.AutoML
+{
+ public static class BinaryClassificationExperimentWithExperimentSettings
+ {
+ public static void Example()
+ {
+ // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
+ // as a catalog of available operations and as the source of randomness.
+ var mlContext = new MLContext();
+
+ // Download and featurize the dataset.
+ var dataView = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext);
+
+ // Run an AutoML experiment
+ var experimentSettings = new BinaryExperimentSettings()
+ {
+ MaxExperimentTimeInSeconds = 60
+ };
+ var experimentResult = mlContext.Auto()
+ .CreateBinaryClassificationExperiment(experimentSettings)
+ .Execute(dataView, "IsOver50K");
+ }
+ }
+}
diff --git a/docs/samples/Microsoft.ML.Samples/AutoML/MulticlassClassificationExperiment.cs b/docs/samples/Microsoft.ML.Samples/AutoML/MulticlassClassificationExperiment.cs
new file mode 100644
index 0000000000..bb0b98907c
--- /dev/null
+++ b/docs/samples/Microsoft.ML.Samples/AutoML/MulticlassClassificationExperiment.cs
@@ -0,0 +1,28 @@
+using Microsoft.ML;
+using Microsoft.ML.Auto;
+using Microsoft.ML.SamplesUtils;
+
+namespace Samples.AutoML
+{
+ public static class MulticlassClassificationExperiment
+ {
+ public static void Example()
+ {
+ // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
+ // as a catalog of available operations and as the source of randomness.
+ var mlContext = new MLContext();
+
+ // Download and featurize the dataset.
+ // Create a list of data examples.
+ var examples = DatasetUtils.GenerateRandomMulticlassClassificationExamples(1000);
+
+ // Convert the examples list to an IDataView object, which is consumable by ML.NET API.
+ var dataView = mlContext.Data.LoadFromEnumerable(examples);
+
+ // Run an AutoML experiment
+ var experimentResult = mlContext.Auto()
+ .CreateMulticlassClassificationExperiment(60)
+ .Execute(dataView);
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/samples/Microsoft.ML.Samples/AutoML/MulticlassClassificationExperimentWithExperimentSettings.cs b/docs/samples/Microsoft.ML.Samples/AutoML/MulticlassClassificationExperimentWithExperimentSettings.cs
new file mode 100644
index 0000000000..8056b81d43
--- /dev/null
+++ b/docs/samples/Microsoft.ML.Samples/AutoML/MulticlassClassificationExperimentWithExperimentSettings.cs
@@ -0,0 +1,32 @@
+using Microsoft.ML;
+using Microsoft.ML.Auto;
+using Microsoft.ML.SamplesUtils;
+
+namespace Samples.AutoML
+{
+ public static class MulticlassClassificationExperimentWithExperimentSettings
+ {
+ public static void Example()
+ {
+ // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
+ // as a catalog of available operations and as the source of randomness.
+ var mlContext = new MLContext();
+
+ // Download and featurize the dataset.
+ // Create a list of data examples.
+ var examples = DatasetUtils.GenerateRandomMulticlassClassificationExamples(1000);
+
+ // Convert the examples list to an IDataView object, which is consumable by ML.NET API.
+ var dataView = mlContext.Data.LoadFromEnumerable(examples);
+
+ // Run an AutoML experiment
+ var experimentSettings = new MulticlassExperimentSettings()
+ {
+ MaxExperimentTimeInSeconds = 60
+ };
+ var experimentResult = mlContext.Auto()
+ .CreateMulticlassClassificationExperiment(experimentSettings)
+ .Execute(dataView);
+ }
+ }
+}
\ No newline at end of file
diff --git a/docs/samples/Microsoft.ML.Samples/AutoML/RegressionExperiment.cs b/docs/samples/Microsoft.ML.Samples/AutoML/RegressionExperiment.cs
new file mode 100644
index 0000000000..79938d02c7
--- /dev/null
+++ b/docs/samples/Microsoft.ML.Samples/AutoML/RegressionExperiment.cs
@@ -0,0 +1,39 @@
+using Microsoft.ML;
+using Microsoft.ML.Auto;
+using Microsoft.ML.Data;
+
+namespace Samples.AutoML
+{
+ public static class RegressionExperiment
+ {
+ public static void Example()
+ {
+ // Downloading a regression dataset from github.com/dotnet/machinelearning
+ string dataFile = Microsoft.ML.SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset();
+
+ // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
+ // as a catalog of available operations and as the source of randomness.
+ var mlContext = new MLContext();
+
+ // Creating a data loader, based on the format of the data
+ // The data is tab separated with all numeric columns.
+ // The first column being the label and rest are numeric features
+ // Here only seven numeric columns are used as features
+ var dataView = mlContext.Data.LoadFromTextFile(dataFile, new TextLoader.Options
+ {
+ Separators = new[] { '\t' },
+ HasHeader = true,
+ Columns = new[]
+ {
+ new TextLoader.Column("Label", DataKind.Single, 0),
+ new TextLoader.Column("Features", DataKind.Single, 1, 6)
+ }
+ });
+
+ // Run an AutoML experiment
+ var experimentResult = mlContext.Auto()
+ .CreateRegressionExperiment(60)
+ .Execute(dataView);
+ }
+ }
+}
diff --git a/docs/samples/Microsoft.ML.Samples/AutoML/RegressionExperimentWithExperimentSettings.cs b/docs/samples/Microsoft.ML.Samples/AutoML/RegressionExperimentWithExperimentSettings.cs
new file mode 100644
index 0000000000..9d9dd061e4
--- /dev/null
+++ b/docs/samples/Microsoft.ML.Samples/AutoML/RegressionExperimentWithExperimentSettings.cs
@@ -0,0 +1,43 @@
+using Microsoft.ML;
+using Microsoft.ML.Auto;
+using Microsoft.ML.Data;
+
+namespace Samples.AutoML
+{
+ public static class RegressionExperimentWithExperimentSettings
+ {
+ public static void Example()
+ {
+ // Downloading a regression dataset from github.com/dotnet/machinelearning
+ string dataFile = Microsoft.ML.SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset();
+
+ // Create a new context for ML.NET operations. It can be used for exception tracking and logging,
+ // as a catalog of available operations and as the source of randomness.
+ var mlContext = new MLContext();
+
+ // Creating a data loader, based on the format of the data
+ // The data is tab separated with all numeric columns.
+ // The first column being the label and rest are numeric features
+ // Here only seven numeric columns are used as features
+ var dataView = mlContext.Data.LoadFromTextFile(dataFile, new TextLoader.Options
+ {
+ Separators = new[] { '\t' },
+ HasHeader = true,
+ Columns = new[]
+ {
+ new TextLoader.Column("Label", DataKind.Single, 0),
+ new TextLoader.Column("Features", DataKind.Single, 1, 6)
+ }
+ });
+
+ // Run an AutoML experiment
+ var experimentSettings = new RegressionExperimentSettings()
+ {
+ MaxExperimentTimeInSeconds = 60
+ };
+ var experimentResult = mlContext.Auto()
+ .CreateRegressionExperiment(60)
+ .Execute(dataView);
+ }
+ }
+}
diff --git a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj
index ea16ed7bf3..d63f4d0b9c 100644
--- a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj
+++ b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj
@@ -9,6 +9,7 @@
+
diff --git a/src/Microsoft.ML.Auto/API/AutoCatalog.cs b/src/Microsoft.ML.Auto/API/AutoCatalog.cs
new file mode 100644
index 0000000000..bd07b1fcb3
--- /dev/null
+++ b/src/Microsoft.ML.Auto/API/AutoCatalog.cs
@@ -0,0 +1,242 @@
+// Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.ML.Data;
+
+namespace Microsoft.ML.Auto
+{
+ ///
+ /// A catalog of all available AutoML tasks.
+ ///
+ public sealed class AutoCatalog
+ {
+ private readonly MLContext _context;
+
+ internal AutoCatalog(MLContext context)
+ {
+ _context = context;
+ }
+
+ ///
+ /// Creates a new AutoML experiment to run on a regression dataset.
+ ///
+ /// Maximum number of seconds that experiment will run.
+ /// A new AutoML regression experiment.
+ ///
+ /// An experiment may run for longer than .
+ /// This is because once AutoML starts training an ML.NET model, AutoML lets the
+ /// model train to completion. For instance, if the first model
+ /// AutoML trains takes 4 hours, and the second model trained takes 5 hours,
+ /// but was the number of seconds in 6 hours,
+ /// the experiment will run for 4 + 5 = 9 hours (not 6 hours).
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ public RegressionExperiment CreateRegressionExperiment(uint maxExperimentTimeInSeconds)
+ {
+ return new RegressionExperiment(_context, new RegressionExperimentSettings()
+ {
+ MaxExperimentTimeInSeconds = maxExperimentTimeInSeconds
+ });
+ }
+
+ ///
+ /// Creates a new AutoML experiment to run on a regression dataset.
+ ///
+ /// Settings for the AutoML experiment.
+ /// A new AutoML regression experiment.
+ ///
+ ///
+ ///
+ ///
+ ///
+ public RegressionExperiment CreateRegressionExperiment(RegressionExperimentSettings experimentSettings)
+ {
+ return new RegressionExperiment(_context, experimentSettings);
+ }
+
+ ///
+ /// Creates a new AutoML experiment to run on a binary classification dataset.
+ ///
+ /// Maximum number of seconds that experiment will run.
+ /// A new AutoML binary classification experiment.
+ ///
+ /// An experiment may run for longer than .
+ /// This is because once AutoML starts training an ML.NET model, AutoML lets the
+ /// model train to completion. For instance, if the first model
+ /// AutoML trains takes 4 hours, and the second model trained takes 5 hours,
+ /// but was the number of seconds in 6 hours,
+ /// the experiment will run for 4 + 5 = 9 hours (not 6 hours).
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ public BinaryClassificationExperiment CreateBinaryClassificationExperiment(uint maxExperimentTimeInSeconds)
+ {
+ return new BinaryClassificationExperiment(_context, new BinaryExperimentSettings()
+ {
+ MaxExperimentTimeInSeconds = maxExperimentTimeInSeconds
+ });
+ }
+
+ ///
+ /// Creates a new AutoML experiment to run on a binary classification dataset.
+ ///
+ /// Settings for the AutoML experiment.
+ /// A new AutoML binary classification experiment.
+ ///
+ ///
+ ///
+ ///
+ ///
+ public BinaryClassificationExperiment CreateBinaryClassificationExperiment(BinaryExperimentSettings experimentSettings)
+ {
+ return new BinaryClassificationExperiment(_context, experimentSettings);
+ }
+
+ ///
+ /// Creates a new AutoML experiment to run on a multiclass classification dataset.
+ ///
+ /// Maximum number of seconds that experiment will run.
+ /// A new AutoML multiclass classification experiment.
+ ///
+ /// An experiment may run for longer than .
+ /// This is because once AutoML starts training an ML.NET model, AutoML lets the
+ /// model train to completion. For instance, if the first model
+ /// AutoML trains takes 4 hours, and the second model trained takes 5 hours,
+ /// but was the number of seconds in 6 hours,
+ /// the experiment will run for 4 + 5 = 9 hours (not 6 hours).
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ public MulticlassClassificationExperiment CreateMulticlassClassificationExperiment(uint maxExperimentTimeInSeconds)
+ {
+ return new MulticlassClassificationExperiment(_context, new MulticlassExperimentSettings()
+ {
+ MaxExperimentTimeInSeconds = maxExperimentTimeInSeconds
+ });
+ }
+
+ ///
+ /// Creates a new AutoML experiment to run on a binary classification dataset.
+ ///
+ /// Settings for the AutoML experiment.
+ /// A new AutoML multiclass classification experiment.
+ ///
+ ///
+ ///
+ ///
+ ///
+ public MulticlassClassificationExperiment CreateMulticlassClassificationExperiment(MulticlassExperimentSettings experimentSettings)
+ {
+ return new MulticlassClassificationExperiment(_context, experimentSettings);
+ }
+
+ ///
+ /// Infers information about the columns of a dataset in a file located at .
+ ///
+ /// Path to a dataset file.
+ /// The name of the label column.
+ /// The character used as separator between data elements in a row. If , AutoML will try to infer this value.
+ /// Whether the file can contain columns defined by a quoted string. If , AutoML will try to infer this value.
+ /// Whether the file can contain numerical vectors in sparse format. If , AutoML will try to infer this value.
+ /// Whether trailing whitespace should be removed from dataset file lines.
+ /// Whether to group together (when possible) original columns in the dataset file into vector columns in the resulting data structures. See for more information.
+ /// Information inferred about the columns in the provided dataset.
+ ///
+ /// Infers information about the name, data type, and purpose of each column.
+ /// The returned can be used to
+ /// instantiate a . The can be used to
+ /// obtain an that can be fed into an AutoML experiment,
+ /// or used elsewhere in the ML.NET ecosystem (ie in .
+ /// The contains the inferred purpose of each column in the dataset.
+ /// (For instance, is the column categorical, numeric, or text data? Should the column be ignored? Etc.)
+ /// The can be inspected and modified (or kept as is) and used by an AutoML experiment.
+ ///
+ public ColumnInferenceResults InferColumns(string path, string labelColumnName = DefaultColumnNames.Label, char? separatorChar = null, bool? allowQuoting = null,
+ bool? allowSparse = null, bool trimWhitespace = false, bool groupColumns = true)
+ {
+ UserInputValidationUtil.ValidateInferColumnsArgs(path, labelColumnName);
+ return ColumnInferenceApi.InferColumns(_context, path, labelColumnName, separatorChar, allowQuoting, allowSparse, trimWhitespace, groupColumns);
+ }
+
+ ///
+ /// Infers information about the columns of a dataset in a file located at .
+ ///
+ /// Path to a dataset file.
+ /// Column information for the dataset.
+ /// The character used as separator between data elements in a row. If , AutoML will try to infer this value.
+ /// Whether the file can contain columns defined by a quoted string. If , AutoML will try to infer this value.
+ /// Whether the file can contain numerical vectors in sparse format. If , AutoML will try to infer this value.
+ /// Whether trailing whitespace should be removed from dataset file lines.
+ /// Whether to group together (when possible) original columns in the dataset file into vector columns in the resulting data structures. See for more information.
+ /// Information inferred about the columns in the provided dataset.
+ ///
+ /// Infers information about the name, data type, and purpose of each column.
+ /// The returned can be used to
+ /// instantiate a . The can be used to
+ /// obtain an that can be fed into an AutoML experiment,
+ /// or used elsewhere in the ML.NET ecosystem (ie in .
+ /// The contains the inferred purpose of each column in the dataset.
+ /// (For instance, is the column categorical, numeric, or text data? Should the column be ignored? Etc.)
+ /// The can be inspected and modified (or kept as is) and used by an AutoML experiment.
+ ///
+ public ColumnInferenceResults InferColumns(string path, ColumnInformation columnInformation, char? separatorChar = null, bool? allowQuoting = null,
+ bool? allowSparse = null, bool trimWhitespace = false, bool groupColumns = true)
+ {
+ columnInformation = columnInformation ?? new ColumnInformation();
+ UserInputValidationUtil.ValidateInferColumnsArgs(path, columnInformation);
+ return ColumnInferenceApi.InferColumns(_context, path, columnInformation, separatorChar, allowQuoting, allowSparse, trimWhitespace, groupColumns);
+ }
+
+ ///
+ /// Infers information about the columns of a dataset in a file located at .
+ ///
+ /// Path to a dataset file.
+ /// Column index of the label column in the dataset.
+ /// Whether or not the dataset file has a header row.
+ /// The character used as separator between data elements in a row. If , AutoML will try to infer this value.
+ /// Whether the file can contain columns defined by a quoted string. If , AutoML will try to infer this value.
+ /// Whether the file can contain numerical vectors in sparse format. If , AutoML will try to infer this value.
+ /// Whether trailing whitespace should be removed from dataset file lines.
+ /// Whether to group together (when possible) original columns in the dataset file into vector columns in the resulting data structures. See for more information.
+ /// Information inferred about the columns in the provided dataset.
+ ///
+ /// Infers information about the name, data type, and purpose of each column.
+ /// The returned can be used to
+ /// instantiate a . The can be used to
+ /// obtain an that can be fed into an AutoML experiment,
+ /// or used elsewhere in the ML.NET ecosystem (ie in .
+ /// The contains the inferred purpose of each column in the dataset.
+ /// (For instance, is the column categorical, numeric, or text data? Should the column be ignored? Etc.)
+ /// The can be inspected and modified (or kept as is) and used by an AutoML experiment.
+ ///
+ public ColumnInferenceResults InferColumns(string path, uint labelColumnIndex, bool hasHeader = false, char? separatorChar = null,
+ bool? allowQuoting = null, bool? allowSparse = null, bool trimWhitespace = false, bool groupColumns = true)
+ {
+ UserInputValidationUtil.ValidateInferColumnsArgs(path);
+ return ColumnInferenceApi.InferColumns(_context, path, labelColumnIndex, hasHeader, separatorChar, allowQuoting, allowSparse, trimWhitespace, groupColumns);
+ }
+ }
+}
diff --git a/src/Microsoft.ML.Auto/API/AutoInferenceCatalog.cs b/src/Microsoft.ML.Auto/API/AutoInferenceCatalog.cs
deleted file mode 100644
index adf04111f2..0000000000
--- a/src/Microsoft.ML.Auto/API/AutoInferenceCatalog.cs
+++ /dev/null
@@ -1,79 +0,0 @@
-// Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-using Microsoft.ML.Data;
-
-namespace Microsoft.ML.Auto
-{
- public sealed class AutoMLCatalog
- {
- private readonly MLContext _context;
-
- internal AutoMLCatalog(MLContext context)
- {
- _context = context;
- }
-
- public RegressionExperiment CreateRegressionExperiment(uint maxExperimentTimeInSeconds)
- {
- return new RegressionExperiment(_context, new RegressionExperimentSettings()
- {
- MaxExperimentTimeInSeconds = maxExperimentTimeInSeconds
- });
- }
-
- public RegressionExperiment CreateRegressionExperiment(RegressionExperimentSettings experimentSettings)
- {
- return new RegressionExperiment(_context, experimentSettings);
- }
-
- public BinaryClassificationExperiment CreateBinaryClassificationExperiment(uint maxExperimentTimeInSeconds)
- {
- return new BinaryClassificationExperiment(_context, new BinaryExperimentSettings()
- {
- MaxExperimentTimeInSeconds = maxExperimentTimeInSeconds
- });
- }
-
- public BinaryClassificationExperiment CreateBinaryClassificationExperiment(BinaryExperimentSettings experimentSettings)
- {
- return new BinaryClassificationExperiment(_context, experimentSettings);
- }
-
- public MulticlassClassificationExperiment CreateMulticlassClassificationExperiment(uint maxExperimentTimeInSeconds)
- {
- return new MulticlassClassificationExperiment(_context, new MulticlassExperimentSettings()
- {
- MaxExperimentTimeInSeconds = maxExperimentTimeInSeconds
- });
- }
-
- public MulticlassClassificationExperiment CreateMulticlassClassificationExperiment(MulticlassExperimentSettings experimentSettings)
- {
- return new MulticlassClassificationExperiment(_context, experimentSettings);
- }
-
- public ColumnInferenceResults InferColumns(string path, string labelColumn = DefaultColumnNames.Label, char? separatorChar = null, bool? allowQuotedStrings = null,
- bool? supportSparse = null, bool trimWhitespace = false, bool groupColumns = true)
- {
- UserInputValidationUtil.ValidateInferColumnsArgs(path, labelColumn);
- return ColumnInferenceApi.InferColumns(_context, path, labelColumn, separatorChar, allowQuotedStrings, supportSparse, trimWhitespace, groupColumns);
- }
-
- public ColumnInferenceResults InferColumns(string path, ColumnInformation columnInformation, char? separatorChar = null, bool? allowQuotedStrings = null,
- bool? supportSparse = null, bool trimWhitespace = false, bool groupColumns = true)
- {
- columnInformation = columnInformation ?? new ColumnInformation();
- UserInputValidationUtil.ValidateInferColumnsArgs(path, columnInformation);
- return ColumnInferenceApi.InferColumns(_context, path, columnInformation, separatorChar, allowQuotedStrings, supportSparse, trimWhitespace, groupColumns);
- }
-
- public ColumnInferenceResults InferColumns(string path, uint labelColumnIndex, bool hasHeader = false, char? separatorChar = null,
- bool? allowQuotedStrings = null, bool? supportSparse = null, bool trimWhitespace = false, bool groupColumns = true)
- {
- UserInputValidationUtil.ValidateInferColumnsArgs(path);
- return ColumnInferenceApi.InferColumns(_context, path, labelColumnIndex, hasHeader, separatorChar, allowQuotedStrings, supportSparse, trimWhitespace, groupColumns);
- }
- }
-}
diff --git a/src/Microsoft.ML.Auto/API/BinaryClassificationExperiment.cs b/src/Microsoft.ML.Auto/API/BinaryClassificationExperiment.cs
index 3ab9dbb7a1..45b2715eec 100644
--- a/src/Microsoft.ML.Auto/API/BinaryClassificationExperiment.cs
+++ b/src/Microsoft.ML.Auto/API/BinaryClassificationExperiment.cs
@@ -6,41 +6,132 @@
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML.Data;
+using Microsoft.ML.Trainers;
+using Microsoft.ML.Trainers.FastTree;
+using Microsoft.ML.Trainers.LightGbm;
namespace Microsoft.ML.Auto
{
+ ///
+ /// Settings for AutoML experiments on binary classification datasets.
+ ///
public sealed class BinaryExperimentSettings : ExperimentSettings
{
+ ///
+ /// Metric that AutoML will try to optimize over the course of the experiment.
+ ///
public BinaryClassificationMetric OptimizingMetric { get; set; } = BinaryClassificationMetric.Accuracy;
+
+ ///
+ /// Collection of trainers the AutoML experiment can leverage.
+ ///
+ ///
+ /// The collection is auto-populated with all possible trainers (all values of ).
+ ///
public ICollection Trainers { get; } =
Enum.GetValues(typeof(BinaryClassificationTrainer)).OfType().ToList();
}
+ ///
+ /// Binary classification metric that AutoML will aim to optimize in its sweeping process during an experiment.
+ ///
public enum BinaryClassificationMetric
{
+ ///
+ /// See .
+ ///
Accuracy,
+
+ ///
+ /// See .
+ ///
AreaUnderRocCurve,
+
+ ///
+ /// See .
+ ///
AreaUnderPrecisionRecallCurve,
+
+ ///
+ /// See .
+ ///
F1Score,
+
+ ///
+ /// See .
+ ///
PositivePrecision,
+
+ ///
+ /// See .
+ ///
PositiveRecall,
+
+ ///
+ /// See .
+ ///
NegativePrecision,
+
+ ///
+ /// See .
+ ///
NegativeRecall,
}
+ ///
+ /// Enumeration of ML.NET binary classification trainers used by AutoML.
+ ///
public enum BinaryClassificationTrainer
{
+ ///
+ /// See .
+ ///
AveragedPerceptron,
+
+ ///
+ /// See .
+ ///
FastForest,
+
+ ///
+ /// See .
+ ///
FastTree,
+
+ ///
+ /// See .
+ ///
LightGbm,
- LinearSupportVectorMachines,
+
+ ///
+ /// See .
+ ///
+ LinearSvm,
+
+ ///
+ /// See .
+ ///
LbfgsLogisticRegression,
+
+ ///
+ /// See .
+ ///
SdcaLogisticRegression,
+
+ ///
+ /// See .
+ ///
SgdCalibrated,
+
+ ///
+ /// See .
+ ///
SymbolicSgdLogisticRegression,
}
+ ///
+ /// AutoML experiment on binary classification datasets.
+ ///
public sealed class BinaryClassificationExperiment : ExperimentBase
{
internal BinaryClassificationExperiment(MLContext context, BinaryExperimentSettings settings)
@@ -54,8 +145,17 @@ internal BinaryClassificationExperiment(MLContext context, BinaryExperimentSetti
}
}
+ ///
+ /// Extension methods that operate over binary experiment run results.
+ ///
public static class BinaryExperimentResultExtensions
{
+ ///
+ /// Select the best run from an enumeration of experiment runs.
+ ///
+ /// Enumeration of AutoML experiment run results.
+ /// Metric to consider when selecting the best run.
+ /// The best experiment run.
public static RunDetail Best(this IEnumerable> results, BinaryClassificationMetric metric = BinaryClassificationMetric.Accuracy)
{
var metricsAgent = new BinaryMetricsAgent(null, metric);
@@ -63,6 +163,12 @@ public static RunDetail Best(this IEnumerable
+ /// Select the best run from an enumeration of experiment cross validation runs.
+ ///
+ /// Enumeration of AutoML experiment cross validation run results.
+ /// Metric to consider when selecting the best run.
+ /// The best experiment run.
public static CrossValidationRunDetail Best(this IEnumerable> results, BinaryClassificationMetric metric = BinaryClassificationMetric.Accuracy)
{
var metricsAgent = new BinaryMetricsAgent(null, metric);
diff --git a/src/Microsoft.ML.Auto/API/ColumnInference.cs b/src/Microsoft.ML.Auto/API/ColumnInference.cs
index 588116897d..a10c2fcbc2 100644
--- a/src/Microsoft.ML.Auto/API/ColumnInference.cs
+++ b/src/Microsoft.ML.Auto/API/ColumnInference.cs
@@ -8,20 +8,85 @@
namespace Microsoft.ML.Auto
{
+ ///
+ /// Contains information AutoML inferred about columns in a dataset.
+ ///
public sealed class ColumnInferenceResults
{
+ ///
+ /// Inferred for the dataset.
+ ///
+ ///
+ /// Can be used to instantiate a new to load
+ /// data into an .
+ ///
public TextLoader.Options TextLoaderOptions { get; internal set; } = new TextLoader.Options();
+
+ ///
+ /// Information about the inferred columns in the dataset.
+ ///
+ ///
+ /// Contains the inferred purposes of each column. See for more details.
+ /// This can be fed to the AutoML API when running an experiment.
+ /// See
+ /// for example.
+ ///
public ColumnInformation ColumnInformation { get; internal set; } = new ColumnInformation();
}
+ ///
+ /// Information about the columns in a dataset.
+ ///
+ ///
+ /// Contains information about the purpose of each column in the dataset. For instance,
+ /// it enumerates the dataset columns that AutoML should treat as categorical,
+ /// the columns AutoML should ignore, which column is the label, etc.
+ /// can be fed to the AutoML API when running an experiment.
+ /// See
+ /// for example.
+ ///
public sealed class ColumnInformation
{
+ ///
+ /// The dataset column to use as the label.
+ ///
public string LabelColumnName { get; set; } = DefaultColumnNames.Label;
+
+ ///
+ /// The dataset column to use for example weight.
+ ///
public string ExampleWeightColumnName { get; set; }
+
+ ///
+ /// The dataset column to use for grouping rows.
+ /// If two examples share the same sampling key column name,
+ /// they are guaranteed to appear in the same subset (train or test).
+ /// This can be used to ensure no label leakage from the train to the test set.
+ /// If , no row grouping will be performed.
+ ///
public string SamplingKeyColumnName { get; set; }
+
+ ///
+ /// The dataset columns that are categorical.
+ ///
+ ///
+ /// Categorical data columns should generally be columns that contain a small number of unique values.
+ ///
public ICollection CategoricalColumnNames { get; } = new Collection();
+
+ ///
+ /// The dataset columns that are numeric.
+ ///
public ICollection NumericColumnNames { get; } = new Collection();
+
+ ///
+ /// The dataset columns that are text.
+ ///
public ICollection TextColumnNames { get; } = new Collection();
+
+ ///
+ /// The dataset columns that AutoML should ignore.
+ ///
public ICollection IgnoredColumnNames { get; } = new Collection();
}
}
\ No newline at end of file
diff --git a/src/Microsoft.ML.Auto/API/ExperimentBase.cs b/src/Microsoft.ML.Auto/API/ExperimentBase.cs
index 381196c54c..b9b5ec8db1 100644
--- a/src/Microsoft.ML.Auto/API/ExperimentBase.cs
+++ b/src/Microsoft.ML.Auto/API/ExperimentBase.cs
@@ -7,9 +7,14 @@
namespace Microsoft.ML.Auto
{
+ ///
+ /// AutoML experiment base class. All task-specific AutoML experiments
+ /// (like ) inherit from this class.
+ ///
+ /// Metrics type used by task-specific AutoML experiments.
public abstract class ExperimentBase where TMetrics : class
{
- protected readonly MLContext Context;
+ private protected readonly MLContext Context;
private readonly IMetricsAgent _metricsAgent;
private readonly OptimizingMetricInfo _optimizingMetricInfo;
@@ -32,17 +37,57 @@ internal ExperimentBase(MLContext context,
_trainerWhitelist = trainerWhitelist;
}
- public IEnumerable> Execute(IDataView trainData, string labelColumn = DefaultColumnNames.Label,
- string samplingKeyColumn = null, IEstimator preFeaturizers = null, IProgress> progressHandler = null)
+ ///
+ /// Executes an AutoML experiment.
+ ///
+ /// The training data used by the AutoML experiment.
+ /// The dataset column used as the label.
+ /// The dataset column used as the sampling key column.
+ /// See for more information.
+ /// Pre-featurizer that AutoML will apply to the data during an
+ /// experiment. (The pre-featurizer will be fit only on the training data split to produce a
+ /// trained transform. Then, the trained transform will be applied to both the training
+ /// data split and corresponding validation data split.)
+ /// A user-defined object that implements
+ /// the interface. AutoML will invoke the method
+ /// after each model it produces during the
+ /// course of the experiment.
+ ///
+ /// An enumeration of all the runs in an experiment. See
+ /// for more information on the contents of a run.
+ ///
+ /// Depending on the size of your data, the AutoML experiment could take a long time to execute.
+ ///
+ public IEnumerable> Execute(IDataView trainData, string labelColumnName = DefaultColumnNames.Label,
+ string samplingKeyColumn = null, IEstimator preFeaturizer = null, IProgress> progressHandler = null)
{
var columnInformation = new ColumnInformation()
{
- LabelColumnName = labelColumn,
+ LabelColumnName = labelColumnName,
SamplingKeyColumnName = samplingKeyColumn
};
- return Execute(trainData, columnInformation, preFeaturizers, progressHandler);
+ return Execute(trainData, columnInformation, preFeaturizer, progressHandler);
}
+ ///
+ /// Executes an AutoML experiment.
+ ///
+ /// The training data to be used by the AutoML experiment.
+ /// Column information for the dataset.
+ /// Pre-featurizer that AutoML will apply to the data during an
+ /// experiment. (The pre-featurizer will be fit only on the training data split to produce a
+ /// trained transform. Then, the trained transform will be applied to both the training
+ /// data split and corresponding validation data split.)
+ /// A user-defined object that implements
+ /// the interface. AutoML will invoke the method
+ /// after each model it produces during the
+ /// course of the experiment.
+ ///
+ /// An enumeration of all the runs in an experiment. See
+ /// for more information on the contents of a run.
+ ///
+ /// Depending on the size of your data, the AutoML experiment could take a long time to execute.
+ ///
public IEnumerable> Execute(IDataView trainData, ColumnInformation columnInformation,
IEstimator preFeaturizer = null, IProgress> progressHandler = null)
{
@@ -66,12 +111,52 @@ public IEnumerable> Execute(IDataView trainData, ColumnInfor
}
}
- public IEnumerable> Execute(IDataView trainData, IDataView validationData, string labelColumn = DefaultColumnNames.Label, IEstimator preFeaturizer = null, IProgress> progressHandler = null)
+ ///
+ /// Executes an AutoML experiment.
+ ///
+ /// The training data to be used by the AutoML experiment.
+ /// The validation data to be used by the AutoML experiment.
+ /// The name of the label column.
+ /// Pre-featurizer that AutoML will apply to the data during an
+ /// experiment. (The pre-featurizer will be fit only on the training data split to produce a
+ /// trained transform. Then, the trained transform will be applied to both the training
+ /// data split and corresponding validation data split.)
+ /// A user-defined object that implements
+ /// the interface. AutoML will invoke the method
+ /// after each model it produces during the
+ /// course of the experiment.
+ ///
+ /// An enumeration of all the runs in an experiment. See
+ /// for more information on the contents of a run.
+ ///
+ /// Depending on the size of your data, the AutoML experiment could take a long time to execute.
+ ///
+ public IEnumerable> Execute(IDataView trainData, IDataView validationData, string labelColumnName = DefaultColumnNames.Label, IEstimator preFeaturizer = null, IProgress> progressHandler = null)
{
- var columnInformation = new ColumnInformation() { LabelColumnName = labelColumn };
+ var columnInformation = new ColumnInformation() { LabelColumnName = labelColumnName };
return Execute(trainData, validationData, columnInformation, preFeaturizer, progressHandler);
}
+ ///
+ /// Executes an AutoML experiment.
+ ///
+ /// The training data to be used by the AutoML experiment.
+ /// The validation data to be used by the AutoML experiment.
+ /// Column information for the dataset.
+ /// Pre-featurizer that AutoML will apply to the data during an
+ /// experiment. (The pre-featurizer will be fit only on the training data split to produce a
+ /// trained transform. Then, the trained transform will be applied to both the training
+ /// data split and corresponding validation data split.)
+ /// A user-defined object that implements
+ /// the interface. AutoML will invoke the method
+ /// after each model it produces during the
+ /// course of the experiment.
+ ///
+ /// An enumeration of all the runs in an experiment. See
+ /// for more information on the contents of a run.
+ ///
+ /// Depending on the size of your data, the AutoML experiment could take a long time to execute.
+ ///
public IEnumerable> Execute(IDataView trainData, IDataView validationData, ColumnInformation columnInformation, IEstimator preFeaturizer = null, IProgress> progressHandler = null)
{
if (validationData == null)
@@ -83,6 +168,26 @@ public IEnumerable> Execute(IDataView trainData, IDataView v
return ExecuteTrainValidate(trainData, columnInformation, validationData, preFeaturizer, progressHandler);
}
+ ///
+ /// Executes an AutoML experiment.
+ ///
+ /// The training data to be used by the AutoML experiment.
+ /// The number of cross validation folds into which the training data should be divided when fitting a model.
+ /// Column information for the dataset.
+ /// Pre-featurizer that AutoML will apply to the data during an
+ /// experiment. (The pre-featurizer will be fit only on the training data split to produce a
+ /// trained transform. Then, the trained transform will be applied to both the training
+ /// data split and corresponding validation data split.)
+ /// A user-defined object that implements
+ /// the interface. AutoML will invoke the method
+ /// after each model it produces during the
+ /// course of the experiment.
+ ///
+ /// An enumeration of all the runs in an experiment. See
+ /// for more information on the contents of a run.
+ ///
+ /// Depending on the size of your data, the AutoML experiment could take a long time to execute.
+ ///
public IEnumerable> Execute(IDataView trainData, uint numberOfCVFolds, ColumnInformation columnInformation = null, IEstimator preFeaturizer = null, IProgress> progressHandler = null)
{
UserInputValidationUtil.ValidateNumberOfCVFoldsArg(numberOfCVFolds);
@@ -90,14 +195,35 @@ public IEnumerable> Execute(IDataView trainDa
return ExecuteCrossVal(splitResult.trainDatasets, columnInformation, splitResult.validationDatasets, preFeaturizer, progressHandler);
}
+ ///
+ /// Executes an AutoML experiment.
+ ///
+ /// The training data to be used by the AutoML experiment.
+ /// The number of cross validation folds into which the training data should be divided when fitting a model.
+ /// The name of the label column.
+ /// The name of the sampling key column.
+ /// Pre-featurizer that AutoML will apply to the data during an
+ /// experiment. (The pre-featurizer will be fit only on the training data split to produce a
+ /// trained transform. Then, the trained transform will be applied to both the training
+ /// data split and corresponding validation data split.)
+ /// A user-defined object that implements
+ /// the interface. AutoML will invoke the method
+ /// after each model it produces during the
+ /// course of the experiment.
+ ///
+ /// An enumeration of all the runs in an experiment. See
+ /// for more information on the contents of a run.
+ ///
+ /// Depending on the size of your data, the AutoML experiment could take a long time to execute.
+ ///
public IEnumerable> Execute(IDataView trainData,
- uint numberOfCVFolds, string labelColumn = DefaultColumnNames.Label,
+ uint numberOfCVFolds, string labelColumnName = DefaultColumnNames.Label,
string samplingKeyColumn = null, IEstimator preFeaturizer = null,
Progress> progressHandler = null)
{
var columnInformation = new ColumnInformation()
{
- LabelColumnName = labelColumn,
+ LabelColumnName = labelColumnName,
SamplingKeyColumnName = samplingKeyColumn
};
return Execute(trainData, numberOfCVFolds, columnInformation, preFeaturizer, progressHandler);
diff --git a/src/Microsoft.ML.Auto/API/ExperimentSettings.cs b/src/Microsoft.ML.Auto/API/ExperimentSettings.cs
index 43c6c8befe..891f3615e0 100644
--- a/src/Microsoft.ML.Auto/API/ExperimentSettings.cs
+++ b/src/Microsoft.ML.Auto/API/ExperimentSettings.cs
@@ -7,14 +7,42 @@
namespace Microsoft.ML.Auto
{
- public class ExperimentSettings
+ ///
+ /// Base class for experiment settings. All task-specific AutoML experiment settings
+ /// (like ) inherit from this class.
+ ///
+ public abstract class ExperimentSettings
{
+ ///
+ /// Maximum time in seconds the experiment is allowed to run.
+ ///
+ ///
+ /// An experiment may run for longer than .
+ /// This is because once AutoML starts training an ML.NET model, AutoML lets the
+ /// model train to completion. For instance, if the first model
+ /// AutoML trains takes 4 hours, and the second model trained takes 5 hours,
+ /// but was the number of seconds in 6 hours,
+ /// the experiment will run for 4 + 5 = 9 hours (not 6 hours).
+ ///
public uint MaxExperimentTimeInSeconds { get; set; } = 24 * 60 * 60;
+
+ ///
+ /// Cancellation token for the AutoML experiment. It propagates the notification
+ /// that the experiment should be canceled.
+ ///
+ ///
+ /// An experiment may not immediately stop after cancellation.
+ /// This is because once AutoML starts training an ML.NET model, AutoML lets the
+ /// model train to completion. For instance, if the first model
+ /// AutoML trains takes 4 hours, and the second model trained takes 5 hours,
+ /// but cancellation is requested after 6 hours,
+ /// the experiment will stop after 4 + 5 = 9 hours (not 6 hours).
+ ///
public CancellationToken CancellationToken { get; set; } = default;
///
/// This is a pointer to a directory where all models trained during the AutoML experiment will be saved.
- /// If null, models will be kept in memory instead of written to disk.
+ /// If , models will be kept in memory instead of written to disk.
/// (Please note: for an experiment with high runtime operating on a large dataset, opting to keep models in
/// memory could cause a system to run out of memory.)
///
@@ -23,10 +51,10 @@ public class ExperimentSettings
///
/// This setting controls whether or not an AutoML experiment will make use of ML.NET-provided caching.
/// If set to true, caching will be forced on for all pipelines. If set to false, caching will be forced off.
- /// If set to null (default value), AutoML will decide whether to enable caching for each model.
+ /// If set to (default value), AutoML will decide whether to enable caching for each model.
///
public bool? CacheBeforeTrainer = null;
-
+
internal int MaxModels = int.MaxValue;
internal IDebugLogger DebugLogger;
}
diff --git a/src/Microsoft.ML.Auto/API/InferenceException.cs b/src/Microsoft.ML.Auto/API/InferenceException.cs
index 423c4ae3ce..0e501c5b89 100644
--- a/src/Microsoft.ML.Auto/API/InferenceException.cs
+++ b/src/Microsoft.ML.Auto/API/InferenceException.cs
@@ -6,23 +6,38 @@
namespace Microsoft.ML.Auto
{
- public enum InferenceType
+ ///
+ /// Type of exception encountered by AutoML.
+ ///
+ public enum InferenceExceptionType
{
- ColumnDataKind,
+ ///
+ /// Exception that occurs when AutoML is inferring the data type of a column.
+ ///
+ ColumnDataType,
+
+ ///
+ /// Exception that occurs when AutoML is attempting to split a dataset into distinct columns.
+ ///
ColumnSplit,
- Label,
}
+ ///
+ /// Exception thrown by AutoML.
+ ///
public sealed class InferenceException : Exception
{
- public InferenceType InferenceType;
-
- public InferenceException(InferenceType inferenceType, string message)
+ ///
+ /// Type of AutoML exception that occurred.
+ ///
+ public InferenceExceptionType InferenceExceptionType;
+
+ internal InferenceException(InferenceExceptionType inferenceType, string message)
: base(message)
{
}
- public InferenceException(InferenceType inferenceType, string message, Exception inner)
+ internal InferenceException(InferenceExceptionType inferenceType, string message, Exception inner)
: base(message, inner)
{
}
diff --git a/src/Microsoft.ML.Auto/API/MLContextExtension.cs b/src/Microsoft.ML.Auto/API/MLContextExtension.cs
index 9287fe827c..7b2e6a8a69 100644
--- a/src/Microsoft.ML.Auto/API/MLContextExtension.cs
+++ b/src/Microsoft.ML.Auto/API/MLContextExtension.cs
@@ -4,11 +4,19 @@
namespace Microsoft.ML.Auto
{
+ ///
+ /// Class containing AutoML extension methods to
+ ///
public static class MLContextExtension
{
- public static AutoMLCatalog Auto(this MLContext mlContext)
+ ///
+ /// Returns a catalog of all possible AutoML operations.
+ ///
+ /// instance.
+ /// A catalog of all possible AutoML operations.
+ public static AutoCatalog Auto(this MLContext mlContext)
{
- return new AutoMLCatalog(mlContext);
+ return new AutoCatalog(mlContext);
}
}
}
diff --git a/src/Microsoft.ML.Auto/API/MulticlassClassificationExperiment.cs b/src/Microsoft.ML.Auto/API/MulticlassClassificationExperiment.cs
index f7f5a856cb..bd4383d861 100644
--- a/src/Microsoft.ML.Auto/API/MulticlassClassificationExperiment.cs
+++ b/src/Microsoft.ML.Auto/API/MulticlassClassificationExperiment.cs
@@ -6,39 +6,122 @@
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML.Data;
+using Microsoft.ML.Trainers;
+using Microsoft.ML.Trainers.FastTree;
+using Microsoft.ML.Trainers.LightGbm;
namespace Microsoft.ML.Auto
{
+ ///
+ /// Settings for AutoML experiments on multiclass classification datasets.
+ ///
public sealed class MulticlassExperimentSettings : ExperimentSettings
{
+ ///
+ /// Metric that AutoML will try to optimize over the course of the experiment.
+ ///
public MulticlassClassificationMetric OptimizingMetric { get; set; } = MulticlassClassificationMetric.MicroAccuracy;
+
+ ///
+ /// Collection of trainers the AutoML experiment can leverage.
+ ///
+ ///
+ /// The collection is auto-populated with all possible trainers (all values of ).
+ ///
public ICollection Trainers { get; } =
Enum.GetValues(typeof(MulticlassClassificationTrainer)).OfType().ToList();
}
+ ///
+ /// Multiclass classification metric that AutoML will aim to optimize in its sweeping process during an experiment.
+ ///
public enum MulticlassClassificationMetric
{
+ ///
+ /// See .
+ ///
MicroAccuracy,
+
+ ///
+ /// See .
+ ///
MacroAccuracy,
+
+ ///
+ /// See .
+ ///
LogLoss,
+
+ ///
+ /// See .
+ ///
LogLossReduction,
+
+ ///
+ /// See .
+ ///
TopKAccuracy,
}
+ ///
+ /// Enumeration of ML.NET multiclass classification trainers used by AutoML.
+ ///
public enum MulticlassClassificationTrainer
{
+ ///
+ /// using .
+ ///
AveragedPerceptronOVA,
+
+ ///
+ /// using .
+ ///
FastForestOVA,
+
+ ///
+ /// using .
+ ///
FastTreeOVA,
+
+ ///
+ /// See .
+ ///
LightGbm,
+
+ ///
+ /// using .
+ ///
LinearSupportVectorMachinesOVA,
+
+ ///
+ /// See .
+ ///
LbfgsMaximumEntropy,
+
+ ///
+ /// using .
+ ///
LbfgsLogisticRegressionOVA,
+
+ ///
+ /// See .
+ ///
SdcaMaximumEntropy,
+
+ ///
+ /// using .
+ ///
SgdCalibratedOVA,
+
+ ///
+ /// using .
+ ///
SymbolicSgdLogisticRegressionOVA,
}
+ ///
+ /// AutoML experiment on multiclass classification datasets.
+ ///
public sealed class MulticlassClassificationExperiment : ExperimentBase
{
internal MulticlassClassificationExperiment(MLContext context, MulticlassExperimentSettings settings)
@@ -52,8 +135,17 @@ internal MulticlassClassificationExperiment(MLContext context, MulticlassExperim
}
}
+ ///
+ /// Extension methods that operate over multiclass experiment run results.
+ ///
public static class MulticlassExperimentResultExtensions
{
+ ///
+ /// Select the best run from an enumeration of experiment runs.
+ ///
+ /// Enumeration of AutoML experiment run results.
+ /// Metric to consider when selecting the best run.
+ /// The best experiment run.
public static RunDetail Best(this IEnumerable> results, MulticlassClassificationMetric metric = MulticlassClassificationMetric.MicroAccuracy)
{
var metricsAgent = new MultiMetricsAgent(null, metric);
@@ -61,6 +153,13 @@ public static RunDetail Best(this IEnumerable
+ /// Select the best run from an enumeration of experiment cross validation runs.
+ ///
+ /// Enumeration of AutoML experiment cross validation run results.
+ /// Metric to consider when selecting the best run.
+ /// The best experiment run.
public static CrossValidationRunDetail Best(this IEnumerable> results, MulticlassClassificationMetric metric = MulticlassClassificationMetric.MicroAccuracy)
{
var metricsAgent = new MultiMetricsAgent(null, metric);
diff --git a/src/Microsoft.ML.Auto/API/RegressionExperiment.cs b/src/Microsoft.ML.Auto/API/RegressionExperiment.cs
index 51f5988f64..f57fb2470f 100644
--- a/src/Microsoft.ML.Auto/API/RegressionExperiment.cs
+++ b/src/Microsoft.ML.Auto/API/RegressionExperiment.cs
@@ -6,36 +6,108 @@
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML.Data;
+using Microsoft.ML.Trainers;
+using Microsoft.ML.Trainers.FastTree;
+using Microsoft.ML.Trainers.LightGbm;
namespace Microsoft.ML.Auto
{
+ ///
+ /// Settings for AutoML experiments on regression datasets.
+ ///
public sealed class RegressionExperimentSettings : ExperimentSettings
{
+ ///
+ /// Metric that AutoML will try to optimize over the course of the experiment.
+ ///
public RegressionMetric OptimizingMetric { get; set; } = RegressionMetric.RSquared;
+
+ ///
+ /// Collection of trainers the AutoML experiment can leverage.
+ ///
+ ///
+ /// The collection is auto-populated with all possible trainers (all values of ).
+ ///
public ICollection Trainers { get; } =
Enum.GetValues(typeof(RegressionTrainer)).OfType().ToList();
}
+ ///
+ /// Regression metric that AutoML will aim to optimize in its sweeping process during an experiment.
+ ///
public enum RegressionMetric
{
+ ///
+ /// See .
+ ///
MeanAbsoluteError,
+
+ ///
+ /// See .
+ ///
MeanSquaredError,
+
+ ///
+ /// See .
+ ///
RootMeanSquaredError,
+
+ ///
+ /// See .
+ ///
RSquared
}
+
+ ///
+ /// Enumeration of ML.NET multiclass classification trainers used by AutoML.
+ ///
public enum RegressionTrainer
{
+ ///
+ /// See .
+ ///
FastForest,
+
+ ///
+ /// See .
+ ///
FastTree,
+
+ ///
+ /// See .
+ ///
FastTreeTweedie,
+
+ ///
+ /// See .
+ ///
LightGbm,
+
+ ///
+ /// See .
+ ///
OnlineGradientDescent,
+
+ ///
+ /// See .
+ ///
Ols,
+
+ ///
+ /// See .
+ ///
LbfgsPoissonRegression,
+
+ ///
+ /// See .
+ ///
StochasticDualCoordinateAscent,
}
+ ///
+ /// AutoML experiment on regression classification datasets.
+ ///
public sealed class RegressionExperiment : ExperimentBase
{
internal RegressionExperiment(MLContext context, RegressionExperimentSettings settings)
@@ -49,8 +121,17 @@ internal RegressionExperiment(MLContext context, RegressionExperimentSettings se
}
}
+ ///
+ /// Extension methods that operate over regression experiment run results.
+ ///
public static class RegressionExperimentResultExtensions
{
+ ///
+ /// Select the best run from an enumeration of experiment runs.
+ ///
+ /// Enumeration of AutoML experiment run results.
+ /// Metric to consider when selecting the best run.
+ /// The best experiment run.
public static RunDetail Best(this IEnumerable> results, RegressionMetric metric = RegressionMetric.RSquared)
{
var metricsAgent = new RegressionMetricsAgent(null, metric);
@@ -58,6 +139,12 @@ public static RunDetail Best(this IEnumerable
+ /// Select the best run from an enumeration of experiment cross validation runs.
+ ///
+ /// Enumeration of AutoML experiment cross validation run results.
+ /// Metric to consider when selecting the best run.
+ /// The best experiment run.
public static CrossValidationRunDetail Best(this IEnumerable> results, RegressionMetric metric = RegressionMetric.RSquared)
{
var metricsAgent = new RegressionMetricsAgent(null, metric);
diff --git a/src/Microsoft.ML.Auto/API/RunDetails/CrossValidationRunDetail.cs b/src/Microsoft.ML.Auto/API/RunDetails/CrossValidationRunDetail.cs
index 713c820a99..45520e8068 100644
--- a/src/Microsoft.ML.Auto/API/RunDetails/CrossValidationRunDetail.cs
+++ b/src/Microsoft.ML.Auto/API/RunDetails/CrossValidationRunDetail.cs
@@ -7,8 +7,20 @@
namespace Microsoft.ML.Auto
{
+ ///
+ /// Details about a cross validation run in an AutoML experiment.
+ ///
+ /// Metrics type for the run.
+ ///
+ /// Over the course of an experiment, many models are evaluated on a dataset
+ /// using cross validation. This object contains information about each model
+ /// evaluated during the AutoML experiment.
+ ///
public sealed class CrossValidationRunDetail : RunDetail
{
+ ///
+ /// Results for each of the cross validation folds.
+ ///
public IEnumerable> Results { get; private set; }
internal CrossValidationRunDetail(string trainerName,
@@ -20,10 +32,35 @@ internal CrossValidationRunDetail(string trainerName,
}
}
+ ///
+ /// Result of a pipeline trained on a cross validation fold.
+ ///
+ /// Metrics type for the run.
public sealed class TrainResult
{
+ ///
+ /// Each fold has training data and validation data. A model trained on the
+ /// folds's training data is evaluated against the validation data,
+ /// and the metrics for that calculation are emitted here.
+ ///
public TMetrics ValidationMetrics { get; private set; }
+
+ ///
+ /// Model trained on the fold during the run.
+ ///
+ ///
+ /// You can use the trained model to obtain predictions on input data.
+ ///
public ITransformer Model { get { return _modelContainer.GetModel(); } }
+
+ ///
+ /// Exception encountered while training the fold. This property is
+ /// if no exception was encountered.
+ ///
+ ///
+ /// If an exception occurred, it's possible some properties in ths object
+ /// (like ) could be .
+ ///
public Exception Exception { get; private set; }
private readonly ModelContainer _modelContainer;
diff --git a/src/Microsoft.ML.Auto/API/RunDetails/RunDetail.cs b/src/Microsoft.ML.Auto/API/RunDetails/RunDetail.cs
index a83670986d..3600374dea 100644
--- a/src/Microsoft.ML.Auto/API/RunDetails/RunDetail.cs
+++ b/src/Microsoft.ML.Auto/API/RunDetails/RunDetail.cs
@@ -6,10 +6,45 @@
namespace Microsoft.ML.Auto
{
+ ///
+ /// Details about an AutoML experiment run.
+ ///
+ ///
+ /// Over the course of an experiment, many models are evaluated on a dataset.
+ /// This object contains information about each model evaluated during
+ /// the AutoML experiment.
+ ///
+ /// Type of the metrics for this experiment. (For instance, (cref Binary, Regression).)
public sealed class RunDetail : RunDetail
{
+ ///
+ /// Metrics of how the trained model performed on the validation data during
+ /// the run.
+ ///
+ ///
+ /// Internally, each run has train data and validation data. Model trained on the
+ /// run's training is evaluated against the validation data,
+ /// and the metrics for that calculation are emitted here.
+ ///
public TMetrics ValidationMetrics { get; private set; }
+
+
+ ///
+ /// Model trained during the run.
+ ///
+ ///
+ /// You can use the trained model to obtain predictions on input data.
+ ///
public ITransformer Model { get { return _modelContainer.GetModel(); } }
+
+ ///
+ /// Exception encountered during the run. This property is if
+ /// no exception was encountered.
+ ///
+ ///
+ /// If an exception occurred, it's possible some properties in ths object
+ /// (like ) could be .
+ ///
public Exception Exception { get; private set; }
private readonly ModelContainer _modelContainer;
@@ -27,10 +62,36 @@ internal RunDetail(string trainerName,
}
}
+ ///
+ /// Details about an AutoML experiment run.
+ ///
+ ///
+ /// In trying to produce the best model, an AutoML experiment evaluates the quality of many models
+ /// on a dataset. This object contains information about each model tried during the AutoML experiment.
+ ///
public abstract class RunDetail
{
+ ///
+ /// String name of the trainer used in this run. (For instance, "LightGbm".)
+ ///
public string TrainerName { get; private set; }
+
+ ///
+ /// Runtime in seconds.
+ ///
+ ///
+ /// Runtime includes model training time. Depending on the size of the data,
+ /// the runtime may be quite long.
+ ///
public double RuntimeInSeconds { get; internal set; }
+
+ ///
+ /// An ML.NET that represents the pipeline in this run.
+ ///
+ ///
+ /// You can call on
+ /// this estimator to re-train your pipeline on any .
+ ///
public IEstimator Estimator { get; private set; }
internal Pipeline Pipeline { get; private set; }
diff --git a/src/Microsoft.ML.Auto/ColumnInference/ColumnInferenceApi.cs b/src/Microsoft.ML.Auto/ColumnInference/ColumnInferenceApi.cs
index 6db0fab782..d7462bbf25 100644
--- a/src/Microsoft.ML.Auto/ColumnInference/ColumnInferenceApi.cs
+++ b/src/Microsoft.ML.Auto/ColumnInference/ColumnInferenceApi.cs
@@ -117,7 +117,7 @@ private static TextFileContents.ColumnSplitResult InferSplit(MLContext context,
if (!splitInference.IsSuccess)
{
- throw new InferenceException(InferenceType.ColumnSplit, "Unable to split the file provided into multiple, consistent columns.");
+ throw new InferenceException(InferenceExceptionType.ColumnSplit, "Unable to split the file provided into multiple, consistent columns.");
}
return splitInference;
@@ -141,7 +141,7 @@ private static ColumnTypeInference.InferenceResult InferColumnTypes(MLContext co
if (!typeInferenceResult.IsSuccess)
{
- throw new InferenceException(InferenceType.ColumnDataKind, "Unable to infer column types of the file provided.");
+ throw new InferenceException(InferenceExceptionType.ColumnDataType, "Unable to infer column types of the file provided.");
}
return typeInferenceResult;
diff --git a/src/Microsoft.ML.Auto/Microsoft.ML.Auto.csproj b/src/Microsoft.ML.Auto/Microsoft.ML.Auto.csproj
index b13120524f..d97c3a819a 100644
--- a/src/Microsoft.ML.Auto/Microsoft.ML.Auto.csproj
+++ b/src/Microsoft.ML.Auto/Microsoft.ML.Auto.csproj
@@ -10,9 +10,6 @@
-
-
-
@@ -26,8 +23,23 @@
Microsoft.ML.Auto
+
+ true
+
+
+
+ 1701;1702
+
+
+
+
+
+
+
+
+
diff --git a/src/Microsoft.ML.Auto/Sweepers/SmacSweeper.cs b/src/Microsoft.ML.Auto/Sweepers/SmacSweeper.cs
index 618cf74256..36c3b78247 100644
--- a/src/Microsoft.ML.Auto/Sweepers/SmacSweeper.cs
+++ b/src/Microsoft.ML.Auto/Sweepers/SmacSweeper.cs
@@ -206,7 +206,7 @@ private ParameterSet[] GreedyPlusRandomSearch(ParameterSet[] parents, FastForest
/// Trained forest, for evaluation of points.
/// Best performance seen thus far.
/// Threshold for when to stop the local search.
- /// Metric type - maximizing or minimizing.
+ /// Whether SMAC should aim to maximize (vs minimize) metric.
///
private Tuple LocalSearch(ParameterSet parent, FastForestRegressionModelParameters forest, double bestVal, double epsilon, bool isMetricMaximizing)
{
diff --git a/src/Microsoft.ML.Auto/TrainerExtensions/SweepableParams.cs b/src/Microsoft.ML.Auto/TrainerExtensions/SweepableParams.cs
index 9d80ebe09a..c395103769 100644
--- a/src/Microsoft.ML.Auto/TrainerExtensions/SweepableParams.cs
+++ b/src/Microsoft.ML.Auto/TrainerExtensions/SweepableParams.cs
@@ -53,7 +53,7 @@ private static IEnumerable BuildLbfgsArgsParams()
return new SweepableParam[] {
new SweepableFloatParam("L2Regularization", 0.0f, 1.0f, numSteps: 4),
new SweepableFloatParam("L1Regularization", 0.0f, 1.0f, numSteps: 4),
- new SweepableDiscreteParam("OptmizationTolerance", new object[] { 1e-4f, 1e-7f }),
+ new SweepableDiscreteParam("OptimizationTolerance", new object[] { 1e-4f, 1e-7f }),
new SweepableDiscreteParam("HistorySize", new object[] { 5, 20, 50 }),
new SweepableLongParam("MaximumNumberOfIterations", 1, int.MaxValue),
new SweepableFloatParam("InitialWeightsDiameter", 0.0f, 1.0f, numSteps: 5),
diff --git a/src/Microsoft.ML.Auto/TrainerExtensions/TrainerExtensionUtil.cs b/src/Microsoft.ML.Auto/TrainerExtensions/TrainerExtensionUtil.cs
index 213d555545..cca0d621d2 100644
--- a/src/Microsoft.ML.Auto/TrainerExtensions/TrainerExtensionUtil.cs
+++ b/src/Microsoft.ML.Auto/TrainerExtensions/TrainerExtensionUtil.cs
@@ -291,7 +291,7 @@ public static TrainerName GetTrainerName(BinaryClassificationTrainer binaryTrain
return TrainerName.FastTreeBinary;
case BinaryClassificationTrainer.LightGbm:
return TrainerName.LightGbmBinary;
- case BinaryClassificationTrainer.LinearSupportVectorMachines:
+ case BinaryClassificationTrainer.LinearSvm:
return TrainerName.LinearSvmBinary;
case BinaryClassificationTrainer.LbfgsLogisticRegression:
return TrainerName.LbfgsLogisticRegressionBinary;
diff --git a/src/mlnet/CodeGenerator/CSharp/TrainerGenerators.cs b/src/mlnet/CodeGenerator/CSharp/TrainerGenerators.cs
index ee606a6cde..5a65c31304 100644
--- a/src/mlnet/CodeGenerator/CSharp/TrainerGenerators.cs
+++ b/src/mlnet/CodeGenerator/CSharp/TrainerGenerators.cs
@@ -245,7 +245,7 @@ internal override IDictionary NamedParameters
{"FeatureColumnName","featureColumnName" },
{"L1Regularization","l1Regularization" },
{"L2Regularization","l2Regularization" },
- {"OptmizationTolerance","optimizationTolerance" },
+ {"OptimizationTolerance","optimizationTolerance" },
{"HistorySize","historySize" },
{"EnforceNonNegativity","enforceNonNegativity" },
};
@@ -368,7 +368,7 @@ internal override IDictionary NamedParameters
{"FeatureColumnName","featureColumnName" },
{"L1Regularization","l1Regularization" },
{"L2Regularization","l2Regularization" },
- {"OptmizationTolerance","optimizationTolerance" },
+ {"OptimizationTolerance","optimizationTolerance" },
{"HistorySize","historySize" },
{"EnforceNonNegativity","enforceNonNegativity" },
};
diff --git a/src/mlnet/mlnet.csproj b/src/mlnet/mlnet.csproj
index f1e2fd5ae3..7a2cf20063 100644
--- a/src/mlnet/mlnet.csproj
+++ b/src/mlnet/mlnet.csproj
@@ -16,6 +16,7 @@
+
diff --git a/test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj b/test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj
index 3a9140e449..95196ad9d9 100644
--- a/test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj
+++ b/test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj
@@ -37,5 +37,10 @@
PreserveNewest
+
+
+
+
+