diff --git a/Microsoft.ML.AutoML.sln b/Microsoft.ML.AutoML.sln deleted file mode 100644 index 280cef5704..0000000000 --- a/Microsoft.ML.AutoML.sln +++ /dev/null @@ -1,91 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 15 -VisualStudioVersion = 15.0.28010.2050 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Auto", "src\Microsoft.ML.Auto\Microsoft.ML.Auto.csproj", "{B3727729-3DF8-47E0-8710-9B41DAF55817}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.AutoML.Tests", "test\Microsoft.ML.AutoML.Tests\Microsoft.ML.AutoML.Tests.csproj", "{55ACB7E2-053D-43BB-88E8-0E102FBD62F0}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "mlnet", "src\mlnet\mlnet.csproj", "{ED714FA5-6F89-401B-9E7F-CADF1373C553}" -EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "mlnet.Tests", "test\mlnet.Tests\mlnet.Tests.csproj", "{AAC3E4E6-C146-44BB-8873-A1E61D563F2A}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Any CPU = Debug|Any CPU - Debug-Intrinsics|Any CPU = Debug-Intrinsics|Any CPU - Debug-netfx|Any CPU = Debug-netfx|Any CPU - Release|Any CPU = Release|Any CPU - Release-Intrinsics|Any CPU = Release-Intrinsics|Any CPU - Release-netfx|Any CPU = Release-netfx|Any CPU - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {B3727729-3DF8-47E0-8710-9B41DAF55817}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {B3727729-3DF8-47E0-8710-9B41DAF55817}.Debug|Any CPU.Build.0 = Debug|Any CPU - {B3727729-3DF8-47E0-8710-9B41DAF55817}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug|Any CPU - {B3727729-3DF8-47E0-8710-9B41DAF55817}.Debug-Intrinsics|Any CPU.Build.0 = Debug|Any CPU - {B3727729-3DF8-47E0-8710-9B41DAF55817}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU - {B3727729-3DF8-47E0-8710-9B41DAF55817}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU - {B3727729-3DF8-47E0-8710-9B41DAF55817}.Release|Any CPU.ActiveCfg = Release|Any CPU - {B3727729-3DF8-47E0-8710-9B41DAF55817}.Release|Any CPU.Build.0 = Release|Any CPU - {B3727729-3DF8-47E0-8710-9B41DAF55817}.Release-Intrinsics|Any CPU.ActiveCfg = Release|Any CPU - {B3727729-3DF8-47E0-8710-9B41DAF55817}.Release-Intrinsics|Any CPU.Build.0 = Release|Any CPU - {B3727729-3DF8-47E0-8710-9B41DAF55817}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU - {B3727729-3DF8-47E0-8710-9B41DAF55817}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU - {64A7294E-A2C7-4499-8F0B-4BB074047C6B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {64A7294E-A2C7-4499-8F0B-4BB074047C6B}.Debug|Any CPU.Build.0 = Debug|Any CPU - {64A7294E-A2C7-4499-8F0B-4BB074047C6B}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug|Any CPU - {64A7294E-A2C7-4499-8F0B-4BB074047C6B}.Debug-Intrinsics|Any CPU.Build.0 = Debug|Any CPU - {64A7294E-A2C7-4499-8F0B-4BB074047C6B}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU - {64A7294E-A2C7-4499-8F0B-4BB074047C6B}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU - {64A7294E-A2C7-4499-8F0B-4BB074047C6B}.Release|Any CPU.ActiveCfg = Release|Any CPU - {64A7294E-A2C7-4499-8F0B-4BB074047C6B}.Release|Any CPU.Build.0 = Release|Any CPU - {64A7294E-A2C7-4499-8F0B-4BB074047C6B}.Release-Intrinsics|Any CPU.ActiveCfg = Release|Any CPU - {64A7294E-A2C7-4499-8F0B-4BB074047C6B}.Release-Intrinsics|Any CPU.Build.0 = Release|Any CPU - {64A7294E-A2C7-4499-8F0B-4BB074047C6B}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU - {64A7294E-A2C7-4499-8F0B-4BB074047C6B}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU - {55ACB7E2-053D-43BB-88E8-0E102FBD62F0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {55ACB7E2-053D-43BB-88E8-0E102FBD62F0}.Debug|Any CPU.Build.0 = Debug|Any CPU - {55ACB7E2-053D-43BB-88E8-0E102FBD62F0}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug-Intrinsics|Any CPU - {55ACB7E2-053D-43BB-88E8-0E102FBD62F0}.Debug-Intrinsics|Any CPU.Build.0 = Debug-Intrinsics|Any CPU - {55ACB7E2-053D-43BB-88E8-0E102FBD62F0}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU - {55ACB7E2-053D-43BB-88E8-0E102FBD62F0}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU - {55ACB7E2-053D-43BB-88E8-0E102FBD62F0}.Release|Any CPU.ActiveCfg = Release|Any CPU - {55ACB7E2-053D-43BB-88E8-0E102FBD62F0}.Release|Any CPU.Build.0 = Release|Any CPU - {55ACB7E2-053D-43BB-88E8-0E102FBD62F0}.Release-Intrinsics|Any CPU.ActiveCfg = Release-Intrinsics|Any CPU - {55ACB7E2-053D-43BB-88E8-0E102FBD62F0}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU - {55ACB7E2-053D-43BB-88E8-0E102FBD62F0}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU - {55ACB7E2-053D-43BB-88E8-0E102FBD62F0}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU - {ED714FA5-6F89-401B-9E7F-CADF1373C553}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {ED714FA5-6F89-401B-9E7F-CADF1373C553}.Debug|Any CPU.Build.0 = Debug|Any CPU - {ED714FA5-6F89-401B-9E7F-CADF1373C553}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug-Intrinsics|Any CPU - {ED714FA5-6F89-401B-9E7F-CADF1373C553}.Debug-Intrinsics|Any CPU.Build.0 = Debug-Intrinsics|Any CPU - {ED714FA5-6F89-401B-9E7F-CADF1373C553}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU - {ED714FA5-6F89-401B-9E7F-CADF1373C553}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU - {ED714FA5-6F89-401B-9E7F-CADF1373C553}.Release|Any CPU.ActiveCfg = Release|Any CPU - {ED714FA5-6F89-401B-9E7F-CADF1373C553}.Release|Any CPU.Build.0 = Release|Any CPU - {ED714FA5-6F89-401B-9E7F-CADF1373C553}.Release-Intrinsics|Any CPU.ActiveCfg = Release-Intrinsics|Any CPU - {ED714FA5-6F89-401B-9E7F-CADF1373C553}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU - {ED714FA5-6F89-401B-9E7F-CADF1373C553}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU - {ED714FA5-6F89-401B-9E7F-CADF1373C553}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU - {AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Debug|Any CPU.Build.0 = Debug|Any CPU - {AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug-Intrinsics|Any CPU - {AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Debug-Intrinsics|Any CPU.Build.0 = Debug-Intrinsics|Any CPU - {AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU - {AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU - {AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Release|Any CPU.ActiveCfg = Release|Any CPU - {AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Release|Any CPU.Build.0 = Release|Any CPU - {AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Release-Intrinsics|Any CPU.ActiveCfg = Release-Intrinsics|Any CPU - {AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU - {AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU - {AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - SolutionGuid = {8C1BC26C-B87E-47CD-928E-00EFE4353B40} - EndGlobalSection -EndGlobal diff --git a/Microsoft.ML.sln b/Microsoft.ML.sln index 6b0b880210..daa4ff4511 100644 --- a/Microsoft.ML.sln +++ b/Microsoft.ML.sln @@ -274,6 +274,14 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Microsoft.ML.FastTree", "Mi pkg\Microsoft.ML.FastTree\Microsoft.ML.FastTree.symbols.nupkgproj = pkg\Microsoft.ML.FastTree\Microsoft.ML.FastTree.symbols.nupkgproj EndProjectSection EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Auto", "src\Microsoft.ML.Auto\Microsoft.ML.Auto.csproj", "{D57A04E8-1A2B-4A3E-A96A-229C746C2FBE}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.AutoML.Tests", "test\Microsoft.ML.AutoML.Tests\Microsoft.ML.AutoML.Tests.csproj", "{D48126A1-5334-4575-BC91-4CDAA754C8C8}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "mlnet", "src\mlnet\mlnet.csproj", "{C2F953F9-9825-48AB-88D8-D4538268F017}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "mlnet.Tests", "test\mlnet.Tests\mlnet.Tests.csproj", "{73D4685B-94D2-4C28-A434-16ED6CA39BDE}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -968,6 +976,54 @@ Global {E02DA82D-3FEE-4C60-BD80-9EC3C3448DFC}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU {E02DA82D-3FEE-4C60-BD80-9EC3C3448DFC}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU {E02DA82D-3FEE-4C60-BD80-9EC3C3448DFC}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU + {D57A04E8-1A2B-4A3E-A96A-229C746C2FBE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {D57A04E8-1A2B-4A3E-A96A-229C746C2FBE}.Debug|Any CPU.Build.0 = Debug|Any CPU + {D57A04E8-1A2B-4A3E-A96A-229C746C2FBE}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug-Intrinsics|Any CPU + {D57A04E8-1A2B-4A3E-A96A-229C746C2FBE}.Debug-Intrinsics|Any CPU.Build.0 = Debug-Intrinsics|Any CPU + {D57A04E8-1A2B-4A3E-A96A-229C746C2FBE}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU + {D57A04E8-1A2B-4A3E-A96A-229C746C2FBE}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU + {D57A04E8-1A2B-4A3E-A96A-229C746C2FBE}.Release|Any CPU.ActiveCfg = Release|Any CPU + {D57A04E8-1A2B-4A3E-A96A-229C746C2FBE}.Release|Any CPU.Build.0 = Release|Any CPU + {D57A04E8-1A2B-4A3E-A96A-229C746C2FBE}.Release-Intrinsics|Any CPU.ActiveCfg = Release-Intrinsics|Any CPU + {D57A04E8-1A2B-4A3E-A96A-229C746C2FBE}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU + {D57A04E8-1A2B-4A3E-A96A-229C746C2FBE}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU + {D57A04E8-1A2B-4A3E-A96A-229C746C2FBE}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU + {D48126A1-5334-4575-BC91-4CDAA754C8C8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {D48126A1-5334-4575-BC91-4CDAA754C8C8}.Debug|Any CPU.Build.0 = Debug|Any CPU + {D48126A1-5334-4575-BC91-4CDAA754C8C8}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug-Intrinsics|Any CPU + {D48126A1-5334-4575-BC91-4CDAA754C8C8}.Debug-Intrinsics|Any CPU.Build.0 = Debug-Intrinsics|Any CPU + {D48126A1-5334-4575-BC91-4CDAA754C8C8}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU + {D48126A1-5334-4575-BC91-4CDAA754C8C8}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU + {D48126A1-5334-4575-BC91-4CDAA754C8C8}.Release|Any CPU.ActiveCfg = Release|Any CPU + {D48126A1-5334-4575-BC91-4CDAA754C8C8}.Release|Any CPU.Build.0 = Release|Any CPU + {D48126A1-5334-4575-BC91-4CDAA754C8C8}.Release-Intrinsics|Any CPU.ActiveCfg = Release-Intrinsics|Any CPU + {D48126A1-5334-4575-BC91-4CDAA754C8C8}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU + {D48126A1-5334-4575-BC91-4CDAA754C8C8}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU + {D48126A1-5334-4575-BC91-4CDAA754C8C8}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU + {C2F953F9-9825-48AB-88D8-D4538268F017}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {C2F953F9-9825-48AB-88D8-D4538268F017}.Debug|Any CPU.Build.0 = Debug|Any CPU + {C2F953F9-9825-48AB-88D8-D4538268F017}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug-Intrinsics|Any CPU + {C2F953F9-9825-48AB-88D8-D4538268F017}.Debug-Intrinsics|Any CPU.Build.0 = Debug-Intrinsics|Any CPU + {C2F953F9-9825-48AB-88D8-D4538268F017}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU + {C2F953F9-9825-48AB-88D8-D4538268F017}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU + {C2F953F9-9825-48AB-88D8-D4538268F017}.Release|Any CPU.ActiveCfg = Release|Any CPU + {C2F953F9-9825-48AB-88D8-D4538268F017}.Release|Any CPU.Build.0 = Release|Any CPU + {C2F953F9-9825-48AB-88D8-D4538268F017}.Release-Intrinsics|Any CPU.ActiveCfg = Release-Intrinsics|Any CPU + {C2F953F9-9825-48AB-88D8-D4538268F017}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU + {C2F953F9-9825-48AB-88D8-D4538268F017}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU + {C2F953F9-9825-48AB-88D8-D4538268F017}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU + {73D4685B-94D2-4C28-A434-16ED6CA39BDE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {73D4685B-94D2-4C28-A434-16ED6CA39BDE}.Debug|Any CPU.Build.0 = Debug|Any CPU + {73D4685B-94D2-4C28-A434-16ED6CA39BDE}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug-Intrinsics|Any CPU + {73D4685B-94D2-4C28-A434-16ED6CA39BDE}.Debug-Intrinsics|Any CPU.Build.0 = Debug-Intrinsics|Any CPU + {73D4685B-94D2-4C28-A434-16ED6CA39BDE}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU + {73D4685B-94D2-4C28-A434-16ED6CA39BDE}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU + {73D4685B-94D2-4C28-A434-16ED6CA39BDE}.Release|Any CPU.ActiveCfg = Release|Any CPU + {73D4685B-94D2-4C28-A434-16ED6CA39BDE}.Release|Any CPU.Build.0 = Release|Any CPU + {73D4685B-94D2-4C28-A434-16ED6CA39BDE}.Release-Intrinsics|Any CPU.ActiveCfg = Release-Intrinsics|Any CPU + {73D4685B-94D2-4C28-A434-16ED6CA39BDE}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU + {73D4685B-94D2-4C28-A434-16ED6CA39BDE}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU + {73D4685B-94D2-4C28-A434-16ED6CA39BDE}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -1055,6 +1111,10 @@ Global {AD7058C9-5608-49A8-BE23-58C33A74EE91} = {D3D38B03-B557-484D-8348-8BADEE4DF592} {E02DA82D-3FEE-4C60-BD80-9EC3C3448DFC} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {B1B3F284-FA3D-4D76-A712-FF04495D244B} = {D3D38B03-B557-484D-8348-8BADEE4DF592} + {D57A04E8-1A2B-4A3E-A96A-229C746C2FBE} = {09EADF06-BE25-4228-AB53-95AE3E15B530} + {D48126A1-5334-4575-BC91-4CDAA754C8C8} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} + {C2F953F9-9825-48AB-88D8-D4538268F017} = {09EADF06-BE25-4228-AB53-95AE3E15B530} + {73D4685B-94D2-4C28-A434-16ED6CA39BDE} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {41165AF1-35BB-4832-A189-73060F82B01D} diff --git a/build.proj b/build.proj index 3dd5edd0e7..15fea4e309 100644 --- a/build.proj +++ b/build.proj @@ -22,7 +22,6 @@ - diff --git a/docs/samples/Microsoft.ML.Samples/AutoML/BinaryClassificationExperiment.cs b/docs/samples/Microsoft.ML.Samples/AutoML/BinaryClassificationExperiment.cs new file mode 100644 index 0000000000..2957a2c635 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/AutoML/BinaryClassificationExperiment.cs @@ -0,0 +1,23 @@ +using Microsoft.ML; +using Microsoft.ML.Auto; + +namespace Samples.AutoML +{ + public static class BinaryClassificationExperiment + { + public static void Example() + { + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + var mlContext = new MLContext(); + + // Download and featurize the dataset. + var dataView = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); + + // Run an AutoML experiment + var experimentResult = mlContext.Auto() + .CreateBinaryClassificationExperiment(60) + .Execute(dataView, "IsOver50K"); + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/AutoML/BinaryClassificationExperimentWithExperimentSettings.cs b/docs/samples/Microsoft.ML.Samples/AutoML/BinaryClassificationExperimentWithExperimentSettings.cs new file mode 100644 index 0000000000..c383a3963c --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/AutoML/BinaryClassificationExperimentWithExperimentSettings.cs @@ -0,0 +1,27 @@ +using Microsoft.ML; +using Microsoft.ML.Auto; + +namespace Samples.AutoML +{ + public static class BinaryClassificationExperimentWithExperimentSettings + { + public static void Example() + { + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + var mlContext = new MLContext(); + + // Download and featurize the dataset. + var dataView = Microsoft.ML.SamplesUtils.DatasetUtils.LoadFeaturizedAdultDataset(mlContext); + + // Run an AutoML experiment + var experimentSettings = new BinaryExperimentSettings() + { + MaxExperimentTimeInSeconds = 60 + }; + var experimentResult = mlContext.Auto() + .CreateBinaryClassificationExperiment(experimentSettings) + .Execute(dataView, "IsOver50K"); + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/AutoML/MulticlassClassificationExperiment.cs b/docs/samples/Microsoft.ML.Samples/AutoML/MulticlassClassificationExperiment.cs new file mode 100644 index 0000000000..bb0b98907c --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/AutoML/MulticlassClassificationExperiment.cs @@ -0,0 +1,28 @@ +using Microsoft.ML; +using Microsoft.ML.Auto; +using Microsoft.ML.SamplesUtils; + +namespace Samples.AutoML +{ + public static class MulticlassClassificationExperiment + { + public static void Example() + { + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + var mlContext = new MLContext(); + + // Download and featurize the dataset. + // Create a list of data examples. + var examples = DatasetUtils.GenerateRandomMulticlassClassificationExamples(1000); + + // Convert the examples list to an IDataView object, which is consumable by ML.NET API. + var dataView = mlContext.Data.LoadFromEnumerable(examples); + + // Run an AutoML experiment + var experimentResult = mlContext.Auto() + .CreateMulticlassClassificationExperiment(60) + .Execute(dataView); + } + } +} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/AutoML/MulticlassClassificationExperimentWithExperimentSettings.cs b/docs/samples/Microsoft.ML.Samples/AutoML/MulticlassClassificationExperimentWithExperimentSettings.cs new file mode 100644 index 0000000000..8056b81d43 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/AutoML/MulticlassClassificationExperimentWithExperimentSettings.cs @@ -0,0 +1,32 @@ +using Microsoft.ML; +using Microsoft.ML.Auto; +using Microsoft.ML.SamplesUtils; + +namespace Samples.AutoML +{ + public static class MulticlassClassificationExperimentWithExperimentSettings + { + public static void Example() + { + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + var mlContext = new MLContext(); + + // Download and featurize the dataset. + // Create a list of data examples. + var examples = DatasetUtils.GenerateRandomMulticlassClassificationExamples(1000); + + // Convert the examples list to an IDataView object, which is consumable by ML.NET API. + var dataView = mlContext.Data.LoadFromEnumerable(examples); + + // Run an AutoML experiment + var experimentSettings = new MulticlassExperimentSettings() + { + MaxExperimentTimeInSeconds = 60 + }; + var experimentResult = mlContext.Auto() + .CreateMulticlassClassificationExperiment(experimentSettings) + .Execute(dataView); + } + } +} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/AutoML/RegressionExperiment.cs b/docs/samples/Microsoft.ML.Samples/AutoML/RegressionExperiment.cs new file mode 100644 index 0000000000..79938d02c7 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/AutoML/RegressionExperiment.cs @@ -0,0 +1,39 @@ +using Microsoft.ML; +using Microsoft.ML.Auto; +using Microsoft.ML.Data; + +namespace Samples.AutoML +{ + public static class RegressionExperiment + { + public static void Example() + { + // Downloading a regression dataset from github.com/dotnet/machinelearning + string dataFile = Microsoft.ML.SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset(); + + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + var mlContext = new MLContext(); + + // Creating a data loader, based on the format of the data + // The data is tab separated with all numeric columns. + // The first column being the label and rest are numeric features + // Here only seven numeric columns are used as features + var dataView = mlContext.Data.LoadFromTextFile(dataFile, new TextLoader.Options + { + Separators = new[] { '\t' }, + HasHeader = true, + Columns = new[] + { + new TextLoader.Column("Label", DataKind.Single, 0), + new TextLoader.Column("Features", DataKind.Single, 1, 6) + } + }); + + // Run an AutoML experiment + var experimentResult = mlContext.Auto() + .CreateRegressionExperiment(60) + .Execute(dataView); + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/AutoML/RegressionExperimentWithExperimentSettings.cs b/docs/samples/Microsoft.ML.Samples/AutoML/RegressionExperimentWithExperimentSettings.cs new file mode 100644 index 0000000000..9d9dd061e4 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/AutoML/RegressionExperimentWithExperimentSettings.cs @@ -0,0 +1,43 @@ +using Microsoft.ML; +using Microsoft.ML.Auto; +using Microsoft.ML.Data; + +namespace Samples.AutoML +{ + public static class RegressionExperimentWithExperimentSettings + { + public static void Example() + { + // Downloading a regression dataset from github.com/dotnet/machinelearning + string dataFile = Microsoft.ML.SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset(); + + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + var mlContext = new MLContext(); + + // Creating a data loader, based on the format of the data + // The data is tab separated with all numeric columns. + // The first column being the label and rest are numeric features + // Here only seven numeric columns are used as features + var dataView = mlContext.Data.LoadFromTextFile(dataFile, new TextLoader.Options + { + Separators = new[] { '\t' }, + HasHeader = true, + Columns = new[] + { + new TextLoader.Column("Label", DataKind.Single, 0), + new TextLoader.Column("Features", DataKind.Single, 1, 6) + } + }); + + // Run an AutoML experiment + var experimentSettings = new RegressionExperimentSettings() + { + MaxExperimentTimeInSeconds = 60 + }; + var experimentResult = mlContext.Auto() + .CreateRegressionExperiment(60) + .Execute(dataView); + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj index ea16ed7bf3..d63f4d0b9c 100644 --- a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj +++ b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj @@ -9,6 +9,7 @@ + diff --git a/src/Microsoft.ML.Auto/API/AutoCatalog.cs b/src/Microsoft.ML.Auto/API/AutoCatalog.cs new file mode 100644 index 0000000000..bd07b1fcb3 --- /dev/null +++ b/src/Microsoft.ML.Auto/API/AutoCatalog.cs @@ -0,0 +1,242 @@ +// Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Data; + +namespace Microsoft.ML.Auto +{ + /// + /// A catalog of all available AutoML tasks. + /// + public sealed class AutoCatalog + { + private readonly MLContext _context; + + internal AutoCatalog(MLContext context) + { + _context = context; + } + + /// + /// Creates a new AutoML experiment to run on a regression dataset. + /// + /// Maximum number of seconds that experiment will run. + /// A new AutoML regression experiment. + /// + /// An experiment may run for longer than . + /// This is because once AutoML starts training an ML.NET model, AutoML lets the + /// model train to completion. For instance, if the first model + /// AutoML trains takes 4 hours, and the second model trained takes 5 hours, + /// but was the number of seconds in 6 hours, + /// the experiment will run for 4 + 5 = 9 hours (not 6 hours). + /// + /// + /// + /// + /// + /// + public RegressionExperiment CreateRegressionExperiment(uint maxExperimentTimeInSeconds) + { + return new RegressionExperiment(_context, new RegressionExperimentSettings() + { + MaxExperimentTimeInSeconds = maxExperimentTimeInSeconds + }); + } + + /// + /// Creates a new AutoML experiment to run on a regression dataset. + /// + /// Settings for the AutoML experiment. + /// A new AutoML regression experiment. + /// + /// + /// + /// + /// + public RegressionExperiment CreateRegressionExperiment(RegressionExperimentSettings experimentSettings) + { + return new RegressionExperiment(_context, experimentSettings); + } + + /// + /// Creates a new AutoML experiment to run on a binary classification dataset. + /// + /// Maximum number of seconds that experiment will run. + /// A new AutoML binary classification experiment. + /// + /// An experiment may run for longer than . + /// This is because once AutoML starts training an ML.NET model, AutoML lets the + /// model train to completion. For instance, if the first model + /// AutoML trains takes 4 hours, and the second model trained takes 5 hours, + /// but was the number of seconds in 6 hours, + /// the experiment will run for 4 + 5 = 9 hours (not 6 hours). + /// + /// + /// + /// + /// + /// + public BinaryClassificationExperiment CreateBinaryClassificationExperiment(uint maxExperimentTimeInSeconds) + { + return new BinaryClassificationExperiment(_context, new BinaryExperimentSettings() + { + MaxExperimentTimeInSeconds = maxExperimentTimeInSeconds + }); + } + + /// + /// Creates a new AutoML experiment to run on a binary classification dataset. + /// + /// Settings for the AutoML experiment. + /// A new AutoML binary classification experiment. + /// + /// + /// + /// + /// + public BinaryClassificationExperiment CreateBinaryClassificationExperiment(BinaryExperimentSettings experimentSettings) + { + return new BinaryClassificationExperiment(_context, experimentSettings); + } + + /// + /// Creates a new AutoML experiment to run on a multiclass classification dataset. + /// + /// Maximum number of seconds that experiment will run. + /// A new AutoML multiclass classification experiment. + /// + /// An experiment may run for longer than . + /// This is because once AutoML starts training an ML.NET model, AutoML lets the + /// model train to completion. For instance, if the first model + /// AutoML trains takes 4 hours, and the second model trained takes 5 hours, + /// but was the number of seconds in 6 hours, + /// the experiment will run for 4 + 5 = 9 hours (not 6 hours). + /// + /// + /// + /// + /// + /// + public MulticlassClassificationExperiment CreateMulticlassClassificationExperiment(uint maxExperimentTimeInSeconds) + { + return new MulticlassClassificationExperiment(_context, new MulticlassExperimentSettings() + { + MaxExperimentTimeInSeconds = maxExperimentTimeInSeconds + }); + } + + /// + /// Creates a new AutoML experiment to run on a binary classification dataset. + /// + /// Settings for the AutoML experiment. + /// A new AutoML multiclass classification experiment. + /// + /// + /// + /// + /// + public MulticlassClassificationExperiment CreateMulticlassClassificationExperiment(MulticlassExperimentSettings experimentSettings) + { + return new MulticlassClassificationExperiment(_context, experimentSettings); + } + + /// + /// Infers information about the columns of a dataset in a file located at . + /// + /// Path to a dataset file. + /// The name of the label column. + /// The character used as separator between data elements in a row. If , AutoML will try to infer this value. + /// Whether the file can contain columns defined by a quoted string. If , AutoML will try to infer this value. + /// Whether the file can contain numerical vectors in sparse format. If , AutoML will try to infer this value. + /// Whether trailing whitespace should be removed from dataset file lines. + /// Whether to group together (when possible) original columns in the dataset file into vector columns in the resulting data structures. See for more information. + /// Information inferred about the columns in the provided dataset. + /// + /// Infers information about the name, data type, and purpose of each column. + /// The returned can be used to + /// instantiate a . The can be used to + /// obtain an that can be fed into an AutoML experiment, + /// or used elsewhere in the ML.NET ecosystem (ie in . + /// The contains the inferred purpose of each column in the dataset. + /// (For instance, is the column categorical, numeric, or text data? Should the column be ignored? Etc.) + /// The can be inspected and modified (or kept as is) and used by an AutoML experiment. + /// + public ColumnInferenceResults InferColumns(string path, string labelColumnName = DefaultColumnNames.Label, char? separatorChar = null, bool? allowQuoting = null, + bool? allowSparse = null, bool trimWhitespace = false, bool groupColumns = true) + { + UserInputValidationUtil.ValidateInferColumnsArgs(path, labelColumnName); + return ColumnInferenceApi.InferColumns(_context, path, labelColumnName, separatorChar, allowQuoting, allowSparse, trimWhitespace, groupColumns); + } + + /// + /// Infers information about the columns of a dataset in a file located at . + /// + /// Path to a dataset file. + /// Column information for the dataset. + /// The character used as separator between data elements in a row. If , AutoML will try to infer this value. + /// Whether the file can contain columns defined by a quoted string. If , AutoML will try to infer this value. + /// Whether the file can contain numerical vectors in sparse format. If , AutoML will try to infer this value. + /// Whether trailing whitespace should be removed from dataset file lines. + /// Whether to group together (when possible) original columns in the dataset file into vector columns in the resulting data structures. See for more information. + /// Information inferred about the columns in the provided dataset. + /// + /// Infers information about the name, data type, and purpose of each column. + /// The returned can be used to + /// instantiate a . The can be used to + /// obtain an that can be fed into an AutoML experiment, + /// or used elsewhere in the ML.NET ecosystem (ie in . + /// The contains the inferred purpose of each column in the dataset. + /// (For instance, is the column categorical, numeric, or text data? Should the column be ignored? Etc.) + /// The can be inspected and modified (or kept as is) and used by an AutoML experiment. + /// + public ColumnInferenceResults InferColumns(string path, ColumnInformation columnInformation, char? separatorChar = null, bool? allowQuoting = null, + bool? allowSparse = null, bool trimWhitespace = false, bool groupColumns = true) + { + columnInformation = columnInformation ?? new ColumnInformation(); + UserInputValidationUtil.ValidateInferColumnsArgs(path, columnInformation); + return ColumnInferenceApi.InferColumns(_context, path, columnInformation, separatorChar, allowQuoting, allowSparse, trimWhitespace, groupColumns); + } + + /// + /// Infers information about the columns of a dataset in a file located at . + /// + /// Path to a dataset file. + /// Column index of the label column in the dataset. + /// Whether or not the dataset file has a header row. + /// The character used as separator between data elements in a row. If , AutoML will try to infer this value. + /// Whether the file can contain columns defined by a quoted string. If , AutoML will try to infer this value. + /// Whether the file can contain numerical vectors in sparse format. If , AutoML will try to infer this value. + /// Whether trailing whitespace should be removed from dataset file lines. + /// Whether to group together (when possible) original columns in the dataset file into vector columns in the resulting data structures. See for more information. + /// Information inferred about the columns in the provided dataset. + /// + /// Infers information about the name, data type, and purpose of each column. + /// The returned can be used to + /// instantiate a . The can be used to + /// obtain an that can be fed into an AutoML experiment, + /// or used elsewhere in the ML.NET ecosystem (ie in . + /// The contains the inferred purpose of each column in the dataset. + /// (For instance, is the column categorical, numeric, or text data? Should the column be ignored? Etc.) + /// The can be inspected and modified (or kept as is) and used by an AutoML experiment. + /// + public ColumnInferenceResults InferColumns(string path, uint labelColumnIndex, bool hasHeader = false, char? separatorChar = null, + bool? allowQuoting = null, bool? allowSparse = null, bool trimWhitespace = false, bool groupColumns = true) + { + UserInputValidationUtil.ValidateInferColumnsArgs(path); + return ColumnInferenceApi.InferColumns(_context, path, labelColumnIndex, hasHeader, separatorChar, allowQuoting, allowSparse, trimWhitespace, groupColumns); + } + } +} diff --git a/src/Microsoft.ML.Auto/API/AutoInferenceCatalog.cs b/src/Microsoft.ML.Auto/API/AutoInferenceCatalog.cs deleted file mode 100644 index adf04111f2..0000000000 --- a/src/Microsoft.ML.Auto/API/AutoInferenceCatalog.cs +++ /dev/null @@ -1,79 +0,0 @@ -// Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using Microsoft.ML.Data; - -namespace Microsoft.ML.Auto -{ - public sealed class AutoMLCatalog - { - private readonly MLContext _context; - - internal AutoMLCatalog(MLContext context) - { - _context = context; - } - - public RegressionExperiment CreateRegressionExperiment(uint maxExperimentTimeInSeconds) - { - return new RegressionExperiment(_context, new RegressionExperimentSettings() - { - MaxExperimentTimeInSeconds = maxExperimentTimeInSeconds - }); - } - - public RegressionExperiment CreateRegressionExperiment(RegressionExperimentSettings experimentSettings) - { - return new RegressionExperiment(_context, experimentSettings); - } - - public BinaryClassificationExperiment CreateBinaryClassificationExperiment(uint maxExperimentTimeInSeconds) - { - return new BinaryClassificationExperiment(_context, new BinaryExperimentSettings() - { - MaxExperimentTimeInSeconds = maxExperimentTimeInSeconds - }); - } - - public BinaryClassificationExperiment CreateBinaryClassificationExperiment(BinaryExperimentSettings experimentSettings) - { - return new BinaryClassificationExperiment(_context, experimentSettings); - } - - public MulticlassClassificationExperiment CreateMulticlassClassificationExperiment(uint maxExperimentTimeInSeconds) - { - return new MulticlassClassificationExperiment(_context, new MulticlassExperimentSettings() - { - MaxExperimentTimeInSeconds = maxExperimentTimeInSeconds - }); - } - - public MulticlassClassificationExperiment CreateMulticlassClassificationExperiment(MulticlassExperimentSettings experimentSettings) - { - return new MulticlassClassificationExperiment(_context, experimentSettings); - } - - public ColumnInferenceResults InferColumns(string path, string labelColumn = DefaultColumnNames.Label, char? separatorChar = null, bool? allowQuotedStrings = null, - bool? supportSparse = null, bool trimWhitespace = false, bool groupColumns = true) - { - UserInputValidationUtil.ValidateInferColumnsArgs(path, labelColumn); - return ColumnInferenceApi.InferColumns(_context, path, labelColumn, separatorChar, allowQuotedStrings, supportSparse, trimWhitespace, groupColumns); - } - - public ColumnInferenceResults InferColumns(string path, ColumnInformation columnInformation, char? separatorChar = null, bool? allowQuotedStrings = null, - bool? supportSparse = null, bool trimWhitespace = false, bool groupColumns = true) - { - columnInformation = columnInformation ?? new ColumnInformation(); - UserInputValidationUtil.ValidateInferColumnsArgs(path, columnInformation); - return ColumnInferenceApi.InferColumns(_context, path, columnInformation, separatorChar, allowQuotedStrings, supportSparse, trimWhitespace, groupColumns); - } - - public ColumnInferenceResults InferColumns(string path, uint labelColumnIndex, bool hasHeader = false, char? separatorChar = null, - bool? allowQuotedStrings = null, bool? supportSparse = null, bool trimWhitespace = false, bool groupColumns = true) - { - UserInputValidationUtil.ValidateInferColumnsArgs(path); - return ColumnInferenceApi.InferColumns(_context, path, labelColumnIndex, hasHeader, separatorChar, allowQuotedStrings, supportSparse, trimWhitespace, groupColumns); - } - } -} diff --git a/src/Microsoft.ML.Auto/API/BinaryClassificationExperiment.cs b/src/Microsoft.ML.Auto/API/BinaryClassificationExperiment.cs index 3ab9dbb7a1..45b2715eec 100644 --- a/src/Microsoft.ML.Auto/API/BinaryClassificationExperiment.cs +++ b/src/Microsoft.ML.Auto/API/BinaryClassificationExperiment.cs @@ -6,41 +6,132 @@ using System.Collections.Generic; using System.Linq; using Microsoft.ML.Data; +using Microsoft.ML.Trainers; +using Microsoft.ML.Trainers.FastTree; +using Microsoft.ML.Trainers.LightGbm; namespace Microsoft.ML.Auto { + /// + /// Settings for AutoML experiments on binary classification datasets. + /// public sealed class BinaryExperimentSettings : ExperimentSettings { + /// + /// Metric that AutoML will try to optimize over the course of the experiment. + /// public BinaryClassificationMetric OptimizingMetric { get; set; } = BinaryClassificationMetric.Accuracy; + + /// + /// Collection of trainers the AutoML experiment can leverage. + /// + /// + /// The collection is auto-populated with all possible trainers (all values of ). + /// public ICollection Trainers { get; } = Enum.GetValues(typeof(BinaryClassificationTrainer)).OfType().ToList(); } + /// + /// Binary classification metric that AutoML will aim to optimize in its sweeping process during an experiment. + /// public enum BinaryClassificationMetric { + /// + /// See . + /// Accuracy, + + /// + /// See . + /// AreaUnderRocCurve, + + /// + /// See . + /// AreaUnderPrecisionRecallCurve, + + /// + /// See . + /// F1Score, + + /// + /// See . + /// PositivePrecision, + + /// + /// See . + /// PositiveRecall, + + /// + /// See . + /// NegativePrecision, + + /// + /// See . + /// NegativeRecall, } + /// + /// Enumeration of ML.NET binary classification trainers used by AutoML. + /// public enum BinaryClassificationTrainer { + /// + /// See . + /// AveragedPerceptron, + + /// + /// See . + /// FastForest, + + /// + /// See . + /// FastTree, + + /// + /// See . + /// LightGbm, - LinearSupportVectorMachines, + + /// + /// See . + /// + LinearSvm, + + /// + /// See . + /// LbfgsLogisticRegression, + + /// + /// See . + /// SdcaLogisticRegression, + + /// + /// See . + /// SgdCalibrated, + + /// + /// See . + /// SymbolicSgdLogisticRegression, } + /// + /// AutoML experiment on binary classification datasets. + /// public sealed class BinaryClassificationExperiment : ExperimentBase { internal BinaryClassificationExperiment(MLContext context, BinaryExperimentSettings settings) @@ -54,8 +145,17 @@ internal BinaryClassificationExperiment(MLContext context, BinaryExperimentSetti } } + /// + /// Extension methods that operate over binary experiment run results. + /// public static class BinaryExperimentResultExtensions { + /// + /// Select the best run from an enumeration of experiment runs. + /// + /// Enumeration of AutoML experiment run results. + /// Metric to consider when selecting the best run. + /// The best experiment run. public static RunDetail Best(this IEnumerable> results, BinaryClassificationMetric metric = BinaryClassificationMetric.Accuracy) { var metricsAgent = new BinaryMetricsAgent(null, metric); @@ -63,6 +163,12 @@ public static RunDetail Best(this IEnumerable + /// Select the best run from an enumeration of experiment cross validation runs. + /// + /// Enumeration of AutoML experiment cross validation run results. + /// Metric to consider when selecting the best run. + /// The best experiment run. public static CrossValidationRunDetail Best(this IEnumerable> results, BinaryClassificationMetric metric = BinaryClassificationMetric.Accuracy) { var metricsAgent = new BinaryMetricsAgent(null, metric); diff --git a/src/Microsoft.ML.Auto/API/ColumnInference.cs b/src/Microsoft.ML.Auto/API/ColumnInference.cs index 588116897d..a10c2fcbc2 100644 --- a/src/Microsoft.ML.Auto/API/ColumnInference.cs +++ b/src/Microsoft.ML.Auto/API/ColumnInference.cs @@ -8,20 +8,85 @@ namespace Microsoft.ML.Auto { + /// + /// Contains information AutoML inferred about columns in a dataset. + /// public sealed class ColumnInferenceResults { + /// + /// Inferred for the dataset. + /// + /// + /// Can be used to instantiate a new to load + /// data into an . + /// public TextLoader.Options TextLoaderOptions { get; internal set; } = new TextLoader.Options(); + + /// + /// Information about the inferred columns in the dataset. + /// + /// + /// Contains the inferred purposes of each column. See for more details. + /// This can be fed to the AutoML API when running an experiment. + /// See + /// for example. + /// public ColumnInformation ColumnInformation { get; internal set; } = new ColumnInformation(); } + /// + /// Information about the columns in a dataset. + /// + /// + /// Contains information about the purpose of each column in the dataset. For instance, + /// it enumerates the dataset columns that AutoML should treat as categorical, + /// the columns AutoML should ignore, which column is the label, etc. + /// can be fed to the AutoML API when running an experiment. + /// See + /// for example. + /// public sealed class ColumnInformation { + /// + /// The dataset column to use as the label. + /// public string LabelColumnName { get; set; } = DefaultColumnNames.Label; + + /// + /// The dataset column to use for example weight. + /// public string ExampleWeightColumnName { get; set; } + + /// + /// The dataset column to use for grouping rows. + /// If two examples share the same sampling key column name, + /// they are guaranteed to appear in the same subset (train or test). + /// This can be used to ensure no label leakage from the train to the test set. + /// If , no row grouping will be performed. + /// public string SamplingKeyColumnName { get; set; } + + /// + /// The dataset columns that are categorical. + /// + /// + /// Categorical data columns should generally be columns that contain a small number of unique values. + /// public ICollection CategoricalColumnNames { get; } = new Collection(); + + /// + /// The dataset columns that are numeric. + /// public ICollection NumericColumnNames { get; } = new Collection(); + + /// + /// The dataset columns that are text. + /// public ICollection TextColumnNames { get; } = new Collection(); + + /// + /// The dataset columns that AutoML should ignore. + /// public ICollection IgnoredColumnNames { get; } = new Collection(); } } \ No newline at end of file diff --git a/src/Microsoft.ML.Auto/API/ExperimentBase.cs b/src/Microsoft.ML.Auto/API/ExperimentBase.cs index 381196c54c..b9b5ec8db1 100644 --- a/src/Microsoft.ML.Auto/API/ExperimentBase.cs +++ b/src/Microsoft.ML.Auto/API/ExperimentBase.cs @@ -7,9 +7,14 @@ namespace Microsoft.ML.Auto { + /// + /// AutoML experiment base class. All task-specific AutoML experiments + /// (like ) inherit from this class. + /// + /// Metrics type used by task-specific AutoML experiments. public abstract class ExperimentBase where TMetrics : class { - protected readonly MLContext Context; + private protected readonly MLContext Context; private readonly IMetricsAgent _metricsAgent; private readonly OptimizingMetricInfo _optimizingMetricInfo; @@ -32,17 +37,57 @@ internal ExperimentBase(MLContext context, _trainerWhitelist = trainerWhitelist; } - public IEnumerable> Execute(IDataView trainData, string labelColumn = DefaultColumnNames.Label, - string samplingKeyColumn = null, IEstimator preFeaturizers = null, IProgress> progressHandler = null) + /// + /// Executes an AutoML experiment. + /// + /// The training data used by the AutoML experiment. + /// The dataset column used as the label. + /// The dataset column used as the sampling key column. + /// See for more information. + /// Pre-featurizer that AutoML will apply to the data during an + /// experiment. (The pre-featurizer will be fit only on the training data split to produce a + /// trained transform. Then, the trained transform will be applied to both the training + /// data split and corresponding validation data split.) + /// A user-defined object that implements + /// the interface. AutoML will invoke the method + /// after each model it produces during the + /// course of the experiment. + /// + /// An enumeration of all the runs in an experiment. See + /// for more information on the contents of a run. + /// + /// Depending on the size of your data, the AutoML experiment could take a long time to execute. + /// + public IEnumerable> Execute(IDataView trainData, string labelColumnName = DefaultColumnNames.Label, + string samplingKeyColumn = null, IEstimator preFeaturizer = null, IProgress> progressHandler = null) { var columnInformation = new ColumnInformation() { - LabelColumnName = labelColumn, + LabelColumnName = labelColumnName, SamplingKeyColumnName = samplingKeyColumn }; - return Execute(trainData, columnInformation, preFeaturizers, progressHandler); + return Execute(trainData, columnInformation, preFeaturizer, progressHandler); } + /// + /// Executes an AutoML experiment. + /// + /// The training data to be used by the AutoML experiment. + /// Column information for the dataset. + /// Pre-featurizer that AutoML will apply to the data during an + /// experiment. (The pre-featurizer will be fit only on the training data split to produce a + /// trained transform. Then, the trained transform will be applied to both the training + /// data split and corresponding validation data split.) + /// A user-defined object that implements + /// the interface. AutoML will invoke the method + /// after each model it produces during the + /// course of the experiment. + /// + /// An enumeration of all the runs in an experiment. See + /// for more information on the contents of a run. + /// + /// Depending on the size of your data, the AutoML experiment could take a long time to execute. + /// public IEnumerable> Execute(IDataView trainData, ColumnInformation columnInformation, IEstimator preFeaturizer = null, IProgress> progressHandler = null) { @@ -66,12 +111,52 @@ public IEnumerable> Execute(IDataView trainData, ColumnInfor } } - public IEnumerable> Execute(IDataView trainData, IDataView validationData, string labelColumn = DefaultColumnNames.Label, IEstimator preFeaturizer = null, IProgress> progressHandler = null) + /// + /// Executes an AutoML experiment. + /// + /// The training data to be used by the AutoML experiment. + /// The validation data to be used by the AutoML experiment. + /// The name of the label column. + /// Pre-featurizer that AutoML will apply to the data during an + /// experiment. (The pre-featurizer will be fit only on the training data split to produce a + /// trained transform. Then, the trained transform will be applied to both the training + /// data split and corresponding validation data split.) + /// A user-defined object that implements + /// the interface. AutoML will invoke the method + /// after each model it produces during the + /// course of the experiment. + /// + /// An enumeration of all the runs in an experiment. See + /// for more information on the contents of a run. + /// + /// Depending on the size of your data, the AutoML experiment could take a long time to execute. + /// + public IEnumerable> Execute(IDataView trainData, IDataView validationData, string labelColumnName = DefaultColumnNames.Label, IEstimator preFeaturizer = null, IProgress> progressHandler = null) { - var columnInformation = new ColumnInformation() { LabelColumnName = labelColumn }; + var columnInformation = new ColumnInformation() { LabelColumnName = labelColumnName }; return Execute(trainData, validationData, columnInformation, preFeaturizer, progressHandler); } + /// + /// Executes an AutoML experiment. + /// + /// The training data to be used by the AutoML experiment. + /// The validation data to be used by the AutoML experiment. + /// Column information for the dataset. + /// Pre-featurizer that AutoML will apply to the data during an + /// experiment. (The pre-featurizer will be fit only on the training data split to produce a + /// trained transform. Then, the trained transform will be applied to both the training + /// data split and corresponding validation data split.) + /// A user-defined object that implements + /// the interface. AutoML will invoke the method + /// after each model it produces during the + /// course of the experiment. + /// + /// An enumeration of all the runs in an experiment. See + /// for more information on the contents of a run. + /// + /// Depending on the size of your data, the AutoML experiment could take a long time to execute. + /// public IEnumerable> Execute(IDataView trainData, IDataView validationData, ColumnInformation columnInformation, IEstimator preFeaturizer = null, IProgress> progressHandler = null) { if (validationData == null) @@ -83,6 +168,26 @@ public IEnumerable> Execute(IDataView trainData, IDataView v return ExecuteTrainValidate(trainData, columnInformation, validationData, preFeaturizer, progressHandler); } + /// + /// Executes an AutoML experiment. + /// + /// The training data to be used by the AutoML experiment. + /// The number of cross validation folds into which the training data should be divided when fitting a model. + /// Column information for the dataset. + /// Pre-featurizer that AutoML will apply to the data during an + /// experiment. (The pre-featurizer will be fit only on the training data split to produce a + /// trained transform. Then, the trained transform will be applied to both the training + /// data split and corresponding validation data split.) + /// A user-defined object that implements + /// the interface. AutoML will invoke the method + /// after each model it produces during the + /// course of the experiment. + /// + /// An enumeration of all the runs in an experiment. See + /// for more information on the contents of a run. + /// + /// Depending on the size of your data, the AutoML experiment could take a long time to execute. + /// public IEnumerable> Execute(IDataView trainData, uint numberOfCVFolds, ColumnInformation columnInformation = null, IEstimator preFeaturizer = null, IProgress> progressHandler = null) { UserInputValidationUtil.ValidateNumberOfCVFoldsArg(numberOfCVFolds); @@ -90,14 +195,35 @@ public IEnumerable> Execute(IDataView trainDa return ExecuteCrossVal(splitResult.trainDatasets, columnInformation, splitResult.validationDatasets, preFeaturizer, progressHandler); } + /// + /// Executes an AutoML experiment. + /// + /// The training data to be used by the AutoML experiment. + /// The number of cross validation folds into which the training data should be divided when fitting a model. + /// The name of the label column. + /// The name of the sampling key column. + /// Pre-featurizer that AutoML will apply to the data during an + /// experiment. (The pre-featurizer will be fit only on the training data split to produce a + /// trained transform. Then, the trained transform will be applied to both the training + /// data split and corresponding validation data split.) + /// A user-defined object that implements + /// the interface. AutoML will invoke the method + /// after each model it produces during the + /// course of the experiment. + /// + /// An enumeration of all the runs in an experiment. See + /// for more information on the contents of a run. + /// + /// Depending on the size of your data, the AutoML experiment could take a long time to execute. + /// public IEnumerable> Execute(IDataView trainData, - uint numberOfCVFolds, string labelColumn = DefaultColumnNames.Label, + uint numberOfCVFolds, string labelColumnName = DefaultColumnNames.Label, string samplingKeyColumn = null, IEstimator preFeaturizer = null, Progress> progressHandler = null) { var columnInformation = new ColumnInformation() { - LabelColumnName = labelColumn, + LabelColumnName = labelColumnName, SamplingKeyColumnName = samplingKeyColumn }; return Execute(trainData, numberOfCVFolds, columnInformation, preFeaturizer, progressHandler); diff --git a/src/Microsoft.ML.Auto/API/ExperimentSettings.cs b/src/Microsoft.ML.Auto/API/ExperimentSettings.cs index 43c6c8befe..891f3615e0 100644 --- a/src/Microsoft.ML.Auto/API/ExperimentSettings.cs +++ b/src/Microsoft.ML.Auto/API/ExperimentSettings.cs @@ -7,14 +7,42 @@ namespace Microsoft.ML.Auto { - public class ExperimentSettings + /// + /// Base class for experiment settings. All task-specific AutoML experiment settings + /// (like ) inherit from this class. + /// + public abstract class ExperimentSettings { + /// + /// Maximum time in seconds the experiment is allowed to run. + /// + /// + /// An experiment may run for longer than . + /// This is because once AutoML starts training an ML.NET model, AutoML lets the + /// model train to completion. For instance, if the first model + /// AutoML trains takes 4 hours, and the second model trained takes 5 hours, + /// but was the number of seconds in 6 hours, + /// the experiment will run for 4 + 5 = 9 hours (not 6 hours). + /// public uint MaxExperimentTimeInSeconds { get; set; } = 24 * 60 * 60; + + /// + /// Cancellation token for the AutoML experiment. It propagates the notification + /// that the experiment should be canceled. + /// + /// + /// An experiment may not immediately stop after cancellation. + /// This is because once AutoML starts training an ML.NET model, AutoML lets the + /// model train to completion. For instance, if the first model + /// AutoML trains takes 4 hours, and the second model trained takes 5 hours, + /// but cancellation is requested after 6 hours, + /// the experiment will stop after 4 + 5 = 9 hours (not 6 hours). + /// public CancellationToken CancellationToken { get; set; } = default; /// /// This is a pointer to a directory where all models trained during the AutoML experiment will be saved. - /// If null, models will be kept in memory instead of written to disk. + /// If , models will be kept in memory instead of written to disk. /// (Please note: for an experiment with high runtime operating on a large dataset, opting to keep models in /// memory could cause a system to run out of memory.) /// @@ -23,10 +51,10 @@ public class ExperimentSettings /// /// This setting controls whether or not an AutoML experiment will make use of ML.NET-provided caching. /// If set to true, caching will be forced on for all pipelines. If set to false, caching will be forced off. - /// If set to null (default value), AutoML will decide whether to enable caching for each model. + /// If set to (default value), AutoML will decide whether to enable caching for each model. /// public bool? CacheBeforeTrainer = null; - + internal int MaxModels = int.MaxValue; internal IDebugLogger DebugLogger; } diff --git a/src/Microsoft.ML.Auto/API/InferenceException.cs b/src/Microsoft.ML.Auto/API/InferenceException.cs index 423c4ae3ce..0e501c5b89 100644 --- a/src/Microsoft.ML.Auto/API/InferenceException.cs +++ b/src/Microsoft.ML.Auto/API/InferenceException.cs @@ -6,23 +6,38 @@ namespace Microsoft.ML.Auto { - public enum InferenceType + /// + /// Type of exception encountered by AutoML. + /// + public enum InferenceExceptionType { - ColumnDataKind, + /// + /// Exception that occurs when AutoML is inferring the data type of a column. + /// + ColumnDataType, + + /// + /// Exception that occurs when AutoML is attempting to split a dataset into distinct columns. + /// ColumnSplit, - Label, } + /// + /// Exception thrown by AutoML. + /// public sealed class InferenceException : Exception { - public InferenceType InferenceType; - - public InferenceException(InferenceType inferenceType, string message) + /// + /// Type of AutoML exception that occurred. + /// + public InferenceExceptionType InferenceExceptionType; + + internal InferenceException(InferenceExceptionType inferenceType, string message) : base(message) { } - public InferenceException(InferenceType inferenceType, string message, Exception inner) + internal InferenceException(InferenceExceptionType inferenceType, string message, Exception inner) : base(message, inner) { } diff --git a/src/Microsoft.ML.Auto/API/MLContextExtension.cs b/src/Microsoft.ML.Auto/API/MLContextExtension.cs index 9287fe827c..7b2e6a8a69 100644 --- a/src/Microsoft.ML.Auto/API/MLContextExtension.cs +++ b/src/Microsoft.ML.Auto/API/MLContextExtension.cs @@ -4,11 +4,19 @@ namespace Microsoft.ML.Auto { + /// + /// Class containing AutoML extension methods to + /// public static class MLContextExtension { - public static AutoMLCatalog Auto(this MLContext mlContext) + /// + /// Returns a catalog of all possible AutoML operations. + /// + /// instance. + /// A catalog of all possible AutoML operations. + public static AutoCatalog Auto(this MLContext mlContext) { - return new AutoMLCatalog(mlContext); + return new AutoCatalog(mlContext); } } } diff --git a/src/Microsoft.ML.Auto/API/MulticlassClassificationExperiment.cs b/src/Microsoft.ML.Auto/API/MulticlassClassificationExperiment.cs index f7f5a856cb..bd4383d861 100644 --- a/src/Microsoft.ML.Auto/API/MulticlassClassificationExperiment.cs +++ b/src/Microsoft.ML.Auto/API/MulticlassClassificationExperiment.cs @@ -6,39 +6,122 @@ using System.Collections.Generic; using System.Linq; using Microsoft.ML.Data; +using Microsoft.ML.Trainers; +using Microsoft.ML.Trainers.FastTree; +using Microsoft.ML.Trainers.LightGbm; namespace Microsoft.ML.Auto { + /// + /// Settings for AutoML experiments on multiclass classification datasets. + /// public sealed class MulticlassExperimentSettings : ExperimentSettings { + /// + /// Metric that AutoML will try to optimize over the course of the experiment. + /// public MulticlassClassificationMetric OptimizingMetric { get; set; } = MulticlassClassificationMetric.MicroAccuracy; + + /// + /// Collection of trainers the AutoML experiment can leverage. + /// + /// + /// The collection is auto-populated with all possible trainers (all values of ). + /// public ICollection Trainers { get; } = Enum.GetValues(typeof(MulticlassClassificationTrainer)).OfType().ToList(); } + /// + /// Multiclass classification metric that AutoML will aim to optimize in its sweeping process during an experiment. + /// public enum MulticlassClassificationMetric { + /// + /// See . + /// MicroAccuracy, + + /// + /// See . + /// MacroAccuracy, + + /// + /// See . + /// LogLoss, + + /// + /// See . + /// LogLossReduction, + + /// + /// See . + /// TopKAccuracy, } + /// + /// Enumeration of ML.NET multiclass classification trainers used by AutoML. + /// public enum MulticlassClassificationTrainer { + /// + /// using . + /// AveragedPerceptronOVA, + + /// + /// using . + /// FastForestOVA, + + /// + /// using . + /// FastTreeOVA, + + /// + /// See . + /// LightGbm, + + /// + /// using . + /// LinearSupportVectorMachinesOVA, + + /// + /// See . + /// LbfgsMaximumEntropy, + + /// + /// using . + /// LbfgsLogisticRegressionOVA, + + /// + /// See . + /// SdcaMaximumEntropy, + + /// + /// using . + /// SgdCalibratedOVA, + + /// + /// using . + /// SymbolicSgdLogisticRegressionOVA, } + /// + /// AutoML experiment on multiclass classification datasets. + /// public sealed class MulticlassClassificationExperiment : ExperimentBase { internal MulticlassClassificationExperiment(MLContext context, MulticlassExperimentSettings settings) @@ -52,8 +135,17 @@ internal MulticlassClassificationExperiment(MLContext context, MulticlassExperim } } + /// + /// Extension methods that operate over multiclass experiment run results. + /// public static class MulticlassExperimentResultExtensions { + /// + /// Select the best run from an enumeration of experiment runs. + /// + /// Enumeration of AutoML experiment run results. + /// Metric to consider when selecting the best run. + /// The best experiment run. public static RunDetail Best(this IEnumerable> results, MulticlassClassificationMetric metric = MulticlassClassificationMetric.MicroAccuracy) { var metricsAgent = new MultiMetricsAgent(null, metric); @@ -61,6 +153,13 @@ public static RunDetail Best(this IEnumerable + /// Select the best run from an enumeration of experiment cross validation runs. + /// + /// Enumeration of AutoML experiment cross validation run results. + /// Metric to consider when selecting the best run. + /// The best experiment run. public static CrossValidationRunDetail Best(this IEnumerable> results, MulticlassClassificationMetric metric = MulticlassClassificationMetric.MicroAccuracy) { var metricsAgent = new MultiMetricsAgent(null, metric); diff --git a/src/Microsoft.ML.Auto/API/RegressionExperiment.cs b/src/Microsoft.ML.Auto/API/RegressionExperiment.cs index 51f5988f64..f57fb2470f 100644 --- a/src/Microsoft.ML.Auto/API/RegressionExperiment.cs +++ b/src/Microsoft.ML.Auto/API/RegressionExperiment.cs @@ -6,36 +6,108 @@ using System.Collections.Generic; using System.Linq; using Microsoft.ML.Data; +using Microsoft.ML.Trainers; +using Microsoft.ML.Trainers.FastTree; +using Microsoft.ML.Trainers.LightGbm; namespace Microsoft.ML.Auto { + /// + /// Settings for AutoML experiments on regression datasets. + /// public sealed class RegressionExperimentSettings : ExperimentSettings { + /// + /// Metric that AutoML will try to optimize over the course of the experiment. + /// public RegressionMetric OptimizingMetric { get; set; } = RegressionMetric.RSquared; + + /// + /// Collection of trainers the AutoML experiment can leverage. + /// + /// + /// The collection is auto-populated with all possible trainers (all values of ). + /// public ICollection Trainers { get; } = Enum.GetValues(typeof(RegressionTrainer)).OfType().ToList(); } + /// + /// Regression metric that AutoML will aim to optimize in its sweeping process during an experiment. + /// public enum RegressionMetric { + /// + /// See . + /// MeanAbsoluteError, + + /// + /// See . + /// MeanSquaredError, + + /// + /// See . + /// RootMeanSquaredError, + + /// + /// See . + /// RSquared } + + /// + /// Enumeration of ML.NET multiclass classification trainers used by AutoML. + /// public enum RegressionTrainer { + /// + /// See . + /// FastForest, + + /// + /// See . + /// FastTree, + + /// + /// See . + /// FastTreeTweedie, + + /// + /// See . + /// LightGbm, + + /// + /// See . + /// OnlineGradientDescent, + + /// + /// See . + /// Ols, + + /// + /// See . + /// LbfgsPoissonRegression, + + /// + /// See . + /// StochasticDualCoordinateAscent, } + /// + /// AutoML experiment on regression classification datasets. + /// public sealed class RegressionExperiment : ExperimentBase { internal RegressionExperiment(MLContext context, RegressionExperimentSettings settings) @@ -49,8 +121,17 @@ internal RegressionExperiment(MLContext context, RegressionExperimentSettings se } } + /// + /// Extension methods that operate over regression experiment run results. + /// public static class RegressionExperimentResultExtensions { + /// + /// Select the best run from an enumeration of experiment runs. + /// + /// Enumeration of AutoML experiment run results. + /// Metric to consider when selecting the best run. + /// The best experiment run. public static RunDetail Best(this IEnumerable> results, RegressionMetric metric = RegressionMetric.RSquared) { var metricsAgent = new RegressionMetricsAgent(null, metric); @@ -58,6 +139,12 @@ public static RunDetail Best(this IEnumerable + /// Select the best run from an enumeration of experiment cross validation runs. + /// + /// Enumeration of AutoML experiment cross validation run results. + /// Metric to consider when selecting the best run. + /// The best experiment run. public static CrossValidationRunDetail Best(this IEnumerable> results, RegressionMetric metric = RegressionMetric.RSquared) { var metricsAgent = new RegressionMetricsAgent(null, metric); diff --git a/src/Microsoft.ML.Auto/API/RunDetails/CrossValidationRunDetail.cs b/src/Microsoft.ML.Auto/API/RunDetails/CrossValidationRunDetail.cs index 713c820a99..45520e8068 100644 --- a/src/Microsoft.ML.Auto/API/RunDetails/CrossValidationRunDetail.cs +++ b/src/Microsoft.ML.Auto/API/RunDetails/CrossValidationRunDetail.cs @@ -7,8 +7,20 @@ namespace Microsoft.ML.Auto { + /// + /// Details about a cross validation run in an AutoML experiment. + /// + /// Metrics type for the run. + /// + /// Over the course of an experiment, many models are evaluated on a dataset + /// using cross validation. This object contains information about each model + /// evaluated during the AutoML experiment. + /// public sealed class CrossValidationRunDetail : RunDetail { + /// + /// Results for each of the cross validation folds. + /// public IEnumerable> Results { get; private set; } internal CrossValidationRunDetail(string trainerName, @@ -20,10 +32,35 @@ internal CrossValidationRunDetail(string trainerName, } } + /// + /// Result of a pipeline trained on a cross validation fold. + /// + /// Metrics type for the run. public sealed class TrainResult { + /// + /// Each fold has training data and validation data. A model trained on the + /// folds's training data is evaluated against the validation data, + /// and the metrics for that calculation are emitted here. + /// public TMetrics ValidationMetrics { get; private set; } + + /// + /// Model trained on the fold during the run. + /// + /// + /// You can use the trained model to obtain predictions on input data. + /// public ITransformer Model { get { return _modelContainer.GetModel(); } } + + /// + /// Exception encountered while training the fold. This property is + /// if no exception was encountered. + /// + /// + /// If an exception occurred, it's possible some properties in ths object + /// (like ) could be . + /// public Exception Exception { get; private set; } private readonly ModelContainer _modelContainer; diff --git a/src/Microsoft.ML.Auto/API/RunDetails/RunDetail.cs b/src/Microsoft.ML.Auto/API/RunDetails/RunDetail.cs index a83670986d..3600374dea 100644 --- a/src/Microsoft.ML.Auto/API/RunDetails/RunDetail.cs +++ b/src/Microsoft.ML.Auto/API/RunDetails/RunDetail.cs @@ -6,10 +6,45 @@ namespace Microsoft.ML.Auto { + /// + /// Details about an AutoML experiment run. + /// + /// + /// Over the course of an experiment, many models are evaluated on a dataset. + /// This object contains information about each model evaluated during + /// the AutoML experiment. + /// + /// Type of the metrics for this experiment. (For instance, (cref Binary, Regression).) public sealed class RunDetail : RunDetail { + /// + /// Metrics of how the trained model performed on the validation data during + /// the run. + /// + /// + /// Internally, each run has train data and validation data. Model trained on the + /// run's training is evaluated against the validation data, + /// and the metrics for that calculation are emitted here. + /// public TMetrics ValidationMetrics { get; private set; } + + + /// + /// Model trained during the run. + /// + /// + /// You can use the trained model to obtain predictions on input data. + /// public ITransformer Model { get { return _modelContainer.GetModel(); } } + + /// + /// Exception encountered during the run. This property is if + /// no exception was encountered. + /// + /// + /// If an exception occurred, it's possible some properties in ths object + /// (like ) could be . + /// public Exception Exception { get; private set; } private readonly ModelContainer _modelContainer; @@ -27,10 +62,36 @@ internal RunDetail(string trainerName, } } + /// + /// Details about an AutoML experiment run. + /// + /// + /// In trying to produce the best model, an AutoML experiment evaluates the quality of many models + /// on a dataset. This object contains information about each model tried during the AutoML experiment. + /// public abstract class RunDetail { + /// + /// String name of the trainer used in this run. (For instance, "LightGbm".) + /// public string TrainerName { get; private set; } + + /// + /// Runtime in seconds. + /// + /// + /// Runtime includes model training time. Depending on the size of the data, + /// the runtime may be quite long. + /// public double RuntimeInSeconds { get; internal set; } + + /// + /// An ML.NET that represents the pipeline in this run. + /// + /// + /// You can call on + /// this estimator to re-train your pipeline on any . + /// public IEstimator Estimator { get; private set; } internal Pipeline Pipeline { get; private set; } diff --git a/src/Microsoft.ML.Auto/ColumnInference/ColumnInferenceApi.cs b/src/Microsoft.ML.Auto/ColumnInference/ColumnInferenceApi.cs index 6db0fab782..d7462bbf25 100644 --- a/src/Microsoft.ML.Auto/ColumnInference/ColumnInferenceApi.cs +++ b/src/Microsoft.ML.Auto/ColumnInference/ColumnInferenceApi.cs @@ -117,7 +117,7 @@ private static TextFileContents.ColumnSplitResult InferSplit(MLContext context, if (!splitInference.IsSuccess) { - throw new InferenceException(InferenceType.ColumnSplit, "Unable to split the file provided into multiple, consistent columns."); + throw new InferenceException(InferenceExceptionType.ColumnSplit, "Unable to split the file provided into multiple, consistent columns."); } return splitInference; @@ -141,7 +141,7 @@ private static ColumnTypeInference.InferenceResult InferColumnTypes(MLContext co if (!typeInferenceResult.IsSuccess) { - throw new InferenceException(InferenceType.ColumnDataKind, "Unable to infer column types of the file provided."); + throw new InferenceException(InferenceExceptionType.ColumnDataType, "Unable to infer column types of the file provided."); } return typeInferenceResult; diff --git a/src/Microsoft.ML.Auto/Microsoft.ML.Auto.csproj b/src/Microsoft.ML.Auto/Microsoft.ML.Auto.csproj index b13120524f..d97c3a819a 100644 --- a/src/Microsoft.ML.Auto/Microsoft.ML.Auto.csproj +++ b/src/Microsoft.ML.Auto/Microsoft.ML.Auto.csproj @@ -10,9 +10,6 @@ - - - @@ -26,8 +23,23 @@ Microsoft.ML.Auto + + true + + + + 1701;1702 + + + + + + + + + diff --git a/src/Microsoft.ML.Auto/Sweepers/SmacSweeper.cs b/src/Microsoft.ML.Auto/Sweepers/SmacSweeper.cs index 618cf74256..36c3b78247 100644 --- a/src/Microsoft.ML.Auto/Sweepers/SmacSweeper.cs +++ b/src/Microsoft.ML.Auto/Sweepers/SmacSweeper.cs @@ -206,7 +206,7 @@ private ParameterSet[] GreedyPlusRandomSearch(ParameterSet[] parents, FastForest /// Trained forest, for evaluation of points. /// Best performance seen thus far. /// Threshold for when to stop the local search. - /// Metric type - maximizing or minimizing. + /// Whether SMAC should aim to maximize (vs minimize) metric. /// private Tuple LocalSearch(ParameterSet parent, FastForestRegressionModelParameters forest, double bestVal, double epsilon, bool isMetricMaximizing) { diff --git a/src/Microsoft.ML.Auto/TrainerExtensions/SweepableParams.cs b/src/Microsoft.ML.Auto/TrainerExtensions/SweepableParams.cs index 9d80ebe09a..c395103769 100644 --- a/src/Microsoft.ML.Auto/TrainerExtensions/SweepableParams.cs +++ b/src/Microsoft.ML.Auto/TrainerExtensions/SweepableParams.cs @@ -53,7 +53,7 @@ private static IEnumerable BuildLbfgsArgsParams() return new SweepableParam[] { new SweepableFloatParam("L2Regularization", 0.0f, 1.0f, numSteps: 4), new SweepableFloatParam("L1Regularization", 0.0f, 1.0f, numSteps: 4), - new SweepableDiscreteParam("OptmizationTolerance", new object[] { 1e-4f, 1e-7f }), + new SweepableDiscreteParam("OptimizationTolerance", new object[] { 1e-4f, 1e-7f }), new SweepableDiscreteParam("HistorySize", new object[] { 5, 20, 50 }), new SweepableLongParam("MaximumNumberOfIterations", 1, int.MaxValue), new SweepableFloatParam("InitialWeightsDiameter", 0.0f, 1.0f, numSteps: 5), diff --git a/src/Microsoft.ML.Auto/TrainerExtensions/TrainerExtensionUtil.cs b/src/Microsoft.ML.Auto/TrainerExtensions/TrainerExtensionUtil.cs index 213d555545..cca0d621d2 100644 --- a/src/Microsoft.ML.Auto/TrainerExtensions/TrainerExtensionUtil.cs +++ b/src/Microsoft.ML.Auto/TrainerExtensions/TrainerExtensionUtil.cs @@ -291,7 +291,7 @@ public static TrainerName GetTrainerName(BinaryClassificationTrainer binaryTrain return TrainerName.FastTreeBinary; case BinaryClassificationTrainer.LightGbm: return TrainerName.LightGbmBinary; - case BinaryClassificationTrainer.LinearSupportVectorMachines: + case BinaryClassificationTrainer.LinearSvm: return TrainerName.LinearSvmBinary; case BinaryClassificationTrainer.LbfgsLogisticRegression: return TrainerName.LbfgsLogisticRegressionBinary; diff --git a/src/mlnet/CodeGenerator/CSharp/TrainerGenerators.cs b/src/mlnet/CodeGenerator/CSharp/TrainerGenerators.cs index ee606a6cde..5a65c31304 100644 --- a/src/mlnet/CodeGenerator/CSharp/TrainerGenerators.cs +++ b/src/mlnet/CodeGenerator/CSharp/TrainerGenerators.cs @@ -245,7 +245,7 @@ internal override IDictionary NamedParameters {"FeatureColumnName","featureColumnName" }, {"L1Regularization","l1Regularization" }, {"L2Regularization","l2Regularization" }, - {"OptmizationTolerance","optimizationTolerance" }, + {"OptimizationTolerance","optimizationTolerance" }, {"HistorySize","historySize" }, {"EnforceNonNegativity","enforceNonNegativity" }, }; @@ -368,7 +368,7 @@ internal override IDictionary NamedParameters {"FeatureColumnName","featureColumnName" }, {"L1Regularization","l1Regularization" }, {"L2Regularization","l2Regularization" }, - {"OptmizationTolerance","optimizationTolerance" }, + {"OptimizationTolerance","optimizationTolerance" }, {"HistorySize","historySize" }, {"EnforceNonNegativity","enforceNonNegativity" }, }; diff --git a/src/mlnet/mlnet.csproj b/src/mlnet/mlnet.csproj index f1e2fd5ae3..7a2cf20063 100644 --- a/src/mlnet/mlnet.csproj +++ b/src/mlnet/mlnet.csproj @@ -16,6 +16,7 @@ + diff --git a/test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj b/test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj index 3a9140e449..95196ad9d9 100644 --- a/test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj +++ b/test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj @@ -37,5 +37,10 @@ PreserveNewest + + + + +