diff --git a/src/Microsoft.ML.Auto/API/BinaryClassificationExperiment.cs b/src/Microsoft.ML.Auto/API/BinaryClassificationExperiment.cs index 45b2715eec..c3de696345 100644 --- a/src/Microsoft.ML.Auto/API/BinaryClassificationExperiment.cs +++ b/src/Microsoft.ML.Auto/API/BinaryClassificationExperiment.cs @@ -132,7 +132,7 @@ public enum BinaryClassificationTrainer /// /// AutoML experiment on binary classification datasets. /// - public sealed class BinaryClassificationExperiment : ExperimentBase + public sealed class BinaryClassificationExperiment : ExperimentBase { internal BinaryClassificationExperiment(MLContext context, BinaryExperimentSettings settings) : base(context, @@ -143,37 +143,15 @@ internal BinaryClassificationExperiment(MLContext context, BinaryExperimentSetti TrainerExtensionUtil.GetTrainerNames(settings.Trainers)) { } - } - /// - /// Extension methods that operate over binary experiment run results. - /// - public static class BinaryExperimentResultExtensions - { - /// - /// Select the best run from an enumeration of experiment runs. - /// - /// Enumeration of AutoML experiment run results. - /// Metric to consider when selecting the best run. - /// The best experiment run. - public static RunDetail Best(this IEnumerable> results, BinaryClassificationMetric metric = BinaryClassificationMetric.Accuracy) + private protected override RunDetail GetBestRun(IEnumerable> results) { - var metricsAgent = new BinaryMetricsAgent(null, metric); - var isMetricMaximizing = new OptimizingMetricInfo(metric).IsMaximizing; - return BestResultUtil.GetBestRun(results, metricsAgent, isMetricMaximizing); + return BestResultUtil.GetBestRun(results, MetricsAgent, OptimizingMetricInfo.IsMaximizing); } - /// - /// Select the best run from an enumeration of experiment cross validation runs. - /// - /// Enumeration of AutoML experiment cross validation run results. - /// Metric to consider when selecting the best run. - /// The best experiment run. - public static CrossValidationRunDetail Best(this IEnumerable> results, BinaryClassificationMetric metric = BinaryClassificationMetric.Accuracy) + private protected override CrossValidationRunDetail GetBestCrossValRun(IEnumerable> results) { - var metricsAgent = new BinaryMetricsAgent(null, metric); - var isMetricMaximizing = new OptimizingMetricInfo(metric).IsMaximizing; - return BestResultUtil.GetBestRun(results, metricsAgent, isMetricMaximizing); + return BestResultUtil.GetBestRun(results, MetricsAgent, OptimizingMetricInfo.IsMaximizing); } } } diff --git a/src/Microsoft.ML.Auto/API/ColumnInference.cs b/src/Microsoft.ML.Auto/API/ColumnInference.cs index a10c2fcbc2..83e6d9bd05 100644 --- a/src/Microsoft.ML.Auto/API/ColumnInference.cs +++ b/src/Microsoft.ML.Auto/API/ColumnInference.cs @@ -28,7 +28,7 @@ public sealed class ColumnInferenceResults /// /// Contains the inferred purposes of each column. See for more details. /// This can be fed to the AutoML API when running an experiment. - /// See + /// See /// for example. /// public ColumnInformation ColumnInformation { get; internal set; } = new ColumnInformation(); @@ -42,7 +42,7 @@ public sealed class ColumnInferenceResults /// it enumerates the dataset columns that AutoML should treat as categorical, /// the columns AutoML should ignore, which column is the label, etc. /// can be fed to the AutoML API when running an experiment. - /// See + /// See /// for example. /// public sealed class ColumnInformation diff --git a/src/Microsoft.ML.Auto/API/ExperimentBase.cs b/src/Microsoft.ML.Auto/API/ExperimentBase.cs index b9b5ec8db1..c9a028e926 100644 --- a/src/Microsoft.ML.Auto/API/ExperimentBase.cs +++ b/src/Microsoft.ML.Auto/API/ExperimentBase.cs @@ -12,27 +12,32 @@ namespace Microsoft.ML.Auto /// (like ) inherit from this class. /// /// Metrics type used by task-specific AutoML experiments. - public abstract class ExperimentBase where TMetrics : class + /// Experiment settings type. + public abstract class ExperimentBase + where TMetrics : class + where TExperimentSettings : ExperimentSettings { private protected readonly MLContext Context; + private protected readonly IMetricsAgent MetricsAgent; + private protected readonly OptimizingMetricInfo OptimizingMetricInfo; + private protected readonly TExperimentSettings Settings; - private readonly IMetricsAgent _metricsAgent; - private readonly OptimizingMetricInfo _optimizingMetricInfo; - private readonly ExperimentSettings _settings; + private readonly AutoMLLogger _logger; private readonly TaskKind _task; private readonly IEnumerable _trainerWhitelist; internal ExperimentBase(MLContext context, IMetricsAgent metricsAgent, OptimizingMetricInfo optimizingMetricInfo, - ExperimentSettings settings, + TExperimentSettings settings, TaskKind task, IEnumerable trainerWhitelist) { Context = context; - _metricsAgent = metricsAgent; - _optimizingMetricInfo = optimizingMetricInfo; - _settings = settings; + MetricsAgent = metricsAgent; + OptimizingMetricInfo = optimizingMetricInfo; + Settings = settings; + _logger = new AutoMLLogger(context); _task = task; _trainerWhitelist = trainerWhitelist; } @@ -53,12 +58,11 @@ internal ExperimentBase(MLContext context, /// after each model it produces during the /// course of the experiment. /// - /// An enumeration of all the runs in an experiment. See - /// for more information on the contents of a run. + /// The experiment result. /// /// Depending on the size of your data, the AutoML experiment could take a long time to execute. /// - public IEnumerable> Execute(IDataView trainData, string labelColumnName = DefaultColumnNames.Label, + public ExperimentResult Execute(IDataView trainData, string labelColumnName = DefaultColumnNames.Label, string samplingKeyColumn = null, IEstimator preFeaturizer = null, IProgress> progressHandler = null) { var columnInformation = new ColumnInformation() @@ -83,12 +87,11 @@ public IEnumerable> Execute(IDataView trainData, string labe /// after each model it produces during the /// course of the experiment. /// - /// An enumeration of all the runs in an experiment. See - /// for more information on the contents of a run. + /// The experiment result. /// /// Depending on the size of your data, the AutoML experiment could take a long time to execute. /// - public IEnumerable> Execute(IDataView trainData, ColumnInformation columnInformation, + public ExperimentResult Execute(IDataView trainData, ColumnInformation columnInformation, IEstimator preFeaturizer = null, IProgress> progressHandler = null) { // Cross val threshold for # of dataset rows -- @@ -126,12 +129,11 @@ public IEnumerable> Execute(IDataView trainData, ColumnInfor /// after each model it produces during the /// course of the experiment. /// - /// An enumeration of all the runs in an experiment. See - /// for more information on the contents of a run. + /// The experiment result. /// /// Depending on the size of your data, the AutoML experiment could take a long time to execute. /// - public IEnumerable> Execute(IDataView trainData, IDataView validationData, string labelColumnName = DefaultColumnNames.Label, IEstimator preFeaturizer = null, IProgress> progressHandler = null) + public ExperimentResult Execute(IDataView trainData, IDataView validationData, string labelColumnName = DefaultColumnNames.Label, IEstimator preFeaturizer = null, IProgress> progressHandler = null) { var columnInformation = new ColumnInformation() { LabelColumnName = labelColumnName }; return Execute(trainData, validationData, columnInformation, preFeaturizer, progressHandler); @@ -152,12 +154,11 @@ public IEnumerable> Execute(IDataView trainData, IDataView v /// after each model it produces during the /// course of the experiment. /// - /// An enumeration of all the runs in an experiment. See - /// for more information on the contents of a run. + /// The experiment result. /// /// Depending on the size of your data, the AutoML experiment could take a long time to execute. /// - public IEnumerable> Execute(IDataView trainData, IDataView validationData, ColumnInformation columnInformation, IEstimator preFeaturizer = null, IProgress> progressHandler = null) + public ExperimentResult Execute(IDataView trainData, IDataView validationData, ColumnInformation columnInformation, IEstimator preFeaturizer = null, IProgress> progressHandler = null) { if (validationData == null) { @@ -183,12 +184,11 @@ public IEnumerable> Execute(IDataView trainData, IDataView v /// after each model it produces during the /// course of the experiment. /// - /// An enumeration of all the runs in an experiment. See - /// for more information on the contents of a run. + /// The cross validation experiment result. /// /// Depending on the size of your data, the AutoML experiment could take a long time to execute. /// - public IEnumerable> Execute(IDataView trainData, uint numberOfCVFolds, ColumnInformation columnInformation = null, IEstimator preFeaturizer = null, IProgress> progressHandler = null) + public CrossValidationExperimentResult Execute(IDataView trainData, uint numberOfCVFolds, ColumnInformation columnInformation = null, IEstimator preFeaturizer = null, IProgress> progressHandler = null) { UserInputValidationUtil.ValidateNumberOfCVFoldsArg(numberOfCVFolds); var splitResult = SplitUtil.CrossValSplit(Context, trainData, numberOfCVFolds, columnInformation?.SamplingKeyColumnName); @@ -211,12 +211,11 @@ public IEnumerable> Execute(IDataView trainDa /// after each model it produces during the /// course of the experiment. /// - /// An enumeration of all the runs in an experiment. See - /// for more information on the contents of a run. + /// The cross validation experiment result. /// /// Depending on the size of your data, the AutoML experiment could take a long time to execute. /// - public IEnumerable> Execute(IDataView trainData, + public CrossValidationExperimentResult Execute(IDataView trainData, uint numberOfCVFolds, string labelColumnName = DefaultColumnNames.Label, string samplingKeyColumn = null, IEstimator preFeaturizer = null, Progress> progressHandler = null) @@ -229,7 +228,11 @@ public IEnumerable> Execute(IDataView trainDa return Execute(trainData, numberOfCVFolds, columnInformation, preFeaturizer, progressHandler); } - private IEnumerable> ExecuteTrainValidate(IDataView trainData, + private protected abstract CrossValidationRunDetail GetBestCrossValRun(IEnumerable> results); + + private protected abstract RunDetail GetBestRun(IEnumerable> results); + + private ExperimentResult ExecuteTrainValidate(IDataView trainData, ColumnInformation columnInfo, IDataView validationData, IEstimator preFeaturizer, @@ -247,13 +250,13 @@ private IEnumerable> ExecuteTrainValidate(IDataView trainDat validationData = preprocessorTransform.Transform(validationData); } - var runner = new TrainValidateRunner(Context, trainData, validationData, columnInfo.LabelColumnName, _metricsAgent, - preFeaturizer, preprocessorTransform, _settings.DebugLogger); + var runner = new TrainValidateRunner(Context, trainData, validationData, columnInfo.LabelColumnName, MetricsAgent, + preFeaturizer, preprocessorTransform, _logger); var columns = DatasetColumnInfoUtil.GetDatasetColumnInfo(Context, trainData, columnInfo); return Execute(columnInfo, columns, preFeaturizer, progressHandler, runner); } - private IEnumerable> ExecuteCrossVal(IDataView[] trainDatasets, + private CrossValidationExperimentResult ExecuteCrossVal(IDataView[] trainDatasets, ColumnInformation columnInfo, IDataView[] validationDatasets, IEstimator preFeaturizer, @@ -266,13 +269,21 @@ private IEnumerable> ExecuteCrossVal(IDataVie ITransformer[] preprocessorTransforms = null; (trainDatasets, validationDatasets, preprocessorTransforms) = ApplyPreFeaturizerCrossVal(trainDatasets, validationDatasets, preFeaturizer); - var runner = new CrossValRunner(Context, trainDatasets, validationDatasets, _metricsAgent, preFeaturizer, - preprocessorTransforms, columnInfo.LabelColumnName, _settings.DebugLogger); + var runner = new CrossValRunner(Context, trainDatasets, validationDatasets, MetricsAgent, preFeaturizer, + preprocessorTransforms, columnInfo.LabelColumnName, _logger); var columns = DatasetColumnInfoUtil.GetDatasetColumnInfo(Context, trainDatasets[0], columnInfo); - return Execute(columnInfo, columns, preFeaturizer, progressHandler, runner); + + // Execute experiment & get all pipelines run + var experiment = new Experiment, TMetrics>(Context, _task, OptimizingMetricInfo, progressHandler, + Settings, MetricsAgent, _trainerWhitelist, columns, runner, _logger); + var runDetails = experiment.Execute(); + + var bestRun = GetBestCrossValRun(runDetails); + var experimentResult = new CrossValidationExperimentResult(runDetails, bestRun); + return experimentResult; } - private IEnumerable> ExecuteCrossValSummary(IDataView[] trainDatasets, + private ExperimentResult ExecuteCrossValSummary(IDataView[] trainDatasets, ColumnInformation columnInfo, IDataView[] validationDatasets, IEstimator preFeaturizer, @@ -285,24 +296,26 @@ private IEnumerable> ExecuteCrossValSummary(IDataView[] trai ITransformer[] preprocessorTransforms = null; (trainDatasets, validationDatasets, preprocessorTransforms) = ApplyPreFeaturizerCrossVal(trainDatasets, validationDatasets, preFeaturizer); - var runner = new CrossValSummaryRunner(Context, trainDatasets, validationDatasets, _metricsAgent, preFeaturizer, - preprocessorTransforms, columnInfo.LabelColumnName, _optimizingMetricInfo, _settings.DebugLogger); + var runner = new CrossValSummaryRunner(Context, trainDatasets, validationDatasets, MetricsAgent, preFeaturizer, + preprocessorTransforms, columnInfo.LabelColumnName, OptimizingMetricInfo, _logger); var columns = DatasetColumnInfoUtil.GetDatasetColumnInfo(Context, trainDatasets[0], columnInfo); return Execute(columnInfo, columns, preFeaturizer, progressHandler, runner); } - private IEnumerable Execute(ColumnInformation columnInfo, + private ExperimentResult Execute(ColumnInformation columnInfo, DatasetColumnInfo[] columns, IEstimator preFeaturizer, - IProgress progressHandler, - IRunner runner) - where TRunDetail : RunDetail + IProgress> progressHandler, + IRunner> runner) { // Execute experiment & get all pipelines run - var experiment = new Experiment(Context, _task, _optimizingMetricInfo, progressHandler, - _settings, _metricsAgent, _trainerWhitelist, columns, runner); + var experiment = new Experiment, TMetrics>(Context, _task, OptimizingMetricInfo, progressHandler, + Settings, MetricsAgent, _trainerWhitelist, columns, runner, _logger); + var runDetails = experiment.Execute(); - return experiment.Execute(); + var bestRun = GetBestRun(runDetails); + var experimentResult = new ExperimentResult(runDetails, bestRun); + return experimentResult; } private static (IDataView[] trainDatasets, IDataView[] validDatasets, ITransformer[] preprocessorTransforms) diff --git a/src/Microsoft.ML.Auto/API/ExperimentResults/CrossValidationExperimentResult.cs b/src/Microsoft.ML.Auto/API/ExperimentResults/CrossValidationExperimentResult.cs new file mode 100644 index 0000000000..bdc13290fa --- /dev/null +++ b/src/Microsoft.ML.Auto/API/ExperimentResults/CrossValidationExperimentResult.cs @@ -0,0 +1,40 @@ +// Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Collections.Generic; +using Microsoft.ML.Data; + +namespace Microsoft.ML.Auto +{ + /// + /// Result of an AutoML experiment that includes cross validation details. + /// + /// Metrics type for the experiment (like ). + public class CrossValidationExperimentResult + { + /// + /// Details of the cross validation runs in this experiment. + /// + /// + /// See for more information. + /// + public readonly IEnumerable> RunDetails; + + /// + /// Best run in this experiment. + /// + /// + /// AutoML considers the optimizing metric (like ) + /// when determining the best run. + /// + public readonly CrossValidationRunDetail BestRun; + + internal CrossValidationExperimentResult(IEnumerable> runDetails, + CrossValidationRunDetail bestRun) + { + RunDetails = runDetails; + BestRun = bestRun; + } + } +} diff --git a/src/Microsoft.ML.Auto/API/ExperimentResults/ExperimentResult.cs b/src/Microsoft.ML.Auto/API/ExperimentResults/ExperimentResult.cs new file mode 100644 index 0000000000..dd9d2718dc --- /dev/null +++ b/src/Microsoft.ML.Auto/API/ExperimentResults/ExperimentResult.cs @@ -0,0 +1,40 @@ +// Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Collections.Generic; +using Microsoft.ML.Data; + +namespace Microsoft.ML.Auto +{ + /// + /// Result of an AutoML experiment. + /// + /// Metrics type for the experiment (like ). + public class ExperimentResult + { + /// + /// Details of the runs in this experiment. + /// + /// + /// See for more information. + /// + public readonly IEnumerable> RunDetails; + + /// + /// Best run in this experiment. + /// + /// + /// AutoML considers the optimizing metric (like ) + /// when determining the best run. + /// + public readonly RunDetail BestRun; + + internal ExperimentResult(IEnumerable> runDetails, + RunDetail bestRun) + { + RunDetails = runDetails; + BestRun = bestRun; + } + } +} \ No newline at end of file diff --git a/src/Microsoft.ML.Auto/API/ExperimentSettings.cs b/src/Microsoft.ML.Auto/API/ExperimentSettings.cs index 891f3615e0..800b1ca27f 100644 --- a/src/Microsoft.ML.Auto/API/ExperimentSettings.cs +++ b/src/Microsoft.ML.Auto/API/ExperimentSettings.cs @@ -49,13 +49,33 @@ public abstract class ExperimentSettings public DirectoryInfo CacheDirectory { get; set; } = new DirectoryInfo(Path.Combine(Path.GetTempPath(), "Microsoft.ML.Auto")); /// - /// This setting controls whether or not an AutoML experiment will make use of ML.NET-provided caching. - /// If set to true, caching will be forced on for all pipelines. If set to false, caching will be forced off. - /// If set to (default value), AutoML will decide whether to enable caching for each model. + /// Whether AutoML should cache before ML.NET trainers. + /// See for more information on caching. /// - public bool? CacheBeforeTrainer = null; + public CacheBeforeTrainer CacheBeforeTrainer = CacheBeforeTrainer.Auto; internal int MaxModels = int.MaxValue; - internal IDebugLogger DebugLogger; + } + + /// + /// Whether AutoML should cache before ML.NET trainers. + /// See for more information on caching. + /// + public enum CacheBeforeTrainer + { + /// + /// Dynamically determine whether to cache before each trainer. + /// + Auto, + + /// + /// Always force caching on. + /// + On, + + /// + /// Always force caching off. + /// + Off, } } diff --git a/src/Microsoft.ML.Auto/API/MulticlassClassificationExperiment.cs b/src/Microsoft.ML.Auto/API/MulticlassClassificationExperiment.cs index bd4383d861..ea254a2c66 100644 --- a/src/Microsoft.ML.Auto/API/MulticlassClassificationExperiment.cs +++ b/src/Microsoft.ML.Auto/API/MulticlassClassificationExperiment.cs @@ -122,7 +122,7 @@ public enum MulticlassClassificationTrainer /// /// AutoML experiment on multiclass classification datasets. /// - public sealed class MulticlassClassificationExperiment : ExperimentBase + public sealed class MulticlassClassificationExperiment : ExperimentBase { internal MulticlassClassificationExperiment(MLContext context, MulticlassExperimentSettings settings) : base(context, @@ -133,38 +133,15 @@ internal MulticlassClassificationExperiment(MLContext context, MulticlassExperim TrainerExtensionUtil.GetTrainerNames(settings.Trainers)) { } - } - /// - /// Extension methods that operate over multiclass experiment run results. - /// - public static class MulticlassExperimentResultExtensions - { - /// - /// Select the best run from an enumeration of experiment runs. - /// - /// Enumeration of AutoML experiment run results. - /// Metric to consider when selecting the best run. - /// The best experiment run. - public static RunDetail Best(this IEnumerable> results, MulticlassClassificationMetric metric = MulticlassClassificationMetric.MicroAccuracy) + private protected override CrossValidationRunDetail GetBestCrossValRun(IEnumerable> results) { - var metricsAgent = new MultiMetricsAgent(null, metric); - var isMetricMaximizing = new OptimizingMetricInfo(metric).IsMaximizing; - return BestResultUtil.GetBestRun(results, metricsAgent, isMetricMaximizing); + return BestResultUtil.GetBestRun(results, MetricsAgent, OptimizingMetricInfo.IsMaximizing); } - - /// - /// Select the best run from an enumeration of experiment cross validation runs. - /// - /// Enumeration of AutoML experiment cross validation run results. - /// Metric to consider when selecting the best run. - /// The best experiment run. - public static CrossValidationRunDetail Best(this IEnumerable> results, MulticlassClassificationMetric metric = MulticlassClassificationMetric.MicroAccuracy) + private protected override RunDetail GetBestRun(IEnumerable> results) { - var metricsAgent = new MultiMetricsAgent(null, metric); - var isMetricMaximizing = new OptimizingMetricInfo(metric).IsMaximizing; - return BestResultUtil.GetBestRun(results, metricsAgent, isMetricMaximizing); + return BestResultUtil.GetBestRun(results, MetricsAgent, OptimizingMetricInfo.IsMaximizing); } } } \ No newline at end of file diff --git a/src/Microsoft.ML.Auto/API/RegressionExperiment.cs b/src/Microsoft.ML.Auto/API/RegressionExperiment.cs index f57fb2470f..58f2973511 100644 --- a/src/Microsoft.ML.Auto/API/RegressionExperiment.cs +++ b/src/Microsoft.ML.Auto/API/RegressionExperiment.cs @@ -108,7 +108,7 @@ public enum RegressionTrainer /// /// AutoML experiment on regression classification datasets. /// - public sealed class RegressionExperiment : ExperimentBase + public sealed class RegressionExperiment : ExperimentBase { internal RegressionExperiment(MLContext context, RegressionExperimentSettings settings) : base(context, @@ -119,6 +119,16 @@ internal RegressionExperiment(MLContext context, RegressionExperimentSettings se TrainerExtensionUtil.GetTrainerNames(settings.Trainers)) { } + + private protected override CrossValidationRunDetail GetBestCrossValRun(IEnumerable> results) + { + return BestResultUtil.GetBestRun(results, MetricsAgent, OptimizingMetricInfo.IsMaximizing); + } + + private protected override RunDetail GetBestRun(IEnumerable> results) + { + return BestResultUtil.GetBestRun(results, MetricsAgent, OptimizingMetricInfo.IsMaximizing); + } } /// diff --git a/src/Microsoft.ML.Auto/API/RunDetails/RunDetail.cs b/src/Microsoft.ML.Auto/API/RunDetails/RunDetail.cs index 3600374dea..9eeaf3e197 100644 --- a/src/Microsoft.ML.Auto/API/RunDetails/RunDetail.cs +++ b/src/Microsoft.ML.Auto/API/RunDetails/RunDetail.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using System; +using Microsoft.ML.Data; namespace Microsoft.ML.Auto { @@ -14,7 +15,7 @@ namespace Microsoft.ML.Auto /// This object contains information about each model evaluated during /// the AutoML experiment. /// - /// Type of the metrics for this experiment. (For instance, (cref Binary, Regression).) + /// Metrics type for the experiment (like ). public sealed class RunDetail : RunDetail { /// diff --git a/src/Microsoft.ML.Auto/DebugLogger.cs b/src/Microsoft.ML.Auto/DebugLogger.cs deleted file mode 100644 index 90ed9cfdd2..0000000000 --- a/src/Microsoft.ML.Auto/DebugLogger.cs +++ /dev/null @@ -1,17 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -namespace Microsoft.ML.Auto -{ - internal interface IDebugLogger - { - void Log(LogSeverity logLevel, string message); - } - - internal enum LogSeverity - { - Error, - Debug - } -} diff --git a/src/Microsoft.ML.Auto/Experiment/Experiment.cs b/src/Microsoft.ML.Auto/Experiment/Experiment.cs index 4eb389ab79..ec167e9b17 100644 --- a/src/Microsoft.ML.Auto/Experiment/Experiment.cs +++ b/src/Microsoft.ML.Auto/Experiment/Experiment.cs @@ -23,6 +23,7 @@ internal class Experiment where TRunDetail : RunDetail private readonly DatasetColumnInfo[] _datasetColumnInfo; private readonly IRunner _runner; private readonly IList _history = new List(); + private readonly AutoMLLogger _logger; public Experiment(MLContext context, @@ -33,7 +34,8 @@ public Experiment(MLContext context, IMetricsAgent metricsAgent, IEnumerable trainerWhitelist, DatasetColumnInfo[] datasetColumnInfo, - IRunner runner) + IRunner runner, + AutoMLLogger logger) { _context = context; _optimizingMetricInfo = metricInfo; @@ -45,6 +47,7 @@ public Experiment(MLContext context, _modelDirectory = GetModelDirectory(_experimentSettings.CacheDirectory); _datasetColumnInfo = datasetColumnInfo; _runner = runner; + _logger = logger; } public IList Execute() @@ -58,7 +61,7 @@ public IList Execute() // get next pipeline var getPiplelineStopwatch = Stopwatch.StartNew(); - var pipeline = PipelineSuggester.GetNextInferredPipeline(_context, _history, _datasetColumnInfo, _task, _optimizingMetricInfo.IsMaximizing, _trainerWhitelist, _experimentSettings.CacheBeforeTrainer); + var pipeline = PipelineSuggester.GetNextInferredPipeline(_context, _history, _datasetColumnInfo, _task, _optimizingMetricInfo.IsMaximizing, _experimentSettings.CacheBeforeTrainer, _trainerWhitelist); var pipelineInferenceTimeInSeconds = getPiplelineStopwatch.Elapsed.TotalSeconds; // break if no candidates returned, means no valid pipeline available @@ -68,7 +71,7 @@ public IList Execute() } // evaluate pipeline - Log(LogSeverity.Debug, $"Evaluating pipeline {pipeline.ToString()}"); + _logger.Trace($"Evaluating pipeline {pipeline.ToString()}"); (SuggestedPipelineRunDetail suggestedPipelineRunDetail, TRunDetail runDetail) = _runner.Run(pipeline, _modelDirectory, _history.Count + 1); _history.Add(suggestedPipelineRunDetail); @@ -128,23 +131,13 @@ private void ReportProgress(TRunDetail iterationResult) } catch (Exception ex) { - Log(LogSeverity.Error, $"Progress report callback reported exception {ex}"); + _logger.Error($"Progress report callback reported exception {ex}"); } } private void WriteIterationLog(SuggestedPipeline pipeline, SuggestedPipelineRunDetail runResult, Stopwatch stopwatch) { - Log(LogSeverity.Debug, $"{_history.Count}\t{runResult.Score}\t{stopwatch.Elapsed}\t{pipeline.ToString()}"); - } - - private void Log(LogSeverity severity, string message) - { - if(_experimentSettings?.DebugLogger == null) - { - return; - } - - _experimentSettings.DebugLogger.Log(severity, message); + _logger.Trace($"{_history.Count}\t{runResult.Score}\t{stopwatch.Elapsed}\t{pipeline.ToString()}"); } } } diff --git a/src/Microsoft.ML.Auto/Experiment/Runners/CrossValRunner.cs b/src/Microsoft.ML.Auto/Experiment/Runners/CrossValRunner.cs index f211b3107f..5accecde8c 100644 --- a/src/Microsoft.ML.Auto/Experiment/Runners/CrossValRunner.cs +++ b/src/Microsoft.ML.Auto/Experiment/Runners/CrossValRunner.cs @@ -18,7 +18,7 @@ internal class CrossValRunner : IRunner _preFeaturizer; private readonly ITransformer[] _preprocessorTransforms; private readonly string _labelColumn; - private readonly IDebugLogger _logger; + private readonly AutoMLLogger _logger; private readonly DataViewSchema _modelInputSchema; public CrossValRunner(MLContext context, @@ -28,7 +28,7 @@ public CrossValRunner(MLContext context, IEstimator preFeaturizer, ITransformer[] preprocessorTransforms, string labelColumn, - IDebugLogger logger) + AutoMLLogger logger) { _context = context; _trainDatasets = trainDatasets; diff --git a/src/Microsoft.ML.Auto/Experiment/Runners/CrossValSummaryRunner.cs b/src/Microsoft.ML.Auto/Experiment/Runners/CrossValSummaryRunner.cs index 701baa1663..5415f32363 100644 --- a/src/Microsoft.ML.Auto/Experiment/Runners/CrossValSummaryRunner.cs +++ b/src/Microsoft.ML.Auto/Experiment/Runners/CrossValSummaryRunner.cs @@ -20,7 +20,7 @@ internal class CrossValSummaryRunner : IRunner> private readonly ITransformer[] _preprocessorTransforms; private readonly string _labelColumn; private readonly OptimizingMetricInfo _optimizingMetricInfo; - private readonly IDebugLogger _logger; + private readonly AutoMLLogger _logger; private readonly DataViewSchema _modelInputSchema; public CrossValSummaryRunner(MLContext context, @@ -31,7 +31,7 @@ public CrossValSummaryRunner(MLContext context, ITransformer[] preprocessorTransforms, string labelColumn, OptimizingMetricInfo optimizingMetricInfo, - IDebugLogger logger) + AutoMLLogger logger) { _context = context; _trainDatasets = trainDatasets; diff --git a/src/Microsoft.ML.Auto/Experiment/Runners/RunnerUtil.cs b/src/Microsoft.ML.Auto/Experiment/Runners/RunnerUtil.cs index 88575a2280..f4dae203b1 100644 --- a/src/Microsoft.ML.Auto/Experiment/Runners/RunnerUtil.cs +++ b/src/Microsoft.ML.Auto/Experiment/Runners/RunnerUtil.cs @@ -19,7 +19,7 @@ public static (ModelContainer model, TMetrics metrics, Exception exception, doub ITransformer preprocessorTransform, FileInfo modelFileInfo, DataViewSchema modelInputSchema, - IDebugLogger logger) where TMetrics : class + AutoMLLogger logger) where TMetrics : class { try { @@ -44,7 +44,7 @@ public static (ModelContainer model, TMetrics metrics, Exception exception, doub } catch (Exception ex) { - logger?.Log(LogSeverity.Error, $"Pipeline crashed: {pipeline.ToString()} . Exception: {ex}"); + logger.Error($"Pipeline crashed: {pipeline.ToString()} . Exception: {ex}"); return (null, null, ex, double.NaN); } } diff --git a/src/Microsoft.ML.Auto/Experiment/Runners/TrainValidateRunner.cs b/src/Microsoft.ML.Auto/Experiment/Runners/TrainValidateRunner.cs index 9226dcbeb0..baad2e2e10 100644 --- a/src/Microsoft.ML.Auto/Experiment/Runners/TrainValidateRunner.cs +++ b/src/Microsoft.ML.Auto/Experiment/Runners/TrainValidateRunner.cs @@ -16,7 +16,7 @@ internal class TrainValidateRunner : IRunner> private readonly IMetricsAgent _metricsAgent; private readonly IEstimator _preFeaturizer; private readonly ITransformer _preprocessorTransform; - private readonly IDebugLogger _logger; + private readonly AutoMLLogger _logger; private readonly DataViewSchema _modelInputSchema; public TrainValidateRunner(MLContext context, @@ -26,7 +26,7 @@ public TrainValidateRunner(MLContext context, IMetricsAgent metricsAgent, IEstimator preFeaturizer, ITransformer preprocessorTransform, - IDebugLogger logger) + AutoMLLogger logger) { _context = context; _trainData = trainData; diff --git a/src/Microsoft.ML.Auto/Experiment/SuggestedPipelineBuilder.cs b/src/Microsoft.ML.Auto/Experiment/SuggestedPipelineBuilder.cs index a3fad88e0b..b3a94d7e5b 100644 --- a/src/Microsoft.ML.Auto/Experiment/SuggestedPipelineBuilder.cs +++ b/src/Microsoft.ML.Auto/Experiment/SuggestedPipelineBuilder.cs @@ -13,11 +13,11 @@ public static SuggestedPipeline Build(MLContext context, ICollection transforms, ICollection transformsPostTrainer, SuggestedTrainer trainer, - bool? enableCaching) + CacheBeforeTrainer cacheBeforeTrainerSettings) { var trainerInfo = trainer.BuildTrainer().Info; AddNormalizationTransforms(context, trainerInfo, transforms); - var cacheBeforeTrainer = ShouldCacheBeforeTrainer(trainerInfo, enableCaching); + var cacheBeforeTrainer = ShouldCacheBeforeTrainer(trainerInfo, cacheBeforeTrainerSettings); return new SuggestedPipeline(transforms, transformsPostTrainer, trainer, context, cacheBeforeTrainer); } @@ -35,9 +35,9 @@ private static void AddNormalizationTransforms(MLContext context, transforms.Add(transform); } - private static bool ShouldCacheBeforeTrainer(TrainerInfo trainerInfo, bool? enableCaching) + private static bool ShouldCacheBeforeTrainer(TrainerInfo trainerInfo, CacheBeforeTrainer cacheBeforeTrainerSettings) { - return enableCaching == true || (enableCaching == null && trainerInfo.WantCaching); + return cacheBeforeTrainerSettings == CacheBeforeTrainer.On || (cacheBeforeTrainerSettings == CacheBeforeTrainer.Auto && trainerInfo.WantCaching); } } } diff --git a/src/Microsoft.ML.Auto/Microsoft.ML.Auto.csproj b/src/Microsoft.ML.Auto/Microsoft.ML.Auto.csproj index d97c3a819a..d07b6b91d0 100644 --- a/src/Microsoft.ML.Auto/Microsoft.ML.Auto.csproj +++ b/src/Microsoft.ML.Auto/Microsoft.ML.Auto.csproj @@ -42,4 +42,10 @@ + + + + + + diff --git a/src/Microsoft.ML.Auto/PipelineSuggesters/PipelineSuggester.cs b/src/Microsoft.ML.Auto/PipelineSuggesters/PipelineSuggester.cs index ca834ce801..38e79b9877 100644 --- a/src/Microsoft.ML.Auto/PipelineSuggesters/PipelineSuggester.cs +++ b/src/Microsoft.ML.Auto/PipelineSuggesters/PipelineSuggester.cs @@ -20,7 +20,7 @@ public static Pipeline GetNextPipeline(MLContext context, bool isMaximizingMetric = true) { var inferredHistory = history.Select(r => SuggestedPipelineRunDetail.FromPipelineRunResult(context, r)); - var nextInferredPipeline = GetNextInferredPipeline(context, inferredHistory, columns, task, isMaximizingMetric); + var nextInferredPipeline = GetNextInferredPipeline(context, inferredHistory, columns, task, isMaximizingMetric, CacheBeforeTrainer.Auto); return nextInferredPipeline?.ToPipeline(); } @@ -29,8 +29,8 @@ public static SuggestedPipeline GetNextInferredPipeline(MLContext context, DatasetColumnInfo[] columns, TaskKind task, bool isMaximizingMetric, - IEnumerable trainerWhitelist = null, - bool? _enableCaching = null) + CacheBeforeTrainer cacheBeforeTrainer, + IEnumerable trainerWhitelist = null) { var availableTrainers = RecipeInference.AllowedTrainers(context, task, ColumnInformationUtil.BuildColumnInfo(columns), trainerWhitelist); @@ -40,7 +40,7 @@ public static SuggestedPipeline GetNextInferredPipeline(MLContext context, // if we haven't run all pipelines once if (history.Count() < availableTrainers.Count()) { - return GetNextFirstStagePipeline(context, history, availableTrainers, transforms, transformsPostTrainer, _enableCaching); + return GetNextFirstStagePipeline(context, history, availableTrainers, transforms, transformsPostTrainer, cacheBeforeTrainer); } // get top trainers from stage 1 runs @@ -71,7 +71,7 @@ public static SuggestedPipeline GetNextInferredPipeline(MLContext context, break; } - var suggestedPipeline = SuggestedPipelineBuilder.Build(context, transforms, transformsPostTrainer, newTrainer, _enableCaching); + var suggestedPipeline = SuggestedPipelineBuilder.Build(context, transforms, transformsPostTrainer, newTrainer, cacheBeforeTrainer); // make sure we have not seen pipeline before if (!visitedPipelines.Contains(suggestedPipeline)) @@ -119,10 +119,10 @@ private static SuggestedPipeline GetNextFirstStagePipeline(MLContext context, IEnumerable availableTrainers, ICollection transforms, ICollection transformsPostTrainer, - bool? _enableCaching) + CacheBeforeTrainer cacheBeforeTrainer) { var trainer = availableTrainers.ElementAt(history.Count()); - return SuggestedPipelineBuilder.Build(context, transforms, transformsPostTrainer, trainer, _enableCaching); + return SuggestedPipelineBuilder.Build(context, transforms, transformsPostTrainer, trainer, cacheBeforeTrainer); } private static IValueGenerator[] ConvertToValueGenerators(IEnumerable hps) diff --git a/src/Microsoft.ML.Auto/Utils/Logger.cs b/src/Microsoft.ML.Auto/Utils/Logger.cs new file mode 100644 index 0000000000..7c8628ed62 --- /dev/null +++ b/src/Microsoft.ML.Auto/Utils/Logger.cs @@ -0,0 +1,30 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.ML.Runtime; + +namespace Microsoft.ML.Auto +{ + internal class AutoMLLogger + { + public const string ChannelName = "AutoML"; + + private readonly IChannel _channel; + + public AutoMLLogger(MLContext context) + { + _channel = ((IChannelProvider)context).Start(ChannelName); + } + + public void Trace(string message) + { + _channel.Trace(MessageSensitivity.None, message); + } + + public void Error(string message) + { + _channel.Error(MessageSensitivity.None, message); + } + } +} \ No newline at end of file diff --git a/src/mlnet/AutoML/AutoMLDebugLogger.cs b/src/mlnet/AutoML/AutoMLDebugLogger.cs deleted file mode 100644 index e6def1fd81..0000000000 --- a/src/mlnet/AutoML/AutoMLDebugLogger.cs +++ /dev/null @@ -1,21 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using Microsoft.ML.Auto; -using NLog; - -namespace Microsoft.ML.CLI.AutoML -{ - internal class AutoMLDebugLogger: IDebugLogger - { - public static AutoMLDebugLogger Instance = new AutoMLDebugLogger(); - - private static Logger logger = LogManager.GetCurrentClassLogger(); - - public void Log(LogSeverity severity, string message) - { - logger.Log(LogLevel.Trace, message); - } - } -} diff --git a/src/mlnet/AutoML/AutoMLEngine.cs b/src/mlnet/AutoML/AutoMLEngine.cs index 713009205f..194501a62f 100644 --- a/src/mlnet/AutoML/AutoMLEngine.cs +++ b/src/mlnet/AutoML/AutoMLEngine.cs @@ -4,7 +4,6 @@ using System.Collections.Generic; using Microsoft.ML.Auto; -using Microsoft.ML.CLI.AutoML; using Microsoft.ML.CLI.Data; using Microsoft.ML.CLI.ShellProgressBar; using Microsoft.ML.CLI.Utilities; @@ -17,14 +16,14 @@ internal class AutoMLEngine : IAutoMLEngine { private NewCommandSettings settings; private TaskKind taskKind; - private bool? enableCaching; + private CacheBeforeTrainer cacheBeforeTrainer; private static Logger logger = LogManager.GetCurrentClassLogger(); public AutoMLEngine(NewCommandSettings settings) { this.settings = settings; this.taskKind = Utils.GetTaskKind(settings.MlTask); - this.enableCaching = Utils.GetCacheSettings(settings.Cache); + this.cacheBeforeTrainer = Utils.GetCacheSettings(settings.Cache); } public ColumnInferenceResults InferColumns(MLContext context, ColumnInformation columnInformation) @@ -45,23 +44,22 @@ public ColumnInferenceResults InferColumns(MLContext context, ColumnInformation return columnInference; } - IEnumerable> IAutoMLEngine.ExploreBinaryClassificationModels(MLContext context, IDataView trainData, IDataView validationData, ColumnInformation columnInformation, BinaryClassificationMetric optimizationMetric, ProgressBar progressBar) + ExperimentResult IAutoMLEngine.ExploreBinaryClassificationModels(MLContext context, IDataView trainData, IDataView validationData, ColumnInformation columnInformation, BinaryClassificationMetric optimizationMetric, ProgressBar progressBar) { var progressReporter = new ProgressHandlers.BinaryClassificationHandler(optimizationMetric, progressBar); var result = context.Auto() .CreateBinaryClassificationExperiment(new BinaryExperimentSettings() { MaxExperimentTimeInSeconds = settings.MaxExplorationTime, - CacheBeforeTrainer = this.enableCaching, - OptimizingMetric = optimizationMetric, - DebugLogger = AutoMLDebugLogger.Instance + CacheBeforeTrainer = this.cacheBeforeTrainer, + OptimizingMetric = optimizationMetric }) .Execute(trainData, validationData, columnInformation, progressHandler: progressReporter); logger.Log(LogLevel.Trace, Strings.RetrieveBestPipeline); return result; } - IEnumerable> IAutoMLEngine.ExploreRegressionModels(MLContext context, IDataView trainData, IDataView validationData, ColumnInformation columnInformation, RegressionMetric optimizationMetric, ProgressBar progressBar) + ExperimentResult IAutoMLEngine.ExploreRegressionModels(MLContext context, IDataView trainData, IDataView validationData, ColumnInformation columnInformation, RegressionMetric optimizationMetric, ProgressBar progressBar) { var progressReporter = new ProgressHandlers.RegressionHandler(optimizationMetric, progressBar); var result = context.Auto() @@ -69,23 +67,21 @@ IEnumerable> IAutoMLEngine.ExploreRegressionModels( { MaxExperimentTimeInSeconds = settings.MaxExplorationTime, OptimizingMetric = optimizationMetric, - CacheBeforeTrainer = this.enableCaching, - DebugLogger = AutoMLDebugLogger.Instance + CacheBeforeTrainer = this.cacheBeforeTrainer }).Execute(trainData, validationData, columnInformation, progressHandler: progressReporter); logger.Log(LogLevel.Trace, Strings.RetrieveBestPipeline); return result; } - IEnumerable> IAutoMLEngine.ExploreMultiClassificationModels(MLContext context, IDataView trainData, IDataView validationData, ColumnInformation columnInformation, MulticlassClassificationMetric optimizationMetric, ProgressBar progressBar) + ExperimentResult IAutoMLEngine.ExploreMultiClassificationModels(MLContext context, IDataView trainData, IDataView validationData, ColumnInformation columnInformation, MulticlassClassificationMetric optimizationMetric, ProgressBar progressBar) { var progressReporter = new ProgressHandlers.MulticlassClassificationHandler(optimizationMetric, progressBar); var result = context.Auto() .CreateMulticlassClassificationExperiment(new MulticlassExperimentSettings() { MaxExperimentTimeInSeconds = settings.MaxExplorationTime, - CacheBeforeTrainer = this.enableCaching, - OptimizingMetric = optimizationMetric, - DebugLogger = AutoMLDebugLogger.Instance + CacheBeforeTrainer = this.cacheBeforeTrainer, + OptimizingMetric = optimizationMetric }).Execute(trainData, validationData, columnInformation, progressHandler: progressReporter); logger.Log(LogLevel.Trace, Strings.RetrieveBestPipeline); return result; diff --git a/src/mlnet/AutoML/IAutoMLEngine.cs b/src/mlnet/AutoML/IAutoMLEngine.cs index b7ffc57652..b4355bc0f4 100644 --- a/src/mlnet/AutoML/IAutoMLEngine.cs +++ b/src/mlnet/AutoML/IAutoMLEngine.cs @@ -13,11 +13,11 @@ internal interface IAutoMLEngine { ColumnInferenceResults InferColumns(MLContext context, ColumnInformation columnInformation); - IEnumerable> ExploreBinaryClassificationModels(MLContext context, IDataView trainData, IDataView validationData, ColumnInformation columnInformation, BinaryClassificationMetric optimizationMetric, ProgressBar progressBar = null); + ExperimentResult ExploreBinaryClassificationModels(MLContext context, IDataView trainData, IDataView validationData, ColumnInformation columnInformation, BinaryClassificationMetric optimizationMetric, ProgressBar progressBar = null); - IEnumerable> ExploreMultiClassificationModels(MLContext context, IDataView trainData, IDataView validationData, ColumnInformation columnInformation, MulticlassClassificationMetric optimizationMetric, ProgressBar progressBar = null); + ExperimentResult ExploreMultiClassificationModels(MLContext context, IDataView trainData, IDataView validationData, ColumnInformation columnInformation, MulticlassClassificationMetric optimizationMetric, ProgressBar progressBar = null); - IEnumerable> ExploreRegressionModels(MLContext context, IDataView trainData, IDataView validationData, ColumnInformation columnInformation, RegressionMetric optimizationMetric, ProgressBar progressBar = null); + ExperimentResult ExploreRegressionModels(MLContext context, IDataView trainData, IDataView validationData, ColumnInformation columnInformation, RegressionMetric optimizationMetric, ProgressBar progressBar = null); } } diff --git a/src/mlnet/CodeGenerator/CodeGenerationHelper.cs b/src/mlnet/CodeGenerator/CodeGenerationHelper.cs index b974a38a37..552d5db127 100644 --- a/src/mlnet/CodeGenerator/CodeGenerationHelper.cs +++ b/src/mlnet/CodeGenerator/CodeGenerationHelper.cs @@ -40,6 +40,8 @@ public void GenerateCode() { Stopwatch watch = Stopwatch.StartNew(); var context = new MLContext(); + ConsumeAutoMLSDKLogs(context); + var verboseLevel = Utils.GetVerbosity(settings.Verbosity); // Infer columns @@ -78,9 +80,9 @@ public void GenerateCode() // The reason why we are doing this way of defining 3 different results is because of the AutoML API // i.e there is no common class/interface to handle all three tasks together. - IEnumerable> binaryRunDetails = default; - IEnumerable> multiRunDetails = default; - IEnumerable> regressionRunDetails = default; + ExperimentResult binaryExperimentResult = default; + ExperimentResult multiExperimentResult = default; + ExperimentResult regressionExperimentResult = default; if (verboseLevel > LogLevel.Trace) { Console.Write($"{Strings.ExplorePipeline}: "); @@ -116,13 +118,13 @@ public void GenerateCode() switch (taskKind) { case TaskKind.BinaryClassification: - t = new Thread(() => binaryRunDetails = automlEngine.ExploreBinaryClassificationModels(context, trainData, validationData, columnInformation, new BinaryExperimentSettings().OptimizingMetric, pbar)); + t = new Thread(() => binaryExperimentResult = automlEngine.ExploreBinaryClassificationModels(context, trainData, validationData, columnInformation, new BinaryExperimentSettings().OptimizingMetric, pbar)); break; case TaskKind.Regression: - t = new Thread(() => regressionRunDetails = automlEngine.ExploreRegressionModels(context, trainData, validationData, columnInformation, new RegressionExperimentSettings().OptimizingMetric, pbar)); + t = new Thread(() => regressionExperimentResult = automlEngine.ExploreRegressionModels(context, trainData, validationData, columnInformation, new RegressionExperimentSettings().OptimizingMetric, pbar)); break; case TaskKind.MulticlassClassification: - t = new Thread(() => multiRunDetails = automlEngine.ExploreMultiClassificationModels(context, trainData, validationData, columnInformation, new MulticlassExperimentSettings().OptimizingMetric, pbar)); + t = new Thread(() => multiExperimentResult = automlEngine.ExploreMultiClassificationModels(context, trainData, validationData, columnInformation, new MulticlassExperimentSettings().OptimizingMetric, pbar)); break; default: logger.Log(LogLevel.Error, Strings.UnsupportedMlTask); @@ -152,13 +154,13 @@ public void GenerateCode() switch (taskKind) { case TaskKind.BinaryClassification: - binaryRunDetails = automlEngine.ExploreBinaryClassificationModels(context, trainData, validationData, columnInformation, new BinaryExperimentSettings().OptimizingMetric); + binaryExperimentResult = automlEngine.ExploreBinaryClassificationModels(context, trainData, validationData, columnInformation, new BinaryExperimentSettings().OptimizingMetric); break; case TaskKind.Regression: - regressionRunDetails = automlEngine.ExploreRegressionModels(context, trainData, validationData, columnInformation, new RegressionExperimentSettings().OptimizingMetric); + regressionExperimentResult = automlEngine.ExploreRegressionModels(context, trainData, validationData, columnInformation, new RegressionExperimentSettings().OptimizingMetric); break; case TaskKind.MulticlassClassification: - multiRunDetails = automlEngine.ExploreMultiClassificationModels(context, trainData, validationData, columnInformation, new MulticlassExperimentSettings().OptimizingMetric); + multiExperimentResult = automlEngine.ExploreMultiClassificationModels(context, trainData, validationData, columnInformation, new MulticlassExperimentSettings().OptimizingMetric); break; default: logger.Log(LogLevel.Error, Strings.UnsupportedMlTask); @@ -188,25 +190,25 @@ public void GenerateCode() switch (taskKind) { case TaskKind.BinaryClassification: - var bestBinaryIteration = binaryRunDetails.Best(); + var bestBinaryIteration = binaryExperimentResult.BestRun; bestPipeline = bestBinaryIteration.Pipeline; bestModel = bestBinaryIteration.Model; - ConsolePrinter.ExperimentResultsHeader(LogLevel.Info, settings.MlTask, settings.Dataset.Name, columnInformation.LabelColumnName, elapsedTime.ToString("F2"), binaryRunDetails.Count()); - ConsolePrinter.PrintIterationSummary(binaryRunDetails, new BinaryExperimentSettings().OptimizingMetric, 5); + ConsolePrinter.ExperimentResultsHeader(LogLevel.Info, settings.MlTask, settings.Dataset.Name, columnInformation.LabelColumnName, elapsedTime.ToString("F2"), binaryExperimentResult.RunDetails.Count()); + ConsolePrinter.PrintIterationSummary(binaryExperimentResult.RunDetails, new BinaryExperimentSettings().OptimizingMetric, 5); break; case TaskKind.Regression: - var bestRegressionIteration = regressionRunDetails.Best(); + var bestRegressionIteration = regressionExperimentResult.BestRun; bestPipeline = bestRegressionIteration.Pipeline; bestModel = bestRegressionIteration.Model; - ConsolePrinter.ExperimentResultsHeader(LogLevel.Info, settings.MlTask, settings.Dataset.Name, columnInformation.LabelColumnName, elapsedTime.ToString("F2"), regressionRunDetails.Count()); - ConsolePrinter.PrintIterationSummary(regressionRunDetails, new RegressionExperimentSettings().OptimizingMetric, 5); + ConsolePrinter.ExperimentResultsHeader(LogLevel.Info, settings.MlTask, settings.Dataset.Name, columnInformation.LabelColumnName, elapsedTime.ToString("F2"), regressionExperimentResult.RunDetails.Count()); + ConsolePrinter.PrintIterationSummary(regressionExperimentResult.RunDetails, new RegressionExperimentSettings().OptimizingMetric, 5); break; case TaskKind.MulticlassClassification: - var bestMultiIteration = multiRunDetails.Best(); + var bestMultiIteration = multiExperimentResult.BestRun; bestPipeline = bestMultiIteration.Pipeline; bestModel = bestMultiIteration.Model; - ConsolePrinter.ExperimentResultsHeader(LogLevel.Info, settings.MlTask, settings.Dataset.Name, columnInformation.LabelColumnName, elapsedTime.ToString("F2"), multiRunDetails.Count()); - ConsolePrinter.PrintIterationSummary(multiRunDetails, new MulticlassExperimentSettings().OptimizingMetric, 5); + ConsolePrinter.ExperimentResultsHeader(LogLevel.Info, settings.MlTask, settings.Dataset.Name, columnInformation.LabelColumnName, elapsedTime.ToString("F2"), multiExperimentResult.RunDetails.Count()); + ConsolePrinter.PrintIterationSummary(multiExperimentResult.RunDetails, new MulticlassExperimentSettings().OptimizingMetric, 5); break; } } @@ -264,5 +266,17 @@ internal void GenerateProject(ColumnInferenceResults columnInference, Pipeline p return (trainData, validationData); } + + private void ConsumeAutoMLSDKLogs(MLContext context) + { + context.Log += (object sender, LoggingEventArgs loggingEventArgs) => + { + var logMessage = loggingEventArgs.Message; + if (logMessage.Contains(AutoMLLogger.ChannelName)) + { + logger.Trace(loggingEventArgs.Message); + } + }; + } } } diff --git a/src/mlnet/Utilities/Utils.cs b/src/mlnet/Utilities/Utils.cs index a8ee940e84..5759579ee2 100644 --- a/src/mlnet/Utilities/Utils.cs +++ b/src/mlnet/Utilities/Utils.cs @@ -107,13 +107,13 @@ internal static Type GetCSharpType(DataKind labelType) } } - internal static bool? GetCacheSettings(string input) + internal static CacheBeforeTrainer GetCacheSettings(string input) { switch (input) { - case "on": return true; - case "off": return false; - case "auto": return null; + case "on": return CacheBeforeTrainer.On; + case "off": return CacheBeforeTrainer.Off; + case "auto": return CacheBeforeTrainer.Auto; default: throw new ArgumentException($"{nameof(input)} is invalid", nameof(input)); } diff --git a/src/mlnet/mlnet.csproj b/src/mlnet/mlnet.csproj index 7a2cf20063..02be3cc03b 100644 --- a/src/mlnet/mlnet.csproj +++ b/src/mlnet/mlnet.csproj @@ -116,5 +116,11 @@ ModelBuilder.cs + + + + + + diff --git a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs index cef86d8557..0850d6019f 100644 --- a/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs +++ b/test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs @@ -19,14 +19,13 @@ public void AutoFitBinaryTest() var columnInference = context.Auto().InferColumns(dataPath, DatasetUtil.UciAdultLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(dataPath); - var results = context.Auto() + var result = context.Auto() .CreateBinaryClassificationExperiment(0) .Execute(trainData, new ColumnInformation() { LabelColumnName = DatasetUtil.UciAdultLabel }); - var best = results.Best(); - Assert.IsTrue(best.ValidationMetrics.Accuracy > 0.70); - Assert.IsNotNull(best.Estimator); - Assert.IsNotNull(best.Model); - Assert.IsNotNull(best.TrainerName); + Assert.IsTrue(result.BestRun.ValidationMetrics.Accuracy > 0.70); + Assert.IsNotNull(result.BestRun.Estimator); + Assert.IsNotNull(result.BestRun.Model); + Assert.IsNotNull(result.BestRun.TrainerName); } [TestMethod] @@ -36,12 +35,11 @@ public void AutoFitMultiTest() var columnInference = context.Auto().InferColumns(DatasetUtil.TrivialMulticlassDatasetPath, DatasetUtil.TrivialMulticlassDatasetLabel); var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions); var trainData = textLoader.Load(DatasetUtil.TrivialMulticlassDatasetPath); - var results = context.Auto() + var result = context.Auto() .CreateMulticlassClassificationExperiment(0) .Execute(trainData, 5, DatasetUtil.TrivialMulticlassDatasetLabel); - var best = results.Best(); - Assert.IsTrue(best.Results.First().ValidationMetrics.MicroAccuracy >= 0.7); - var scoredData = best.Results.First().Model.Transform(trainData); + Assert.IsTrue(result.BestRun.Results.First().ValidationMetrics.MicroAccuracy >= 0.7); + var scoredData = result.BestRun.Results.First().Model.Transform(trainData); Assert.AreEqual(NumberDataViewType.Single, scoredData.Schema[DefaultColumnNames.PredictedLabel].Type); } @@ -55,12 +53,12 @@ public void AutoFitRegressionTest() var trainData = textLoader.Load(dataPath); var validationData = context.Data.TakeRows(trainData, 20); trainData = context.Data.SkipRows(trainData, 20); - var results = context.Auto() + var result = context.Auto() .CreateRegressionExperiment(0) .Execute(trainData, validationData, new ColumnInformation() { LabelColumnName = DatasetUtil.MlNetGeneratedRegressionLabel }); - Assert.IsTrue(results.Max(i => i.ValidationMetrics.RSquared > 0.9)); + Assert.IsTrue(result.RunDetails.Max(i => i.ValidationMetrics.RSquared > 0.9)); } } } diff --git a/test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj b/test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj index 95196ad9d9..dc4c3b29aa 100644 --- a/test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj +++ b/test/Microsoft.ML.AutoML.Tests/Microsoft.ML.AutoML.Tests.csproj @@ -41,6 +41,7 @@ + diff --git a/test/Microsoft.ML.AutoML.Tests/SuggestedPipelineBuilderTests.cs b/test/Microsoft.ML.AutoML.Tests/SuggestedPipelineBuilderTests.cs index e59c3fccea..fe94f6751f 100644 --- a/test/Microsoft.ML.AutoML.Tests/SuggestedPipelineBuilderTests.cs +++ b/test/Microsoft.ML.AutoML.Tests/SuggestedPipelineBuilderTests.cs @@ -16,7 +16,7 @@ public class SuggestedPipelineBuilderTests public void TrainerWantsCaching() { TestPipelineBuilderCaching(BuildAveragedPerceptronTrainer(), - new bool?[] { true, false, null }, + new CacheBeforeTrainer[] { CacheBeforeTrainer.On, CacheBeforeTrainer.Off, CacheBeforeTrainer.Auto }, new[] { true, false, true }); } @@ -24,7 +24,7 @@ public void TrainerWantsCaching() public void TrainerDoesntWantCaching() { TestPipelineBuilderCaching(BuildLightGbmTrainer(), - new bool?[] { true, false, null }, + new CacheBeforeTrainer[] { CacheBeforeTrainer.On, CacheBeforeTrainer.Off, CacheBeforeTrainer.Auto }, new[] { true, false, false }); } @@ -45,13 +45,13 @@ public void TrainerNotNeedNormalization() private static void TestPipelineBuilderCaching( SuggestedTrainer trainer, - bool?[] enableCachingOptions, + CacheBeforeTrainer[] cacheBeforeTrainerSettings, bool[] resultShouldHaveCaching) { - for (var i = 0; i < enableCachingOptions.Length; i++) + for (var i = 0; i < cacheBeforeTrainerSettings.Length; i++) { var suggestedPipeline = BuildSuggestedPipeline(trainer, - enableCachingOptions[i]); + cacheBeforeTrainerSettings[i]); Assert.AreEqual(resultShouldHaveCaching[i], suggestedPipeline.ToPipeline().CacheBeforeTrainer); } @@ -72,12 +72,12 @@ private static SuggestedTrainer BuildLightGbmTrainer() } private static SuggestedPipeline BuildSuggestedPipeline(SuggestedTrainer trainer, - bool? enableCaching = null) + CacheBeforeTrainer cacheBeforeTrainer = CacheBeforeTrainer.Auto) { return SuggestedPipelineBuilder.Build(_context, new List(), new List(), - trainer, enableCaching); + trainer, cacheBeforeTrainer); } } }