Skip to content

Commit 227da9d

Browse files
authored
[AutoML] CLI telemetry rev (#3789)
1 parent b1af7a6 commit 227da9d

19 files changed

+476
-177
lines changed

src/Microsoft.ML.AutoML/ColumnInference/ColumnInformationUtil.cs

+18
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,24 @@ public static IEnumerable<string> GetColumnNames(ColumnInformation columnInforma
107107
return columnNames;
108108
}
109109

110+
public static IDictionary<ColumnPurpose, int> CountColumnsByPurpose(ColumnInformation columnInformation)
111+
{
112+
var result = new Dictionary<ColumnPurpose, int>();
113+
var columnNames = GetColumnNames(columnInformation);
114+
foreach (var columnName in columnNames)
115+
{
116+
var purpose = columnInformation.GetColumnPurpose(columnName);
117+
if (purpose == null)
118+
{
119+
continue;
120+
}
121+
122+
result.TryGetValue(purpose.Value, out int count);
123+
result[purpose.Value] = ++count;
124+
}
125+
return result;
126+
}
127+
110128
private static void AddStringsToListIfNotNull(List<string> list, IEnumerable<string> strings)
111129
{
112130
foreach (var str in strings)

src/Microsoft.ML.AutoML/TrainerExtensions/SweepableParams.cs

+29
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@ private static IEnumerable<SweepableParam> BuildLbfgsArgsParams()
6161
};
6262
}
6363

64+
/// <summary>
65+
/// The names of every hyperparameter swept across all trainers.
66+
/// </summary>
67+
public static ISet<string> AllHyperparameterNames = GetAllSweepableParameterNames();
68+
6469
public static IEnumerable<SweepableParam> BuildAveragePerceptronParams()
6570
{
6671
return BuildAveragedLinearArgsParams().Concat(BuildOnlineLinearArgsParams());
@@ -172,5 +177,29 @@ public static IEnumerable<SweepableParam> BuildSymSgdLogisticRegressionParams()
172177
new SweepableDiscreteParam("UpdateFrequency", new object[] { "<Auto>", 5, 20 })
173178
};
174179
}
180+
181+
/// <summary>
182+
/// Gets the name of every hyperparameter swept across all trainers.
183+
/// </summary>
184+
public static ISet<string> GetAllSweepableParameterNames()
185+
{
186+
var sweepableParams = new List<SweepableParam>();
187+
sweepableParams.AddRange(BuildAveragePerceptronParams());
188+
sweepableParams.AddRange(BuildAveragePerceptronParams());
189+
sweepableParams.AddRange(BuildFastForestParams());
190+
sweepableParams.AddRange(BuildFastTreeParams());
191+
sweepableParams.AddRange(BuildFastTreeTweedieParams());
192+
sweepableParams.AddRange(BuildLightGbmParamsMulticlass());
193+
sweepableParams.AddRange(BuildLightGbmParams());
194+
sweepableParams.AddRange(BuildLinearSvmParams());
195+
sweepableParams.AddRange(BuildLbfgsLogisticRegressionParams());
196+
sweepableParams.AddRange(BuildOnlineGradientDescentParams());
197+
sweepableParams.AddRange(BuildLbfgsPoissonRegressionParams());
198+
sweepableParams.AddRange(BuildSdcaParams());
199+
sweepableParams.AddRange(BuildOlsParams());
200+
sweepableParams.AddRange(BuildSgdParams());
201+
sweepableParams.AddRange(BuildSymSgdLogisticRegressionParams());
202+
return new HashSet<string>(sweepableParams.Select(p => p.Name));
203+
}
175204
}
176205
}

src/mlnet/CodeGenerator/CodeGenerationHelper.cs

+9
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
using Microsoft.ML.CLI.CodeGenerator.CSharp;
1313
using Microsoft.ML.CLI.Data;
1414
using Microsoft.ML.CLI.ShellProgressBar;
15+
using Microsoft.ML.CLI.Telemetry.Events;
1516
using Microsoft.ML.CLI.Utilities;
1617
using Microsoft.ML.Data;
1718
using NLog;
@@ -51,7 +52,9 @@ public void GenerateCode()
5152
{
5253
inputColumnInformation.IgnoredColumnNames.Add(value);
5354
}
55+
var inferColumnsStopwatch = Stopwatch.StartNew();
5456
columnInference = _automlEngine.InferColumns(context, inputColumnInformation);
57+
InferColumnsEvent.TrackEvent(columnInference.ColumnInformation, inferColumnsStopwatch.Elapsed);
5558
}
5659
catch (Exception)
5760
{
@@ -74,6 +77,9 @@ public void GenerateCode()
7477
// The reason why we are doing this way of defining 3 different results is because of the AutoML API
7578
// i.e there is no common class/interface to handle all three tasks together.
7679

80+
// Start a timer for the experiment
81+
var stopwatch = Stopwatch.StartNew();
82+
7783
List<RunDetail<BinaryClassificationMetrics>> completedBinaryRuns = new List<RunDetail<BinaryClassificationMetrics>>();
7884
List<RunDetail<MulticlassClassificationMetrics>> completedMulticlassRuns = new List<RunDetail<MulticlassClassificationMetrics>>();
7985
List<RunDetail<RegressionMetrics>> completedRegressionRuns = new List<RunDetail<RegressionMetrics>>();
@@ -236,6 +242,7 @@ public void GenerateCode()
236242
{
237243
var binaryMetric = new BinaryExperimentSettings().OptimizingMetric;
238244
var bestBinaryIteration = BestResultUtil.GetBestRun(completedBinaryRuns, binaryMetric);
245+
ExperimentCompletedEvent.TrackEvent(bestBinaryIteration, completedBinaryRuns, TaskKind.BinaryClassification, stopwatch.Elapsed);
239246
bestPipeline = bestBinaryIteration.Pipeline;
240247
bestModel = bestBinaryIteration.Model;
241248
ConsolePrinter.ExperimentResultsHeader(LogLevel.Info, _settings.MlTask, _settings.Dataset.Name, columnInformation.LabelColumnName, elapsedTime.ToString("F2"), completedBinaryRuns.Count());
@@ -253,6 +260,7 @@ public void GenerateCode()
253260
{
254261
var regressionMetric = new RegressionExperimentSettings().OptimizingMetric;
255262
var bestRegressionIteration = BestResultUtil.GetBestRun(completedRegressionRuns, regressionMetric);
263+
ExperimentCompletedEvent.TrackEvent(bestRegressionIteration, completedRegressionRuns, TaskKind.Regression, stopwatch.Elapsed);
256264
bestPipeline = bestRegressionIteration.Pipeline;
257265
bestModel = bestRegressionIteration.Model;
258266
ConsolePrinter.ExperimentResultsHeader(LogLevel.Info, _settings.MlTask, _settings.Dataset.Name, columnInformation.LabelColumnName, elapsedTime.ToString("F2"), completedRegressionRuns.Count());
@@ -270,6 +278,7 @@ public void GenerateCode()
270278
{
271279
var muliclassMetric = new MulticlassExperimentSettings().OptimizingMetric;
272280
var bestMulticlassIteration = BestResultUtil.GetBestRun(completedMulticlassRuns, muliclassMetric);
281+
ExperimentCompletedEvent.TrackEvent(bestMulticlassIteration, completedMulticlassRuns, TaskKind.MulticlassClassification, stopwatch.Elapsed);
273282
bestPipeline = bestMulticlassIteration.Pipeline;
274283
bestModel = bestMulticlassIteration.Model;
275284
ConsolePrinter.ExperimentResultsHeader(LogLevel.Info, _settings.MlTask, _settings.Dataset.Name, columnInformation.LabelColumnName, elapsedTime.ToString("F2"), completedMulticlassRuns.Count());

src/mlnet/Commands/CommandDefinitions.cs

+3-1
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,11 @@ namespace Microsoft.ML.CLI.Commands
1414
{
1515
internal static class CommandDefinitions
1616
{
17+
public const string AutoTrainCommandName = "auto-train";
18+
1719
internal static System.CommandLine.Command AutoTrain(ICommandHandler handler)
1820
{
19-
var newCommand = new System.CommandLine.Command("auto-train", "Create a new .NET project using ML.NET to train and run a model", handler: handler)
21+
var newCommand = new System.CommandLine.Command(AutoTrainCommandName, "Create a new .NET project using ML.NET to train and run a model", handler: handler)
2022
{
2123
MlTask(),
2224
Dataset(),

src/mlnet/Commands/New/NewCommandHandler.cs

+1-6
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5-
using Microsoft.DotNet.Cli.Telemetry;
65
using Microsoft.ML.CLI.CodeGenerator;
76
using Microsoft.ML.CLI.Data;
87

@@ -11,18 +10,14 @@ namespace Microsoft.ML.CLI.Commands.New
1110
internal class NewCommand : ICommand
1211
{
1312
private readonly NewCommandSettings _settings;
14-
private readonly MlTelemetry _telemetry;
1513

16-
internal NewCommand(NewCommandSettings settings, MlTelemetry telemetry)
14+
internal NewCommand(NewCommandSettings settings)
1715
{
1816
_settings = settings;
19-
_telemetry = telemetry;
2017
}
2118

2219
public void Execute()
2320
{
24-
_telemetry.LogAutoTrainMlCommand(_settings.Dataset.Name, _settings.MlTask.ToString(), _settings.Dataset.Length);
25-
2621
CodeGenerationHelper codeGenerationHelper = new CodeGenerationHelper(new AutoMLEngine(_settings), _settings); // Needs to be improved.
2722
codeGenerationHelper.GenerateCode();
2823
}

src/mlnet/Program.cs

+26-7
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,13 @@
55
using System;
66
using System.CommandLine.Builder;
77
using System.CommandLine.Invocation;
8+
using System.Diagnostics;
89
using System.IO;
910
using System.Linq;
10-
using Microsoft.DotNet.Cli.Telemetry;
1111
using Microsoft.ML.CLI.Commands;
1212
using Microsoft.ML.CLI.Commands.New;
1313
using Microsoft.ML.CLI.Data;
14+
using Microsoft.ML.CLI.Telemetry.Events;
1415
using Microsoft.ML.CLI.Utilities;
1516
using NLog;
1617
using NLog.Targets;
@@ -20,24 +21,33 @@ namespace Microsoft.ML.CLI
2021
public class Program
2122
{
2223
private static Logger _logger = LogManager.GetCurrentClassLogger();
24+
2325
public static void Main(string[] args)
2426
{
25-
var telemetry = new MlTelemetry();
27+
Telemetry.Telemetry.Initialize();
2628
int exitCode = 1;
29+
Exception ex = null;
30+
var stopwatch = Stopwatch.StartNew();
31+
32+
var mlNetCommandEvent = new MLNetCommandEvent();
33+
2734
// Create handler outside so that commandline and the handler is decoupled and testable.
2835
var handler = CommandHandler.Create<NewCommandSettings>(
2936
(options) =>
3037
{
3138
try
3239
{
40+
// Send telemetry event for command issued
41+
mlNetCommandEvent.AutoTrainCommandSettings = options;
42+
mlNetCommandEvent.TrackEvent();
43+
3344
// Map the verbosity to internal levels
3445
var verbosity = Utils.GetVerbosity(options.Verbosity);
3546

3647
// Build the output path
3748
string outputBaseDir = string.Empty;
3849
if (options.Name == null)
3950
{
40-
4151
options.Name = "Sample" + Utils.GetTaskKind(options.MlTask).ToString();
4252
outputBaseDir = Path.Combine(options.OutputPath.FullName, options.Name);
4353
}
@@ -50,7 +60,7 @@ public static void Main(string[] args)
5060
options.OutputPath = new DirectoryInfo(outputBaseDir);
5161

5262
// Instantiate the command
53-
var command = new NewCommand(options, telemetry);
63+
var command = new NewCommand(options);
5464

5565
// Override the Logger Configuration
5666
var logconsole = LogManager.Configuration.FindTargetByName("logconsole");
@@ -67,6 +77,7 @@ public static void Main(string[] args)
6777
}
6878
catch (Exception e)
6979
{
80+
ex = e;
7081
_logger.Log(LogLevel.Error, e.Message);
7182
_logger.Log(LogLevel.Debug, e.ToString());
7283
_logger.Log(LogLevel.Info, Strings.LookIntoLogFile);
@@ -82,7 +93,8 @@ public static void Main(string[] args)
8293

8394
var parseResult = parser.Parse(args);
8495

85-
if (parseResult.Errors.Count == 0)
96+
var commandParseSucceeded = !parseResult.Errors.Any();
97+
if (commandParseSucceeded)
8698
{
8799
if (parseResult.RootCommandResult.Children.Count > 0)
88100
{
@@ -95,13 +107,20 @@ public static void Main(string[] args)
95107

96108
var explicitlySpecifiedOptions = options.Where(opt => !opt.IsImplicit).Select(opt => opt.Name);
97109

98-
telemetry.SetCommandAndParameters(command.Name, explicitlySpecifiedOptions);
110+
mlNetCommandEvent.CommandLineParametersUsed = explicitlySpecifiedOptions;
99111
}
100112
}
101113
}
102114

115+
// Send system info telemetry
116+
SystemInfoEvent.TrackEvent();
117+
103118
parser.InvokeAsync(parseResult).Wait();
119+
// Send exit telemetry
120+
ApplicationExitEvent.TrackEvent(exitCode, commandParseSucceeded, stopwatch.Elapsed, ex);
121+
// Flush pending telemetry logs
122+
Telemetry.Telemetry.Flush(TimeSpan.FromSeconds(3));
104123
Environment.Exit(exitCode);
105124
}
106125
}
107-
}
126+
}

src/mlnet/Telemetry/DotNetAppInsights/FirstTimeUseNoticeSentinel.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
33

44
using System.IO;
5-
using Microsoft.DotNet.AutoML;
65
using Microsoft.Extensions.EnvironmentAbstractions;
6+
using Microsoft.ML.CLI.Telemetry;
77

88
namespace Microsoft.DotNet.Configurer
99
{

src/mlnet/Telemetry/DotNetAppInsights/TelemetryCommonProperties.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
using System;
55
using System.Collections.Generic;
66
using System.IO;
7-
using Microsoft.DotNet.AutoML;
87
using Microsoft.DotNet.Configurer;
8+
using Microsoft.ML.CLI.Telemetry;
99
using RuntimeEnvironment = Microsoft.DotNet.PlatformAbstractions.RuntimeEnvironment;
1010
using RuntimeInformation = System.Runtime.InteropServices.RuntimeInformation;
1111

src/mlnet/Telemetry/DotNetAppInsights/UserLevelCacheWriter.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33

44
using System;
55
using System.IO;
6-
using Microsoft.DotNet.AutoML;
76
using Microsoft.Extensions.EnvironmentAbstractions;
7+
using Microsoft.ML.CLI.Telemetry;
88

99
namespace Microsoft.DotNet.Configurer
1010
{
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using System;
6+
using System.Collections.Generic;
7+
using System.Diagnostics;
8+
9+
namespace Microsoft.ML.CLI.Telemetry.Events
10+
{
11+
/// <summary>
12+
/// Telemetry event for CLI application exit.
13+
/// </summary>
14+
internal class ApplicationExitEvent
15+
{
16+
public static void TrackEvent(int exitCode, bool commandParseSucceeded, TimeSpan duration, Exception ex)
17+
{
18+
Telemetry.TrackEvent("application-exit",
19+
new Dictionary<string, string>
20+
{
21+
{ "CommandParseSucceeded", commandParseSucceeded.ToString() },
22+
{ "ExitCode", exitCode.ToString() },
23+
{ "PeakMemory", Process.GetCurrentProcess().PeakWorkingSet64.ToString() },
24+
},
25+
duration, ex);
26+
}
27+
}
28+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using System;
6+
using System.Collections.Generic;
7+
using System.Diagnostics;
8+
using System.Linq;
9+
using Microsoft.ML.AutoML;
10+
11+
namespace Microsoft.ML.CLI.Telemetry.Events
12+
{
13+
/// <summary>
14+
/// Telemetry event for AutoML experiment completion.
15+
/// </summary>
16+
internal static class ExperimentCompletedEvent
17+
{
18+
public static void TrackEvent<TMetrics>(RunDetail<TMetrics> bestRun,
19+
List<RunDetail<TMetrics>> allRuns,
20+
TaskKind machineLearningTask,
21+
TimeSpan duration)
22+
{
23+
Telemetry.TrackEvent("experiment-completed",
24+
new Dictionary<string, string>()
25+
{
26+
{ "BestIterationNum", (allRuns.IndexOf(bestRun) + 1).ToString() },
27+
{ "BestPipeline", Telemetry.GetSanitizedPipelineStr(bestRun.Pipeline) },
28+
{ "BestTrainer", bestRun.TrainerName },
29+
{ "MachineLearningTask", machineLearningTask.ToString() },
30+
{ "NumIterations", allRuns.Count().ToString() },
31+
{ "PeakMemory", Process.GetCurrentProcess().PeakWorkingSet64.ToString() },
32+
},
33+
duration);
34+
}
35+
}
36+
}

0 commit comments

Comments
 (0)