Skip to content

Commit 2a638cc

Browse files
authored
Misc fixes (dotnet#39)
* misc fixes -- fix bug where SMAC returning already-seen values; fix param encoding return bug in pipeline object model; nit clean-up AutoFit; return in pipeline suggester when sweeper has no next proposal; null ref fix in public object model pipeline suggester * fix in BuildPipelineNodePropsLightGbm test, fix / use correct 'newTrainer' variable in PipelneSuggester * SMAC perf improvement
1 parent 390e9d7 commit 2a638cc

File tree

8 files changed

+128
-40
lines changed

8 files changed

+128
-40
lines changed

src/AutoML/AutoFitter/AutoFitter.cs

+2-6
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,6 @@ public AutoFitter(MLContext context, OptimizingMetricInfo metricInfo, AutoFitSet
4141
}
4242

4343
public InferredPipelineRunResult[] Fit()
44-
{
45-
IteratePipelinesAndFit();
46-
return _history.ToArray();
47-
}
48-
49-
private void IteratePipelinesAndFit()
5044
{
5145
var stopwatch = Stopwatch.StartNew();
5246
var columns = AutoMlUtils.GetColumnInfoTuples(_context, _trainData, _label, _purposeOverrides);
@@ -68,6 +62,8 @@ private void IteratePipelinesAndFit()
6862

6963
} while (_history.Count < _settings.StoppingCriteria.MaxIterations &&
7064
stopwatch.Elapsed.TotalMinutes < _settings.StoppingCriteria.TimeOutInMinutes);
65+
66+
return _history.ToArray();
7167
}
7268

7369
private void ProcessPipeline(InferredPipeline pipeline)

src/AutoML/PipelineSuggesters/PipelineSuggester.cs

+27-7
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ public static Pipeline GetNextPipeline(IEnumerable<PipelineRunResult> history,
2121
{
2222
var inferredHistory = history.Select(r => InferredPipelineRunResult.FromPipelineRunResult(r));
2323
var nextInferredPipeline = GetNextInferredPipeline(inferredHistory, columns, task, iterationsRemaining, isMaximizingMetric);
24-
return nextInferredPipeline.ToPipeline();
24+
return nextInferredPipeline?.ToPipeline();
2525
}
2626

2727
public static InferredPipeline GetNextInferredPipeline(IEnumerable<InferredPipelineRunResult> history,
@@ -47,21 +47,31 @@ public static InferredPipeline GetNextInferredPipeline(IEnumerable<InferredPipel
4747
// sort top trainers by # of times they've been run, from lowest to highest
4848
var orderedTopTrainers = OrderTrainersByNumTrials(history, topTrainers);
4949

50+
// keep as hashset of previously visited pipelines
51+
var visitedPipelines = new HashSet<InferredPipeline>(history.Select(h => h.Pipeline));
52+
5053
// iterate over top trainers (from least run to most run),
5154
// to find next pipeline
52-
foreach(var trainer in orderedTopTrainers)
55+
foreach (var trainer in orderedTopTrainers)
5356
{
5457
var newTrainer = trainer.Clone();
5558

56-
// make sure we have not seen pipeline before.
5759
// repeat until passes or runs out of chances
58-
var visitedPipelines = new HashSet<InferredPipeline>(history.Select(h => h.Pipeline));
5960
const int maxNumberAttempts = 10;
6061
var count = 0;
6162
do
6263
{
63-
SampleHyperparameters(newTrainer, history, isMaximizingMetric);
64+
// sample new hyperparameters for the learner
65+
if (!SampleHyperparameters(newTrainer, history, isMaximizingMetric))
66+
{
67+
// if unable to sample new hyperparameters for the learner
68+
// (ie SMAC returned 0 suggestions), break
69+
break;
70+
}
71+
6472
var pipeline = new InferredPipeline(transforms, newTrainer);
73+
74+
// make sure we have not seen pipeline before
6575
if (!visitedPipelines.Contains(pipeline))
6676
{
6777
return pipeline;
@@ -169,7 +179,11 @@ private static IValueGenerator[] ConvertToValueGenerators(IEnumerable<SweepableP
169179
return results;
170180
}
171181

172-
private static void SampleHyperparameters(SuggestedTrainer trainer, IEnumerable<InferredPipelineRunResult> history, bool isMaximizingMetric)
182+
/// <summary>
183+
/// Samples new hyperparameters for the trainer, and sets them.
184+
/// Returns true if success (new hyperparams were suggested and set). Else, returns false.
185+
/// </summary>
186+
private static bool SampleHyperparameters(SuggestedTrainer trainer, IEnumerable<InferredPipelineRunResult> history, bool isMaximizingMetric)
173187
{
174188
var sps = ConvertToValueGenerators(trainer.SweepParams);
175189
var sweeper = new SmacSweeper(
@@ -179,14 +193,20 @@ private static void SampleHyperparameters(SuggestedTrainer trainer, IEnumerable<
179193
});
180194

181195
IEnumerable<InferredPipelineRunResult> historyToUse = history
182-
.Where(r => r.RunSucceded && r.Pipeline.Trainer.TrainerName == trainer.TrainerName && r.Pipeline.Trainer.HyperParamSet != null);
196+
.Where(r => r.RunSucceded && r.Pipeline.Trainer.TrainerName == trainer.TrainerName && r.Pipeline.Trainer.HyperParamSet != null && r.Pipeline.Trainer.HyperParamSet.Any());
183197

184198
// get new set of hyperparameter values
185199
var proposedParamSet = sweeper.ProposeSweeps(1, historyToUse.Select(h => h.ToRunResult(isMaximizingMetric))).First();
200+
if(!proposedParamSet.Any())
201+
{
202+
return false;
203+
}
186204

187205
// associate proposed param set with trainer, so that smart hyperparam
188206
// sweepers (like KDO) can map them back.
189207
trainer.SetHyperparamValues(proposedParamSet);
208+
209+
return true;
190210
}
191211

192212
private static IEnumerable<SuggestedTransform> CalculateTransforms(MLContext context,

src/AutoML/Sweepers/SmacSweeper.cs

+8-4
Original file line numberDiff line numberDiff line change
@@ -190,13 +190,17 @@ private ParameterSet[] GreedyPlusRandomSearch(ParameterSet[] parents, FastForest
190190
for (int i = 0; i < randomConfigs.Length; i++)
191191
configurations.Add(new Tuple<double, ParameterSet>(randomEIs[i], randomConfigs[i]));
192192

193-
HashSet<ParameterSet> retainedConfigs = new HashSet<ParameterSet>();
194193
IOrderedEnumerable<Tuple<double, ParameterSet>> bestConfigurations = configurations.OrderByDescending(x => x.Item1);
195194

196-
foreach (Tuple<double, ParameterSet> t in bestConfigurations.Take(numOfCandidates))
197-
retainedConfigs.Add(t.Item2);
195+
var retainedConfigs = new HashSet<ParameterSet>(bestConfigurations.Select(x => x.Item2));
198196

199-
return retainedConfigs.ToArray();
197+
// remove configurations matching previous run
198+
foreach(var previousRun in previousRuns)
199+
{
200+
retainedConfigs.Remove(previousRun.ParameterSet);
201+
}
202+
203+
return retainedConfigs.Take(numOfCandidates).ToArray();
200204
}
201205

202206
/// <summary>

src/AutoML/Sweepers/SweeperBase.cs

+3-3
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ protected SweeperBase(ArgumentsBase args, IValueGenerator[] sweepParameters, str
4444

4545
public virtual ParameterSet[] ProposeSweeps(int maxSweeps, IEnumerable<IRunResult> previousRuns = null)
4646
{
47-
var prevParamSets = previousRuns?.Select(r => r.ParameterSet).ToList() ?? new List<ParameterSet>();
47+
var prevParamSets = new HashSet<ParameterSet>(previousRuns?.Select(r => r.ParameterSet).ToList() ?? new List<ParameterSet>());
4848
var result = new HashSet<ParameterSet>();
4949
for (int i = 0; i < maxSweeps; i++)
5050
{
@@ -66,9 +66,9 @@ public virtual ParameterSet[] ProposeSweeps(int maxSweeps, IEnumerable<IRunResul
6666

6767
protected abstract ParameterSet CreateParamSet();
6868

69-
protected static bool AlreadyGenerated(ParameterSet paramSet, IEnumerable<ParameterSet> previousRuns)
69+
protected static bool AlreadyGenerated(ParameterSet paramSet, ISet<ParameterSet> previousRuns)
7070
{
71-
return previousRuns.Any(previousRun => previousRun.Equals(paramSet));
71+
return previousRuns.Contains(paramSet);
7272
}
7373
}
7474
}

src/AutoML/TrainerExtensions/TrainerExtensionUtil.cs

+3-3
Original file line numberDiff line numberDiff line change
@@ -83,18 +83,18 @@ public static IDictionary<string, object> BuildPipelineNodeProps(TrainerName tra
8383
return BuildLightGbmPipelineNodeProps(sweepParams);
8484
}
8585

86-
return sweepParams.ToDictionary(p => p.Name, p => (object)p.RawValue);
86+
return sweepParams.ToDictionary(p => p.Name, p => (object)p.ProcessedValue());
8787
}
8888

8989
private static IDictionary<string, object> BuildLightGbmPipelineNodeProps(IEnumerable<SweepableParam> sweepParams)
9090
{
9191
var treeBoosterParams = sweepParams.Where(p => _lightGbmTreeBoosterParamNames.Contains(p.Name));
9292
var parentArgParams = sweepParams.Except(treeBoosterParams);
9393

94-
var treeBoosterProps = treeBoosterParams.ToDictionary(p => p.Name, p => (object)p.RawValue);
94+
var treeBoosterProps = treeBoosterParams.ToDictionary(p => p.Name, p => (object)p.ProcessedValue());
9595
var treeBoosterCustomProp = new CustomProperty("Microsoft.ML.LightGBM.TreeBooster", treeBoosterProps);
9696

97-
var props = parentArgParams.ToDictionary(p => p.Name, p => (object)p.RawValue);
97+
var props = parentArgParams.ToDictionary(p => p.Name, p => (object)p.ProcessedValue());
9898
props[LightGbmTreeBoosterPropName] = treeBoosterCustomProp;
9999

100100
return props;

src/Test/GetNextPipelineTests.cs

+4
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ public void GetNextPipelineMock()
5050
{
5151
// get next pipeline
5252
var pipeline = PipelineSuggester.GetNextPipeline(history, columns, TaskKind.BinaryClassification, maxIterations - i);
53+
if(pipeline == null)
54+
{
55+
break;
56+
}
5357

5458
var result = new PipelineRunResult(pipeline, AutoMlUtils.Random.NextDouble(), true);
5559
history.Add(result);

src/Test/InferredPipelineTests.cs

+64
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using System;
6+
using System.Collections.Generic;
7+
using Microsoft.VisualStudio.TestTools.UnitTesting;
8+
9+
namespace Microsoft.ML.Auto.Test
10+
{
11+
[TestClass]
12+
public class InferredPipelineTests
13+
{
14+
[TestMethod]
15+
public void InferredPipelinesHashTest()
16+
{
17+
var context = new MLContext();
18+
19+
// test same learners with no hyperparams have the same hash code
20+
var trainer1 = new SuggestedTrainer(context, new LightGbmBinaryExtension());
21+
var trainer2 = new SuggestedTrainer(context, new LightGbmBinaryExtension());
22+
var transforms1 = new List<SuggestedTransform>();
23+
var transforms2 = new List<SuggestedTransform>();
24+
var inferredPipeline1 = new InferredPipeline(transforms1, trainer1);
25+
var inferredPipeline2 = new InferredPipeline(transforms2, trainer2);
26+
Assert.AreEqual(inferredPipeline1.GetHashCode(), inferredPipeline2.GetHashCode());
27+
28+
// test same learners with hyperparams set vs empty hyperparams have different hash codes
29+
var hyperparams1 = new ParameterSet(new List<IParameterValue>() { new LongParameterValue("NumLeaves", 2) });
30+
trainer1 = new SuggestedTrainer(context, new LightGbmBinaryExtension(), hyperparams1);
31+
trainer2 = new SuggestedTrainer(context, new LightGbmBinaryExtension());
32+
inferredPipeline1 = new InferredPipeline(transforms1, trainer1);
33+
inferredPipeline2 = new InferredPipeline(transforms2, trainer2);
34+
Assert.AreNotEqual(inferredPipeline1.GetHashCode(), inferredPipeline2.GetHashCode());
35+
36+
// same learners with different hyperparams
37+
hyperparams1 = new ParameterSet(new List<IParameterValue>() { new LongParameterValue("NumLeaves", 2) });
38+
var hyperparams2 = new ParameterSet(new List<IParameterValue>() { new LongParameterValue("NumLeaves", 6) });
39+
trainer1 = new SuggestedTrainer(context, new LightGbmBinaryExtension(), hyperparams1);
40+
trainer2 = new SuggestedTrainer(context, new LightGbmBinaryExtension(), hyperparams2);
41+
inferredPipeline1 = new InferredPipeline(transforms1, trainer1);
42+
inferredPipeline2 = new InferredPipeline(transforms2, trainer2);
43+
Assert.AreNotEqual(inferredPipeline1.GetHashCode(), inferredPipeline2.GetHashCode());
44+
45+
// same learners with same transforms
46+
trainer1 = new SuggestedTrainer(context, new LightGbmBinaryExtension());
47+
trainer2 = new SuggestedTrainer(context, new LightGbmBinaryExtension());
48+
transforms1 = new List<SuggestedTransform>() { ColumnConcatenatingExtension.CreateSuggestedTransform(context, new[] { "In" }, "Out") };
49+
transforms2 = new List<SuggestedTransform>() { ColumnConcatenatingExtension.CreateSuggestedTransform(context, new[] { "In" }, "Out") };
50+
inferredPipeline1 = new InferredPipeline(transforms1, trainer1);
51+
inferredPipeline2 = new InferredPipeline(transforms2, trainer2);
52+
Assert.AreEqual(inferredPipeline1.GetHashCode(), inferredPipeline2.GetHashCode());
53+
54+
// same transforms with different learners
55+
trainer1 = new SuggestedTrainer(context, new SdcaBinaryExtension());
56+
trainer2 = new SuggestedTrainer(context, new LightGbmBinaryExtension());
57+
transforms1 = new List<SuggestedTransform>() { ColumnConcatenatingExtension.CreateSuggestedTransform(context, new[] { "In" }, "Out") };
58+
transforms2 = new List<SuggestedTransform>() { ColumnConcatenatingExtension.CreateSuggestedTransform(context, new[] { "In" }, "Out") };
59+
inferredPipeline1 = new InferredPipeline(transforms1, trainer1);
60+
inferredPipeline2 = new InferredPipeline(transforms2, trainer2);
61+
Assert.AreNotEqual(inferredPipeline1.GetHashCode(), inferredPipeline2.GetHashCode());
62+
}
63+
}
64+
}

src/Test/TrainerExtensionsTests.cs

+17-17
Original file line numberDiff line numberDiff line change
@@ -63,22 +63,22 @@ public void BuildPipelineNodePropsLightGbm()
6363

6464
var expectedJson = @"
6565
{
66-
""NumBoostRound"": 1,
66+
""NumBoostRound"": 20,
6767
""LearningRate"": 1,
6868
""NumLeaves"": 1,
69-
""MinDataPerLeaf"": 1,
70-
""UseSoftmax"": 1,
71-
""UseCat"": 1,
72-
""UseMissing"": 1,
73-
""MinDataPerGroup"": 1,
74-
""MaxCatThreshold"": 1,
75-
""CatSmooth"": 1,
76-
""CatL2"": 1,
69+
""MinDataPerLeaf"": 10,
70+
""UseSoftmax"": false,
71+
""UseCat"": false,
72+
""UseMissing"": false,
73+
""MinDataPerGroup"": 50,
74+
""MaxCatThreshold"": 16,
75+
""CatSmooth"": 10,
76+
""CatL2"": 0.5,
7777
""TreeBooster"": {
7878
""Name"": ""Microsoft.ML.LightGBM.TreeBooster"",
7979
""Properties"": {
80-
""RegLambda"": 1,
81-
""RegAlpha"": 1
80+
""RegLambda"": 0.5,
81+
""RegAlpha"": 0.5
8282
}
8383
}
8484
}";
@@ -99,12 +99,12 @@ public void BuildPipelineNodePropsSdca()
9999
var sdcaBinaryProps = TrainerExtensionUtil.BuildPipelineNodeProps(TrainerName.SdcaBinary, sweepParams);
100100
var expectedJson = @"
101101
{
102-
""L2Const"": 1,
103-
""L1Threshold"": 1,
104-
""ConvergenceTolerance"": 1,
105-
""MaxIterations"": 1,
106-
""Shuffle"": 1,
107-
""BiasLearningRate"": 1
102+
""L2Const"": 1E-07,
103+
""L1Threshold"": 0.0,
104+
""ConvergenceTolerance"": 0.01,
105+
""MaxIterations"": 10,
106+
""Shuffle"": true,
107+
""BiasLearningRate"": 0.01
108108
}";
109109
Util.AssertObjectMatchesJson(expectedJson, sdcaBinaryProps);
110110
}

0 commit comments

Comments
 (0)