Skip to content

Random seed and concurrency for tests #277

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 31, 2018
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions src/Microsoft.ML/LearningPipeline.cs
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,18 @@ public ScorerPipelineStep(Var<IDataView> data, Var<ITransformModel> model)
public class LearningPipeline : ICollection<ILearningPipelineItem>
{
private List<ILearningPipelineItem> Items { get; } = new List<ILearningPipelineItem>();
private readonly int? _seed;
private readonly int _conc;

/// <summary>
/// Construct an empty <see cref="LearningPipeline"/> object.
/// Construct an empty <see cref="LearningPipeline"/> object.
/// </summary>
public LearningPipeline()
/// <param name="seed">Specify seed for random generator</param>
/// <param name="conc">Specify concurrency factor (default value - autoselection)</param>
public LearningPipeline(int? seed=null, int conc=0)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seed=null, int conc=0) [](start = 37, length = 22)

Since it's for the tests for now till we settle on a correct solution, maybe consider make this constructor internal in the meanwhile.

So we'd put something like:
[assembly: InternalsVisibleTo(assemblyName: "Microsoft.ML.TestFramework, into an AssemblyInfo.cs. (Or if not TestFramework whatever other test projects is appropriate.) We'd retain the original constructor.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Absolutely, but for now, I just want to run 10-20 builds to make sure problem is gone.


In reply to: 192223871 [](ancestors = 192223871)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm all right. Maybe one of those 10 or 20 while we're waiting the minutes we can introduce it, so if we do this we can just sign off.


In reply to: 192224636 [](ancestors = 192224636,192223871)

{
_seed = seed;
_conc = conc;
}

/// <summary>
Expand Down Expand Up @@ -137,8 +143,7 @@ public PredictionModel<TInput, TOutput> Train<TInput, TOutput>()
where TInput : class
where TOutput : class, new()
{

using (var environment = new TlcEnvironment())
using (var environment = new TlcEnvironment(seed:_seed, conc:_conc))
{
Experiment experiment = environment.CreateExperiment();
ILearningPipelineStep step = null;
Expand Down
41 changes: 24 additions & 17 deletions test/Microsoft.ML.Tests/LearningPipelineTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Runtime.Api;
using Microsoft.ML.Runtime.Data;
Expand Down Expand Up @@ -34,9 +33,9 @@ public void CanAddAndRemoveFromPipeline()
{
var pipeline = new LearningPipeline()
{
new Transforms.CategoricalOneHotVectorizer("String1", "String2"),
new Transforms.ColumnConcatenator(outputColumn: "Features", "String1", "String2", "Number1", "Number2"),
new Trainers.StochasticDualCoordinateAscentRegressor()
new CategoricalOneHotVectorizer("String1", "String2"),
new ColumnConcatenator(outputColumn: "Features", "String1", "String2", "Number1", "Number2"),
new StochasticDualCoordinateAscentRegressor()
};
Assert.NotNull(pipeline);
Assert.Equal(3, pipeline.Count);
Expand Down Expand Up @@ -66,7 +65,7 @@ private class TransformedData
public void TransformOnlyPipeline()
{
const string _dataPath = @"..\..\Data\breast-cancer.txt";
var pipeline = new LearningPipeline();
var pipeline = new LearningPipeline(seed: 1, conc: 1);
pipeline.Add(new ML.Data.TextLoader(_dataPath).CreateFrom<InputData>(useHeader: false));
pipeline.Add(new CategoricalHashOneHotVectorizer("F1") { HashBits = 10, Seed = 314489979, OutputKind = CategoricalTransformOutputKind.Bag });
var model = pipeline.Train<InputData, TransformedData>();
Expand Down Expand Up @@ -103,9 +102,11 @@ public class Prediction
public void NoTransformPipeline()
{
var data = new Data[1];
data[0] = new Data();
data[0].Features = new float[] { 0.0f, 1.0f };
data[0].Label = 0f;
data[0] = new Data
{
Features = new float[] { 0.0f, 1.0f },
Label = 0f
};
var pipeline = new LearningPipeline();
pipeline.Add(CollectionDataSource.Create(data));
pipeline.Add(new FastForestBinaryClassifier());
Expand All @@ -126,9 +127,11 @@ public class BooleanLabelData
public void BooleanLabelPipeline()
{
var data = new BooleanLabelData[1];
data[0] = new BooleanLabelData();
data[0].Features = new float[] { 0.0f, 1.0f };
data[0].Label = false;
data[0] = new BooleanLabelData
{
Features = new float[] { 0.0f, 1.0f },
Label = false
};
var pipeline = new LearningPipeline();
pipeline.Add(CollectionDataSource.Create(data));
pipeline.Add(new FastForestBinaryClassifier());
Expand All @@ -149,12 +152,16 @@ public class NullableBooleanLabelData
public void NullableBooleanLabelPipeline()
{
var data = new NullableBooleanLabelData[2];
data[0] = new NullableBooleanLabelData();
data[0].Features = new float[] { 0.0f, 1.0f };
data[0].Label = null;
data[1] = new NullableBooleanLabelData();
data[1].Features = new float[] { 1.0f, 0.0f };
data[1].Label = false;
data[0] = new NullableBooleanLabelData
{
Features = new float[] { 0.0f, 1.0f },
Label = null
};
data[1] = new NullableBooleanLabelData
{
Features = new float[] { 1.0f, 0.0f },
Label = false
};
var pipeline = new LearningPipeline();
pipeline.Add(CollectionDataSource.Create(data));
pipeline.Add(new FastForestBinaryClassifier());
Expand Down
6 changes: 3 additions & 3 deletions test/Microsoft.ML.Tests/Scenarios/ClusteringTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ public void PredictNewsCluster()
{
string dataPath = GetDataPath(@"external/20newsgroups.txt");

var pipeline = new LearningPipeline();
pipeline.Add(new TextLoader(dataPath).CreateFrom<NewsData>(useHeader: false, allowQuotedStrings:true, supportSparse:false));
var pipeline = new LearningPipeline(seed: 1, conc: 1);
pipeline.Add(new TextLoader(dataPath).CreateFrom<NewsData>(useHeader: false, allowQuotedStrings: true, supportSparse: false));
pipeline.Add(new ColumnConcatenator("AllText", "Subject", "Content"));
pipeline.Add(new TextFeaturizer("Features", "AllText")
{
Expand Down Expand Up @@ -104,7 +104,7 @@ public void PredictClusters()
}
};
}
var pipeline = new LearningPipeline();
var pipeline = new LearningPipeline(seed: 1, conc: 1);
pipeline.Add(CollectionDataSource.Create(data));
pipeline.Add(new KMeansPlusPlusClusterer() { K = k });
var model = pipeline.Train<ClusteringData, ClusteringPrediction>();
Expand Down