Skip to content

Commit 2617e2b

Browse files
Ivanidzo4kaeerhardt
authored andcommitted
Random seed and concurrency for tests (dotnet#277)
* first attempt * add comments * specify seed for random. make constructor internal.
1 parent c9e7251 commit 2617e2b

File tree

5 files changed

+55
-26
lines changed

5 files changed

+55
-26
lines changed

src/Microsoft.ML.Sweeper/Algorithms/Grid.cs

-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5-
65
using System.Collections.Generic;
76
using System.Linq;
87
using Microsoft.ML.Runtime;

src/Microsoft.ML/LearningPipeline.cs

+16-2
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,27 @@ public ScorerPipelineStep(Var<IDataView> data, Var<ITransformModel> model)
4949
public class LearningPipeline : ICollection<ILearningPipelineItem>
5050
{
5151
private List<ILearningPipelineItem> Items { get; } = new List<ILearningPipelineItem>();
52+
private readonly int? _seed;
53+
private readonly int _conc;
5254

5355
/// <summary>
5456
/// Construct an empty <see cref="LearningPipeline"/> object.
5557
/// </summary>
5658
public LearningPipeline()
5759
{
60+
_seed = null;
61+
_conc = 0;
62+
}
63+
64+
/// <summary>
65+
/// Construct an empty <see cref="LearningPipeline"/> object.
66+
/// </summary>
67+
/// <param name="seed">Specify seed for random generator</param>
68+
/// <param name="conc">Specify concurrency factor (default value - autoselection)</param>
69+
internal LearningPipeline(int? seed=null, int conc=0)
70+
{
71+
_seed = seed;
72+
_conc = conc;
5873
}
5974

6075
/// <summary>
@@ -137,8 +152,7 @@ public PredictionModel<TInput, TOutput> Train<TInput, TOutput>()
137152
where TInput : class
138153
where TOutput : class, new()
139154
{
140-
141-
using (var environment = new TlcEnvironment())
155+
using (var environment = new TlcEnvironment(seed:_seed, conc:_conc))
142156
{
143157
Experiment experiment = environment.CreateExperiment();
144158
ILearningPipelineStep step = null;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using System.Reflection;
6+
using System.Runtime.CompilerServices;
7+
using System.Runtime.InteropServices;
8+
9+
[assembly: InternalsVisibleTo("Microsoft.ML.Tests, PublicKey=002400000480000094000000060200000024000052534131000400000100010015c01ae1f50e8cc09ba9eac9147cf8fd9fce2cfe9f8dce4f7301c4132ca9fb50ce8cbf1df4dc18dd4d210e4345c744ecb3365ed327efdbc52603faa5e21daa11234c8c4a73e51f03bf192544581ebe107adee3a34928e39d04e524a9ce729d5090bfd7dad9d10c722c0def9ccc08ff0a03790e48bcd1f9b6c476063e1966a1c4")]

test/Microsoft.ML.Tests/LearningPipelineTests.cs

+24-17
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5-
using Microsoft.ML;
65
using Microsoft.ML.Data;
76
using Microsoft.ML.Runtime.Api;
87
using Microsoft.ML.Runtime.Data;
@@ -34,9 +33,9 @@ public void CanAddAndRemoveFromPipeline()
3433
{
3534
var pipeline = new LearningPipeline()
3635
{
37-
new Transforms.CategoricalOneHotVectorizer("String1", "String2"),
38-
new Transforms.ColumnConcatenator(outputColumn: "Features", "String1", "String2", "Number1", "Number2"),
39-
new Trainers.StochasticDualCoordinateAscentRegressor()
36+
new CategoricalOneHotVectorizer("String1", "String2"),
37+
new ColumnConcatenator(outputColumn: "Features", "String1", "String2", "Number1", "Number2"),
38+
new StochasticDualCoordinateAscentRegressor()
4039
};
4140
Assert.NotNull(pipeline);
4241
Assert.Equal(3, pipeline.Count);
@@ -66,7 +65,7 @@ private class TransformedData
6665
public void TransformOnlyPipeline()
6766
{
6867
const string _dataPath = @"..\..\Data\breast-cancer.txt";
69-
var pipeline = new LearningPipeline();
68+
var pipeline = new LearningPipeline(seed: 1, conc: 1);
7069
pipeline.Add(new ML.Data.TextLoader(_dataPath).CreateFrom<InputData>(useHeader: false));
7170
pipeline.Add(new CategoricalHashOneHotVectorizer("F1") { HashBits = 10, Seed = 314489979, OutputKind = CategoricalTransformOutputKind.Bag });
7271
var model = pipeline.Train<InputData, TransformedData>();
@@ -103,9 +102,11 @@ public class Prediction
103102
public void NoTransformPipeline()
104103
{
105104
var data = new Data[1];
106-
data[0] = new Data();
107-
data[0].Features = new float[] { 0.0f, 1.0f };
108-
data[0].Label = 0f;
105+
data[0] = new Data
106+
{
107+
Features = new float[] { 0.0f, 1.0f },
108+
Label = 0f
109+
};
109110
var pipeline = new LearningPipeline();
110111
pipeline.Add(CollectionDataSource.Create(data));
111112
pipeline.Add(new FastForestBinaryClassifier());
@@ -126,9 +127,11 @@ public class BooleanLabelData
126127
public void BooleanLabelPipeline()
127128
{
128129
var data = new BooleanLabelData[1];
129-
data[0] = new BooleanLabelData();
130-
data[0].Features = new float[] { 0.0f, 1.0f };
131-
data[0].Label = false;
130+
data[0] = new BooleanLabelData
131+
{
132+
Features = new float[] { 0.0f, 1.0f },
133+
Label = false
134+
};
132135
var pipeline = new LearningPipeline();
133136
pipeline.Add(CollectionDataSource.Create(data));
134137
pipeline.Add(new FastForestBinaryClassifier());
@@ -149,12 +152,16 @@ public class NullableBooleanLabelData
149152
public void NullableBooleanLabelPipeline()
150153
{
151154
var data = new NullableBooleanLabelData[2];
152-
data[0] = new NullableBooleanLabelData();
153-
data[0].Features = new float[] { 0.0f, 1.0f };
154-
data[0].Label = null;
155-
data[1] = new NullableBooleanLabelData();
156-
data[1].Features = new float[] { 1.0f, 0.0f };
157-
data[1].Label = false;
155+
data[0] = new NullableBooleanLabelData
156+
{
157+
Features = new float[] { 0.0f, 1.0f },
158+
Label = null
159+
};
160+
data[1] = new NullableBooleanLabelData
161+
{
162+
Features = new float[] { 1.0f, 0.0f },
163+
Label = false
164+
};
158165
var pipeline = new LearningPipeline();
159166
pipeline.Add(CollectionDataSource.Create(data));
160167
pipeline.Add(new FastForestBinaryClassifier());

test/Microsoft.ML.Tests/Scenarios/ClusteringTests.cs

+6-6
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ public void PredictNewsCluster()
1616
{
1717
string dataPath = GetDataPath(@"external/20newsgroups.txt");
1818

19-
var pipeline = new LearningPipeline();
20-
pipeline.Add(new TextLoader(dataPath).CreateFrom<NewsData>(useHeader: false, allowQuotedStrings:true, supportSparse:false));
19+
var pipeline = new LearningPipeline(seed: 1, conc: 1);
20+
pipeline.Add(new TextLoader(dataPath).CreateFrom<NewsData>(useHeader: false, allowQuotedStrings: true, supportSparse: false));
2121
pipeline.Add(new ColumnConcatenator("AllText", "Subject", "Content"));
2222
pipeline.Add(new TextFeaturizer("Features", "AllText")
2323
{
@@ -81,8 +81,8 @@ public class ClusteringData
8181
public void PredictClusters()
8282
{
8383
int n = 1000;
84-
int k = 5;
85-
var rand = new Random();
84+
int k = 4;
85+
var rand = new Random(1);
8686
var clusters = new ClusteringData[k];
8787
var data = new ClusteringData[n];
8888
for (int i = 0; i < k; i++)
@@ -94,7 +94,7 @@ public void PredictClusters()
9494
for (int i = 0; i < n; i++)
9595
{
9696
var index = rand.Next(0, k);
97-
var shift = (rand.NextDouble() - 0.5) / k;
97+
var shift = (rand.NextDouble() - 0.5) / 10;
9898
data[i] = new ClusteringData
9999
{
100100
Points = new float[2]
@@ -104,7 +104,7 @@ public void PredictClusters()
104104
}
105105
};
106106
}
107-
var pipeline = new LearningPipeline();
107+
var pipeline = new LearningPipeline(seed: 1, conc: 1);
108108
pipeline.Add(CollectionDataSource.Create(data));
109109
pipeline.Add(new KMeansPlusPlusClusterer() { K = k });
110110
var model = pipeline.Train<ClusteringData, ClusteringPrediction>();

0 commit comments

Comments
 (0)