Skip to content

Commit bf4d605

Browse files
authored
Prior trainer should accept label column type of boolean ONLY. (#3291)
1 parent 9c03a1c commit bf4d605

11 files changed

+15
-16
lines changed

src/Microsoft.ML.StandardTrainers/Standard/Simple/SimpleTrainers.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ private PriorModelParameters Train(TrainContext context)
230230
data.CheckBinaryLabel();
231231
_host.CheckParam(data.Schema.Label.HasValue, nameof(data), "Missing Label column");
232232
var labelCol = data.Schema.Label.Value;
233-
_host.CheckParam(labelCol.Type == NumberDataViewType.Single, nameof(data), "Invalid type for Label column");
233+
_host.CheckParam(labelCol.Type == BooleanDataViewType.Instance, nameof(data), "Invalid type for Label column");
234234

235235
double pos = 0;
236236
double neg = 0;
@@ -243,9 +243,9 @@ private PriorModelParameters Train(TrainContext context)
243243

244244
using (var cursor = data.Data.GetRowCursor(cols))
245245
{
246-
var getLab = cursor.GetLabelFloatGetter(data);
246+
var getLab = cursor.GetGetter<bool>(data.Schema.Label.Value);
247247
var getWeight = colWeight >= 0 ? cursor.GetGetter<float>(data.Schema.Weight.Value) : null;
248-
float lab = default;
248+
bool lab = default;
249249
float weight = 1;
250250
while (cursor.MoveNext())
251251
{
@@ -258,9 +258,9 @@ private PriorModelParameters Train(TrainContext context)
258258
}
259259

260260
// Testing both directions effectively ignores NaNs.
261-
if (lab > 0)
261+
if (lab)
262262
pos += weight;
263-
else if (lab <= 0)
263+
else
264264
neg += weight;
265265
}
266266
}

test/BaselineOutput/SingleDebug/PriorPredictor/BinaryPrior-CV-breast-cancer-out.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
maml.exe CV tr=PriorPredictor threads=- dout=%Output% data=%Data% seed=1
1+
maml.exe CV tr=PriorPredictor threads=- dout=%Output% loader=Text{col=Label:BL:0 col=Features:~} data=%Data% seed=1
22
Not adding a normalizer.
33
Not training a calibrator because it is not needed.
44
Not adding a normalizer.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
PriorPredictor
22
AUC Accuracy Positive precision Positive recall Negative precision Negative recall Log-loss Log-loss reduction F1 Score AUPRC Learner Name Train Dataset Test Dataset Results File Run Time Physical Memory Virtual Memory Command Line Settings
3-
0.5 0.656163 0 0 0.656163 1 0.935104 -0.00959 NaN 0.418968 PriorPredictor %Data% %Output% 99 0 0 maml.exe CV tr=PriorPredictor threads=- dout=%Output% data=%Data% seed=1
3+
0.5 0.656163 0 0 0.656163 1 0.935104 -0.00959 NaN 0.418968 PriorPredictor %Data% %Output% 99 0 0 maml.exe CV tr=PriorPredictor threads=- dout=%Output% loader=Text{col=Label:BL:0 col=Features:~} data=%Data% seed=1
44

test/BaselineOutput/SingleDebug/PriorPredictor/BinaryPrior-TrainTest-breast-cancer-out.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
maml.exe TrainTest test=%Data% tr=PriorPredictor dout=%Output% data=%Data% out=%Output% seed=1
1+
maml.exe TrainTest test=%Data% tr=PriorPredictor dout=%Output% loader=Text{col=Label:BL:0 col=Features:~} data=%Data% out=%Output% seed=1
22
Not adding a normalizer.
33
Not training a calibrator because it is not needed.
44
TEST POSITIVE RATIO: 0.3448 (241.0/(241.0+458.0))
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
PriorPredictor
22
AUC Accuracy Positive precision Positive recall Negative precision Negative recall Log-loss Log-loss reduction F1 Score AUPRC Learner Name Train Dataset Test Dataset Results File Run Time Physical Memory Virtual Memory Command Line Settings
3-
0.5 0.655222 0 0 0.655222 1 0.929318 0 NaN 0.415719 PriorPredictor %Data% %Data% %Output% 99 0 0 maml.exe TrainTest test=%Data% tr=PriorPredictor dout=%Output% data=%Data% out=%Output% seed=1
3+
0.5 0.655222 0 0 0.655222 1 0.929318 0 NaN 0.415719 PriorPredictor %Data% %Data% %Output% 99 0 0 maml.exe TrainTest test=%Data% tr=PriorPredictor dout=%Output% loader=Text{col=Label:BL:0 col=Features:~} data=%Data% out=%Output% seed=1
44

test/BaselineOutput/SingleRelease/PriorPredictor/BinaryPrior-CV-breast-cancer-out.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
maml.exe CV tr=PriorPredictor threads=- dout=%Output% data=%Data% seed=1
1+
maml.exe CV tr=PriorPredictor threads=- dout=%Output% loader=Text{col=Label:BL:0 col=Features:~} data=%Data% seed=1
22
Not adding a normalizer.
33
Not training a calibrator because it is not needed.
44
Not adding a normalizer.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
PriorPredictor
22
AUC Accuracy Positive precision Positive recall Negative precision Negative recall Log-loss Log-loss reduction F1 Score AUPRC Learner Name Train Dataset Test Dataset Results File Run Time Physical Memory Virtual Memory Command Line Settings
3-
0.5 0.656163 0 0 0.656163 1 0.935104 -0.00959 NaN 0.418968 PriorPredictor %Data% %Output% 99 0 0 maml.exe CV tr=PriorPredictor threads=- dout=%Output% data=%Data% seed=1
3+
0.5 0.656163 0 0 0.656163 1 0.935104 -0.00959 NaN 0.418968 PriorPredictor %Data% %Output% 99 0 0 maml.exe CV tr=PriorPredictor threads=- dout=%Output% loader=Text{col=Label:BL:0 col=Features:~} data=%Data% seed=1
44

test/BaselineOutput/SingleRelease/PriorPredictor/BinaryPrior-TrainTest-breast-cancer-out.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
maml.exe TrainTest test=%Data% tr=PriorPredictor dout=%Output% data=%Data% out=%Output% seed=1
1+
maml.exe TrainTest test=%Data% tr=PriorPredictor dout=%Output% loader=Text{col=Label:BL:0 col=Features:~} data=%Data% out=%Output% seed=1
22
Not adding a normalizer.
33
Not training a calibrator because it is not needed.
44
TEST POSITIVE RATIO: 0.3448 (241.0/(241.0+458.0))
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
PriorPredictor
22
AUC Accuracy Positive precision Positive recall Negative precision Negative recall Log-loss Log-loss reduction F1 Score AUPRC Learner Name Train Dataset Test Dataset Results File Run Time Physical Memory Virtual Memory Command Line Settings
3-
0.5 0.655222 0 0 0.655222 1 0.929318 0 NaN 0.415719 PriorPredictor %Data% %Data% %Output% 99 0 0 maml.exe TrainTest test=%Data% tr=PriorPredictor dout=%Output% data=%Data% out=%Output% seed=1
3+
0.5 0.655222 0 0 0.655222 1 0.929318 0 NaN 0.415719 PriorPredictor %Data% %Data% %Output% 99 0 0 maml.exe TrainTest test=%Data% tr=PriorPredictor dout=%Output% loader=Text{col=Label:BL:0 col=Features:~} data=%Data% out=%Output% seed=1
44

test/Microsoft.ML.Predictor.Tests/TestPredictors.cs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,7 @@ public void BinaryPriorTest()
120120
{
121121
var predictors = new[] {
122122
TestLearners.binaryPrior};
123-
var datasets = GetDatasetsForBinaryClassifierBaseTest();
124-
RunAllTests(predictors, datasets);
123+
RunAllTests(predictors, new[] { TestDatasets.breastCancerBoolLabel });
125124
Done();
126125
}
127126

test/Microsoft.ML.Tests/TrainerEstimators/PriorRandomTests.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ private IDataView GetBreastCancerDataviewWithTextColumns()
1818
HasHeader = true,
1919
Columns = new[]
2020
{
21-
new TextLoader.Column("Label", DataKind.Single, 0),
21+
new TextLoader.Column("Label", DataKind.Boolean, 0),
2222
new TextLoader.Column("F1", DataKind.String, 1),
2323
new TextLoader.Column("F2", DataKind.Int32, 2),
2424
new TextLoader.Column("Rest", DataKind.Single, new [] { new TextLoader.Range(3, 9) })

0 commit comments

Comments
 (0)