Skip to content

Fix ResultProcessor bug, LogisticRegression bug and missing value conversion bug #1236

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
Oct 20, 2018
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/Microsoft.ML.Data/Data/Conversion.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1170,7 +1170,7 @@ private bool IsStdMissing(ref ReadOnlySpan<char> span)
public bool TryParseKey(ref TX src, U8 min, U8 max, out U8 dst)
{
var span = src.Span;
Contracts.Check(!IsStdMissing(ref span), "Missing text value cannot be converted to unsigned integer type.");
Contracts.Check(span.IsEmpty || !IsStdMissing(ref span), "Missing text value cannot be converted to unsigned integer type.");
Copy link
Contributor

@TomFinley TomFinley Oct 12, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

span.IsEmpty [](start = 28, length = 12)

Heh heh. Whoops! #Resolved

Contracts.Assert(min <= max);

// This simply ensures we don't have min == 0 and max == U8.MaxValue. This is illegal since
Expand Down Expand Up @@ -1530,7 +1530,7 @@ public bool TryParse(ref TX src, out BL dst)
{
var span = src.Span;

Contracts.Check(!IsStdMissing(ref span), "Missing text values cannot be converted to bool value.");
Contracts.Check(span.IsEmpty || !IsStdMissing(ref span), "Missing text value cannot be converted to bool type.");

char ch;
switch (src.Length)
Expand Down
9 changes: 6 additions & 3 deletions src/Microsoft.ML.ResultProcessor/ResultProcessor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1063,10 +1063,10 @@ private static Experiment CreateVisualizationExperiment(ExperimentItemResult res
var experiment = new ML.Runtime.ExperimentVisualization.Experiment
{
Key = index.ToString(),
CompareGroup = string.IsNullOrEmpty(result.CustomizedTag) ? result.Trainer.Kind : result.CustomizedTag,
CompareGroup = string.IsNullOrEmpty(result.CustomizedTag) ? result.TrainerKind : result.CustomizedTag,
Trainer = new ML.Runtime.ExperimentVisualization.Trainer
{
Name = result.Trainer.Kind,
Name = result.TrainerKind,
ParameterSets = new List<ML.Runtime.ExperimentVisualization.Item>()
},
DataSet = new ML.Runtime.ExperimentVisualization.DataSet { File = result.Datafile },
Expand Down Expand Up @@ -1152,7 +1152,10 @@ private static object Load(Stream stream)

public static int Main(string[] args)
{
return Main(new ConsoleEnvironment(42), args);
string currentDirectory = Path.GetDirectoryName(typeof(ResultProcessor).Module.FullyQualifiedName);
using (var env = new ConsoleEnvironment(42))
using (AssemblyLoadingUtils.CreateAssemblyRegistrar(env, currentDirectory))
return Main(env, args);
}

public static int Main(IHostEnvironment env, string[] args)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ protected override ParameterMixingCalibratedPredictor CreatePredictor()
CurrentWeights.GetItemOrDefault(0, ref bias);
CurrentWeights.CopyTo(ref weights, 1, CurrentWeights.Length - 1);
return new ParameterMixingCalibratedPredictor(Host,
new LinearBinaryPredictor(Host, ref weights, bias),
new LinearBinaryPredictor(Host, ref weights, bias, _stats),
new PlattCalibrator(Host, -1, 0));
}

Expand Down
5 changes: 4 additions & 1 deletion src/Microsoft.ML.Sweeper/ConfigRunner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,10 @@ public virtual void Finish()
if (Exe == null || Exe.EndsWith("maml", StringComparison.OrdinalIgnoreCase) ||
Exe.EndsWith("maml.exe", StringComparison.OrdinalIgnoreCase))
{
string currentDirectory = Path.GetDirectoryName(typeof(ExeConfigRunnerBase).Module.FullyQualifiedName);

using (var ch = Host.Start("Finish"))
using (AssemblyLoadingUtils.CreateAssemblyRegistrar(Host, currentDirectory))
{
var runs = RunNums.ToArray();
var args = Utils.BuildArray(RunNums.Count + 2,
Expand All @@ -120,7 +123,7 @@ public virtual void Finish()
return string.Format("{{{0}}}", GetFilePath(runs[i], "out"));
});

ResultProcessorInternal.ResultProcessor.Main (args);
ResultProcessorInternal.ResultProcessor.Main(args);

ch.Info(@"The summary of the run results has been saved to the file {0}\{1}.summary.txt", OutputFolder, Prefix);
}
Expand Down
4 changes: 4 additions & 0 deletions src/Microsoft.ML.Sweeper/Microsoft.ML.Sweeper.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,8 @@

</ItemGroup>

<ItemGroup>
<Compile Include="..\Common\AssemblyLoadingUtils.cs" Link="Common\AssemblyLoadingUtils.cs" />
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Saving predictor summary
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
maml.exe Train tr=MultiClassLogisticRegression{maxiter=100 t=- stat=+} loader=TextLoader{col=Label:TX:4 col=Features:R4:0-3 sep=,} data=%Data% out=%Output% seed=1 xf=Term{col=Label}
Automatically adding a MinMax normalization transform, use 'norm=Warn' or 'norm=No' to turn this behavior off.
Beginning optimization
num vars: 15
improvement criterion: Mean Improvement
L1 regularization selected 11 of 15 weights.
Model trained with 150 training examples.
Residual Deviance: 132.0122
Null Deviance: 329.5837
AIC: 154.0122
Not training a calibrator because it is not needed.
Physical memory usage(MB): %Number%
Virtual memory usage(MB): %Number%
%DateTime% Time elapsed(s): %Number%

Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
MulticlassLogisticRegression bias and non-zero weights
Iris-setosa+(Bias) 2.265129
Iris-versicolor+(Bias) 0.7695086
Iris-virginica+(Bias) -3.034663
Iris-setosa+f3 -3.180634
Iris-setosa+f2 -2.88663
Iris-setosa+f1 0.5392878
Iris-setosa+f0 -0.03958065
Iris-versicolor+f1 -0.7073272
Iris-virginica+f3 3.158146
Iris-virginica+f2 1.907791
Iris-virginica+f0 0.01793481

*** MODEL STATISTICS SUMMARY ***
Count of training examples: 150
Residual Deviance: 132.0122
Null Deviance: 329.5837
AIC: 154.0122
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Saving predictor summary
Copy link
Contributor

@Ivanidzo4ka Ivanidzo4ka Oct 12, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

summary [](start = 17, length = 7)

with latest Eric changes (#1193) you can just use Common folder if files for Debug and Release are same. #Closed

Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
maml.exe Train feat=Num lab=Lab tr=lr{t=- stat=+} loader=text{header+ sep=comma col=Lab:14 col=Num:0,2,4,10-12} data=%Data% out=%Output%
Automatically adding a MinMax normalization transform, use 'norm=Warn' or 'norm=No' to turn this behavior off.
Beginning optimization
num vars: 7
improvement criterion: Mean Improvement
L1 regularization selected 7 of 7 weights.
Model trained with 32561 training examples.
Residual Deviance: 26705.74 (on 32554 degrees of freedom)
Null Deviance: 35948.08 (on 32560 degrees of freedom)
AIC: 26719.74
Not training a calibrator because it is not needed.
Physical memory usage(MB): %Number%
Virtual memory usage(MB): %Number%
%DateTime% Time elapsed(s): %Number%

Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Linear Binary Classification Predictor non-zero weights

(Bias) -8.228298
capital-gain 18.58347
education-num 5.066041
hours-per-week 3.946534
age 3.86064
capital-loss 2.81616
fnlwgt 0.7489593

*** MODEL STATISTICS SUMMARY ***
Count of training examples: 32561
Residual Deviance: 26705.74
Null Deviance: 35948.08
AIC: 26719.74
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
maml.exe CV tr=FastRank{nl=5 mil=5 lr=0.25 iter=20} threads=- dout=%Output% loader=Text{col=Name:TX:0 col=Label:Num:1 col=Features:Num:~} data=%Data% seed=1 xf=Expr{col=Name expr={x=>right(x, 1)}}
Physical memory usage(MB): %Number%
Virtual memory usage(MB): %Number%
%DateTime% Time elapsed(s): %Number%

Could not find file '%Data%
Error log has been saved to '%Temp%\%ErrorLog%'. Please refer to https://aka.ms/MLNetIssue if you need assistance.
--- Progress log ---
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
maml.exe CV tr=FastRank{nl=5 mil=5 lr=0.25 iter=20} threads=- dout=%Output% data=%Data% seed=1
Not adding a normalizer.
Making per-feature arrays
Changing data from row-wise to column-wise
Warning: Skipped 8 instances with missing features during training
Processed 329 instances
Binning and forming Feature objects
Reserved memory for tree learner: 3852 bytes
Starting to train ...
Not training a calibrator because it is not needed.
Not adding a normalizer.
Making per-feature arrays
Changing data from row-wise to column-wise
Warning: Skipped 8 instances with missing features during training
Processed 354 instances
Binning and forming Feature objects
Reserved memory for tree learner: 3816 bytes
Starting to train ...
Not training a calibrator because it is not needed.
TEST POSITIVE RATIO: 0.3702 (134.0/(134.0+228.0))
Confusion table
||======================
PREDICTED || positive | negative | Recall
TRUTH ||======================
positive || 131 | 3 | 0.9776
negative || 10 | 218 | 0.9561
||======================
Precision || 0.9291 | 0.9864 |
OVERALL 0/1 ACCURACY: 0.964088
LOG LOSS/instance: 0.211336
Test-set entropy (prior Log-Loss/instance): 0.950799
LOG-LOSS REDUCTION (RIG): 77.772765
AUC: 0.983225
TEST POSITIVE RATIO: 0.3175 (107.0/(107.0+230.0))
Confusion table
||======================
PREDICTED || positive | negative | Recall
TRUTH ||======================
positive || 98 | 9 | 0.9159
negative || 5 | 225 | 0.9783
||======================
Precision || 0.9515 | 0.9615 |
OVERALL 0/1 ACCURACY: 0.958457
LOG LOSS/instance: 0.137700
Test-set entropy (prior Log-Loss/instance): 0.901650
LOG-LOSS REDUCTION (RIG): 84.727964
AUC: 0.993681

OVERALL RESULTS
---------------------------------------
AUC: 0.988453 (0.0052)
Accuracy: 0.961273 (0.0028)
Positive precision: 0.940267 (0.0112)
Positive recall: 0.946750 (0.0309)
Negative precision: 0.973982 (0.0124)
Negative recall: 0.967201 (0.0111)
Log-loss: 0.174518 (0.0368)
Log-loss reduction: 81.250364 (3.4776)
F1 Score: 0.943030 (0.0097)
AUPRC: 0.962986 (0.0211)

---------------------------------------
Physical memory usage(MB): %Number%
Virtual memory usage(MB): %Number%
%DateTime% Time elapsed(s): %Number%

--- Progress log ---
[1] 'FastTree data preparation' started.
[1] 'FastTree data preparation' finished in %Time%.
[2] 'FastTree in-memory bins initialization' started.
[2] 'FastTree in-memory bins initialization' finished in %Time%.
[3] 'FastTree feature conversion' started.
[3] 'FastTree feature conversion' finished in %Time%.
[4] 'FastTree training' started.
[4] 'FastTree training' finished in %Time%.
[5] 'FastTree data preparation #2' started.
[5] 'FastTree data preparation #2' finished in %Time%.
[6] 'FastTree in-memory bins initialization #2' started.
[6] 'FastTree in-memory bins initialization #2' finished in %Time%.
[7] 'FastTree feature conversion #2' started.
[7] 'FastTree feature conversion #2' finished in %Time%.
[8] 'FastTree training #2' started.
[8] 'FastTree training #2' finished in %Time%.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
FastRank
AUC Accuracy Positive precision Positive recall Negative precision Negative recall Log-loss Log-loss reduction F1 Score AUPRC /lr /nl /mil /iter Learner Name Train Dataset Test Dataset Results File Run Time Physical Memory Virtual Memory Command Line Settings
0.988453 0.961273 0.940267 0.94675 0.973982 0.967201 0.174518 81.25037 0.94303 0.962986 0.25 5 5 20 FastRank %Data% %Output% 99 0 0 maml.exe CV tr=FastRank{nl=5 mil=5 lr=0.25 iter=20} threads=- dout=%Output% data=%Data% seed=1 /lr:0.25;/nl:5;/mil:5;/iter:20

Loading