Skip to content

Commit 5bfa668

Browse files
committed
Fixes data invariant format problems
The tests do not pass on machines that have different formatting than English language. The error happens since the results are written in different than expected format. 1. The main fix is to imbue en-US culture to the test thread so that results will be output in format that is comparable with the test format. 2. A secondary fix is to make comparisons between culture sensitive data type representations invariant when they do not have human readable dimensions. In OptimizationMonitor.cs case the cast between culture sensitive floating point and string will cause orders of magnitudes of error in output results. The intention of this path is not to offer a robust solution and remove future issues. There is room for refactoring where, for instance, locale information would be applied to input and output and logging/tracing would be clearly separated from another kind of locale sensitive handling. This way culture sensitive parts would be separated and particular output formats could be tested as separate cases if so desired. Fixes #74
1 parent 3780923 commit 5bfa668

File tree

14 files changed

+70
-55
lines changed

14 files changed

+70
-55
lines changed

src/Microsoft.ML.Core/Environment/TlcEnvironment.cs

+12-11
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
using System;
88
using System.Collections.Concurrent;
99
using System.Collections.Generic;
10+
using System.Globalization;
1011
using System.IO;
1112
using System.Linq;
1213
using System.Threading;
@@ -212,7 +213,7 @@ public void GetAndPrintAllProgress(ProgressReporting.ProgressTracker progressTra
212213
PrintOperationStop(_out, ev);
213214
break;
214215
case ProgressReporting.ProgressEvent.EventKind.Progress:
215-
_out.Write("[{0}] ", ev.Index);
216+
_out.Write(string.Format(CultureInfo.InvariantCulture, "[{0}] ", ev.Index));
216217
PrintProgressLine(_out, ev);
217218
break;
218219
}
@@ -225,7 +226,7 @@ public void GetAndPrintAllProgress(ProgressReporting.ProgressTracker progressTra
225226

226227
if (PrintDot())
227228
{
228-
// We need to print an extended status line. At this point, every event should be
229+
// We need to print an extended status line. At this point, every event should be
229230
// a non-checkpoint progress event.
230231
bool needPrepend = entries.Count > 1;
231232
foreach (var ev in entries)
@@ -236,7 +237,7 @@ public void GetAndPrintAllProgress(ProgressReporting.ProgressTracker progressTra
236237
{
237238
EnsureNewLine();
238239
WriteAndReturnLinePrefix(MessageSensitivity.None, _out);
239-
_out.Write("[{0}] ", ev.Index);
240+
_out.Write(string.Format(CultureInfo.InvariantCulture, "[{0}] ", ev.Index));
240241
}
241242
else
242243
{
@@ -252,24 +253,24 @@ public void GetAndPrintAllProgress(ProgressReporting.ProgressTracker progressTra
252253

253254
private static void PrintOperationStart(TextWriter writer, ProgressReporting.ProgressEvent ev)
254255
{
255-
writer.WriteLine("[{0}] '{1}' started.", ev.Index, ev.Name);
256+
writer.WriteLine(string.Format(CultureInfo.InvariantCulture, "[{0}] '{1}' started.", ev.Index, ev.Name));
256257
}
257258

258259
private static void PrintOperationStop(TextWriter writer, ProgressReporting.ProgressEvent ev)
259260
{
260-
writer.WriteLine("[{0}] '{1}' finished in {2}.", ev.Index, ev.Name, ev.EventTime - ev.StartTime);
261+
writer.WriteLine(string.Format(CultureInfo.InvariantCulture, "[{0}] '{1}' finished in {2}.", ev.Index, ev.Name, ev.EventTime - ev.StartTime));
261262
}
262263

263264
private void PrintProgressLine(TextWriter writer, ProgressReporting.ProgressEvent ev)
264265
{
265266
// Elapsed time.
266267
var elapsed = ev.EventTime - ev.StartTime;
267268
if (elapsed.TotalMinutes < 1)
268-
writer.Write("(00:{0:00.00})", elapsed.TotalSeconds);
269+
writer.Write(string.Format(CultureInfo.InvariantCulture, "(00:{0:00.00})", elapsed.TotalSeconds));
269270
else if (elapsed.TotalHours < 1)
270-
writer.Write("({0:00}:{1:00.0})", elapsed.Minutes, elapsed.TotalSeconds - 60 * elapsed.Minutes);
271+
writer.Write(string.Format(CultureInfo.InvariantCulture, "({0:00}:{1:00.0})", elapsed.Minutes, elapsed.TotalSeconds - 60 * elapsed.Minutes));
271272
else
272-
writer.Write("({0:00}:{1:00}:{2:00})", elapsed.Hours, elapsed.Minutes, elapsed.Seconds);
273+
writer.Write(string.Format(CultureInfo.InvariantCulture, "({0:00}:{1:00}:{2:00})", elapsed.Hours, elapsed.Minutes, elapsed.Seconds));
273274

274275
// Progress units.
275276
bool first = true;
@@ -281,7 +282,7 @@ private void PrintProgressLine(TextWriter writer, ProgressReporting.ProgressEven
281282
first = false;
282283
writer.Write("{0}", ev.ProgressEntry.Progress[i]);
283284
if (ev.ProgressEntry.ProgressLim[i] != null)
284-
writer.Write("/{0}", ev.ProgressEntry.ProgressLim[i].Value);
285+
writer.Write("/{0}", ev.ProgressEntry.ProgressLim[i].Value.ToString(CultureInfo.InvariantCulture));
285286
writer.Write(" {0}", ev.ProgressEntry.Header.UnitNames[i]);
286287
}
287288

@@ -291,7 +292,7 @@ private void PrintProgressLine(TextWriter writer, ProgressReporting.ProgressEven
291292
if (ev.ProgressEntry.Metrics[i] == null)
292293
continue;
293294
// REVIEW: print metrics prettier.
294-
writer.Write("\t{0}: {1}", ev.ProgressEntry.Header.MetricNames[i], ev.ProgressEntry.Metrics[i].Value);
295+
writer.Write("\t{0}: {1}", ev.ProgressEntry.Header.MetricNames[i], ev.ProgressEntry.Metrics[i].Value.ToString(CultureInfo.InvariantCulture));
295296
}
296297

297298
writer.WriteLine();
@@ -306,7 +307,7 @@ private void EnsureNewLine(bool isError = false)
306307
return;
307308

308309
// If _err and _out is the same writer, we need to print new line as well.
309-
// If _out and _err writes to Console.Out and Console.Error respectively,
310+
// If _out and _err writes to Console.Out and Console.Error respectively,
310311
// in the general user scenario they ends up with writing to the same underlying stream,.
311312
// so write a new line to the stream anyways.
312313
if (isError && _err != _out && (_out != Console.Out || _err != Console.Error))

src/Microsoft.ML.Data/Utilities/TimerScope.cs

+2-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// See the LICENSE file in the project root for more information.
44

55
using System;
6+
using System.Globalization;
67
using Microsoft.ML.Runtime;
78
using Microsoft.ML.Runtime.Data;
89

@@ -46,7 +47,7 @@ public void Dispose()
4647

4748
// REVIEW: This is \n\n is to prevent changes across bunch of baseline files.
4849
// Ideally we should change our comparison method to ignore empty lines.
49-
_ch.Info("{0}\t Time elapsed(s): {1}\n\n", DateTime.Now, elapsedSeconds);
50+
_ch.Info("{0}\t Time elapsed(s): {1}\n\n", DateTime.Now.ToString(CultureInfo.InvariantCulture), elapsedSeconds.ToString(CultureInfo.InvariantCulture));
5051

5152
using (var pipe = _host.StartPipe<TelemetryMessage>("TelemetryPipe"))
5253
{

src/Microsoft.ML.FastTree/Training/EnsembleCompression/LassoBasedEnsembleCompressor.cs

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
using System;
66
using System.Collections.Generic;
7+
using System.Globalization;
78

89
namespace Microsoft.ML.Runtime.FastTree.Internal
910
{

src/Microsoft.ML.FastTree/Training/Test.cs

+4-3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
using System;
66
using System.Collections.Generic;
7+
using System.Globalization;
78
using System.Linq;
89
using System.Threading;
910
using System.Threading.Tasks;
@@ -191,7 +192,7 @@ public virtual string FormatInfoString()
191192
var sb = new System.Text.StringBuilder();
192193
foreach (var r in ComputeTests())
193194
{
194-
sb.AppendFormat("{0}.{1}={2}\n", ScoreTracker.DatasetName, r.LossFunctionName, r.FinalValue);
195+
sb.AppendFormat(CultureInfo.InvariantCulture, "{0}.{1}={2}\n", ScoreTracker.DatasetName, r.LossFunctionName, r.FinalValue);
195196
}
196197
return sb.ToString();
197198
}
@@ -377,7 +378,7 @@ public override string FormatInfoString()
377378
{
378379
if (i > 1)
379380
sb.Append("\t");
380-
sb.AppendFormat("@{0}:{1:00.00}", i++, 100.0 * t.FinalValue);
381+
sb.AppendFormat(CultureInfo.InvariantCulture, "@{0}:{1:00.00}", i++, 100.0 * t.FinalValue);
381382
}
382383
sb.AppendLine();
383384
return sb.ToString();
@@ -512,7 +513,7 @@ public override string FormatInfoString()
512513
{
513514
if (i > 1)
514515
sb.Append("\t");
515-
sb.AppendFormat("{0}:{1:00.00}", t.LossFunctionName, t.FinalValue);
516+
sb.AppendFormat(CultureInfo.InvariantCulture, "{0}:{1:00.00}", t.LossFunctionName, t.FinalValue);
516517
i++;
517518
}
518519
sb.AppendLine();

src/Microsoft.ML.FastTree/TreeEnsemble/Ensemble.cs

+10-9
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
using System;
66
using System.Collections.Generic;
7+
using System.Globalization;
78
using System.IO;
89
using System.Linq;
910
using System.Text;
@@ -128,13 +129,13 @@ public string ToTreeEnsembleIni(FeaturesToContentMap fmap,
128129

129130
numNodes += evaluatorCounter;
130131

131-
sb.AppendFormat("[TreeEnsemble]\nInputs={0}\nEvaluators={1}\n", featureToID.Count, evaluatorCounter + 1);
132+
sb.AppendFormat(CultureInfo.InvariantCulture, "[TreeEnsemble]\nInputs={0}\nEvaluators={1}\n", featureToID.Count, evaluatorCounter + 1);
132133

133134
sb.Append(sbInput);
134135
sb.Append(sbEvaluator);
135136

136137
// Append the final aggregator
137-
sb.AppendFormat("\n[Evaluator:{0}]\nEvaluatorType=Aggregator\nNumNodes={1}\nNodes=", evaluatorCounter + 1, numNodes);
138+
sb.AppendFormat(CultureInfo.InvariantCulture, "\n[Evaluator:{0}]\nEvaluatorType=Aggregator\nNumNodes={1}\nNodes=", evaluatorCounter + 1, numNodes);
138139

139140
// Nodes
140141
if (_firstInputInitializationContent != null)
@@ -163,7 +164,7 @@ public string ToTreeEnsembleIni(FeaturesToContentMap fmap,
163164
{
164165
if (_firstInputInitializationContent != null)
165166
sb.Append("\t");
166-
sb.AppendFormat("{0}", _trees[0].Weight);
167+
sb.AppendFormat(CultureInfo.InvariantCulture, "{0}", _trees[0].Weight);
167168
}
168169

169170
for (int w = 1; w < NumTrees; ++w)
@@ -172,7 +173,7 @@ public string ToTreeEnsembleIni(FeaturesToContentMap fmap,
172173
{
173174
sb.Append("\t");
174175
}
175-
sb.Append(_trees[w].Weight);
176+
sb.Append(_trees[w].Weight.ToString(CultureInfo.InvariantCulture));
176177
}
177178

178179
sb.AppendFormat("\nBias={0}", Bias);
@@ -193,15 +194,15 @@ public string ToTreeEnsembleIni(FeaturesToContentMap fmap,
193194

194195
protected int AppendComments(StringBuilder sb, string trainingParams)
195196
{
196-
sb.AppendFormat("\n\n[Comments]\nC:0=Regression Tree Ensemble\nC:1=Generated using FastTree\nC:2=Created on {0}\n", DateTime.Now);
197+
sb.AppendFormat("\n\n[Comments]\nC:0=Regression Tree Ensemble\nC:1=Generated using FastTree\nC:2=Created on {0}\n", DateTime.Now.ToString(CultureInfo.InvariantCulture));
197198

198199
string[] trainingParamsList = trainingParams.Split(new char[] { '\n' });
199200
int i = 0;
200201
for (; i < trainingParamsList.Length; ++i)
201202
{
202203
if (trainingParamsList[i].Length > 0)
203204
{
204-
sb.AppendFormat("C:{0}=PARAM:{1}\n", i + 3, trainingParamsList[i]);
205+
sb.AppendFormat(CultureInfo.InvariantCulture, "C:{0}=PARAM:{1}\n", i + 3, trainingParamsList[i]);
205206
}
206207
}
207208
return i + 3;
@@ -328,15 +329,15 @@ public string ToGainSummary(FeaturesToContentMap fmap, Dictionary<int, int> feat
328329
foreach (var pair in sortedByGain)
329330
{
330331
int outputInputId = featureToID.ContainsKey(pair.Key) ? featureToID[pair.Key] : 0;
331-
output.Append(string.Format("C:{0}=FG:I{1}:{2}:{3}\n", startingCommentNumber++, outputInputId,
332-
fmap.GetName(pair.Key), Math.Pow(pair.Value, power) / normalizingFactor));
332+
output.AppendFormat(CultureInfo.InvariantCulture, "C:{0}=FG:I{1}:{2}:{3}\n", startingCommentNumber++, outputInputId,
333+
fmap.GetName(pair.Key), Math.Pow(pair.Value, power) / normalizingFactor);
333334
}
334335
return output.ToString();
335336
}
336337

337338
/// <summary>
338339
/// Returns a vector of feature contributions for a given example.
339-
/// <paramref name="builder"/> is used as a buffer to accumulate the contributions across trees.
340+
/// <paramref name="builder"/> is used as a buffer to accumulate the contributions across trees.
340341
/// If <paramref name="builder"/> is null, it will be created, otherwise it will be reused.
341342
/// </summary>
342343
internal void GetFeatureContributions(ref VBuffer<float> features, ref VBuffer<float> contribs, ref BufferBuilder<float> builder)

src/Microsoft.ML.FastTree/TreeEnsemble/RegressionTree.cs

+5-4
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
using Microsoft.ML.Runtime.Model.Pfa;
2020
using Microsoft.ML.Runtime.Internal.Internallearn;
2121
using Newtonsoft.Json.Linq;
22+
using System.Globalization;
2223

2324
namespace Microsoft.ML.Runtime.FastTree.Internal
2425
{
@@ -1184,7 +1185,7 @@ public void ToTreeEnsembleFormat(StringBuilder sbEvaluator, StringBuilder sbInpu
11841185
private void ToTreeEnsembleFormatForCategoricalSplit(StringBuilder sbEvaluator, StringBuilder sbInput, FeaturesToContentMap featureContents,
11851186
ref int evaluatorCounter, Dictionary<int, int> featureToId, Dictionary<int, int> categoricalSplitNodeToId)
11861187
{
1187-
//REVIEW: Can all these conditions even be true?
1188+
//REVIEW: Can all these conditions even be true?
11881189
if (CategoricalSplitFeatures == null ||
11891190
CategoricalSplitFeatures.Length == 0 ||
11901191
CategoricalSplitFeatures.All(val => val == null))
@@ -1234,7 +1235,7 @@ private void ToTreeEnsembleFormatForCategoricalSplit(StringBuilder sbEvaluator,
12341235
sbLteChild.Append((n + 1) + toAppend);
12351236
sbGtChild.Append(~n + toAppend);
12361237
sbOutput.Append("1\t");
1237-
sbThreshold.Append(((double)0.5).ToString("R") + toAppend);
1238+
sbThreshold.Append(((double)0.5).ToString("R", CultureInfo.InvariantCulture) + toAppend);
12381239
}
12391240

12401241
sbOutput.Append("0");
@@ -1266,12 +1267,12 @@ public string ToOldIni(FeatureNameCollection featureNames)
12661267
if (gtChildCorrected < 0)
12671268
gtChildCorrected = numNonLeaves + (~gtChildCorrected);
12681269

1269-
output.AppendFormat("\nNodeType:{0}=Branch\nNodeDecision:{0}={1}\nNodeThreshold:{0}={2}\nNodeLTE:{0}={3}\nNodeGT:{0}={4}\n", n, name, currentThreshold, lteChildCorrected, gtChildCorrected);
1270+
output.AppendFormat(CultureInfo.InvariantCulture, "\nNodeType:{0}=Branch\nNodeDecision:{0}={1}\nNodeThreshold:{0}={2}\nNodeLTE:{0}={3}\nNodeGT:{0}={4}\n", n, name, currentThreshold, lteChildCorrected, gtChildCorrected);
12701271
}
12711272

12721273
for (int n = 0; n < NumLeaves; ++n)
12731274
{
1274-
output.AppendFormat("\nNodeType:{0}=Value\nNodeValue:{0}={1}\n", numNonLeaves + n, LeafValues[n]);
1275+
output.AppendFormat(CultureInfo.InvariantCulture, "\nNodeType:{0}=Value\nNodeValue:{0}={1}\n", numNonLeaves + n, LeafValues[n]);
12751276
}
12761277

12771278
return output.ToString();

src/Microsoft.ML.FastTree/Utils/Timer.cs

+4-3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// See the LICENSE file in the project root for more information.
44

55
using System;
6+
using System.Globalization;
67
using System.Text;
78
using System.Threading;
89

@@ -156,7 +157,7 @@ public override string ToString()
156157

157158
string padded = "Name".PadRight(MaxEventNameLen);
158159

159-
sb.AppendFormat("{0} {1,10}{2,10}{3,8}{4,11}\n", padded, "Time", "%", "#Calls", "Time/Call");
160+
sb.AppendFormat(CultureInfo.InvariantCulture, "{0} {1,10}{2,10}{3,8}{4,11}\n", padded, "Time", "%", "#Calls", "Time/Call");
160161
foreach (TimerEvent n in Enum.GetValues(typeof(TimerEvent)))
161162
{
162163
double time = (double)TickTotals[(int)n] / Stopwatch.Frequency;
@@ -167,7 +168,7 @@ public override string ToString()
167168

168169
padded = n.ToString().PadRight(MaxEventNameLen);
169170

170-
sb.AppendFormat("{0} {1,10:0.000}{2,9:00.00}%{3,8}{4,11:0.000}\n", padded, time, perc, numCalls, timePerCall);
171+
sb.AppendFormat(CultureInfo.InvariantCulture, "{0} {1,10:0.000}{2,9:00.00}%{3,8}{4,11:0.000}\n", padded, time, perc, numCalls, timePerCall);
171172
}
172173
sb.AppendFormat("Count Statistics:\n");
173174
padded = "Name".PadRight(MaxEventNameLen);
@@ -178,7 +179,7 @@ public override string ToString()
178179

179180
padded = n.ToString().PadRight(MaxEventNameLen);
180181

181-
sb.AppendFormat("{0} {1,10}\n", padded, count);
182+
sb.AppendFormat(CultureInfo.InvariantCulture, "{0} {1,10}\n", padded, count);
182183
}
183184
return sb.ToString();
184185
}

src/Microsoft.ML.FastTree/Utils/VectorUtils.cs

+2-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// See the LICENSE file in the project root for more information.
44

55
using System;
6+
using System.Globalization;
67
using System.Text;
78

89
namespace Microsoft.ML.Runtime.FastTree.Internal
@@ -338,7 +339,7 @@ public static string ToString(double[] vector)
338339
{
339340
sb.Append(", ");
340341
}
341-
sb.Append(vector[f]);
342+
sb.Append(vector[f].ToString(CultureInfo.InvariantCulture));
342343
}
343344
return sb.ToString();
344345
}

src/Microsoft.ML.ResultProcessor/ResultProcessor.cs

+10-9
Original file line numberDiff line numberDiff line change
@@ -584,7 +584,7 @@ private static bool ValidateMamlOutput(string filename, string[] rawLines, out L
584584
Results = runResults,
585585
PerFoldResults = foldResults,
586586
Time = 0,
587-
ExecutionDate = DateTime.Now.ToString()
587+
ExecutionDate = DateTime.Now.ToString(CultureInfo.InvariantCulture)
588588
};
589589
}
590590

@@ -1069,7 +1069,7 @@ private static Experiment CreateVisualizationExperiment(ExperimentItemResult res
10691069
Results = new List<ML.Runtime.ExperimentVisualization.ExperimentResult>()
10701070
};
10711071

1072-
// Propagate metrics to the report.
1072+
// Propagate metrics to the report.
10731073
ML.Runtime.ExperimentVisualization.ExperimentResult metrics = new ML.Runtime.ExperimentVisualization.ExperimentResult
10741074
{
10751075
Metrics = new List<ML.Runtime.ExperimentVisualization.MetricValue>(),
@@ -1103,7 +1103,7 @@ private static Experiment CreateVisualizationExperiment(ExperimentItemResult res
11031103
Value = result.VirtualMemory
11041104
});
11051105

1106-
// Propagate experiment arguments to the report.
1106+
// Propagate experiment arguments to the report.
11071107
foreach (KeyValuePair<string, string> setting in result.Settings)
11081108
{
11091109
string val;
@@ -1134,7 +1134,7 @@ private static Experiment CreateVisualizationExperiment(ExperimentItemResult res
11341134

11351135
/// <summary>
11361136
/// Deserialize a predictor, returning as an object
1137-
/// </summary>
1137+
/// </summary>
11381138
private static object Load(Stream stream)
11391139
{
11401140
BinaryFormatter bf = new BinaryFormatter();
@@ -1262,11 +1262,10 @@ protected static void Run(string[] args)
12621262
foreach (ExperimentItemResult result in predictor.PredictorList)
12631263
{
12641264
//print the result metrices
1265-
foreach (string name in predictor.ResultHeaderNames)
1265+
foreach(string name in predictor.ResultHeaderNames)
12661266
{
1267-
ResultMetric val;
1268-
if (result.Results.TryGetValue(name, out val))
1269-
outStream.Write(val.MetricValue);
1267+
if (result.Results.TryGetValue(name, out ResultMetric val))
1268+
outStream.Write(val.MetricValue.ToString(CultureInfo.InvariantCulture));
12701269
outStream.Write("\t");
12711270
}
12721271

@@ -1286,8 +1285,10 @@ protected static void Run(string[] args)
12861285
foreach (var kvp in result.PerFoldResults)
12871286
{
12881287
if (Float.IsNaN(kvp.Value.MetricValue) && kvp.Value.AllValues != null)
1288+
{
12891289
outStream.Write("\t" + kvp.Key + ":"
1290-
+ string.Join(cmd.PerFoldResultSeparator, new List<string>(new List<Float>(kvp.Value.AllValues).Select(d => "" + d))));
1290+
+ string.Join(cmd.PerFoldResultSeparator, kvp.Value.AllValues.Select(d => d.ToString(CultureInfo.InvariantCulture))));
1291+
}
12911292
}
12921293
}
12931294

0 commit comments

Comments
 (0)