Skip to content

Commit 9e88532

Browse files
committed
Fixes data invariant format problems
The tests do not pass on machines that have different formatting than English language. The error happens since the results are written in different than expected format. Fixes #74
1 parent ff5fb14 commit 9e88532

File tree

14 files changed

+68
-58
lines changed

14 files changed

+68
-58
lines changed

src/Microsoft.ML.Core/Environment/TlcEnvironment.cs

+12-11
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
using System;
88
using System.Collections.Concurrent;
99
using System.Collections.Generic;
10+
using System.Globalization;
1011
using System.IO;
1112
using System.Linq;
1213
using System.Threading;
@@ -212,7 +213,7 @@ public void GetAndPrintAllProgress(ProgressReporting.ProgressTracker progressTra
212213
PrintOperationStop(_out, ev);
213214
break;
214215
case ProgressReporting.ProgressEvent.EventKind.Progress:
215-
_out.Write("[{0}] ", ev.Index);
216+
_out.Write(string.Format(CultureInfo.InvariantCulture, "[{0}] ", ev.Index));
216217
PrintProgressLine(_out, ev);
217218
break;
218219
}
@@ -225,7 +226,7 @@ public void GetAndPrintAllProgress(ProgressReporting.ProgressTracker progressTra
225226

226227
if (PrintDot())
227228
{
228-
// We need to print an extended status line. At this point, every event should be
229+
// We need to print an extended status line. At this point, every event should be
229230
// a non-checkpoint progress event.
230231
bool needPrepend = entries.Count > 1;
231232
foreach (var ev in entries)
@@ -236,7 +237,7 @@ public void GetAndPrintAllProgress(ProgressReporting.ProgressTracker progressTra
236237
{
237238
EnsureNewLine();
238239
WriteAndReturnLinePrefix(MessageSensitivity.None, _out);
239-
_out.Write("[{0}] ", ev.Index);
240+
_out.Write(string.Format(CultureInfo.InvariantCulture, "[{0}] ", ev.Index));
240241
}
241242
else
242243
{
@@ -252,24 +253,24 @@ public void GetAndPrintAllProgress(ProgressReporting.ProgressTracker progressTra
252253

253254
private static void PrintOperationStart(TextWriter writer, ProgressReporting.ProgressEvent ev)
254255
{
255-
writer.WriteLine("[{0}] '{1}' started.", ev.Index, ev.Name);
256+
writer.WriteLine(string.Format(CultureInfo.InvariantCulture, "[{0}] '{1}' started.", ev.Index, ev.Name));
256257
}
257258

258259
private static void PrintOperationStop(TextWriter writer, ProgressReporting.ProgressEvent ev)
259260
{
260-
writer.WriteLine("[{0}] '{1}' finished in {2}.", ev.Index, ev.Name, ev.EventTime - ev.StartTime);
261+
writer.WriteLine(string.Format(CultureInfo.InvariantCulture, "[{0}] '{1}' finished in {2}.", ev.Index, ev.Name, ev.EventTime - ev.StartTime));
261262
}
262263

263264
private void PrintProgressLine(TextWriter writer, ProgressReporting.ProgressEvent ev)
264265
{
265266
// Elapsed time.
266267
var elapsed = ev.EventTime - ev.StartTime;
267268
if (elapsed.TotalMinutes < 1)
268-
writer.Write("(00:{0:00.00})", elapsed.TotalSeconds);
269+
writer.Write(string.Format(CultureInfo.InvariantCulture, "(00:{0:00.00})", elapsed.TotalSeconds));
269270
else if (elapsed.TotalHours < 1)
270-
writer.Write("({0:00}:{1:00.0})", elapsed.Minutes, elapsed.TotalSeconds - 60 * elapsed.Minutes);
271+
writer.Write(string.Format(CultureInfo.InvariantCulture, "({0:00}:{1:00.0})", elapsed.Minutes, elapsed.TotalSeconds - 60 * elapsed.Minutes));
271272
else
272-
writer.Write("({0:00}:{1:00}:{2:00})", elapsed.Hours, elapsed.Minutes, elapsed.Seconds);
273+
writer.Write(string.Format(CultureInfo.InvariantCulture, "({0:00}:{1:00}:{2:00})", elapsed.Hours, elapsed.Minutes, elapsed.Seconds));
273274

274275
// Progress units.
275276
bool first = true;
@@ -281,7 +282,7 @@ private void PrintProgressLine(TextWriter writer, ProgressReporting.ProgressEven
281282
first = false;
282283
writer.Write("{0}", ev.ProgressEntry.Progress[i]);
283284
if (ev.ProgressEntry.ProgressLim[i] != null)
284-
writer.Write("/{0}", ev.ProgressEntry.ProgressLim[i].Value);
285+
writer.Write("/{0}", ev.ProgressEntry.ProgressLim[i].Value.ToString(CultureInfo.InvariantCulture));
285286
writer.Write(" {0}", ev.ProgressEntry.Header.UnitNames[i]);
286287
}
287288

@@ -291,7 +292,7 @@ private void PrintProgressLine(TextWriter writer, ProgressReporting.ProgressEven
291292
if (ev.ProgressEntry.Metrics[i] == null)
292293
continue;
293294
// REVIEW: print metrics prettier.
294-
writer.Write("\t{0}: {1}", ev.ProgressEntry.Header.MetricNames[i], ev.ProgressEntry.Metrics[i].Value);
295+
writer.Write("\t{0}: {1}", ev.ProgressEntry.Header.MetricNames[i], ev.ProgressEntry.Metrics[i].Value.ToString(CultureInfo.InvariantCulture));
295296
}
296297

297298
writer.WriteLine();
@@ -306,7 +307,7 @@ private void EnsureNewLine(bool isError = false)
306307
return;
307308

308309
// If _err and _out is the same writer, we need to print new line as well.
309-
// If _out and _err writes to Console.Out and Console.Error respectively,
310+
// If _out and _err writes to Console.Out and Console.Error respectively,
310311
// in the general user scenario they ends up with writing to the same underlying stream,.
311312
// so write a new line to the stream anyways.
312313
if (isError && _err != _out && (_out != Console.Out || _err != Console.Error))

src/Microsoft.ML.Data/Evaluators/EvaluatorUtils.cs

+11-11
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,7 @@ public static IDataView AddKeyColumn<TSrc>(IHostEnvironment env, IDataView input
383383

384384
/// <summary>
385385
/// This method takes an array of data views and a specified input vector column, and adds a new output column to each of the data views.
386-
/// First, we find the union set of the slot names in the different data views. Next we define a new vector column for each
386+
/// First, we find the union set of the slot names in the different data views. Next we define a new vector column for each
387387
/// data view, indexed by the union of the slot names. For each data view, every slot value is the value in the slot corresponding
388388
/// to its slot name in the original column. If a reconciled slot name does not exist in an input column, the value in the output
389389
/// column is def.
@@ -530,7 +530,7 @@ private static int[][] MapKeys(ISchema[] schemas, string columnName, bool isVec,
530530

531531
/// <summary>
532532
/// This method takes an array of data views and a specified input key column, and adds a new output column to each of the data views.
533-
/// First, we find the union set of the key values in the different data views. Next we define a new key column for each
533+
/// First, we find the union set of the key values in the different data views. Next we define a new key column for each
534534
/// data view, with the union of the key values as the new key values. For each data view, the value in the output column is the value
535535
/// corresponding to the key value in the original column.
536536
/// </summary>
@@ -801,8 +801,8 @@ public static string GetPerFoldResults(IHostEnvironment env, IDataView fold, out
801801
}
802802

803803
// This method returns a string representation of a set of metrics. If there are stratification columns, it looks for columns named
804-
// StratCol and StratVal, and outputs the metrics in the rows with NA in the StratCol column. If weighted is true, it looks
805-
// for a DvBool column named "IsWeighted" and outputs the metrics in the rows with a value of true in that column.
804+
// StratCol and StratVal, and outputs the metrics in the rows with NA in the StratCol column. If weighted is true, it looks
805+
// for a DvBool column named "IsWeighted" and outputs the metrics in the rows with a value of true in that column.
806806
// If nonAveragedCols is non-null, it computes the average and standard deviation over all the relevant rows and populates
807807
// nonAveragedCols with columns that are either hidden, or are not of a type that we can display (i.e., either a numeric column,
808808
// or a known length vector of doubles).
@@ -926,8 +926,8 @@ private static string GetMetricsAsString(IHostEnvironment env, IDataView data, b
926926
if (average)
927927
{
928928
Contracts.AssertValue(sumSqMetrics);
929-
sb.AppendLine(string.Format(" ({0:N4})", numResults == 1 ? 0 :
930-
Math.Sqrt(sumSqMetrics[i] / numResults - avgMetrics[i] * avgMetrics[i])));
929+
sb.AppendFormat(CultureInfo.InvariantCulture, " ({0:N4})", numResults == 1 ? 0 :
930+
Math.Sqrt(sumSqMetrics[i] / numResults - avgMetrics[i] * avgMetrics[i])).AppendLine();
931931
}
932932
else
933933
sb.AppendLine();
@@ -1026,7 +1026,7 @@ private static List<string> GetMetricNames(IChannel ch, ISchema schema, IRow row
10261026
Contracts.Assert(Utils.Size(vBufferGetters) == schema.ColumnCount);
10271027

10281028
// Get the names of the metrics. For R8 valued columns the metric name is the column name. For R8 vector valued columns
1029-
// the names of the metrics are the column name, followed by the slot name if it exists, or "Label_i" if it doesn't.
1029+
// the names of the metrics are the column name, followed by the slot name if it exists, or "Label_i" if it doesn't.
10301030
VBuffer<DvText> names = default(VBuffer<DvText>);
10311031
int metricCount = 0;
10321032
var metricNames = new List<string>();
@@ -1121,7 +1121,7 @@ private static string GetConfusionTableAsString(double[][] confusionTable, doubl
11211121
var numFalseNeg = confusionTable[0][1];
11221122
var numTrueNeg = confusionTable[1][1];
11231123
var numFalsePos = confusionTable[1][0];
1124-
sb.AppendFormat("{0}TEST {1} RATIO:\t{2:N4} ({3:F1}/({3:F1}+{4:F1}))", prefix, positiveCaps,
1124+
sb.AppendFormat(CultureInfo.InvariantCulture, "{0}TEST {1} RATIO:\t{2:N4} ({3:F1}/({3:F1}+{4:F1}))", prefix, positiveCaps,
11251125
1.0 * (numTruePos + numFalseNeg) / (numTruePos + numTrueNeg + numFalseNeg + numFalsePos),
11261126
numTruePos + numFalseNeg, numFalsePos + numTrueNeg);
11271127
}
@@ -1154,9 +1154,9 @@ private static string GetConfusionTableAsString(double[][] confusionTable, doubl
11541154
{
11551155
sb.AppendFormat(rowLabelFormat, i, predictedLabelNames[i]);
11561156
for (int j = 0; j < numLabels; j++)
1157-
sb.AppendFormat(format2, confusionTable[i][j]);
1157+
sb.AppendFormat(CultureInfo.InvariantCulture, format2, confusionTable[i][j]);
11581158
Double recall = rowSums[i] > 0 ? confusionTable[i][i] / rowSums[i] : 0;
1159-
sb.AppendFormat(" {0,5:F4}", recall);
1159+
sb.AppendFormat(CultureInfo.InvariantCulture, " {0,5:F4}", recall);
11601160
sb.AppendLine();
11611161
}
11621162
sb.AppendFormat(" {0}||", pad);
@@ -1168,7 +1168,7 @@ private static string GetConfusionTableAsString(double[][] confusionTable, doubl
11681168
for (int i = 0; i < numLabels; i++)
11691169
{
11701170
Double precision = columnSums[i] > 0 ? confusionTable[i][i] / columnSums[i] : 0;
1171-
sb.AppendFormat(format, precision);
1171+
sb.AppendFormat(CultureInfo.InvariantCulture, format, precision);
11721172
}
11731173
sb.AppendLine();
11741174
return sb.ToString();

src/Microsoft.ML.Data/Utilities/TimerScope.cs

+2-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// See the LICENSE file in the project root for more information.
44

55
using System;
6+
using System.Globalization;
67
using Microsoft.ML.Runtime;
78
using Microsoft.ML.Runtime.Data;
89

@@ -46,7 +47,7 @@ public void Dispose()
4647

4748
// REVIEW: This is \n\n is to prevent changes across bunch of baseline files.
4849
// Ideally we should change our comparison method to ignore empty lines.
49-
_ch.Info("{0}\t Time elapsed(s): {1}\n\n", DateTime.Now, elapsedSeconds);
50+
_ch.Info("{0}\t Time elapsed(s): {1}\n\n", DateTime.Now.ToString(CultureInfo.InvariantCulture), elapsedSeconds.ToString(CultureInfo.InvariantCulture));
5051

5152
using (var pipe = _host.StartPipe<TelemetryMessage>("TelemetryPipe"))
5253
{

src/Microsoft.ML.FastTree/Training/EnsembleCompression/LassoBasedEnsembleCompressor.cs

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
using System;
66
using System.Collections.Generic;
7+
using System.Globalization;
78

89
namespace Microsoft.ML.Runtime.FastTree.Internal
910
{

src/Microsoft.ML.FastTree/Training/Test.cs

+4-3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
using System;
66
using System.Collections.Generic;
7+
using System.Globalization;
78
using System.Linq;
89
using System.Threading;
910
using System.Threading.Tasks;
@@ -191,7 +192,7 @@ public virtual string FormatInfoString()
191192
var sb = new System.Text.StringBuilder();
192193
foreach (var r in ComputeTests())
193194
{
194-
sb.AppendFormat("{0}.{1}={2}\n", ScoreTracker.DatasetName, r.LossFunctionName, r.FinalValue);
195+
sb.AppendFormat(CultureInfo.InvariantCulture, "{0}.{1}={2}\n", ScoreTracker.DatasetName, r.LossFunctionName, r.FinalValue);
195196
}
196197
return sb.ToString();
197198
}
@@ -377,7 +378,7 @@ public override string FormatInfoString()
377378
{
378379
if (i > 1)
379380
sb.Append("\t");
380-
sb.AppendFormat("@{0}:{1:00.00}", i++, 100.0 * t.FinalValue);
381+
sb.AppendFormat(CultureInfo.InvariantCulture, "@{0}:{1:00.00}", i++, 100.0 * t.FinalValue);
381382
}
382383
sb.AppendLine();
383384
return sb.ToString();
@@ -512,7 +513,7 @@ public override string FormatInfoString()
512513
{
513514
if (i > 1)
514515
sb.Append("\t");
515-
sb.AppendFormat("{0}:{1:00.00}", t.LossFunctionName, t.FinalValue);
516+
sb.AppendFormat(CultureInfo.InvariantCulture, "{0}:{1:00.00}", t.LossFunctionName, t.FinalValue);
516517
i++;
517518
}
518519
sb.AppendLine();

src/Microsoft.ML.FastTree/TreeEnsemble/Ensemble.cs

+10-9
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
using System;
66
using System.Collections.Generic;
7+
using System.Globalization;
78
using System.IO;
89
using System.Linq;
910
using System.Text;
@@ -128,13 +129,13 @@ public string ToTreeEnsembleIni(FeaturesToContentMap fmap,
128129

129130
numNodes += evaluatorCounter;
130131

131-
sb.AppendFormat("[TreeEnsemble]\nInputs={0}\nEvaluators={1}\n", featureToID.Count, evaluatorCounter + 1);
132+
sb.AppendFormat(CultureInfo.InvariantCulture, "[TreeEnsemble]\nInputs={0}\nEvaluators={1}\n", featureToID.Count, evaluatorCounter + 1);
132133

133134
sb.Append(sbInput);
134135
sb.Append(sbEvaluator);
135136

136137
// Append the final aggregator
137-
sb.AppendFormat("\n[Evaluator:{0}]\nEvaluatorType=Aggregator\nNumNodes={1}\nNodes=", evaluatorCounter + 1, numNodes);
138+
sb.AppendFormat(CultureInfo.InvariantCulture, "\n[Evaluator:{0}]\nEvaluatorType=Aggregator\nNumNodes={1}\nNodes=", evaluatorCounter + 1, numNodes);
138139

139140
// Nodes
140141
if (_firstInputInitializationContent != null)
@@ -163,7 +164,7 @@ public string ToTreeEnsembleIni(FeaturesToContentMap fmap,
163164
{
164165
if (_firstInputInitializationContent != null)
165166
sb.Append("\t");
166-
sb.AppendFormat("{0}", _trees[0].Weight);
167+
sb.AppendFormat(CultureInfo.InvariantCulture, "{0}", _trees[0].Weight);
167168
}
168169

169170
for (int w = 1; w < NumTrees; ++w)
@@ -172,7 +173,7 @@ public string ToTreeEnsembleIni(FeaturesToContentMap fmap,
172173
{
173174
sb.Append("\t");
174175
}
175-
sb.Append(_trees[w].Weight);
176+
sb.Append(_trees[w].Weight.ToString(CultureInfo.InvariantCulture));
176177
}
177178

178179
sb.AppendFormat("\nBias={0}", Bias);
@@ -193,15 +194,15 @@ public string ToTreeEnsembleIni(FeaturesToContentMap fmap,
193194

194195
protected int AppendComments(StringBuilder sb, string trainingParams)
195196
{
196-
sb.AppendFormat("\n\n[Comments]\nC:0=Regression Tree Ensemble\nC:1=Generated using FastTree\nC:2=Created on {0}\n", DateTime.Now);
197+
sb.AppendFormat("\n\n[Comments]\nC:0=Regression Tree Ensemble\nC:1=Generated using FastTree\nC:2=Created on {0}\n", DateTime.Now.ToString(CultureInfo.InvariantCulture));
197198

198199
string[] trainingParamsList = trainingParams.Split(new char[] { '\n' });
199200
int i = 0;
200201
for (; i < trainingParamsList.Length; ++i)
201202
{
202203
if (trainingParamsList[i].Length > 0)
203204
{
204-
sb.AppendFormat("C:{0}=PARAM:{1}\n", i + 3, trainingParamsList[i]);
205+
sb.AppendFormat(CultureInfo.InvariantCulture, "C:{0}=PARAM:{1}\n", i + 3, trainingParamsList[i]);
205206
}
206207
}
207208
return i + 3;
@@ -328,15 +329,15 @@ public string ToGainSummary(FeaturesToContentMap fmap, Dictionary<int, int> feat
328329
foreach (var pair in sortedByGain)
329330
{
330331
int outputInputId = featureToID.ContainsKey(pair.Key) ? featureToID[pair.Key] : 0;
331-
output.Append(string.Format("C:{0}=FG:I{1}:{2}:{3}\n", startingCommentNumber++, outputInputId,
332-
fmap.GetName(pair.Key), Math.Pow(pair.Value, power) / normalizingFactor));
332+
output.AppendFormat(CultureInfo.InvariantCulture, "C:{0}=FG:I{1}:{2}:{3}\n", startingCommentNumber++, outputInputId,
333+
fmap.GetName(pair.Key), Math.Pow(pair.Value, power) / normalizingFactor);
333334
}
334335
return output.ToString();
335336
}
336337

337338
/// <summary>
338339
/// Returns a vector of feature contributions for a given example.
339-
/// <paramref name="builder"/> is used as a buffer to accumulate the contributions across trees.
340+
/// <paramref name="builder"/> is used as a buffer to accumulate the contributions across trees.
340341
/// If <paramref name="builder"/> is null, it will be created, otherwise it will be reused.
341342
/// </summary>
342343
internal void GetFeatureContributions(ref VBuffer<float> features, ref VBuffer<float> contribs, ref BufferBuilder<float> builder)

0 commit comments

Comments
 (0)