Skip to content

Commit 425fa60

Browse files
daholsteDmitry-A
authored andcommitted
Fix: during type inferencing, parse whitespace strings as NaN (dotnet#271)
1 parent 75813e9 commit 425fa60

File tree

3 files changed

+11
-4
lines changed

3 files changed

+11
-4
lines changed

src/Microsoft.ML.Auto/Utils/MLNetUtils/Conversions.cs

+7-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,13 @@ internal static class Conversions
1919
public static bool TryParse(in TX src, out R4 dst)
2020
{
2121
var span = src.Span;
22-
if (float.TryParse(span.ToString(), out dst))
22+
var str = span.ToString();
23+
if (string.IsNullOrWhiteSpace(str))
24+
{
25+
dst = R4.NaN;
26+
return true;
27+
}
28+
if (float.TryParse(str, out dst))
2329
{
2430
return true;
2531
}

src/Test/ColumnInferenceTests.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,9 @@ public void IdentifyLabelColumnThroughIndexWithoutHeader()
6363
[TestMethod]
6464
public void DatasetWithEmptyColumn()
6565
{
66-
var result = new MLContext().Auto().InferColumns(Path.Combine("TestData", "DatasetWithEmptyColumn.txt"), DefaultColumnNames.Label);
66+
var result = new MLContext().Auto().InferColumns(Path.Combine("TestData", "DatasetWithEmptyColumn.txt"), DefaultColumnNames.Label, groupColumns: false);
6767
var emptyColumn = result.TextLoaderOptions.Columns.First(c => c.Name == "Empty");
68-
Assert.AreEqual(DataKind.String, emptyColumn.DataKind);
68+
Assert.AreEqual(DataKind.Single, emptyColumn.DataKind);
6969
}
7070

7171
[TestMethod]

src/Test/ConversionTests.cs

+2-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ public void ConvertFloatMissingValues()
1515
{
1616
var missingValues = new string[]
1717
{
18-
"?",
18+
"",
19+
"?", " ",
1920
"na", "n/a", "nan",
2021
"NA", "N/A", "NaN", "NAN"
2122
};

0 commit comments

Comments
 (0)