Skip to content

Commit 1ed4195

Browse files
authored
Fail gracefully if unable to instantiate data view with swept parameters (dotnet#125)
* gracefully fail if fail to parse a datai * rev
1 parent 48de6a7 commit 1ed4195

File tree

1 file changed

+31
-21
lines changed

1 file changed

+31
-21
lines changed

src/Microsoft.ML.Auto/ColumnInference/TextFileContents.cs

+31-21
Original file line numberDiff line numberDiff line change
@@ -81,35 +81,45 @@ from _sep in separatorCandidates
8181
private static bool TryParseFile(TextLoader.Arguments args, IMultiStreamSource source, out ColumnSplitResult result)
8282
{
8383
result = null;
84-
var textLoader = new TextLoader(new MLContext(), args, source);
85-
var idv = textLoader.Read(source).Take(1000);
86-
var columnCounts = new List<int>();
87-
var column = idv.Schema["C"];
88-
var columnIndex = column.Index;
89-
90-
using (var cursor = idv.GetRowCursor(new[] { idv.Schema[columnIndex] }))
84+
// try to instantiate data view with swept arguments
85+
try
9186
{
92-
var getter = cursor.GetGetter<VBuffer<ReadOnlyMemory<char>>>(columnIndex);
9387

94-
VBuffer<ReadOnlyMemory<char>> line = default;
95-
while (cursor.MoveNext())
88+
var textLoader = new TextLoader(new MLContext(), args, source);
89+
var idv = textLoader.Read(source).Take(1000);
90+
var columnCounts = new List<int>();
91+
var column = idv.Schema["C"];
92+
var columnIndex = column.Index;
93+
94+
using (var cursor = idv.GetRowCursor(new[] { idv.Schema[columnIndex] }))
95+
{
96+
var getter = cursor.GetGetter<VBuffer<ReadOnlyMemory<char>>>(columnIndex);
97+
98+
VBuffer<ReadOnlyMemory<char>> line = default;
99+
while (cursor.MoveNext())
100+
{
101+
getter(ref line);
102+
columnCounts.Add(line.Length);
103+
}
104+
}
105+
106+
var mostCommon = columnCounts.GroupBy(x => x).OrderByDescending(x => x.Count()).First();
107+
if (mostCommon.Count() < UniformColumnCountThreshold * columnCounts.Count)
96108
{
97-
getter(ref line);
98-
columnCounts.Add(line.Length);
109+
return false;
99110
}
111+
112+
// disallow single-column case
113+
if (mostCommon.Key <= 1) { return false; }
114+
115+
result = new ColumnSplitResult(true, args.Separators.First(), args.AllowQuoting, args.AllowSparse, mostCommon.Key);
116+
return true;
100117
}
101-
102-
var mostCommon = columnCounts.GroupBy(x => x).OrderByDescending(x => x.Count()).First();
103-
if (mostCommon.Count() < UniformColumnCountThreshold * columnCounts.Count)
118+
// fail gracefully if unable to instantiate data view with swept arguments
119+
catch(Exception)
104120
{
105121
return false;
106122
}
107-
108-
// disallow single-column case
109-
if (mostCommon.Key <= 1) { return false; }
110-
111-
result = new ColumnSplitResult(true, args.Separators.First(), args.AllowQuoting, args.AllowSparse, mostCommon.Key);
112-
return true;
113123
}
114124
}
115125
}

0 commit comments

Comments
 (0)