Skip to content

Commit a25763e

Browse files
authored
Textloader internalizations, documentation, and Arguments refactoring (#2417)
1 parent 4a71e50 commit a25763e

File tree

7 files changed

+243
-126
lines changed

7 files changed

+243
-126
lines changed

src/Microsoft.ML.Data/DataLoadSave/DataReaderExtensions.cs

+6-8
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,12 @@ namespace Microsoft.ML
99
{
1010
public static class DataReaderExtensions
1111
{
12-
public static IDataView Read(this IDataReader<IMultiStreamSource> reader, string path)
13-
{
14-
return reader.Read(new MultiFileSource(path));
15-
}
16-
12+
/// <summary>
13+
/// Reads data from one or more file <paramref name="path"/> into an <see cref="IDataView"/>.
14+
/// </summary>
15+
/// <param name="reader">The reader to use.</param>
16+
/// <param name="path">One or more paths from which to load data.</param>
1717
public static IDataView Read(this IDataReader<IMultiStreamSource> reader, params string[] path)
18-
{
19-
return reader.Read(new MultiFileSource(path));
20-
}
18+
=> reader.Read(new MultiFileSource(path));
2119
}
2220
}

src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs

+175-56
Large diffs are not rendered by default.

src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs

+16-16
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ public static class TextLoaderSaverCatalog
2121
/// <param name="dataSample">The optional location of a data sample. The sample can be used to infer column names and number of slots in each column.</param>
2222
public static TextLoader CreateTextLoader(this DataOperationsCatalog catalog,
2323
TextLoader.Column[] columns,
24-
bool hasHeader = TextLoader.DefaultArguments.HasHeader,
25-
char separatorChar = TextLoader.DefaultArguments.Separator,
24+
bool hasHeader = TextLoader.Defaults.HasHeader,
25+
char separatorChar = TextLoader.Defaults.Separator,
2626
IMultiStreamSource dataSample = null)
2727
=> new TextLoader(CatalogUtils.GetEnvironment(catalog), columns, hasHeader, separatorChar, dataSample);
2828

@@ -53,11 +53,11 @@ public static TextLoader CreateTextLoader(this DataOperationsCatalog catalog,
5353
/// except for 3rd and 5th columns which have values 6 and 3</param>
5454
/// <param name="trimWhitespace">Remove trailing whitespace from lines</param>
5555
public static TextLoader CreateTextLoader<TInput>(this DataOperationsCatalog catalog,
56-
bool hasHeader = TextLoader.DefaultArguments.HasHeader,
57-
char separatorChar = TextLoader.DefaultArguments.Separator,
58-
bool allowQuotedStrings = TextLoader.DefaultArguments.AllowQuoting,
59-
bool supportSparse = TextLoader.DefaultArguments.AllowSparse,
60-
bool trimWhitespace = TextLoader.DefaultArguments.TrimWhitespace)
56+
bool hasHeader = TextLoader.Defaults.HasHeader,
57+
char separatorChar = TextLoader.Defaults.Separator,
58+
bool allowQuotedStrings = TextLoader.Defaults.AllowQuoting,
59+
bool supportSparse = TextLoader.Defaults.AllowSparse,
60+
bool trimWhitespace = TextLoader.Defaults.TrimWhitespace)
6161
=> TextLoader.CreateTextReader<TInput>(CatalogUtils.GetEnvironment(catalog), hasHeader, separatorChar, allowQuotedStrings, supportSparse, trimWhitespace);
6262

6363
/// <summary>
@@ -72,8 +72,8 @@ public static TextLoader CreateTextLoader<TInput>(this DataOperationsCatalog cat
7272
public static IDataView ReadFromTextFile(this DataOperationsCatalog catalog,
7373
string path,
7474
TextLoader.Column[] columns,
75-
bool hasHeader = TextLoader.DefaultArguments.HasHeader,
76-
char separatorChar = TextLoader.DefaultArguments.Separator)
75+
bool hasHeader = TextLoader.Defaults.HasHeader,
76+
char separatorChar = TextLoader.Defaults.Separator)
7777
{
7878
Contracts.CheckNonEmpty(path, nameof(path));
7979

@@ -104,11 +104,11 @@ public static IDataView ReadFromTextFile(this DataOperationsCatalog catalog,
104104
/// <returns>The data view.</returns>
105105
public static IDataView ReadFromTextFile<TInput>(this DataOperationsCatalog catalog,
106106
string path,
107-
bool hasHeader = TextLoader.DefaultArguments.HasHeader,
108-
char separatorChar = TextLoader.DefaultArguments.Separator,
109-
bool allowQuotedStrings = TextLoader.DefaultArguments.AllowQuoting,
110-
bool supportSparse = TextLoader.DefaultArguments.AllowSparse,
111-
bool trimWhitespace = TextLoader.DefaultArguments.TrimWhitespace)
107+
bool hasHeader = TextLoader.Defaults.HasHeader,
108+
char separatorChar = TextLoader.Defaults.Separator,
109+
bool allowQuotedStrings = TextLoader.Defaults.AllowQuoting,
110+
bool supportSparse = TextLoader.Defaults.AllowSparse,
111+
bool trimWhitespace = TextLoader.Defaults.TrimWhitespace)
112112
{
113113
Contracts.CheckNonEmpty(path, nameof(path));
114114

@@ -147,8 +147,8 @@ public static IDataView ReadFromTextFile(this DataOperationsCatalog catalog, str
147147
public static void SaveAsText(this DataOperationsCatalog catalog,
148148
IDataView data,
149149
Stream stream,
150-
char separatorChar = TextLoader.DefaultArguments.Separator,
151-
bool headerRow = TextLoader.DefaultArguments.HasHeader,
150+
char separatorChar = TextLoader.Defaults.Separator,
151+
bool headerRow = TextLoader.Defaults.HasHeader,
152152
bool schema = true,
153153
bool keepHidden = false)
154154
{

src/Microsoft.ML.StaticPipe/LocalPathReader.cs

+7-7
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,20 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5+
using System.Data;
56
using Microsoft.ML.Data;
67

78
namespace Microsoft.ML.StaticPipe
89
{
910
public static class LocalPathReader
1011
{
11-
public static DataView<TShape> Read<TShape>(this DataReader<IMultiStreamSource, TShape> reader, string path)
12-
{
13-
return reader.Read(new MultiFileSource(path));
14-
}
1512

13+
/// <summary>
14+
/// Reads data from one or more file <paramref name="path"/> into an <see cref="DataView"/>.
15+
/// </summary>
16+
/// <param name="reader">The reader to use.</param>
17+
/// <param name="path">One or more paths from which to load data.</param>
1618
public static DataView<TShape> Read<TShape>(this DataReader<IMultiStreamSource, TShape> reader, params string[] path)
17-
{
18-
return reader.Read(new MultiFileSource(path));
19-
}
19+
=> reader.Read(new MultiFileSource(path));
2020
}
2121
}

test/BaselineOutput/Common/EntryPoints/core_manifest.json

+36-36
Original file line numberDiff line numberDiff line change
@@ -316,42 +316,6 @@
316316
"IsNullable": false,
317317
"Default": null
318318
},
319-
{
320-
"Name": "UseThreads",
321-
"Type": "Bool",
322-
"Desc": "Use separate parsing threads?",
323-
"Aliases": [
324-
"threads"
325-
],
326-
"Required": false,
327-
"SortOrder": 150.0,
328-
"IsNullable": false,
329-
"Default": true
330-
},
331-
{
332-
"Name": "HeaderFile",
333-
"Type": "String",
334-
"Desc": "File containing a header with feature names. If specified, header defined in the data file (header+) is ignored.",
335-
"Aliases": [
336-
"hf"
337-
],
338-
"Required": false,
339-
"SortOrder": 150.0,
340-
"IsNullable": false,
341-
"Default": null
342-
},
343-
{
344-
"Name": "MaxRows",
345-
"Type": "Int",
346-
"Desc": "Maximum number of rows to produce",
347-
"Aliases": [
348-
"rows"
349-
],
350-
"Required": false,
351-
"SortOrder": 150.0,
352-
"IsNullable": true,
353-
"Default": null
354-
},
355319
{
356320
"Name": "AllowQuoting",
357321
"Type": "Bool",
@@ -428,6 +392,42 @@
428392
"SortOrder": 150.0,
429393
"IsNullable": false,
430394
"Default": false
395+
},
396+
{
397+
"Name": "UseThreads",
398+
"Type": "Bool",
399+
"Desc": "Use separate parsing threads?",
400+
"Aliases": [
401+
"threads"
402+
],
403+
"Required": false,
404+
"SortOrder": 150.0,
405+
"IsNullable": false,
406+
"Default": true
407+
},
408+
{
409+
"Name": "HeaderFile",
410+
"Type": "String",
411+
"Desc": "File containing a header with feature names. If specified, header defined in the data file (header+) is ignored.",
412+
"Aliases": [
413+
"hf"
414+
],
415+
"Required": false,
416+
"SortOrder": 150.0,
417+
"IsNullable": false,
418+
"Default": null
419+
},
420+
{
421+
"Name": "MaxRows",
422+
"Type": "Int",
423+
"Desc": "Maximum number of rows to produce",
424+
"Aliases": [
425+
"rows"
426+
],
427+
"Required": false,
428+
"SortOrder": 150.0,
429+
"IsNullable": true,
430+
"Default": null
431431
}
432432
]
433433
},

test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -463,7 +463,7 @@ public void EntryPointCreateEnsemble()
463463
MetadataUtils.Const.ScoreValueKind.Score)
464464
).Transform(individualScores[i]);
465465

466-
individualScores[i] = ColumnSelectingTransformer.CreateDrop(Env, individualScores[i], MetadataUtils.Const.ScoreValueKind.Score);
466+
individualScores[i] = new ColumnSelectingTransformer(Env, null, new[] { MetadataUtils.Const.ScoreValueKind.Score }).Transform(individualScores[i]);
467467
}
468468

469469
var avgEnsembleInput = new EnsembleCreator.ClassifierInput { Models = predictorModels, ModelCombiner = EnsembleCreator.ClassifierCombiner.Average };

test/Microsoft.ML.Predictor.Tests/TestPredictors.cs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1806,7 +1806,7 @@ public void CompareSvmPredictorResultsToLibSvm()
18061806
{
18071807
using (var env = new LocalEnvironment(1, conc: 1))
18081808
{
1809-
IDataView trainView = new TextLoader(env, new TextLoader.Arguments(), new MultiFileSource(GetDataPath(TestDatasets.mnistOneClass.trainFilename)));
1809+
IDataView trainView = new TextLoader(env, new TextLoader.Options(), new MultiFileSource(GetDataPath(TestDatasets.mnistOneClass.trainFilename)));
18101810
trainView =
18111811
NormalizeTransform.Create(env,
18121812
new NormalizeTransform.MinMaxArguments()
@@ -1815,7 +1815,7 @@ public void CompareSvmPredictorResultsToLibSvm()
18151815
},
18161816
trainView);
18171817
var trainData = new RoleMappedData(trainView, "Label", "Features");
1818-
IDataView testView = new TextLoader(env, new TextLoader.Arguments(), new MultiFileSource(GetDataPath(TestDatasets.mnistOneClass.testFilename)));
1818+
IDataView testView = new TextLoader(env, new TextLoader.Options(), new MultiFileSource(GetDataPath(TestDatasets.mnistOneClass.testFilename)));
18191819
ApplyTransformUtils.ApplyAllTransformsToData(env, trainView, testView);
18201820
var testData = new RoleMappedData(testView, "Label", "Features");
18211821

0 commit comments

Comments
 (0)