Skip to content

Commit f6a1a33

Browse files
committed
review comments
1 parent 76cf6c4 commit f6a1a33

File tree

8 files changed

+47
-64
lines changed

8 files changed

+47
-64
lines changed

docs/code/MlNetCookBook.md

Lines changed: 33 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,7 @@ If the schema of the data is not known at compile time, or too cumbersome, you c
115115
var mlContext = new MLContext();
116116

117117
// Create the reader: define the data columns and where to find them in the text file.
118-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
119-
{
120-
Column = new[] {
118+
var reader = mlContext.Data.TextReader(new[] {
121119
// A boolean column depicting the 'label'.
122120
new TextLoader.Column("IsOver50K", DataKind.BL, 0),
123121
// Three text columns.
@@ -126,8 +124,8 @@ var reader = mlContext.Data.TextReader(new TextLoader.Arguments
126124
new TextLoader.Column("MaritalStatus", DataKind.TX, 3)
127125
},
128126
// First line of the file is a header, not a data row.
129-
HasHeader = true
130-
});
127+
hasHeader: true
128+
);
131129

132130
// Now read the file (remember though, readers are lazy, so the actual reading will happen when the data is accessed).
133131
var data = reader.Read(dataPath);
@@ -175,19 +173,17 @@ The code is very similar using the dynamic API:
175173
var mlContext = new MLContext();
176174

177175
// Create the reader: define the data columns and where to find them in the text file.
178-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
179-
{
180-
Column = new[] {
176+
var reader = mlContext.Data.TextReader(new[] {
181177
// A boolean column depicting the 'label'.
182-
new TextLoader.Column("IsOver50k", DataKind.BL, 0),
178+
new TextLoader.Column("IsOver50K", DataKind.BL, 0),
183179
// Three text columns.
184180
new TextLoader.Column("Workclass", DataKind.TX, 1),
185181
new TextLoader.Column("Education", DataKind.TX, 2),
186182
new TextLoader.Column("MaritalStatus", DataKind.TX, 3)
187183
},
188184
// First line of the file is a header, not a data row.
189-
HasHeader = true
190-
});
185+
hasHeader: true
186+
);
191187

192188
var data = reader.Read(exampleFile1, exampleFile2);
193189
```
@@ -365,19 +361,17 @@ You can also use the dynamic API to create the equivalent of the previous pipeli
365361
var mlContext = new MLContext();
366362

367363
// Create the reader: define the data columns and where to find them in the text file.
368-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
369-
{
370-
Column = new[] {
364+
var reader = mlContext.Data.TextReader(new[] {
371365
// A boolean column depicting the 'label'.
372-
new TextLoader.Column("IsOver50k", DataKind.BL, 0),
366+
new TextLoader.Column("IsOver50K", DataKind.BL, 0),
373367
// Three text columns.
374368
new TextLoader.Column("Workclass", DataKind.TX, 1),
375369
new TextLoader.Column("Education", DataKind.TX, 2),
376370
new TextLoader.Column("MaritalStatus", DataKind.TX, 3)
377371
},
378372
// First line of the file is a header, not a data row.
379-
HasHeader = true
380-
});
373+
hasHeader: true
374+
);
381375

382376
// Start creating our processing pipeline. For now, let's just concatenate all the text columns
383377
// together into one.
@@ -468,20 +462,18 @@ var mlContext = new MLContext();
468462

469463
// Step one: read the data as an IDataView.
470464
// First, we define the reader: specify the data columns and where to find them in the text file.
471-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
472-
{
473-
Column = new[] {
465+
var reader = mlContext.Data.TextReader(new[] {
474466
// We read the first 11 values as a single float vector.
475467
new TextLoader.Column("FeatureVector", DataKind.R4, 0, 10),
476468

477469
// Separately, read the target variable.
478470
new TextLoader.Column("Target", DataKind.R4, 11),
479471
},
480472
// First line of the file is a header, not a data row.
481-
HasHeader = true,
473+
hasHeader: true,
482474
// Default separator is tab, but we need a semicolon.
483-
Separator = ";"
484-
});
475+
separatorChar: ';'
476+
);
485477

486478
// Now read the file (remember though, readers are lazy, so the actual reading will happen when the data is accessed).
487479
var trainData = reader.Read(trainDataPath);
@@ -617,9 +609,7 @@ var mlContext = new MLContext();
617609

618610
// Step one: read the data as an IDataView.
619611
// First, we define the reader: specify the data columns and where to find them in the text file.
620-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
621-
{
622-
Column = new[] {
612+
var reader = mlContext.Data.TextReader(new[] {
623613
new TextLoader.Column("SepalLength", DataKind.R4, 0),
624614
new TextLoader.Column("SepalWidth", DataKind.R4, 1),
625615
new TextLoader.Column("PetalLength", DataKind.R4, 2),
@@ -628,8 +618,8 @@ var reader = mlContext.Data.TextReader(new TextLoader.Arguments
628618
new TextLoader.Column("Label", DataKind.TX, 4),
629619
},
630620
// Default separator is tab, but the dataset has comma.
631-
Separator = ","
632-
});
621+
separatorChar: ','
622+
);
633623

634624
// Retrieve the training data.
635625
var trainData = reader.Read(irisDataPath);
@@ -910,17 +900,15 @@ You can achieve the same results using the dynamic API.
910900
var mlContext = new MLContext();
911901

912902
// Define the reader: specify the data columns and where to find them in the text file.
913-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
914-
{
915-
Column = new[] {
903+
var reader = mlContext.Data.TextReader(new[] {
916904
// The four features of the Iris dataset will be grouped together as one Features column.
917905
new TextLoader.Column("Features", DataKind.R4, 0, 3),
918906
// Label: kind of iris.
919907
new TextLoader.Column("Label", DataKind.TX, 4),
920908
},
921909
// Default separator is tab, but the dataset has comma.
922-
Separator = ","
923-
});
910+
separatorChar: ','
911+
);
924912

925913
// Read the training data.
926914
var trainData = reader.Read(dataPath);
@@ -1027,9 +1015,8 @@ You can achieve the same results using the dynamic API.
10271015
var mlContext = new MLContext();
10281016

10291017
// Define the reader: specify the data columns and where to find them in the text file.
1030-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
1031-
{
1032-
Column = new[] {
1018+
var reader = mlContext.Data.TextReader(new[]
1019+
{
10331020
new TextLoader.Column("Label", DataKind.BL, 0),
10341021
// We will load all the categorical features into one vector column of size 8.
10351022
new TextLoader.Column("CategoricalFeatures", DataKind.TX, 1, 8),
@@ -1038,8 +1025,8 @@ var reader = mlContext.Data.TextReader(new TextLoader.Arguments
10381025
// Let's also separately load the 'Workclass' column.
10391026
new TextLoader.Column("Workclass", DataKind.TX, 1),
10401027
},
1041-
HasHeader = true
1042-
});
1028+
hasHeader: true
1029+
);
10431030

10441031
// Read the data.
10451032
var data = reader.Read(dataPath);
@@ -1154,14 +1141,13 @@ You can achieve the same results using the dynamic API.
11541141
var mlContext = new MLContext();
11551142

11561143
// Define the reader: specify the data columns and where to find them in the text file.
1157-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
1158-
{
1159-
Column = new[] {
1144+
var reader = mlContext.Data.TextReader(new[]
1145+
{
11601146
new TextLoader.Column("IsToxic", DataKind.BL, 0),
11611147
new TextLoader.Column("Message", DataKind.TX, 1),
11621148
},
1163-
HasHeader = true
1164-
});
1149+
hasHeader: true
1150+
);
11651151

11661152
// Read the data.
11671153
var data = reader.Read(dataPath);
@@ -1274,9 +1260,8 @@ var mlContext = new MLContext();
12741260

12751261
// Step one: read the data as an IDataView.
12761262
// First, we define the reader: specify the data columns and where to find them in the text file.
1277-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
1278-
{
1279-
Column = new[] {
1263+
var reader = mlContext.Data.TextReader(new[]
1264+
{
12801265
// We read the first 11 values as a single float vector.
12811266
new TextLoader.Column("SepalLength", DataKind.R4, 0),
12821267
new TextLoader.Column("SepalWidth", DataKind.R4, 1),
@@ -1286,8 +1271,8 @@ var reader = mlContext.Data.TextReader(new TextLoader.Arguments
12861271
new TextLoader.Column("Label", DataKind.TX, 4),
12871272
},
12881273
// Default separator is tab, but the dataset has comma.
1289-
Separator = ","
1290-
});
1274+
separatorChar: ','
1275+
);
12911276

12921277
// Read the data.
12931278
var data = reader.Read(dataPath);

src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1033,11 +1033,12 @@ private static Arguments MakeArgs(Column[] columns, bool hasHeader, char[] separ
10331033
/// <param name="env">The environment to use.</param>
10341034
/// <param name="args">Defines the settings of the load operation.</param>
10351035
/// <param name="dataSample">Allows to expose items that can be used for reading.</param>
1036-
public TextLoader(IHostEnvironment env, Arguments args, IMultiStreamSource dataSample = null)
1036+
public TextLoader(IHostEnvironment env, Arguments args = null, IMultiStreamSource dataSample = null)
10371037
{
1038+
args = args ?? new Arguments();
1039+
10381040
Contracts.CheckValue(env, nameof(env));
10391041
_host = env.Register(RegistrationName);
1040-
10411042
_host.CheckValue(args, nameof(args));
10421043
_host.CheckValueOrNull(dataSample);
10431044

@@ -1332,7 +1333,7 @@ public static IDataView ReadFile(IHostEnvironment env, IMultiStreamSource fileSo
13321333
/// <param name="env">The environment to use.</param>
13331334
/// <param name="fileSource">Specifies a file from which to read.</param>
13341335
/// <param name="args">Defines the settings of the load operation.</param>
1335-
public static IDataView ReadFile(IHostEnvironment env, IMultiStreamSource fileSource, Arguments args)
1336+
public static IDataView ReadFile(IHostEnvironment env, IMultiStreamSource fileSource, Arguments args = null)
13361337
=> new TextLoader(env, args, fileSource).Read(fileSource);
13371338

13381339
public void Save(ModelSaveContext ctx)

src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ public static class TextLoaderSaverCatalog
1919
/// <param name="catalog">The catalog.</param>
2020
/// <param name="columns">The columns of the schema.</param>
2121
/// <param name="hasHeader">Whether the file has a header.</param>
22-
/// <param name="separatorChar"> The character used as separator between data points in a row. By default the tab character is used as separator.</param>
22+
/// <param name="separatorChar">The character used as separator between data points in a row. By default the tab character is used as separator.</param>
2323
/// <param name="dataSample">The optional location of a data sample.</param>
2424
public static TextLoader TextReader(this DataOperations catalog,
2525
Column[] columns, bool hasHeader = false, char separatorChar = '\t', IMultiStreamSource dataSample = null)
@@ -40,7 +40,7 @@ public static TextLoader TextReader(this DataOperations catalog, Arguments args,
4040
/// <param name="catalog">The catalog.</param>
4141
/// <param name="columns">The columns of the schema.</param>
4242
/// <param name="hasHeader">Whether the file has a header.</param>
43-
/// <param name="separatorChar"> The character used as separator between data points in a row. By default the tab character is used as separator.</param>
43+
/// <param name="separatorChar">The character used as separator between data points in a row. By default the tab character is used as separator.</param>
4444
/// <param name="path">The path to the file.</param>
4545
/// <returns>The data view.</returns>
4646
public static IDataView ReadFromTextFile(this DataOperations catalog,

src/Microsoft.ML.Data/Utilities/ModelFileUtils.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ public static IEnumerable<KeyValuePair<ColumnRole, string>> LoadRoleMappingsOrNu
283283
{
284284
// REVIEW: Should really validate the schema here, and consider
285285
// ignoring this stream if it isn't as expected.
286-
var loader = TextLoader.ReadFile(env, new RepositoryStreamWrapper(rep, DirTrainingInfo, RoleMappingFile), new TextLoader.Arguments());
286+
var loader = TextLoader.ReadFile(env, new RepositoryStreamWrapper(rep, DirTrainingInfo, RoleMappingFile));
287287

288288
using (var cursor = loader.GetRowCursor(c => true))
289289
{

src/Microsoft.ML.Transforms/TermLookupTransformer.cs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -361,9 +361,6 @@ private static IComponentFactory<IMultiStreamSource, IDataLoader> GetLoaderFacto
361361
ulong max = ulong.MinValue;
362362
try
363363
{
364-
var txtArgs = new TextLoader.Arguments();
365-
bool parsed = CmdParser.ParseArguments(host, "col=Term:TX:0 col=Value:TX:1", txtArgs);
366-
host.Assert(parsed);
367364
var data = TextLoader.ReadFile(host, new MultiFileSource(filename), new[]
368365
{
369366
new TextLoader.Column("Term", DataKind.TX, 0),

test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ public void TrainSentiment()
6464
{
6565
var env = new MLContext(seed: 1);
6666
// Pipeline
67-
var arguemnts = new TextLoader.Arguments()
67+
var arguments = new TextLoader.Arguments()
6868
{
6969
Column = new TextLoader.Column[]
7070
{
@@ -86,7 +86,7 @@ public void TrainSentiment()
8686
AllowQuoting = false,
8787
AllowSparse = false
8888
};
89-
var loader = TextLoader.ReadFile(env, new MultiFileSource(_sentimentDataPath), arguemnts);
89+
var loader = TextLoader.ReadFile(env, new MultiFileSource(_sentimentDataPath), arguments);
9090

9191
var text = TextFeaturizingEstimator.Create(env,
9292
new TextFeaturizingEstimator.Arguments()

test/Microsoft.ML.Predictor.Tests/TestPredictors.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,7 @@ public void RankingLightGBMTest()
595595
public void TestTreeEnsembleCombiner()
596596
{
597597
var dataPath = GetDataPath("breast-cancer.txt");
598-
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath), new TextLoader.Arguments());
598+
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath));
599599

600600
var fastTrees = new IPredictorModel[3];
601601
for (int i = 0; i < 3; i++)
@@ -617,7 +617,7 @@ public void TestTreeEnsembleCombiner()
617617
public void TestTreeEnsembleCombinerWithCategoricalSplits()
618618
{
619619
var dataPath = GetDataPath("adult.tiny.with-schema.txt");
620-
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath), new TextLoader.Arguments());
620+
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath));
621621

622622
var cat = new OneHotEncodingEstimator(Env, "Categories", "Features").Fit(dataView).Transform(dataView);
623623
var fastTrees = new IPredictorModel[3];
@@ -718,7 +718,7 @@ private void CombineAndTestTreeEnsembles(IDataView idv, IPredictorModel[] fastTr
718718
public void TestEnsembleCombiner()
719719
{
720720
var dataPath = GetDataPath("breast-cancer.txt");
721-
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath), new TextLoader.Arguments());
721+
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath));
722722

723723
var predictors = new IPredictorModel[]
724724
{
@@ -764,7 +764,7 @@ public void TestEnsembleCombiner()
764764
public void TestMultiClassEnsembleCombiner()
765765
{
766766
var dataPath = GetDataPath("breast-cancer.txt");
767-
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath), new TextLoader.Arguments());
767+
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath));
768768

769769
var predictors = new IPredictorModel[]
770770
{

test/Microsoft.ML.TestFramework/DataPipe/TestDataPipeBase.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -438,7 +438,7 @@ protected bool SaveLoadText(IDataView view, IHostEnvironment env,
438438

439439
// Note that we don't pass in "args", but pass in a default args so we test
440440
// the auto-schema parsing.
441-
var loadedData = TextLoader.ReadFile(env, new MultiFileSource(pathData), new TextLoader.Arguments());
441+
var loadedData = TextLoader.ReadFile(env, new MultiFileSource(pathData));
442442
if (!CheckMetadataTypes(loadedData.Schema))
443443
Failed();
444444

0 commit comments

Comments
 (0)