Skip to content

Commit d90c116

Browse files
committed
review comments
1 parent f04cd68 commit d90c116

File tree

8 files changed

+47
-64
lines changed

8 files changed

+47
-64
lines changed

docs/code/MlNetCookBook.md

Lines changed: 33 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,7 @@ If the schema of the data is not known at compile time, or too cumbersome, you c
115115
var mlContext = new MLContext();
116116

117117
// Create the reader: define the data columns and where to find them in the text file.
118-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
119-
{
120-
Column = new[] {
118+
var reader = mlContext.Data.TextReader(new[] {
121119
// A boolean column depicting the 'label'.
122120
new TextLoader.Column("IsOver50K", DataKind.BL, 0),
123121
// Three text columns.
@@ -126,8 +124,8 @@ var reader = mlContext.Data.TextReader(new TextLoader.Arguments
126124
new TextLoader.Column("MaritalStatus", DataKind.TX, 3)
127125
},
128126
// First line of the file is a header, not a data row.
129-
HasHeader = true
130-
});
127+
hasHeader: true
128+
);
131129

132130
// Now read the file (remember though, readers are lazy, so the actual reading will happen when the data is accessed).
133131
var data = reader.Read(dataPath);
@@ -175,19 +173,17 @@ The code is very similar using the dynamic API:
175173
var mlContext = new MLContext();
176174

177175
// Create the reader: define the data columns and where to find them in the text file.
178-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
179-
{
180-
Column = new[] {
176+
var reader = mlContext.Data.TextReader(new[] {
181177
// A boolean column depicting the 'label'.
182-
new TextLoader.Column("IsOver50k", DataKind.BL, 0),
178+
new TextLoader.Column("IsOver50K", DataKind.BL, 0),
183179
// Three text columns.
184180
new TextLoader.Column("Workclass", DataKind.TX, 1),
185181
new TextLoader.Column("Education", DataKind.TX, 2),
186182
new TextLoader.Column("MaritalStatus", DataKind.TX, 3)
187183
},
188184
// First line of the file is a header, not a data row.
189-
HasHeader = true
190-
});
185+
hasHeader: true
186+
);
191187

192188
var data = reader.Read(exampleFile1, exampleFile2);
193189
```
@@ -365,19 +361,17 @@ You can also use the dynamic API to create the equivalent of the previous pipeli
365361
var mlContext = new MLContext();
366362

367363
// Create the reader: define the data columns and where to find them in the text file.
368-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
369-
{
370-
Column = new[] {
364+
var reader = mlContext.Data.TextReader(new[] {
371365
// A boolean column depicting the 'label'.
372-
new TextLoader.Column("IsOver50k", DataKind.BL, 0),
366+
new TextLoader.Column("IsOver50K", DataKind.BL, 0),
373367
// Three text columns.
374368
new TextLoader.Column("Workclass", DataKind.TX, 1),
375369
new TextLoader.Column("Education", DataKind.TX, 2),
376370
new TextLoader.Column("MaritalStatus", DataKind.TX, 3)
377371
},
378372
// First line of the file is a header, not a data row.
379-
HasHeader = true
380-
});
373+
hasHeader: true
374+
);
381375

382376
// Start creating our processing pipeline. For now, let's just concatenate all the text columns
383377
// together into one.
@@ -482,20 +476,18 @@ var mlContext = new MLContext();
482476

483477
// Step one: read the data as an IDataView.
484478
// First, we define the reader: specify the data columns and where to find them in the text file.
485-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
486-
{
487-
Column = new[] {
479+
var reader = mlContext.Data.TextReader(new[] {
488480
// We read the first 11 values as a single float vector.
489481
new TextLoader.Column("FeatureVector", DataKind.R4, 0, 10),
490482

491483
// Separately, read the target variable.
492484
new TextLoader.Column("Target", DataKind.R4, 11),
493485
},
494486
// First line of the file is a header, not a data row.
495-
HasHeader = true,
487+
hasHeader: true,
496488
// Default separator is tab, but we need a semicolon.
497-
Separator = ";"
498-
});
489+
separatorChar: ';'
490+
);
499491

500492
// Now read the file (remember though, readers are lazy, so the actual reading will happen when the data is accessed).
501493
var trainData = reader.Read(trainDataPath);
@@ -653,9 +645,7 @@ var mlContext = new MLContext();
653645

654646
// Step one: read the data as an IDataView.
655647
// First, we define the reader: specify the data columns and where to find them in the text file.
656-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
657-
{
658-
Column = new[] {
648+
var reader = mlContext.Data.TextReader(new[] {
659649
new TextLoader.Column("SepalLength", DataKind.R4, 0),
660650
new TextLoader.Column("SepalWidth", DataKind.R4, 1),
661651
new TextLoader.Column("PetalLength", DataKind.R4, 2),
@@ -664,8 +654,8 @@ var reader = mlContext.Data.TextReader(new TextLoader.Arguments
664654
new TextLoader.Column("Label", DataKind.TX, 4),
665655
},
666656
// Default separator is tab, but the dataset has comma.
667-
Separator = ","
668-
});
657+
separatorChar: ','
658+
);
669659

670660
// Retrieve the training data.
671661
var trainData = reader.Read(irisDataPath);
@@ -952,17 +942,15 @@ You can achieve the same results using the dynamic API.
952942
var mlContext = new MLContext();
953943

954944
// Define the reader: specify the data columns and where to find them in the text file.
955-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
956-
{
957-
Column = new[] {
945+
var reader = mlContext.Data.TextReader(new[] {
958946
// The four features of the Iris dataset will be grouped together as one Features column.
959947
new TextLoader.Column("Features", DataKind.R4, 0, 3),
960948
// Label: kind of iris.
961949
new TextLoader.Column("Label", DataKind.TX, 4),
962950
},
963951
// Default separator is tab, but the dataset has comma.
964-
Separator = ","
965-
});
952+
separatorChar: ','
953+
);
966954

967955
// Read the training data.
968956
var trainData = reader.Read(dataPath);
@@ -1073,9 +1061,8 @@ You can achieve the same results using the dynamic API.
10731061
var mlContext = new MLContext();
10741062

10751063
// Define the reader: specify the data columns and where to find them in the text file.
1076-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
1077-
{
1078-
Column = new[] {
1064+
var reader = mlContext.Data.TextReader(new[]
1065+
{
10791066
new TextLoader.Column("Label", DataKind.BL, 0),
10801067
// We will load all the categorical features into one vector column of size 8.
10811068
new TextLoader.Column("CategoricalFeatures", DataKind.TX, 1, 8),
@@ -1084,8 +1071,8 @@ var reader = mlContext.Data.TextReader(new TextLoader.Arguments
10841071
// Let's also separately load the 'Workclass' column.
10851072
new TextLoader.Column("Workclass", DataKind.TX, 1),
10861073
},
1087-
HasHeader = true
1088-
});
1074+
hasHeader: true
1075+
);
10891076

10901077
// Read the data.
10911078
var data = reader.Read(dataPath);
@@ -1207,14 +1194,13 @@ You can achieve the same results using the dynamic API.
12071194
var mlContext = new MLContext();
12081195

12091196
// Define the reader: specify the data columns and where to find them in the text file.
1210-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
1211-
{
1212-
Column = new[] {
1197+
var reader = mlContext.Data.TextReader(new[]
1198+
{
12131199
new TextLoader.Column("IsToxic", DataKind.BL, 0),
12141200
new TextLoader.Column("Message", DataKind.TX, 1),
12151201
},
1216-
HasHeader = true
1217-
});
1202+
hasHeader: true
1203+
);
12181204

12191205
// Read the data.
12201206
var data = reader.Read(dataPath);
@@ -1330,9 +1316,8 @@ var mlContext = new MLContext();
13301316

13311317
// Step one: read the data as an IDataView.
13321318
// First, we define the reader: specify the data columns and where to find them in the text file.
1333-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
1334-
{
1335-
Column = new[] {
1319+
var reader = mlContext.Data.TextReader(new[]
1320+
{
13361321
// We read the first 11 values as a single float vector.
13371322
new TextLoader.Column("SepalLength", DataKind.R4, 0),
13381323
new TextLoader.Column("SepalWidth", DataKind.R4, 1),
@@ -1342,8 +1327,8 @@ var reader = mlContext.Data.TextReader(new TextLoader.Arguments
13421327
new TextLoader.Column("Label", DataKind.TX, 4),
13431328
},
13441329
// Default separator is tab, but the dataset has comma.
1345-
Separator = ","
1346-
});
1330+
separatorChar: ','
1331+
);
13471332

13481333
// Read the data.
13491334
var data = reader.Read(dataPath);

src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1033,11 +1033,12 @@ private static Arguments MakeArgs(Column[] columns, bool hasHeader, char[] separ
10331033
/// <param name="env">The environment to use.</param>
10341034
/// <param name="args">Defines the settings of the load operation.</param>
10351035
/// <param name="dataSample">Allows to expose items that can be used for reading.</param>
1036-
public TextLoader(IHostEnvironment env, Arguments args, IMultiStreamSource dataSample = null)
1036+
public TextLoader(IHostEnvironment env, Arguments args = null, IMultiStreamSource dataSample = null)
10371037
{
1038+
args = args ?? new Arguments();
1039+
10381040
Contracts.CheckValue(env, nameof(env));
10391041
_host = env.Register(RegistrationName);
1040-
10411042
_host.CheckValue(args, nameof(args));
10421043
_host.CheckValueOrNull(dataSample);
10431044

@@ -1332,7 +1333,7 @@ public static IDataView ReadFile(IHostEnvironment env, IMultiStreamSource fileSo
13321333
/// <param name="env">The environment to use.</param>
13331334
/// <param name="fileSource">Specifies a file from which to read.</param>
13341335
/// <param name="args">Defines the settings of the load operation.</param>
1335-
public static IDataView ReadFile(IHostEnvironment env, IMultiStreamSource fileSource, Arguments args)
1336+
public static IDataView ReadFile(IHostEnvironment env, IMultiStreamSource fileSource, Arguments args = null)
13361337
=> new TextLoader(env, args, fileSource).Read(fileSource);
13371338

13381339
public void Save(ModelSaveContext ctx)

src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ public static class TextLoaderSaverCatalog
1919
/// <param name="catalog">The catalog.</param>
2020
/// <param name="columns">The columns of the schema.</param>
2121
/// <param name="hasHeader">Whether the file has a header.</param>
22-
/// <param name="separatorChar"> The character used as separator between data points in a row. By default the tab character is used as separator.</param>
22+
/// <param name="separatorChar">The character used as separator between data points in a row. By default the tab character is used as separator.</param>
2323
/// <param name="dataSample">The optional location of a data sample.</param>
2424
public static TextLoader TextReader(this DataOperations catalog,
2525
Column[] columns, bool hasHeader = false, char separatorChar = '\t', IMultiStreamSource dataSample = null)
@@ -40,7 +40,7 @@ public static TextLoader TextReader(this DataOperations catalog, Arguments args,
4040
/// <param name="catalog">The catalog.</param>
4141
/// <param name="columns">The columns of the schema.</param>
4242
/// <param name="hasHeader">Whether the file has a header.</param>
43-
/// <param name="separatorChar"> The character used as separator between data points in a row. By default the tab character is used as separator.</param>
43+
/// <param name="separatorChar">The character used as separator between data points in a row. By default the tab character is used as separator.</param>
4444
/// <param name="path">The path to the file.</param>
4545
/// <returns>The data view.</returns>
4646
public static IDataView ReadFromTextFile(this DataOperations catalog,

src/Microsoft.ML.Data/Utilities/ModelFileUtils.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ public static IEnumerable<KeyValuePair<ColumnRole, string>> LoadRoleMappingsOrNu
283283
{
284284
// REVIEW: Should really validate the schema here, and consider
285285
// ignoring this stream if it isn't as expected.
286-
var loader = TextLoader.ReadFile(env, new RepositoryStreamWrapper(rep, DirTrainingInfo, RoleMappingFile), new TextLoader.Arguments());
286+
var loader = TextLoader.ReadFile(env, new RepositoryStreamWrapper(rep, DirTrainingInfo, RoleMappingFile));
287287

288288
using (var cursor = loader.GetRowCursor(c => true))
289289
{

src/Microsoft.ML.Transforms/TermLookupTransformer.cs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -361,9 +361,6 @@ private static IComponentFactory<IMultiStreamSource, IDataLoader> GetLoaderFacto
361361
ulong max = ulong.MinValue;
362362
try
363363
{
364-
var txtArgs = new TextLoader.Arguments();
365-
bool parsed = CmdParser.ParseArguments(host, "col=Term:TX:0 col=Value:TX:1", txtArgs);
366-
host.Assert(parsed);
367364
var data = TextLoader.ReadFile(host, new MultiFileSource(filename), new[]
368365
{
369366
new TextLoader.Column("Term", DataKind.TX, 0),

test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ public void TrainSentiment()
6464
{
6565
var env = new MLContext(seed: 1);
6666
// Pipeline
67-
var arguemnts = new TextLoader.Arguments()
67+
var arguments = new TextLoader.Arguments()
6868
{
6969
Column = new TextLoader.Column[]
7070
{
@@ -86,7 +86,7 @@ public void TrainSentiment()
8686
AllowQuoting = false,
8787
AllowSparse = false
8888
};
89-
var loader = TextLoader.ReadFile(env, new MultiFileSource(_sentimentDataPath), arguemnts);
89+
var loader = TextLoader.ReadFile(env, new MultiFileSource(_sentimentDataPath), arguments);
9090

9191
var text = TextFeaturizingEstimator.Create(env,
9292
new TextFeaturizingEstimator.Arguments()

test/Microsoft.ML.Predictor.Tests/TestPredictors.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -606,7 +606,7 @@ public void RankingLightGBMTest()
606606
public void TestTreeEnsembleCombiner()
607607
{
608608
var dataPath = GetDataPath("breast-cancer.txt");
609-
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath), new TextLoader.Arguments());
609+
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath));
610610

611611
var fastTrees = new IPredictorModel[3];
612612
for (int i = 0; i < 3; i++)
@@ -628,7 +628,7 @@ public void TestTreeEnsembleCombiner()
628628
public void TestTreeEnsembleCombinerWithCategoricalSplits()
629629
{
630630
var dataPath = GetDataPath("adult.tiny.with-schema.txt");
631-
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath), new TextLoader.Arguments());
631+
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath));
632632

633633
var cat = new OneHotEncodingEstimator(Env, "Categories", "Features").Fit(dataView).Transform(dataView);
634634
var fastTrees = new IPredictorModel[3];
@@ -729,7 +729,7 @@ private void CombineAndTestTreeEnsembles(IDataView idv, IPredictorModel[] fastTr
729729
public void TestEnsembleCombiner()
730730
{
731731
var dataPath = GetDataPath("breast-cancer.txt");
732-
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath), new TextLoader.Arguments());
732+
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath));
733733

734734
var predictors = new IPredictorModel[]
735735
{
@@ -775,7 +775,7 @@ public void TestEnsembleCombiner()
775775
public void TestMultiClassEnsembleCombiner()
776776
{
777777
var dataPath = GetDataPath("breast-cancer.txt");
778-
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath), new TextLoader.Arguments());
778+
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath));
779779

780780
var predictors = new IPredictorModel[]
781781
{

test/Microsoft.ML.TestFramework/DataPipe/TestDataPipeBase.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -438,7 +438,7 @@ protected bool SaveLoadText(IDataView view, IHostEnvironment env,
438438

439439
// Note that we don't pass in "args", but pass in a default args so we test
440440
// the auto-schema parsing.
441-
var loadedData = TextLoader.ReadFile(env, new MultiFileSource(pathData), new TextLoader.Arguments());
441+
var loadedData = TextLoader.ReadFile(env, new MultiFileSource(pathData));
442442
if (!CheckMetadataTypes(loadedData.Schema))
443443
Failed();
444444

0 commit comments

Comments
 (0)