Skip to content

Commit 7a156c8

Browse files
committed
Revert "review comments"
This reverts commit 490e03f.
1 parent 360162a commit 7a156c8

File tree

8 files changed

+64
-47
lines changed

8 files changed

+64
-47
lines changed

docs/code/MlNetCookBook.md

Lines changed: 48 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,9 @@ If the schema of the data is not known at compile time, or too cumbersome, you c
115115
var mlContext = new MLContext();
116116

117117
// Create the reader: define the data columns and where to find them in the text file.
118-
var reader = mlContext.Data.TextReader(new[] {
118+
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
119+
{
120+
Column = new[] {
119121
// A boolean column depicting the 'label'.
120122
new TextLoader.Column("IsOver50K", DataKind.BL, 0),
121123
// Three text columns.
@@ -124,8 +126,8 @@ var reader = mlContext.Data.TextReader(new[] {
124126
new TextLoader.Column("MaritalStatus", DataKind.TX, 3)
125127
},
126128
// First line of the file is a header, not a data row.
127-
hasHeader: true
128-
);
129+
HasHeader = true
130+
});
129131

130132
// Now read the file (remember though, readers are lazy, so the actual reading will happen when the data is accessed).
131133
var data = reader.Read(dataPath);
@@ -173,17 +175,19 @@ The code is very similar using the dynamic API:
173175
var mlContext = new MLContext();
174176

175177
// Create the reader: define the data columns and where to find them in the text file.
176-
var reader = mlContext.Data.TextReader(new[] {
178+
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
179+
{
180+
Column = new[] {
177181
// A boolean column depicting the 'label'.
178-
new TextLoader.Column("IsOver50K", DataKind.BL, 0),
182+
new TextLoader.Column("IsOver50k", DataKind.BL, 0),
179183
// Three text columns.
180184
new TextLoader.Column("Workclass", DataKind.TX, 1),
181185
new TextLoader.Column("Education", DataKind.TX, 2),
182186
new TextLoader.Column("MaritalStatus", DataKind.TX, 3)
183187
},
184188
// First line of the file is a header, not a data row.
185-
hasHeader: true
186-
);
189+
HasHeader = true
190+
});
187191

188192
var data = reader.Read(exampleFile1, exampleFile2);
189193
```
@@ -361,17 +365,19 @@ You can also use the dynamic API to create the equivalent of the previous pipeli
361365
var mlContext = new MLContext();
362366

363367
// Create the reader: define the data columns and where to find them in the text file.
364-
var reader = mlContext.Data.TextReader(new[] {
368+
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
369+
{
370+
Column = new[] {
365371
// A boolean column depicting the 'label'.
366-
new TextLoader.Column("IsOver50K", DataKind.BL, 0),
372+
new TextLoader.Column("IsOver50k", DataKind.BL, 0),
367373
// Three text columns.
368374
new TextLoader.Column("Workclass", DataKind.TX, 1),
369375
new TextLoader.Column("Education", DataKind.TX, 2),
370376
new TextLoader.Column("MaritalStatus", DataKind.TX, 3)
371377
},
372378
// First line of the file is a header, not a data row.
373-
hasHeader: true
374-
);
379+
HasHeader = true
380+
});
375381

376382
// Start creating our processing pipeline. For now, let's just concatenate all the text columns
377383
// together into one.
@@ -462,18 +468,20 @@ var mlContext = new MLContext();
462468

463469
// Step one: read the data as an IDataView.
464470
// First, we define the reader: specify the data columns and where to find them in the text file.
465-
var reader = mlContext.Data.TextReader(new[] {
471+
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
472+
{
473+
Column = new[] {
466474
// We read the first 11 values as a single float vector.
467475
new TextLoader.Column("FeatureVector", DataKind.R4, 0, 10),
468476

469477
// Separately, read the target variable.
470478
new TextLoader.Column("Target", DataKind.R4, 11),
471479
},
472480
// First line of the file is a header, not a data row.
473-
hasHeader: true,
481+
HasHeader = true,
474482
// Default separator is tab, but we need a semicolon.
475-
separatorChar: ';'
476-
);
483+
Separator = ";"
484+
});
477485

478486
// Now read the file (remember though, readers are lazy, so the actual reading will happen when the data is accessed).
479487
var trainData = reader.Read(trainDataPath);
@@ -609,7 +617,9 @@ var mlContext = new MLContext();
609617

610618
// Step one: read the data as an IDataView.
611619
// First, we define the reader: specify the data columns and where to find them in the text file.
612-
var reader = mlContext.Data.TextReader(new[] {
620+
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
621+
{
622+
Column = new[] {
613623
new TextLoader.Column("SepalLength", DataKind.R4, 0),
614624
new TextLoader.Column("SepalWidth", DataKind.R4, 1),
615625
new TextLoader.Column("PetalLength", DataKind.R4, 2),
@@ -618,8 +628,8 @@ var reader = mlContext.Data.TextReader(new[] {
618628
new TextLoader.Column("Label", DataKind.TX, 4),
619629
},
620630
// Default separator is tab, but the dataset has comma.
621-
separatorChar: ','
622-
);
631+
Separator = ","
632+
});
623633

624634
// Retrieve the training data.
625635
var trainData = reader.Read(irisDataPath);
@@ -900,15 +910,17 @@ You can achieve the same results using the dynamic API.
900910
var mlContext = new MLContext();
901911

902912
// Define the reader: specify the data columns and where to find them in the text file.
903-
var reader = mlContext.Data.TextReader(new[] {
913+
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
914+
{
915+
Column = new[] {
904916
// The four features of the Iris dataset will be grouped together as one Features column.
905917
new TextLoader.Column("Features", DataKind.R4, 0, 3),
906918
// Label: kind of iris.
907919
new TextLoader.Column("Label", DataKind.TX, 4),
908920
},
909921
// Default separator is tab, but the dataset has comma.
910-
separatorChar: ','
911-
);
922+
Separator = ","
923+
});
912924

913925
// Read the training data.
914926
var trainData = reader.Read(dataPath);
@@ -1015,8 +1027,9 @@ You can achieve the same results using the dynamic API.
10151027
var mlContext = new MLContext();
10161028

10171029
// Define the reader: specify the data columns and where to find them in the text file.
1018-
var reader = mlContext.Data.TextReader(new[]
1019-
{
1030+
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
1031+
{
1032+
Column = new[] {
10201033
new TextLoader.Column("Label", DataKind.BL, 0),
10211034
// We will load all the categorical features into one vector column of size 8.
10221035
new TextLoader.Column("CategoricalFeatures", DataKind.TX, 1, 8),
@@ -1025,8 +1038,8 @@ var reader = mlContext.Data.TextReader(new[]
10251038
// Let's also separately load the 'Workclass' column.
10261039
new TextLoader.Column("Workclass", DataKind.TX, 1),
10271040
},
1028-
hasHeader: true
1029-
);
1041+
HasHeader = true
1042+
});
10301043

10311044
// Read the data.
10321045
var data = reader.Read(dataPath);
@@ -1141,13 +1154,14 @@ You can achieve the same results using the dynamic API.
11411154
var mlContext = new MLContext();
11421155

11431156
// Define the reader: specify the data columns and where to find them in the text file.
1144-
var reader = mlContext.Data.TextReader(new[]
1145-
{
1157+
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
1158+
{
1159+
Column = new[] {
11461160
new TextLoader.Column("IsToxic", DataKind.BL, 0),
11471161
new TextLoader.Column("Message", DataKind.TX, 1),
11481162
},
1149-
hasHeader: true
1150-
);
1163+
HasHeader = true
1164+
});
11511165

11521166
// Read the data.
11531167
var data = reader.Read(dataPath);
@@ -1260,8 +1274,9 @@ var mlContext = new MLContext();
12601274

12611275
// Step one: read the data as an IDataView.
12621276
// First, we define the reader: specify the data columns and where to find them in the text file.
1263-
var reader = mlContext.Data.TextReader(new[]
1264-
{
1277+
var reader = mlContext.Data.TextReader(new TextLoader.Arguments
1278+
{
1279+
Column = new[] {
12651280
// We read the first 11 values as a single float vector.
12661281
new TextLoader.Column("SepalLength", DataKind.R4, 0),
12671282
new TextLoader.Column("SepalWidth", DataKind.R4, 1),
@@ -1271,8 +1286,8 @@ var reader = mlContext.Data.TextReader(new[]
12711286
new TextLoader.Column("Label", DataKind.TX, 4),
12721287
},
12731288
// Default separator is tab, but the dataset has comma.
1274-
separatorChar: ','
1275-
);
1289+
Separator = ","
1290+
});
12761291

12771292
// Read the data.
12781293
var data = reader.Read(dataPath);

src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1033,12 +1033,11 @@ private static Arguments MakeArgs(Column[] columns, bool hasHeader, char[] separ
10331033
/// <param name="env">The environment to use.</param>
10341034
/// <param name="args">Defines the settings of the load operation.</param>
10351035
/// <param name="dataSample">Allows to expose items that can be used for reading.</param>
1036-
public TextLoader(IHostEnvironment env, Arguments args = null, IMultiStreamSource dataSample = null)
1036+
public TextLoader(IHostEnvironment env, Arguments args, IMultiStreamSource dataSample = null)
10371037
{
1038-
args = args ?? new Arguments();
1039-
10401038
Contracts.CheckValue(env, nameof(env));
10411039
_host = env.Register(RegistrationName);
1040+
10421041
_host.CheckValue(args, nameof(args));
10431042
_host.CheckValueOrNull(dataSample);
10441043

@@ -1333,7 +1332,7 @@ public static IDataView ReadFile(IHostEnvironment env, IMultiStreamSource fileSo
13331332
/// <param name="env">The environment to use.</param>
13341333
/// <param name="fileSource">Specifies a file from which to read.</param>
13351334
/// <param name="args">Defines the settings of the load operation.</param>
1336-
public static IDataView ReadFile(IHostEnvironment env, IMultiStreamSource fileSource, Arguments args = null)
1335+
public static IDataView ReadFile(IHostEnvironment env, IMultiStreamSource fileSource, Arguments args)
13371336
=> new TextLoader(env, args, fileSource).Read(fileSource);
13381337

13391338
public void Save(ModelSaveContext ctx)

src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ public static class TextLoaderSaverCatalog
1919
/// <param name="catalog">The catalog.</param>
2020
/// <param name="columns">The columns of the schema.</param>
2121
/// <param name="hasHeader">Whether the file has a header.</param>
22-
/// <param name="separatorChar">The character used as separator between data points in a row. By default the tab character is used as separator.</param>
22+
/// <param name="separatorChar"> The character used as separator between data points in a row. By default the tab character is used as separator.</param>
2323
/// <param name="dataSample">The optional location of a data sample.</param>
2424
public static TextLoader TextReader(this DataOperations catalog,
2525
Column[] columns, bool hasHeader = false, char separatorChar = '\t', IMultiStreamSource dataSample = null)
@@ -40,7 +40,7 @@ public static TextLoader TextReader(this DataOperations catalog, Arguments args,
4040
/// <param name="catalog">The catalog.</param>
4141
/// <param name="columns">The columns of the schema.</param>
4242
/// <param name="hasHeader">Whether the file has a header.</param>
43-
/// <param name="separatorChar">The character used as separator between data points in a row. By default the tab character is used as separator.</param>
43+
/// <param name="separatorChar"> The character used as separator between data points in a row. By default the tab character is used as separator.</param>
4444
/// <param name="path">The path to the file.</param>
4545
/// <returns>The data view.</returns>
4646
public static IDataView ReadFromTextFile(this DataOperations catalog,

src/Microsoft.ML.Data/Utilities/ModelFileUtils.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ public static IEnumerable<KeyValuePair<ColumnRole, string>> LoadRoleMappingsOrNu
283283
{
284284
// REVIEW: Should really validate the schema here, and consider
285285
// ignoring this stream if it isn't as expected.
286-
var loader = TextLoader.ReadFile(env, new RepositoryStreamWrapper(rep, DirTrainingInfo, RoleMappingFile));
286+
var loader = TextLoader.ReadFile(env, new RepositoryStreamWrapper(rep, DirTrainingInfo, RoleMappingFile), new TextLoader.Arguments());
287287

288288
using (var cursor = loader.GetRowCursor(c => true))
289289
{

src/Microsoft.ML.Transforms/TermLookupTransformer.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,9 @@ private static IComponentFactory<IMultiStreamSource, IDataLoader> GetLoaderFacto
361361
ulong max = ulong.MinValue;
362362
try
363363
{
364+
var txtArgs = new TextLoader.Arguments();
365+
bool parsed = CmdParser.ParseArguments(host, "col=Term:TX:0 col=Value:TX:1", txtArgs);
366+
host.Assert(parsed);
364367
var data = TextLoader.ReadFile(host, new MultiFileSource(filename), new[]
365368
{
366369
new TextLoader.Column("Term", DataKind.TX, 0),

test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ public void TrainSentiment()
6464
{
6565
var env = new MLContext(seed: 1);
6666
// Pipeline
67-
var arguments = new TextLoader.Arguments()
67+
var arguemnts = new TextLoader.Arguments()
6868
{
6969
Column = new TextLoader.Column[]
7070
{
@@ -86,7 +86,7 @@ public void TrainSentiment()
8686
AllowQuoting = false,
8787
AllowSparse = false
8888
};
89-
var loader = TextLoader.ReadFile(env, new MultiFileSource(_sentimentDataPath), arguments);
89+
var loader = TextLoader.ReadFile(env, new MultiFileSource(_sentimentDataPath), arguemnts);
9090

9191
var text = TextFeaturizingEstimator.Create(env,
9292
new TextFeaturizingEstimator.Arguments()

test/Microsoft.ML.Predictor.Tests/TestPredictors.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -606,7 +606,7 @@ public void RankingLightGBMTest()
606606
public void TestTreeEnsembleCombiner()
607607
{
608608
var dataPath = GetDataPath("breast-cancer.txt");
609-
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath));
609+
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath), new TextLoader.Arguments());
610610

611611
var fastTrees = new IPredictorModel[3];
612612
for (int i = 0; i < 3; i++)
@@ -628,7 +628,7 @@ public void TestTreeEnsembleCombiner()
628628
public void TestTreeEnsembleCombinerWithCategoricalSplits()
629629
{
630630
var dataPath = GetDataPath("adult.tiny.with-schema.txt");
631-
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath));
631+
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath), new TextLoader.Arguments());
632632

633633
var cat = new OneHotEncodingEstimator(Env, "Categories", "Features").Fit(dataView).Transform(dataView);
634634
var fastTrees = new IPredictorModel[3];
@@ -729,7 +729,7 @@ private void CombineAndTestTreeEnsembles(IDataView idv, IPredictorModel[] fastTr
729729
public void TestEnsembleCombiner()
730730
{
731731
var dataPath = GetDataPath("breast-cancer.txt");
732-
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath));
732+
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath), new TextLoader.Arguments());
733733

734734
var predictors = new IPredictorModel[]
735735
{
@@ -775,7 +775,7 @@ public void TestEnsembleCombiner()
775775
public void TestMultiClassEnsembleCombiner()
776776
{
777777
var dataPath = GetDataPath("breast-cancer.txt");
778-
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath));
778+
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath), new TextLoader.Arguments());
779779

780780
var predictors = new IPredictorModel[]
781781
{

test/Microsoft.ML.TestFramework/DataPipe/TestDataPipeBase.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ protected bool SaveLoadText(IDataView view, IHostEnvironment env,
439439

440440
// Note that we don't pass in "args", but pass in a default args so we test
441441
// the auto-schema parsing.
442-
var loadedData = TextLoader.ReadFile(env, new MultiFileSource(pathData));
442+
var loadedData = TextLoader.ReadFile(env, new MultiFileSource(pathData), new TextLoader.Arguments());
443443
if (!CheckMetadataTypes(loadedData.Schema))
444444
Failed();
445445

0 commit comments

Comments
 (0)