@@ -115,7 +115,9 @@ If the schema of the data is not known at compile time, or too cumbersome, you c
115
115
var mlContext = new MLContext ();
116
116
117
117
// Create the reader: define the data columns and where to find them in the text file.
118
- var reader = mlContext .Data .TextReader (new [] {
118
+ var reader = mlContext .Data .TextReader (new TextLoader .Arguments
119
+ {
120
+ Column = new [] {
119
121
// A boolean column depicting the 'label'.
120
122
new TextLoader .Column (" IsOver50K" , DataKind .BL , 0 ),
121
123
// Three text columns.
@@ -124,8 +126,8 @@ var reader = mlContext.Data.TextReader(new[] {
124
126
new TextLoader .Column (" MaritalStatus" , DataKind .TX , 3 )
125
127
},
126
128
// First line of the file is a header, not a data row.
127
- hasHeader : true
128
- );
129
+ HasHeader = true
130
+ } );
129
131
130
132
// Now read the file (remember though, readers are lazy, so the actual reading will happen when the data is accessed).
131
133
var data = reader .Read (dataPath );
@@ -173,17 +175,19 @@ The code is very similar using the dynamic API:
173
175
var mlContext = new MLContext ();
174
176
175
177
// Create the reader: define the data columns and where to find them in the text file.
176
- var reader = mlContext .Data .TextReader (new [] {
178
+ var reader = mlContext .Data .TextReader (new TextLoader .Arguments
179
+ {
180
+ Column = new [] {
177
181
// A boolean column depicting the 'label'.
178
- new TextLoader .Column (" IsOver50K " , DataKind .BL , 0 ),
182
+ new TextLoader .Column (" IsOver50k " , DataKind .BL , 0 ),
179
183
// Three text columns.
180
184
new TextLoader .Column (" Workclass" , DataKind .TX , 1 ),
181
185
new TextLoader .Column (" Education" , DataKind .TX , 2 ),
182
186
new TextLoader .Column (" MaritalStatus" , DataKind .TX , 3 )
183
187
},
184
188
// First line of the file is a header, not a data row.
185
- hasHeader : true
186
- );
189
+ HasHeader = true
190
+ } );
187
191
188
192
var data = reader .Read (exampleFile1 , exampleFile2 );
189
193
```
@@ -361,17 +365,19 @@ You can also use the dynamic API to create the equivalent of the previous pipeli
361
365
var mlContext = new MLContext ();
362
366
363
367
// Create the reader: define the data columns and where to find them in the text file.
364
- var reader = mlContext .Data .TextReader (new [] {
368
+ var reader = mlContext .Data .TextReader (new TextLoader .Arguments
369
+ {
370
+ Column = new [] {
365
371
// A boolean column depicting the 'label'.
366
- new TextLoader .Column (" IsOver50K " , DataKind .BL , 0 ),
372
+ new TextLoader .Column (" IsOver50k " , DataKind .BL , 0 ),
367
373
// Three text columns.
368
374
new TextLoader .Column (" Workclass" , DataKind .TX , 1 ),
369
375
new TextLoader .Column (" Education" , DataKind .TX , 2 ),
370
376
new TextLoader .Column (" MaritalStatus" , DataKind .TX , 3 )
371
377
},
372
378
// First line of the file is a header, not a data row.
373
- hasHeader : true
374
- );
379
+ HasHeader = true
380
+ } );
375
381
376
382
// Start creating our processing pipeline. For now, let's just concatenate all the text columns
377
383
// together into one.
@@ -462,18 +468,20 @@ var mlContext = new MLContext();
462
468
463
469
// Step one: read the data as an IDataView.
464
470
// First, we define the reader: specify the data columns and where to find them in the text file.
465
- var reader = mlContext .Data .TextReader (new [] {
471
+ var reader = mlContext .Data .TextReader (new TextLoader .Arguments
472
+ {
473
+ Column = new [] {
466
474
// We read the first 11 values as a single float vector.
467
475
new TextLoader .Column (" FeatureVector" , DataKind .R4 , 0 , 10 ),
468
476
469
477
// Separately, read the target variable.
470
478
new TextLoader .Column (" Target" , DataKind .R4 , 11 ),
471
479
},
472
480
// First line of the file is a header, not a data row.
473
- hasHeader : true ,
481
+ HasHeader = true ,
474
482
// Default separator is tab, but we need a semicolon.
475
- separatorChar : ';'
476
- );
483
+ Separator = " ; "
484
+ } );
477
485
478
486
// Now read the file (remember though, readers are lazy, so the actual reading will happen when the data is accessed).
479
487
var trainData = reader .Read (trainDataPath );
@@ -609,7 +617,9 @@ var mlContext = new MLContext();
609
617
610
618
// Step one: read the data as an IDataView.
611
619
// First, we define the reader: specify the data columns and where to find them in the text file.
612
- var reader = mlContext .Data .TextReader (new [] {
620
+ var reader = mlContext .Data .TextReader (new TextLoader .Arguments
621
+ {
622
+ Column = new [] {
613
623
new TextLoader .Column (" SepalLength" , DataKind .R4 , 0 ),
614
624
new TextLoader .Column (" SepalWidth" , DataKind .R4 , 1 ),
615
625
new TextLoader .Column (" PetalLength" , DataKind .R4 , 2 ),
@@ -618,8 +628,8 @@ var reader = mlContext.Data.TextReader(new[] {
618
628
new TextLoader .Column (" Label" , DataKind .TX , 4 ),
619
629
},
620
630
// Default separator is tab, but the dataset has comma.
621
- separatorChar : ','
622
- );
631
+ Separator = " , "
632
+ } );
623
633
624
634
// Retrieve the training data.
625
635
var trainData = reader .Read (irisDataPath );
@@ -900,15 +910,17 @@ You can achieve the same results using the dynamic API.
900
910
var mlContext = new MLContext ();
901
911
902
912
// Define the reader: specify the data columns and where to find them in the text file.
903
- var reader = mlContext .Data .TextReader (new [] {
913
+ var reader = mlContext .Data .TextReader (new TextLoader .Arguments
914
+ {
915
+ Column = new [] {
904
916
// The four features of the Iris dataset will be grouped together as one Features column.
905
917
new TextLoader .Column (" Features" , DataKind .R4 , 0 , 3 ),
906
918
// Label: kind of iris.
907
919
new TextLoader .Column (" Label" , DataKind .TX , 4 ),
908
920
},
909
921
// Default separator is tab, but the dataset has comma.
910
- separatorChar : ','
911
- );
922
+ Separator = " , "
923
+ } );
912
924
913
925
// Read the training data.
914
926
var trainData = reader .Read (dataPath );
@@ -1015,8 +1027,9 @@ You can achieve the same results using the dynamic API.
1015
1027
var mlContext = new MLContext ();
1016
1028
1017
1029
// Define the reader: specify the data columns and where to find them in the text file.
1018
- var reader = mlContext .Data .TextReader (new []
1019
- {
1030
+ var reader = mlContext .Data .TextReader (new TextLoader .Arguments
1031
+ {
1032
+ Column = new [] {
1020
1033
new TextLoader .Column (" Label" , DataKind .BL , 0 ),
1021
1034
// We will load all the categorical features into one vector column of size 8.
1022
1035
new TextLoader .Column (" CategoricalFeatures" , DataKind .TX , 1 , 8 ),
@@ -1025,8 +1038,8 @@ var reader = mlContext.Data.TextReader(new[]
1025
1038
// Let's also separately load the 'Workclass' column.
1026
1039
new TextLoader .Column (" Workclass" , DataKind .TX , 1 ),
1027
1040
},
1028
- hasHeader : true
1029
- );
1041
+ HasHeader = true
1042
+ } );
1030
1043
1031
1044
// Read the data.
1032
1045
var data = reader .Read (dataPath );
@@ -1141,13 +1154,14 @@ You can achieve the same results using the dynamic API.
1141
1154
var mlContext = new MLContext ();
1142
1155
1143
1156
// Define the reader: specify the data columns and where to find them in the text file.
1144
- var reader = mlContext .Data .TextReader (new []
1145
- {
1157
+ var reader = mlContext .Data .TextReader (new TextLoader .Arguments
1158
+ {
1159
+ Column = new [] {
1146
1160
new TextLoader .Column (" IsToxic" , DataKind .BL , 0 ),
1147
1161
new TextLoader .Column (" Message" , DataKind .TX , 1 ),
1148
1162
},
1149
- hasHeader : true
1150
- );
1163
+ HasHeader = true
1164
+ } );
1151
1165
1152
1166
// Read the data.
1153
1167
var data = reader .Read (dataPath );
@@ -1260,8 +1274,9 @@ var mlContext = new MLContext();
1260
1274
1261
1275
// Step one: read the data as an IDataView.
1262
1276
// First, we define the reader: specify the data columns and where to find them in the text file.
1263
- var reader = mlContext .Data .TextReader (new []
1264
- {
1277
+ var reader = mlContext .Data .TextReader (new TextLoader .Arguments
1278
+ {
1279
+ Column = new [] {
1265
1280
// We read the first 11 values as a single float vector.
1266
1281
new TextLoader .Column (" SepalLength" , DataKind .R4 , 0 ),
1267
1282
new TextLoader .Column (" SepalWidth" , DataKind .R4 , 1 ),
@@ -1271,8 +1286,8 @@ var reader = mlContext.Data.TextReader(new[]
1271
1286
new TextLoader .Column (" Label" , DataKind .TX , 4 ),
1272
1287
},
1273
1288
// Default separator is tab, but the dataset has comma.
1274
- separatorChar : ','
1275
- );
1289
+ Separator = " , "
1290
+ } );
1276
1291
1277
1292
// Read the data.
1278
1293
var data = reader .Read (dataPath );
0 commit comments