Skip to content

Commit 2986558

Browse files
committed
made create methods internal and adjusted code accordingly
1 parent 75254c5 commit 2986558

36 files changed

+220
-277
lines changed

docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,8 @@ public static void FeatureContributionCalculationTransform_Regression()
1919

2020
// Step 1: Read the data as an IDataView.
2121
// First, we define the reader: specify the data columns and where to find them in the text file.
22-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments()
23-
{
24-
Separator = "tab",
25-
HasHeader = true,
26-
Column = new[]
22+
var reader = mlContext.Data.TextReader(
23+
columns: new[]
2724
{
2825
new TextLoader.Column("MedianHomeValue", DataKind.R4, 0),
2926
new TextLoader.Column("CrimesPerCapita", DataKind.R4, 1),
@@ -37,8 +34,9 @@ public static void FeatureContributionCalculationTransform_Regression()
3734
new TextLoader.Column("HighwayDistance", DataKind.R4, 9),
3835
new TextLoader.Column("TaxRate", DataKind.R4, 10),
3936
new TextLoader.Column("TeacherRatio", DataKind.R4, 11),
40-
}
41-
});
37+
},
38+
hasHeader: true
39+
);
4240

4341
// Read the data
4442
var data = reader.Read(dataFile);

docs/samples/Microsoft.ML.Samples/Dynamic/FeatureSelectionTransform.cs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,14 @@ public static void FeatureSelectionTransform()
3131

3232
// First, we define the reader: specify the data columns and where to find them in the text file. Notice that we combine entries from
3333
// all the feature columns into entries of a vector of a single column named "Features".
34-
var reader = ml.Data.TextReader(new TextLoader.Arguments()
35-
{
36-
Separator = "tab",
37-
HasHeader = true,
38-
Column = new[]
34+
var reader = ml.Data.TextReader(
35+
columns: new[]
3936
{
4037
new TextLoader.Column("Label", DataKind.BL, 0),
4138
new TextLoader.Column("Features", DataKind.Num, new [] { new TextLoader.Range(1, 9) })
42-
}
43-
});
39+
},
40+
hasHeader: true
41+
);
4442

4543
// Then, we use the reader to read the data as an IDataView.
4644
var data = reader.Read(dataFilePath);

docs/samples/Microsoft.ML.Samples/Dynamic/GeneralizedAdditiveModels.cs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,8 @@ public static void RunExample()
1919

2020
// Step 1: Read the data as an IDataView.
2121
// First, we define the reader: specify the data columns and where to find them in the text file.
22-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments()
23-
{
24-
Separator = "tab",
25-
HasHeader = true,
26-
Column = new[]
22+
var reader = mlContext.Data.TextReader(
23+
columns: new[]
2724
{
2825
new TextLoader.Column("MedianHomeValue", DataKind.R4, 0),
2926
new TextLoader.Column("CrimesPerCapita", DataKind.R4, 1),
@@ -37,8 +34,9 @@ public static void RunExample()
3734
new TextLoader.Column("HighwayDistance", DataKind.R4, 9),
3835
new TextLoader.Column("TaxRate", DataKind.R4, 10),
3936
new TextLoader.Column("TeacherRatio", DataKind.R4, 11),
40-
}
41-
});
37+
},
38+
hasHeader: true
39+
);
4240

4341
// Read the data
4442
var data = reader.Read(dataFile);

docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance.cs

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,8 @@ public static void PFI_Regression()
2222
// First, we define the reader: specify the data columns and where to find them in the text file.
2323
// The data file is composed of rows of data, with each row having 11 numerical columns
2424
// separated by whitespace.
25-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments()
26-
{
27-
Separator = "tab",
28-
HasHeader = true,
29-
Column = new[]
25+
var reader = mlContext.Data.TextReader(
26+
columns: new[]
3027
{
3128
// Read the first column (indexed by 0) in the data file as an R4 (float)
3229
new TextLoader.Column("MedianHomeValue", DataKind.R4, 0),
@@ -40,9 +37,10 @@ public static void PFI_Regression()
4037
new TextLoader.Column("EmploymentDistance", DataKind.R4, 8),
4138
new TextLoader.Column("HighwayDistance", DataKind.R4, 9),
4239
new TextLoader.Column("TaxRate", DataKind.R4, 10),
43-
new TextLoader.Column("TeacherRatio", DataKind.R4, 11),
44-
}
45-
});
40+
new TextLoader.Column("TeacherRatio", DataKind.R4, 11)
41+
},
42+
hasHeader: true
43+
);
4644

4745
// Read the data
4846
var data = reader.Read(dataFile);

docs/samples/Microsoft.ML.Samples/Dynamic/SDCA.cs

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,14 @@ public static void SDCA_BinaryClassification()
2424

2525
// Step 1: Read the data as an IDataView.
2626
// First, we define the reader: specify the data columns and where to find them in the text file.
27-
var reader = mlContext.Data.TextReader(new TextLoader.Arguments()
28-
{
29-
Separator = "tab",
30-
HasHeader = true,
31-
Column = new[]
27+
var reader = mlContext.Data.TextReader(
28+
columns: new[]
3229
{
3330
new TextLoader.Column("Sentiment", DataKind.BL, 0),
3431
new TextLoader.Column("SentimentText", DataKind.Text, 1)
35-
}
36-
});
32+
},
33+
hasHeader: true
34+
);
3735

3836
// Read the data
3937
var data = reader.Read(dataFile);

src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1294,7 +1294,7 @@ private TextLoader(IHost host, ModelLoadContext ctx)
12941294
_parser = new Parser(this);
12951295
}
12961296

1297-
public static TextLoader Create(IHostEnvironment env, ModelLoadContext ctx)
1297+
internal static TextLoader Create(IHostEnvironment env, ModelLoadContext ctx)
12981298
{
12991299
Contracts.CheckValue(env, nameof(env));
13001300
IHost h = env.Register(RegistrationName);
@@ -1306,16 +1306,22 @@ public static TextLoader Create(IHostEnvironment env, ModelLoadContext ctx)
13061306
}
13071307

13081308
// These are legacy constructors needed for ComponentCatalog.
1309-
public static IDataLoader Create(IHostEnvironment env, ModelLoadContext ctx, IMultiStreamSource files)
1309+
internal static IDataLoader Create(IHostEnvironment env, ModelLoadContext ctx, IMultiStreamSource files)
13101310
=> (IDataLoader)Create(env, ctx).Read(files);
1311-
public static IDataLoader Create(IHostEnvironment env, Arguments args, IMultiStreamSource files)
1311+
internal static IDataLoader Create(IHostEnvironment env, Arguments args, IMultiStreamSource files)
13121312
=> (IDataLoader)new TextLoader(env, args, files).Read(files);
13131313

13141314
/// <summary>
1315-
/// Convenience method to create a <see cref="TextLoader"/> and use it to read a specified file.
1315+
/// Creates a <see cref="TextLoader"/> and uses it to read a specified file.
13161316
/// </summary>
1317-
public static IDataView ReadFile(IHostEnvironment env, Arguments args, IMultiStreamSource fileSource)
1318-
=> new TextLoader(env, args, fileSource).Read(fileSource);
1317+
/// <param name="env">The environment to use.</param>
1318+
/// <param name="columns">Defines a mapping between input columns in the file and IDataView columns.</param>
1319+
/// <param name="hasHeader">Whether the file has a header.</param>
1320+
/// <param name="separatorChars">Defines the characters used as separators between data points in a row. By default the tab character is taken as separator.</param>
1321+
/// <param name="advancedSettings">A delegate to apply all the advanced arguments to the algorithm.</param>
1322+
/// <param name="fileSource">Specifies a file from which to read.</param>
1323+
public static IDataView ReadFile(IHostEnvironment env, IMultiStreamSource fileSource, Column[] columns, bool hasHeader = false, char[] separatorChars = null, Action<Arguments> advancedSettings = null)
1324+
=> new TextLoader(env, columns, hasHeader, separatorChars, advancedSettings, fileSource).Read(fileSource);
13191325

13201326
public void Save(ModelSaveContext ctx)
13211327
{

src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,45 +13,39 @@ namespace Microsoft.ML
1313
{
1414
public static class TextLoaderSaverCatalog
1515
{
16-
/// <summary>
17-
/// Create a text reader.
18-
/// </summary>
19-
/// <param name="catalog">The catalog.</param>
20-
/// <param name="args">The arguments to text reader, describing the data schema.</param>
21-
/// <param name="dataSample">The optional location of a data sample.</param>
22-
public static TextLoader TextReader(this DataOperations catalog,
23-
TextLoader.Arguments args, IMultiStreamSource dataSample = null)
24-
=> new TextLoader(CatalogUtils.GetEnvironment(catalog), args, dataSample);
25-
2616
/// <summary>
2717
/// Create a text reader.
2818
/// </summary>
2919
/// <param name="catalog">The catalog.</param>
3020
/// <param name="columns">The columns of the schema.</param>
21+
/// <param name="hasHeader">Whether the file has a header.</param>
22+
/// <param name="separatorChars">Defines the characters used as separators between data points in a row. By default the tab character is taken as separator.</param>
3123
/// <param name="advancedSettings">The delegate to set additional settings.</param>
3224
/// <param name="dataSample">The optional location of a data sample.</param>
3325
public static TextLoader TextReader(this DataOperations catalog,
34-
TextLoader.Column[] columns, Action<Arguments> advancedSettings = null, IMultiStreamSource dataSample = null)
35-
=> new TextLoader(CatalogUtils.GetEnvironment(catalog), columns, advancedSettings: advancedSettings, dataSample: dataSample);
26+
Column[] columns, bool hasHeader = false, char[] separatorChars = null, Action<Arguments> advancedSettings = null, IMultiStreamSource dataSample = null)
27+
=> new TextLoader(CatalogUtils.GetEnvironment(catalog), columns, hasHeader, separatorChars, advancedSettings, dataSample);
3628

3729
/// <summary>
3830
/// Read a data view from a text file using <see cref="TextLoader"/>.
3931
/// </summary>
4032
/// <param name="catalog">The catalog.</param>
4133
/// <param name="columns">The columns of the schema.</param>
42-
/// <param name="advancedSettings">The delegate to set additional settings</param>
43-
/// <param name="path">The path to the file</param>
34+
/// <param name="hasHeader">Whether the file has a header.</param>
35+
/// <param name="separatorChars">Defines the characters used as separators between data points in a row. By default the tab character is taken as separator.</param>
36+
/// <param name="advancedSettings">The delegate to set additional settings.</param>
37+
/// <param name="path">The path to the file.</param>
4438
/// <returns>The data view.</returns>
4539
public static IDataView ReadFromTextFile(this DataOperations catalog,
46-
TextLoader.Column[] columns, string path, Action<Arguments> advancedSettings = null)
40+
string path, Column[] columns, bool hasHeader = false, char[] separatorChars = null, Action<Arguments> advancedSettings = null)
4741
{
4842
Contracts.CheckNonEmpty(path, nameof(path));
4943

5044
var env = catalog.GetEnvironment();
5145

5246
// REVIEW: it is almost always a mistake to have a 'trainable' text loader here.
5347
// Therefore, we are going to disallow data sample.
54-
var reader = new TextLoader(env, columns, advancedSettings: advancedSettings, dataSample: null);
48+
var reader = new TextLoader(env, columns, hasHeader, separatorChars, advancedSettings, dataSample: null);
5549
return reader.Read(new MultiFileSource(path));
5650
}
5751

src/Microsoft.ML.Data/Transforms/ValueToKeyMappingTransformer.cs

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -483,13 +483,9 @@ private static TermMap CreateFileTermMap(IHostEnvironment env, IChannel ch, stri
483483
"{0} should not be specified when default loader is TextLoader. Ignoring {0}={1}",
484484
nameof(Arguments.TermsColumn), src);
485485
}
486-
termData = TextLoader.ReadFile(env,
487-
new TextLoader.Arguments()
488-
{
489-
Separator = "tab",
490-
Column = new[] { new TextLoader.Column("Term", DataKind.TX, 0) }
491-
},
492-
fileSource);
486+
termData = TextLoader.ReadFile(env, fileSource,
487+
columns: new[] { new TextLoader.Column("Term", DataKind.TX, 0) }
488+
);
493489
src = "Term";
494490
autoConvert = true;
495491
}

src/Microsoft.ML.Data/Utilities/ModelFileUtils.cs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -283,8 +283,7 @@ public static IEnumerable<KeyValuePair<ColumnRole, string>> LoadRoleMappingsOrNu
283283
{
284284
// REVIEW: Should really validate the schema here, and consider
285285
// ignoring this stream if it isn't as expected.
286-
var loader = TextLoader.ReadFile(env, new TextLoader.Arguments(),
287-
new RepositoryStreamWrapper(rep, DirTrainingInfo, RoleMappingFile));
286+
var loader = TextLoader.ReadFile(env, new RepositoryStreamWrapper(rep, DirTrainingInfo, RoleMappingFile), null);
288287

289288
using (var cursor = loader.GetRowCursor(c => true))
290289
{

src/Microsoft.ML.Transforms/TermLookupTransformer.cs

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -349,17 +349,12 @@ private static IComponentFactory<IMultiStreamSource, IDataLoader> GetLoaderFacto
349349
// If the user specified non-key values, we define the value column to be numeric.
350350
if (!keyValues)
351351
return ComponentFactoryUtils.CreateFromFunction<IMultiStreamSource, IDataLoader>(
352-
(env, files) => TextLoader.Create(
353-
env,
354-
new TextLoader.Arguments()
355-
{
356-
Column = new[]
352+
(env, files) => new TextLoader(
353+
env, new[]
357354
{
358355
new TextLoader.Column("Term", DataKind.TX, 0),
359356
new TextLoader.Column("Value", DataKind.Num, 1)
360-
}
361-
},
362-
files));
357+
}, dataSample: files).Read(files) as IDataLoader);
363358

364359
// If the user specified key values, we scan the values to determine the range of the key type.
365360
ulong min = ulong.MaxValue;
@@ -369,7 +364,11 @@ private static IComponentFactory<IMultiStreamSource, IDataLoader> GetLoaderFacto
369364
var txtArgs = new TextLoader.Arguments();
370365
bool parsed = CmdParser.ParseArguments(host, "col=Term:TX:0 col=Value:TX:1", txtArgs);
371366
host.Assert(parsed);
372-
var data = TextLoader.ReadFile(host, txtArgs, new MultiFileSource(filename));
367+
var data = TextLoader.ReadFile(host, new MultiFileSource(filename), new[]
368+
{
369+
new TextLoader.Column("Term", DataKind.TX, 0),
370+
new TextLoader.Column("Value", DataKind.TX, 1)
371+
});
373372
using (var cursor = data.GetRowCursor(c => true))
374373
{
375374
var getTerm = cursor.GetGetter<ReadOnlyMemory<char>>(0);
@@ -444,17 +443,14 @@ private static IComponentFactory<IMultiStreamSource, IDataLoader> GetLoaderFacto
444443
}
445444

446445
return ComponentFactoryUtils.CreateFromFunction<IMultiStreamSource, IDataLoader>(
447-
(env, files) => TextLoader.Create(
448-
env,
449-
new TextLoader.Arguments()
450-
{
451-
Column = new[]
452-
{
453-
new TextLoader.Column("Term", DataKind.TX, 0),
454-
valueColumn
455-
}
456-
},
457-
files));
446+
(env, files) => new TextLoader(
447+
env,
448+
columns: new[]
449+
{
450+
new TextLoader.Column("Term", DataKind.TX, 0),
451+
valueColumn
452+
},
453+
dataSample: files).Read(files) as IDataLoader);
458454
}
459455

460456
// This saves the lookup data as a byte array encoded as a binary .idv file.

src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -722,17 +722,13 @@ private IDataLoader GetLoaderForStopwords(IChannel ch, string dataFile,
722722
{
723723
if (stopwordsCol == null)
724724
stopwordsCol = "Stopwords";
725-
dataLoader = TextLoader.Create(
725+
dataLoader = new TextLoader(
726726
Host,
727-
new TextLoader.Arguments()
727+
columns: new[]
728728
{
729-
Separator = "tab",
730-
Column = new[]
731-
{
732-
new TextLoader.Column(stopwordsCol, DataKind.TX, 0)
733-
}
729+
new TextLoader.Column(stopwordsCol, DataKind.TX, 0)
734730
},
735-
fileSource);
731+
dataSample: fileSource).Read(fileSource) as IDataLoader;
736732
}
737733
ch.AssertNonEmpty(stopwordsCol);
738734
}

test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ public ParameterMixingCalibratedPredictor TrainKMeansAndLR()
1818
var ml = new MLContext(seed: 1);
1919
// Pipeline
2020

21-
var input = ml.Data.ReadFromTextFile(new[] {
21+
var input = ml.Data.ReadFromTextFile(_dataPath, new[] {
2222
new TextLoader.Column("Label", DataKind.R4, 0),
2323
new TextLoader.Column("CatFeatures", DataKind.TX,
2424
new [] {
@@ -28,11 +28,7 @@ public ParameterMixingCalibratedPredictor TrainKMeansAndLR()
2828
new [] {
2929
new TextLoader.Range() { Min = 9, Max = 14 },
3030
}),
31-
}, _dataPath, s =>
32-
{
33-
s.HasHeader = true;
34-
s.Separator = "\t";
35-
});
31+
}, hasHeader: true);
3632

3733
var estimatorPipeline = ml.Transforms.Categorical.OneHotEncoding("CatFeatures")
3834
.Append(ml.Transforms.Normalize("NumFeatures"))

0 commit comments

Comments
 (0)