diff --git a/docs/code/MlNetCookBook.md b/docs/code/MlNetCookBook.md
index 7ff7b1f8eb..87e8b82647 100644
--- a/docs/code/MlNetCookBook.md
+++ b/docs/code/MlNetCookBook.md
@@ -95,7 +95,7 @@ This is how you can read this data:
var mlContext = new MLContext();
// Create the reader: define the data columns and where to find them in the text file.
-var reader = mlContext.Data.TextReader(ctx => (
+var reader = mlContext.Data.CreateTextReader(ctx => (
// A boolean column depicting the 'target label'.
IsOver50K: ctx.LoadBool(0),
// Three text columns.
@@ -115,9 +115,7 @@ If the schema of the data is not known at compile time, or too cumbersome, you c
var mlContext = new MLContext();
// Create the reader: define the data columns and where to find them in the text file.
-var reader = mlContext.Data.TextReader(new TextLoader.Arguments
-{
- Column = new[] {
+var reader = mlContext.Data.CreateTextReader(new[] {
// A boolean column depicting the 'label'.
new TextLoader.Column("IsOver50K", DataKind.BL, 0),
// Three text columns.
@@ -126,8 +124,8 @@ var reader = mlContext.Data.TextReader(new TextLoader.Arguments
new TextLoader.Column("MaritalStatus", DataKind.TX, 3)
},
// First line of the file is a header, not a data row.
- HasHeader = true
-});
+ hasHeader: true
+);
// Now read the file (remember though, readers are lazy, so the actual reading will happen when the data is accessed).
var data = reader.Read(dataPath);
@@ -155,7 +153,7 @@ This is how you can read this data:
var mlContext = new MLContext();
// Create the reader: define the data columns and where to find them in the text file.
-var reader = mlContext.Data.TextReader(ctx => (
+var reader = mlContext.Data.CreateTextReader(ctx => (
// A boolean column depicting the 'target label'.
IsOver50K: ctx.LoadBool(14),
// Three text columns.
@@ -175,19 +173,17 @@ The code is very similar using the dynamic API:
var mlContext = new MLContext();
// Create the reader: define the data columns and where to find them in the text file.
-var reader = mlContext.Data.TextReader(new TextLoader.Arguments
-{
- Column = new[] {
+var reader = mlContext.Data.CreateTextReader(new[] {
// A boolean column depicting the 'label'.
- new TextLoader.Column("IsOver50k", DataKind.BL, 0),
+ new TextLoader.Column("IsOver50K", DataKind.BL, 0),
// Three text columns.
new TextLoader.Column("Workclass", DataKind.TX, 1),
new TextLoader.Column("Education", DataKind.TX, 2),
new TextLoader.Column("MaritalStatus", DataKind.TX, 3)
},
// First line of the file is a header, not a data row.
- HasHeader = true
-});
+ hasHeader: true
+);
var data = reader.Read(exampleFile1, exampleFile2);
```
@@ -211,7 +207,7 @@ Reading this file using `TextLoader`:
var mlContext = new MLContext();
// Create the reader: define the data columns and where to find them in the text file.
-var reader = mlContext.Data.TextReader(ctx => (
+var reader = mlContext.Data.CreateTextReader(ctx => (
// We read the first 11 values as a single float vector.
FeatureVector: ctx.LoadFloat(0, 10),
// Separately, read the target variable.
@@ -233,7 +229,7 @@ If the schema of the data is not known at compile time, or too cumbersome, you c
var mlContext = new MLContext();
// Create the reader: define the data columns and where to find them in the text file.
-var reader = mlContext.Data.TextReader(new[] {
+var reader = mlContext.Data.CreateTextReader(new[] {
// We read the first 10 values as a single float vector.
new TextLoader.Column("FeatureVector", DataKind.R4, new[] {new TextLoader.Range(0, 9)}),
// Separately, read the target variable.
@@ -302,7 +298,7 @@ Label Workclass education marital-status
var mlContext = new MLContext();
// Create the reader: define the data columns and where to find them in the text file.
-var reader = mlContext.Data.TextReader(ctx => (
+var reader = mlContext.Data.CreateTextReader(ctx => (
// A boolean column depicting the 'target label'.
IsOver50K: ctx.LoadBool(0),
// Three text columns.
@@ -365,19 +361,17 @@ You can also use the dynamic API to create the equivalent of the previous pipeli
var mlContext = new MLContext();
// Create the reader: define the data columns and where to find them in the text file.
-var reader = mlContext.Data.TextReader(new TextLoader.Arguments
-{
- Column = new[] {
+var reader = mlContext.Data.CreateTextReader(new[] {
// A boolean column depicting the 'label'.
- new TextLoader.Column("IsOver50k", DataKind.BL, 0),
+ new TextLoader.Column("IsOver50K", DataKind.BL, 0),
// Three text columns.
new TextLoader.Column("Workclass", DataKind.TX, 1),
new TextLoader.Column("Education", DataKind.TX, 2),
new TextLoader.Column("MaritalStatus", DataKind.TX, 3)
},
// First line of the file is a header, not a data row.
- HasHeader = true
-});
+ hasHeader: true
+);
// Start creating our processing pipeline. For now, let's just concatenate all the text columns
// together into one.
@@ -428,7 +422,7 @@ var mlContext = new MLContext();
// Step one: read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
-var reader = mlContext.Data.TextReader(ctx => (
+var reader = mlContext.Data.CreateTextReader(ctx => (
// We read the first 11 values as a single float vector.
FeatureVector: ctx.LoadFloat(0, 10),
// Separately, read the target variable.
@@ -482,9 +476,7 @@ var mlContext = new MLContext();
// Step one: read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
-var reader = mlContext.Data.TextReader(new TextLoader.Arguments
-{
- Column = new[] {
+var reader = mlContext.Data.CreateTextReader(new[] {
// We read the first 11 values as a single float vector.
new TextLoader.Column("FeatureVector", DataKind.R4, 0, 10),
@@ -492,10 +484,10 @@ var reader = mlContext.Data.TextReader(new TextLoader.Arguments
new TextLoader.Column("Target", DataKind.R4, 11),
},
// First line of the file is a header, not a data row.
- HasHeader = true,
+ hasHeader: true,
// Default separator is tab, but we need a semicolon.
- Separator = ";"
-});
+ separatorChar: ';'
+);
// Now read the file (remember though, readers are lazy, so the actual reading will happen when the data is accessed).
var trainData = reader.Read(trainDataPath);
@@ -603,7 +595,7 @@ var mlContext = new MLContext();
// Step one: read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
-var reader = mlContext.Data.TextReader(ctx => (
+var reader = mlContext.Data.CreateTextReader(ctx => (
// The four features of the Iris dataset.
SepalLength: ctx.LoadFloat(0),
SepalWidth: ctx.LoadFloat(1),
@@ -653,9 +645,7 @@ var mlContext = new MLContext();
// Step one: read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
-var reader = mlContext.Data.TextReader(new TextLoader.Arguments
-{
- Column = new[] {
+var reader = mlContext.Data.CreateTextReader(new[] {
new TextLoader.Column("SepalLength", DataKind.R4, 0),
new TextLoader.Column("SepalWidth", DataKind.R4, 1),
new TextLoader.Column("PetalLength", DataKind.R4, 2),
@@ -664,8 +654,8 @@ var reader = mlContext.Data.TextReader(new TextLoader.Arguments
new TextLoader.Column("Label", DataKind.TX, 4),
},
// Default separator is tab, but the dataset has comma.
- Separator = ","
-});
+ separatorChar: ','
+);
// Retrieve the training data.
var trainData = reader.Read(irisDataPath);
@@ -821,7 +811,7 @@ var mlContext = new MLContext();
// Step one: read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
-var reader = mlContext.Data.TextReader(ctx => (
+var reader = mlContext.Data.CreateTextReader(ctx => (
// The four features of the Iris dataset.
SepalLength: ctx.LoadFloat(0),
SepalWidth: ctx.LoadFloat(1),
@@ -917,7 +907,7 @@ Here's a snippet of code that demonstrates normalization in learning pipelines.
var mlContext = new MLContext();
// Define the reader: specify the data columns and where to find them in the text file.
-var reader = mlContext.Data.TextReader(ctx => (
+var reader = mlContext.Data.CreateTextReader(ctx => (
// The four features of the Iris dataset will be grouped together as one Features column.
Features: ctx.LoadFloat(0, 3),
// Label: kind of iris.
@@ -952,17 +942,15 @@ You can achieve the same results using the dynamic API.
var mlContext = new MLContext();
// Define the reader: specify the data columns and where to find them in the text file.
-var reader = mlContext.Data.TextReader(new TextLoader.Arguments
-{
- Column = new[] {
+var reader = mlContext.Data.CreateTextReader(new[] {
// The four features of the Iris dataset will be grouped together as one Features column.
new TextLoader.Column("Features", DataKind.R4, 0, 3),
// Label: kind of iris.
new TextLoader.Column("Label", DataKind.TX, 4),
},
// Default separator is tab, but the dataset has comma.
- Separator = ","
-});
+ separatorChar: ','
+);
// Read the training data.
var trainData = reader.Read(dataPath);
@@ -1011,7 +999,7 @@ Label Workclass education marital-status occupation relationship ethnicity sex n
var mlContext = new MLContext();
// Define the reader: specify the data columns and where to find them in the text file.
-var reader = mlContext.Data.TextReader(ctx => (
+var reader = mlContext.Data.CreateTextReader(ctx => (
Label: ctx.LoadBool(0),
// We will load all the categorical features into one vector column of size 8.
CategoricalFeatures: ctx.LoadText(1, 8),
@@ -1073,9 +1061,8 @@ You can achieve the same results using the dynamic API.
var mlContext = new MLContext();
// Define the reader: specify the data columns and where to find them in the text file.
-var reader = mlContext.Data.TextReader(new TextLoader.Arguments
-{
- Column = new[] {
+var reader = mlContext.Data.CreateTextReader(new[]
+ {
new TextLoader.Column("Label", DataKind.BL, 0),
// We will load all the categorical features into one vector column of size 8.
new TextLoader.Column("CategoricalFeatures", DataKind.TX, 1, 8),
@@ -1084,8 +1071,8 @@ var reader = mlContext.Data.TextReader(new TextLoader.Arguments
// Let's also separately load the 'Workclass' column.
new TextLoader.Column("Workclass", DataKind.TX, 1),
},
- HasHeader = true
-});
+ hasHeader: true
+);
// Read the data.
var data = reader.Read(dataPath);
@@ -1157,7 +1144,7 @@ Sentiment SentimentText
var mlContext = new MLContext();
// Define the reader: specify the data columns and where to find them in the text file.
-var reader = mlContext.Data.TextReader(ctx => (
+var reader = mlContext.Data.CreateTextReader(ctx => (
IsToxic: ctx.LoadBool(0),
Message: ctx.LoadText(1)
), hasHeader: true);
@@ -1207,14 +1194,13 @@ You can achieve the same results using the dynamic API.
var mlContext = new MLContext();
// Define the reader: specify the data columns and where to find them in the text file.
-var reader = mlContext.Data.TextReader(new TextLoader.Arguments
-{
- Column = new[] {
+var reader = mlContext.Data.CreateTextReader(new[]
+ {
new TextLoader.Column("IsToxic", DataKind.BL, 0),
new TextLoader.Column("Message", DataKind.TX, 1),
},
- HasHeader = true
-});
+ hasHeader: true
+);
// Read the data.
var data = reader.Read(dataPath);
@@ -1274,7 +1260,7 @@ var mlContext = new MLContext();
// Step one: read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
-var reader = mlContext.Data.TextReader(ctx => (
+var reader = mlContext.Data.CreateTextReader(ctx => (
// The four features of the Iris dataset.
SepalLength: ctx.LoadFloat(0),
SepalWidth: ctx.LoadFloat(1),
@@ -1330,9 +1316,8 @@ var mlContext = new MLContext();
// Step one: read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
-var reader = mlContext.Data.TextReader(new TextLoader.Arguments
-{
- Column = new[] {
+var reader = mlContext.Data.CreateTextReader(new[]
+ {
// We read the first 11 values as a single float vector.
new TextLoader.Column("SepalLength", DataKind.R4, 0),
new TextLoader.Column("SepalWidth", DataKind.R4, 1),
@@ -1342,8 +1327,8 @@ var reader = mlContext.Data.TextReader(new TextLoader.Arguments
new TextLoader.Column("Label", DataKind.TX, 4),
},
// Default separator is tab, but the dataset has comma.
- Separator = ","
-});
+ separatorChar: ','
+);
// Read the data.
var data = reader.Read(dataPath);
@@ -1395,7 +1380,7 @@ var mlContext = new MLContext();
// Read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
-var reader = mlContext.Data.TextReader(ctx => (
+var reader = mlContext.Data.CreateTextReader(ctx => (
// The four features of the Iris dataset.
SepalLength: ctx.LoadFloat(0),
SepalWidth: ctx.LoadFloat(1),
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs
index 1c28df327f..d88cff2cdd 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs
@@ -19,11 +19,8 @@ public static void FeatureContributionCalculationTransform_Regression()
// Step 1: Read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
- var reader = mlContext.Data.TextReader(new TextLoader.Arguments()
- {
- Separator = "tab",
- HasHeader = true,
- Column = new[]
+ var reader = mlContext.Data.CreateTextReader(
+ columns: new[]
{
new TextLoader.Column("MedianHomeValue", DataKind.R4, 0),
new TextLoader.Column("CrimesPerCapita", DataKind.R4, 1),
@@ -37,8 +34,9 @@ public static void FeatureContributionCalculationTransform_Regression()
new TextLoader.Column("HighwayDistance", DataKind.R4, 9),
new TextLoader.Column("TaxRate", DataKind.R4, 10),
new TextLoader.Column("TeacherRatio", DataKind.R4, 11),
- }
- });
+ },
+ hasHeader: true
+ );
// Read the data
var data = reader.Read(dataFile);
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureSelectionTransform.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureSelectionTransform.cs
index 7508815dc4..f0d0442d42 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureSelectionTransform.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/FeatureSelectionTransform.cs
@@ -31,16 +31,14 @@ public static void FeatureSelectionTransform()
// First, we define the reader: specify the data columns and where to find them in the text file. Notice that we combine entries from
// all the feature columns into entries of a vector of a single column named "Features".
- var reader = ml.Data.TextReader(new TextLoader.Arguments()
- {
- Separator = "tab",
- HasHeader = true,
- Column = new[]
+ var reader = ml.Data.CreateTextReader(
+ columns: new[]
{
new TextLoader.Column("Label", DataKind.BL, 0),
new TextLoader.Column("Features", DataKind.Num, new [] { new TextLoader.Range(1, 9) })
- }
- });
+ },
+ hasHeader: true
+ );
// Then, we use the reader to read the data as an IDataView.
var data = reader.Read(dataFilePath);
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/GeneralizedAdditiveModels.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/GeneralizedAdditiveModels.cs
index 827d04a586..26fadc0148 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/GeneralizedAdditiveModels.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/GeneralizedAdditiveModels.cs
@@ -19,11 +19,8 @@ public static void RunExample()
// Step 1: Read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
- var reader = mlContext.Data.TextReader(new TextLoader.Arguments()
- {
- Separator = "tab",
- HasHeader = true,
- Column = new[]
+ var reader = mlContext.Data.CreateTextReader(
+ columns: new[]
{
new TextLoader.Column("MedianHomeValue", DataKind.R4, 0),
new TextLoader.Column("CrimesPerCapita", DataKind.R4, 1),
@@ -37,8 +34,9 @@ public static void RunExample()
new TextLoader.Column("HighwayDistance", DataKind.R4, 9),
new TextLoader.Column("TaxRate", DataKind.R4, 10),
new TextLoader.Column("TeacherRatio", DataKind.R4, 11),
- }
- });
+ },
+ hasHeader: true
+ );
// Read the data
var data = reader.Read(dataFile);
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance.cs
index 0c95abacb8..7150f12835 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance.cs
@@ -22,11 +22,8 @@ public static void PFI_Regression()
// First, we define the reader: specify the data columns and where to find them in the text file.
// The data file is composed of rows of data, with each row having 11 numerical columns
// separated by whitespace.
- var reader = mlContext.Data.TextReader(new TextLoader.Arguments()
- {
- Separator = "tab",
- HasHeader = true,
- Column = new[]
+ var reader = mlContext.Data.CreateTextReader(
+ columns: new[]
{
// Read the first column (indexed by 0) in the data file as an R4 (float)
new TextLoader.Column("MedianHomeValue", DataKind.R4, 0),
@@ -40,9 +37,10 @@ public static void PFI_Regression()
new TextLoader.Column("EmploymentDistance", DataKind.R4, 8),
new TextLoader.Column("HighwayDistance", DataKind.R4, 9),
new TextLoader.Column("TaxRate", DataKind.R4, 10),
- new TextLoader.Column("TeacherRatio", DataKind.R4, 11),
- }
- });
+ new TextLoader.Column("TeacherRatio", DataKind.R4, 11)
+ },
+ hasHeader: true
+ );
// Read the data
var data = reader.Read(dataFile);
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/SDCA.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/SDCA.cs
index a6c1904f6a..09dea18ff1 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/SDCA.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/SDCA.cs
@@ -24,16 +24,14 @@ public static void SDCA_BinaryClassification()
// Step 1: Read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
- var reader = mlContext.Data.TextReader(new TextLoader.Arguments()
- {
- Separator = "tab",
- HasHeader = true,
- Column = new[]
+ var reader = mlContext.Data.CreateTextReader(
+ columns: new[]
{
new TextLoader.Column("Sentiment", DataKind.BL, 0),
new TextLoader.Column("SentimentText", DataKind.Text, 1)
- }
- });
+ },
+ hasHeader: true
+ );
// Read the data
var data = reader.Read(dataFile);
diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs
index e748967714..a3d5260531 100644
--- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs
+++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs
@@ -25,7 +25,6 @@ namespace Microsoft.ML.Runtime.Data
{
///
/// Loads a text file into an IDataView. Supports basic mapping from input columns to IDataView columns.
- /// Should accept any file that TlcTextInstances accepts.
///
public sealed partial class TextLoader : IDataReader, ICanSaveModel
{
@@ -1008,23 +1007,38 @@ private bool HasHeader
private readonly IHost _host;
private const string RegistrationName = "TextLoader";
- public TextLoader(IHostEnvironment env, Column[] columns, Action advancedSettings, IMultiStreamSource dataSample = null)
- : this(env, MakeArgs(columns, advancedSettings), dataSample)
+ ///
+ /// Loads a text file into an . Supports basic mapping from input columns to IDataView columns.
+ ///
+ /// The environment to use.
+ /// Defines a mapping between input columns in the file and IDataView columns.
+ /// Whether the file has a header.
+ /// The character used as separator between data points in a row. By default the tab character is used as separator.
+ /// Allows to expose items that can be used for reading.
+ public TextLoader(IHostEnvironment env, Column[] columns, bool hasHeader = false, char separatorChar = '\t', IMultiStreamSource dataSample = null)
+ : this(env, MakeArgs(columns, hasHeader, new[] { separatorChar }), dataSample)
{
}
- private static Arguments MakeArgs(Column[] columns, Action advancedSettings)
+ private static Arguments MakeArgs(Column[] columns, bool hasHeader, char[] separatorChars)
{
- var result = new Arguments { Column = columns };
- advancedSettings?.Invoke(result);
+ Contracts.AssertValue(separatorChars);
+ var result = new Arguments { Column = columns, HasHeader = hasHeader, SeparatorChars = separatorChars};
return result;
}
- public TextLoader(IHostEnvironment env, Arguments args, IMultiStreamSource dataSample = null)
+ ///
+ /// Loads a text file into an . Supports basic mapping from input columns to IDataView columns.
+ ///
+ /// The environment to use.
+ /// Defines the settings of the load operation.
+ /// Allows to expose items that can be used for reading.
+ public TextLoader(IHostEnvironment env, Arguments args = null, IMultiStreamSource dataSample = null)
{
+ args = args ?? new Arguments();
+
Contracts.CheckValue(env, nameof(env));
_host = env.Register(RegistrationName);
-
_host.CheckValue(args, nameof(args));
_host.CheckValueOrNull(dataSample);
@@ -1285,7 +1299,7 @@ private TextLoader(IHost host, ModelLoadContext ctx)
_parser = new Parser(this);
}
- public static TextLoader Create(IHostEnvironment env, ModelLoadContext ctx)
+ internal static TextLoader Create(IHostEnvironment env, ModelLoadContext ctx)
{
Contracts.CheckValue(env, nameof(env));
IHost h = env.Register(RegistrationName);
@@ -1297,15 +1311,15 @@ public static TextLoader Create(IHostEnvironment env, ModelLoadContext ctx)
}
// These are legacy constructors needed for ComponentCatalog.
- public static IDataLoader Create(IHostEnvironment env, ModelLoadContext ctx, IMultiStreamSource files)
+ internal static IDataLoader Create(IHostEnvironment env, ModelLoadContext ctx, IMultiStreamSource files)
=> (IDataLoader)Create(env, ctx).Read(files);
- public static IDataLoader Create(IHostEnvironment env, Arguments args, IMultiStreamSource files)
+ internal static IDataLoader Create(IHostEnvironment env, Arguments args, IMultiStreamSource files)
=> (IDataLoader)new TextLoader(env, args, files).Read(files);
///
/// Convenience method to create a and use it to read a specified file.
///
- public static IDataView ReadFile(IHostEnvironment env, Arguments args, IMultiStreamSource fileSource)
+ internal static IDataView ReadFile(IHostEnvironment env, Arguments args, IMultiStreamSource fileSource)
=> new TextLoader(env, args, fileSource).Read(fileSource);
public void Save(ModelSaveContext ctx)
diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs
index e5de3573ee..b4cf936a38 100644
--- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs
+++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs
@@ -5,13 +5,8 @@
using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.Data;
using Microsoft.ML.Runtime.Data.IO;
-using Microsoft.ML.Runtime.Internal.Utilities;
-using Microsoft.ML.StaticPipe;
using System;
-using System.Collections.Generic;
using System.IO;
-using System.Linq;
-using System.Text;
using static Microsoft.ML.Runtime.Data.TextLoader;
namespace Microsoft.ML
@@ -19,36 +14,37 @@ namespace Microsoft.ML
public static class TextLoaderSaverCatalog
{
///
- /// Create a text reader.
+ /// Create a text reader .
///
/// The catalog.
- /// The arguments to text reader, describing the data schema.
+ /// The columns of the schema.
+ /// Whether the file has a header.
+ /// The character used as separator between data points in a row. By default the tab character is used as separator.
/// The optional location of a data sample.
- public static TextLoader TextReader(this DataOperations catalog,
- TextLoader.Arguments args, IMultiStreamSource dataSample = null)
- => new TextLoader(CatalogUtils.GetEnvironment(catalog), args, dataSample);
+ public static TextLoader CreateTextReader(this DataOperations catalog,
+ Column[] columns, bool hasHeader = false, char separatorChar = '\t', IMultiStreamSource dataSample = null)
+ => new TextLoader(CatalogUtils.GetEnvironment(catalog), columns, hasHeader, separatorChar, dataSample);
///
- /// Create a text reader.
+ /// Create a text reader .
///
/// The catalog.
- /// The columns of the schema.
- /// The delegate to set additional settings.
- /// The optional location of a data sample.
- public static TextLoader TextReader(this DataOperations catalog,
- TextLoader.Column[] columns, Action advancedSettings = null, IMultiStreamSource dataSample = null)
- => new TextLoader(CatalogUtils.GetEnvironment(catalog), columns, advancedSettings, dataSample);
+ /// Defines the settings of the load operation.
+ /// Allows to expose items that can be used for reading.
+ public static TextLoader CreateTextReader(this DataOperations catalog, Arguments args, IMultiStreamSource dataSample = null)
+ => new TextLoader(CatalogUtils.GetEnvironment(catalog), args, dataSample);
///
/// Read a data view from a text file using .
///
/// The catalog.
/// The columns of the schema.
- /// The delegate to set additional settings
- /// The path to the file
+ /// Whether the file has a header.
+ /// The character used as separator between data points in a row. By default the tab character is used as separator.
+ /// The path to the file.
/// The data view.
public static IDataView ReadFromTextFile(this DataOperations catalog,
- TextLoader.Column[] columns, string path, Action advancedSettings = null)
+ string path, Column[] columns, bool hasHeader = false, char separatorChar = '\t')
{
Contracts.CheckNonEmpty(path, nameof(path));
@@ -56,10 +52,26 @@ public static IDataView ReadFromTextFile(this DataOperations catalog,
// REVIEW: it is almost always a mistake to have a 'trainable' text loader here.
// Therefore, we are going to disallow data sample.
- var reader = new TextLoader(env, columns, advancedSettings, dataSample: null);
+ var reader = new TextLoader(env, columns, hasHeader, separatorChar, dataSample: null);
return reader.Read(new MultiFileSource(path));
}
+ ///
+ /// Read a data view from a text file using .
+ ///
+ /// The catalog.
+ /// Specifies a file from which to read.
+ /// Defines the settings of the load operation.
+ public static IDataView ReadFromTextFile(this DataOperations catalog, string path, Arguments args = null)
+ {
+ Contracts.CheckNonEmpty(path, nameof(path));
+
+ var env = catalog.GetEnvironment();
+ var source = new MultiFileSource(path);
+
+ return new TextLoader(env, args, source).Read(source);
+ }
+
///
/// Save the data view as text.
///
diff --git a/src/Microsoft.ML.Data/StaticPipe/DataLoadSaveOperationsExtensions.cs b/src/Microsoft.ML.Data/StaticPipe/DataLoadSaveOperationsExtensions.cs
index 57adb1be4d..a5f0172935 100644
--- a/src/Microsoft.ML.Data/StaticPipe/DataLoadSaveOperationsExtensions.cs
+++ b/src/Microsoft.ML.Data/StaticPipe/DataLoadSaveOperationsExtensions.cs
@@ -4,14 +4,7 @@
using Microsoft.ML.Runtime;
using Microsoft.ML.Runtime.Data;
-using Microsoft.ML.Runtime.Data.IO;
-using Microsoft.ML.Runtime.Internal.Utilities;
-using Microsoft.ML.StaticPipe;
using System;
-using System.Collections.Generic;
-using System.IO;
-using System.Linq;
-using System.Text;
using static Microsoft.ML.Runtime.Data.TextLoader;
namespace Microsoft.ML.StaticPipe
@@ -40,10 +33,10 @@ public static class DataLoadSaveOperationsExtensions
/// Whether the input may include sparse representations.
/// Remove trailing whitespace from lines.
/// A configured statically-typed reader for text files.
- public static DataReader TextReader<[IsShape] TShape>(
+ public static DataReader CreateTextReader<[IsShape] TShape>(
this DataOperations catalog, Func func, IMultiStreamSource files = null,
bool hasHeader = false, char separator = '\t', bool allowQuoting = true, bool allowSparse = true,
bool trimWhitspace = false)
- => TextLoader.CreateReader(catalog.Environment, func, files, hasHeader, separator, allowQuoting, allowSparse, trimWhitspace);
+ => CreateReader(catalog.Environment, func, files, hasHeader, separator, allowQuoting, allowSparse, trimWhitspace);
}
}
diff --git a/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingTransformer.cs b/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingTransformer.cs
index 81d8e927b4..b33fe123fe 100644
--- a/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingTransformer.cs
+++ b/src/Microsoft.ML.Data/Transforms/ValueToKeyMappingTransformer.cs
@@ -483,13 +483,10 @@ private static TermMap CreateFileTermMap(IHostEnvironment env, IChannel ch, stri
"{0} should not be specified when default loader is TextLoader. Ignoring {0}={1}",
nameof(Arguments.TermsColumn), src);
}
- termData = TextLoader.ReadFile(env,
- new TextLoader.Arguments()
- {
- Separator = "tab",
- Column = new[] { new TextLoader.Column("Term", DataKind.TX, 0) }
- },
- fileSource);
+ termData = new TextLoader(env,
+ columns: new[] { new TextLoader.Column("Term", DataKind.TX, 0) },
+ dataSample: fileSource)
+ .Read(fileSource);
src = "Term";
autoConvert = true;
}
diff --git a/src/Microsoft.ML.Data/Utilities/ModelFileUtils.cs b/src/Microsoft.ML.Data/Utilities/ModelFileUtils.cs
index 97948b764b..5debae70b7 100644
--- a/src/Microsoft.ML.Data/Utilities/ModelFileUtils.cs
+++ b/src/Microsoft.ML.Data/Utilities/ModelFileUtils.cs
@@ -283,8 +283,8 @@ public static IEnumerable> LoadRoleMappingsOrNu
{
// REVIEW: Should really validate the schema here, and consider
// ignoring this stream if it isn't as expected.
- var loader = TextLoader.ReadFile(env, new TextLoader.Arguments(),
- new RepositoryStreamWrapper(rep, DirTrainingInfo, RoleMappingFile));
+ var repoStreamWrapper = new RepositoryStreamWrapper(rep, DirTrainingInfo, RoleMappingFile);
+ var loader = new TextLoader(env, dataSample: repoStreamWrapper).Read(repoStreamWrapper);
using (var cursor = loader.GetRowCursor(c => true))
{
diff --git a/src/Microsoft.ML.Transforms/TermLookupTransformer.cs b/src/Microsoft.ML.Transforms/TermLookupTransformer.cs
index 83a36c55af..741b112645 100644
--- a/src/Microsoft.ML.Transforms/TermLookupTransformer.cs
+++ b/src/Microsoft.ML.Transforms/TermLookupTransformer.cs
@@ -349,27 +349,27 @@ private static IComponentFactory GetLoaderFacto
// If the user specified non-key values, we define the value column to be numeric.
if (!keyValues)
return ComponentFactoryUtils.CreateFromFunction(
- (env, files) => TextLoader.Create(
- env,
- new TextLoader.Arguments()
- {
- Column = new[]
+ (env, files) => new TextLoader(
+ env, new[]
{
new TextLoader.Column("Term", DataKind.TX, 0),
new TextLoader.Column("Value", DataKind.Num, 1)
- }
- },
- files));
+ }, dataSample: files).Read(files) as IDataLoader);
// If the user specified key values, we scan the values to determine the range of the key type.
ulong min = ulong.MaxValue;
ulong max = ulong.MinValue;
try
{
- var txtArgs = new TextLoader.Arguments();
- bool parsed = CmdParser.ParseArguments(host, "col=Term:TX:0 col=Value:TX:1", txtArgs);
- host.Assert(parsed);
- var data = TextLoader.ReadFile(host, txtArgs, new MultiFileSource(filename));
+ var file = new MultiFileSource(filename);
+ var data = new TextLoader(host, new[]
+ {
+ new TextLoader.Column("Term", DataKind.TX, 0),
+ new TextLoader.Column("Value", DataKind.TX, 1)
+ },
+ dataSample: file
+ ).Read(file);
+
using (var cursor = data.GetRowCursor(c => true))
{
var getTerm = cursor.GetGetter>(0);
@@ -444,17 +444,14 @@ private static IComponentFactory GetLoaderFacto
}
return ComponentFactoryUtils.CreateFromFunction(
- (env, files) => TextLoader.Create(
- env,
- new TextLoader.Arguments()
- {
- Column = new[]
- {
- new TextLoader.Column("Term", DataKind.TX, 0),
- valueColumn
- }
- },
- files));
+ (env, files) => new TextLoader(
+ env,
+ columns: new[]
+ {
+ new TextLoader.Column("Term", DataKind.TX, 0),
+ valueColumn
+ },
+ dataSample: files).Read(files) as IDataLoader);
}
// This saves the lookup data as a byte array encoded as a binary .idv file.
diff --git a/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs b/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs
index a4e0cee7e6..7cd22beb74 100644
--- a/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs
+++ b/src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs
@@ -722,17 +722,13 @@ private IDataLoader GetLoaderForStopwords(IChannel ch, string dataFile,
{
if (stopwordsCol == null)
stopwordsCol = "Stopwords";
- dataLoader = TextLoader.Create(
+ dataLoader = new TextLoader(
Host,
- new TextLoader.Arguments()
+ columns: new[]
{
- Separator = "tab",
- Column = new[]
- {
- new TextLoader.Column(stopwordsCol, DataKind.TX, 0)
- }
+ new TextLoader.Column(stopwordsCol, DataKind.TX, 0)
},
- fileSource);
+ dataSample: fileSource).Read(fileSource) as IDataLoader;
}
ch.AssertNonEmpty(stopwordsCol);
}
diff --git a/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs b/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs
index 990915128e..1adf7d4065 100644
--- a/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs
+++ b/test/Microsoft.ML.Benchmarks/KMeansAndLogisticRegressionBench.cs
@@ -18,7 +18,7 @@ public ParameterMixingCalibratedPredictor TrainKMeansAndLR()
var ml = new MLContext(seed: 1);
// Pipeline
- var input = ml.Data.ReadFromTextFile(new[] {
+ var input = ml.Data.ReadFromTextFile(_dataPath, new[] {
new TextLoader.Column("Label", DataKind.R4, 0),
new TextLoader.Column("CatFeatures", DataKind.TX,
new [] {
@@ -28,11 +28,7 @@ public ParameterMixingCalibratedPredictor TrainKMeansAndLR()
new [] {
new TextLoader.Range() { Min = 9, Max = 14 },
}),
- }, _dataPath, s =>
- {
- s.HasHeader = true;
- s.Separator = "\t";
- });
+ }, hasHeader: true);
var estimatorPipeline = ml.Transforms.Categorical.OneHotEncoding("CatFeatures")
.Append(ml.Transforms.Normalize("NumFeatures"))
diff --git a/test/Microsoft.ML.Benchmarks/PredictionEngineBench.cs b/test/Microsoft.ML.Benchmarks/PredictionEngineBench.cs
index 39342cf224..c711ab63f6 100644
--- a/test/Microsoft.ML.Benchmarks/PredictionEngineBench.cs
+++ b/test/Microsoft.ML.Benchmarks/PredictionEngineBench.cs
@@ -38,19 +38,16 @@ public void SetupIrisPipeline()
var env = new MLContext(seed: 1, conc: 1);
var reader = new TextLoader(env,
- new TextLoader.Arguments()
- {
- Separator = "\t",
- HasHeader = true,
- Column = new[]
+ columns: new[]
{
new TextLoader.Column("Label", DataKind.R4, 0),
new TextLoader.Column("SepalLength", DataKind.R4, 1),
new TextLoader.Column("SepalWidth", DataKind.R4, 2),
new TextLoader.Column("PetalLength", DataKind.R4, 3),
new TextLoader.Column("PetalWidth", DataKind.R4, 4),
- }
- });
+ },
+ hasHeader: true
+ );
IDataView data = reader.Read(_irisDataPath);
@@ -73,17 +70,13 @@ public void SetupSentimentPipeline()
string _sentimentDataPath = Program.GetInvariantCultureDataPath("wikipedia-detox-250-line-data.tsv");
var env = new MLContext(seed: 1, conc: 1);
- var reader = new TextLoader(env,
- new TextLoader.Arguments()
- {
- Separator = "\t",
- HasHeader = true,
- Column = new[]
+ var reader = new TextLoader(env, columns: new[]
{
new TextLoader.Column("Label", DataKind.BL, 0),
new TextLoader.Column("SentimentText", DataKind.Text, 1)
- }
- });
+ },
+ hasHeader: true
+ );
IDataView data = reader.Read(_sentimentDataPath);
@@ -106,17 +99,13 @@ public void SetupBreastCancerPipeline()
string _breastCancerDataPath = Program.GetInvariantCultureDataPath("breast-cancer.txt");
var env = new MLContext(seed: 1, conc: 1);
- var reader = new TextLoader(env,
- new TextLoader.Arguments()
- {
- Separator = "\t",
- HasHeader = false,
- Column = new[]
+ var reader = new TextLoader(env, columns: new[]
{
new TextLoader.Column("Label", DataKind.BL, 0),
new TextLoader.Column("Features", DataKind.R4, new[] { new TextLoader.Range(1, 9) })
- }
- });
+ },
+ hasHeader: false
+ );
IDataView data = reader.Read(_breastCancerDataPath);
diff --git a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs
index c2fee89f24..bee16e0d7b 100644
--- a/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs
+++ b/test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs
@@ -64,30 +64,29 @@ public void TrainSentiment()
{
var env = new MLContext(seed: 1);
// Pipeline
- var loader = TextLoader.ReadFile(env,
- new TextLoader.Arguments()
+ var arguments = new TextLoader.Arguments()
+ {
+ Column = new TextLoader.Column[]
{
- AllowQuoting = false,
- AllowSparse = false,
- Separator = "tab",
- HasHeader = true,
- Column = new[]
+ new TextLoader.Column()
{
- new TextLoader.Column()
- {
- Name = "Label",
- Source = new [] { new TextLoader.Range() { Min=0, Max=0} },
- Type = DataKind.Num
- },
+ Name = "Label",
+ Source = new[] { new TextLoader.Range() { Min = 0, Max = 0 } },
+ Type = DataKind.Num
+ },
- new TextLoader.Column()
- {
- Name = "SentimentText",
- Source = new [] { new TextLoader.Range() { Min=1, Max=1} },
- Type = DataKind.Text
- }
+ new TextLoader.Column()
+ {
+ Name = "SentimentText",
+ Source = new[] { new TextLoader.Range() { Min = 1, Max = 1 } },
+ Type = DataKind.Text
}
- }, new MultiFileSource(_sentimentDataPath));
+ },
+ HasHeader = true,
+ AllowQuoting = false,
+ AllowSparse = false
+ };
+ var loader = env.Data.ReadFromTextFile(_sentimentDataPath, arguments);
var text = TextFeaturizingEstimator.Create(env,
new TextFeaturizingEstimator.Arguments()
diff --git a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs
index f96f6f205a..94c7d98155 100644
--- a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs
+++ b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs
@@ -606,12 +606,12 @@ public void RankingLightGBMTest()
public void TestTreeEnsembleCombiner()
{
var dataPath = GetDataPath("breast-cancer.txt");
- var dataView = TextLoader.Create(Env, new TextLoader.Arguments(), new MultiFileSource(dataPath));
+ var dataView = ML.Data.ReadFromTextFile(dataPath);
var fastTrees = new IPredictorModel[3];
for (int i = 0; i < 3; i++)
{
- fastTrees[i] = FastTree.TrainBinary(Env, new FastTreeBinaryClassificationTrainer.Arguments
+ fastTrees[i] = FastTree.TrainBinary(ML, new FastTreeBinaryClassificationTrainer.Arguments
{
FeatureColumn = "Features",
NumTrees = 5,
@@ -628,13 +628,13 @@ public void TestTreeEnsembleCombiner()
public void TestTreeEnsembleCombinerWithCategoricalSplits()
{
var dataPath = GetDataPath("adult.tiny.with-schema.txt");
- var dataView = TextLoader.Create(Env, new TextLoader.Arguments(), new MultiFileSource(dataPath));
+ var dataView = ML.Data.ReadFromTextFile(dataPath);
- var cat = new OneHotEncodingEstimator(Env, "Categories", "Features").Fit(dataView).Transform(dataView);
+ var cat = new OneHotEncodingEstimator(ML, "Categories", "Features").Fit(dataView).Transform(dataView);
var fastTrees = new IPredictorModel[3];
for (int i = 0; i < 3; i++)
{
- fastTrees[i] = FastTree.TrainBinary(Env, new FastTreeBinaryClassificationTrainer.Arguments
+ fastTrees[i] = FastTree.TrainBinary(ML, new FastTreeBinaryClassificationTrainer.Arguments
{
FeatureColumn = "Features",
NumTrees = 5,
@@ -729,11 +729,11 @@ private void CombineAndTestTreeEnsembles(IDataView idv, IPredictorModel[] fastTr
public void TestEnsembleCombiner()
{
var dataPath = GetDataPath("breast-cancer.txt");
- var dataView = TextLoader.Create(Env, new TextLoader.Arguments(), new MultiFileSource(dataPath));
+ var dataView = ML.Data.ReadFromTextFile(dataPath);
var predictors = new IPredictorModel[]
{
- FastTree.TrainBinary(Env, new FastTreeBinaryClassificationTrainer.Arguments
+ FastTree.TrainBinary(ML, new FastTreeBinaryClassificationTrainer.Arguments
{
FeatureColumn = "Features",
NumTrees = 5,
@@ -741,7 +741,7 @@ public void TestEnsembleCombiner()
LabelColumn = DefaultColumnNames.Label,
TrainingData = dataView
}).PredictorModel,
- AveragedPerceptronTrainer.TrainBinary(Env, new AveragedPerceptronTrainer.Arguments()
+ AveragedPerceptronTrainer.TrainBinary(ML, new AveragedPerceptronTrainer.Arguments()
{
FeatureColumn = "Features",
LabelColumn = DefaultColumnNames.Label,
@@ -749,7 +749,7 @@ public void TestEnsembleCombiner()
TrainingData = dataView,
NormalizeFeatures = NormalizeOption.No
}).PredictorModel,
- LogisticRegression.TrainBinary(Env, new LogisticRegression.Arguments()
+ LogisticRegression.TrainBinary(ML, new LogisticRegression.Arguments()
{
FeatureColumn = "Features",
LabelColumn = DefaultColumnNames.Label,
@@ -757,7 +757,7 @@ public void TestEnsembleCombiner()
TrainingData = dataView,
NormalizeFeatures = NormalizeOption.No
}).PredictorModel,
- LogisticRegression.TrainBinary(Env, new LogisticRegression.Arguments()
+ LogisticRegression.TrainBinary(ML, new LogisticRegression.Arguments()
{
FeatureColumn = "Features",
LabelColumn = DefaultColumnNames.Label,
@@ -775,7 +775,7 @@ public void TestEnsembleCombiner()
public void TestMultiClassEnsembleCombiner()
{
var dataPath = GetDataPath("breast-cancer.txt");
- var dataView = TextLoader.Create(Env, new TextLoader.Arguments(), new MultiFileSource(dataPath));
+ var dataView = ML.Data.ReadFromTextFile(dataPath);
var predictors = new IPredictorModel[]
{
diff --git a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
index 35da6afbc0..0cff6c1794 100644
--- a/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
+++ b/test/Microsoft.ML.StaticPipelineTesting/StaticPipeTests.cs
@@ -882,7 +882,7 @@ public void TestConvertStatic()
+ "1 1 2 4 15";
var dataSource = new BytesStreamSource(content);
- var text = ml.Data.TextReader(ctx => (
+ var text = ml.Data.CreateTextReader(ctx => (
label: ctx.LoadBool(0),
text: ctx.LoadText(1),
numericFeatures: ctx.LoadDouble(2, null)), // If fit correctly, this ought to be equivalent to max of 4, that is, length of 3.
diff --git a/test/Microsoft.ML.StaticPipelineTesting/Training.cs b/test/Microsoft.ML.StaticPipelineTesting/Training.cs
index 959f316111..ee5e2626cf 100644
--- a/test/Microsoft.ML.StaticPipelineTesting/Training.cs
+++ b/test/Microsoft.ML.StaticPipelineTesting/Training.cs
@@ -976,7 +976,7 @@ public void MatrixFactorization()
// Read data file. The file contains 3 columns, label (float value), matrixColumnIndex (unsigned integer key), and matrixRowIndex (unsigned integer key).
// More specifically, LoadKey(1, 0, 19) means that the matrixColumnIndex column is read from the 2nd (indexed by 1) column in the data file and as
// a key type (stored as 32-bit unsigned integer) ranged from 0 to 19 (aka the training matrix has 20 columns).
- var reader = mlContext.Data.TextReader(ctx => (label: ctx.LoadFloat(0), matrixColumnIndex: ctx.LoadKey(1, 0, 19), matrixRowIndex: ctx.LoadKey(2, 0, 39)), hasHeader: true);
+ var reader = mlContext.Data.CreateTextReader(ctx => (label: ctx.LoadFloat(0), matrixColumnIndex: ctx.LoadKey(1, 0, 19), matrixRowIndex: ctx.LoadKey(2, 0, 39)), hasHeader: true);
// The parameter that will be into the onFit method below. The obtained predictor will be assigned to this variable
// so that we will be able to touch it.
diff --git a/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipeBase.cs b/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipeBase.cs
index 16ceae22d3..9ff3432ca6 100644
--- a/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipeBase.cs
+++ b/test/Microsoft.ML.TestFramework/DataPipe/TestDataPipeBase.cs
@@ -438,7 +438,7 @@ protected bool SaveLoadText(IDataView view, IHostEnvironment env,
// Note that we don't pass in "args", but pass in a default args so we test
// the auto-schema parsing.
- var loadedData = TextLoader.ReadFile(env, new TextLoader.Arguments(), new MultiFileSource(pathData));
+ var loadedData = ML.Data.ReadFromTextFile(pathData);
if (!CheckMetadataTypes(loadedData.Schema))
Failed();
diff --git a/test/Microsoft.ML.TestFramework/ModelHelper.cs b/test/Microsoft.ML.TestFramework/ModelHelper.cs
index 316a9ea755..4692942e83 100644
--- a/test/Microsoft.ML.TestFramework/ModelHelper.cs
+++ b/test/Microsoft.ML.TestFramework/ModelHelper.cs
@@ -14,7 +14,7 @@ namespace Microsoft.ML.TestFramework
#pragma warning disable 612, 618
public static class ModelHelper
{
- private static IHostEnvironment s_environment = new MLContext(seed: 1);
+ private static MLContext s_environment = new MLContext(seed: 1);
private static ITransformModel s_housePriceModel;
public static void WriteKcHousePriceModel(string dataPath, string outputModelPath)
@@ -41,17 +41,34 @@ public static void WriteKcHousePriceModel(string dataPath, Stream stream)
public static IDataView GetKcHouseDataView(string dataPath)
{
- var dataSchema = "col=Id:TX:0 col=Date:TX:1 col=Label:R4:2 col=Bedrooms:R4:3 " +
- "col=Bathrooms:R4:4 col=SqftLiving:R4:5 col=SqftLot:R4:6 col=Floors:R4:7 " +
- "col=Waterfront:R4:8 col=View:R4:9 col=Condition:R4:10 col=Grade:R4:11 " +
- "col=SqftAbove:R4:12 col=SqftBasement:R4:13 col=YearBuilt:R4:14 " +
- "col=YearRenovated:R4:15 col=Zipcode:R4:16 col=Lat:R4:17 col=Long:R4:18 " +
- "col=SqftLiving15:R4:19 col=SqftLot15:R4:20 header+ sep=,";
-
- var txtArgs = new Runtime.Data.TextLoader.Arguments();
- bool parsed = CmdParser.ParseArguments(s_environment, dataSchema, txtArgs);
- s_environment.Assert(parsed);
- return Runtime.Data.TextLoader.ReadFile(s_environment, txtArgs, new MultiFileSource(dataPath));
+ return s_environment.Data.ReadFromTextFile(dataPath,
+ columns: new[]
+ {
+ new Runtime.Data.TextLoader.Column("Id", Runtime.Data.DataKind.TX, 0),
+ new Runtime.Data.TextLoader.Column("Date", Runtime.Data.DataKind.TX, 1),
+ new Runtime.Data.TextLoader.Column("Label", Runtime.Data.DataKind.R4, 2),
+ new Runtime.Data.TextLoader.Column("BedRooms", Runtime.Data.DataKind.R4, 3),
+ new Runtime.Data.TextLoader.Column("BathRooms", Runtime.Data.DataKind.R4, 4),
+ new Runtime.Data.TextLoader.Column("SqftLiving", Runtime.Data.DataKind.R4, 5),
+ new Runtime.Data.TextLoader.Column("SqftLot", Runtime.Data.DataKind.R4, 6),
+ new Runtime.Data.TextLoader.Column("Floors", Runtime.Data.DataKind.R4, 7),
+ new Runtime.Data.TextLoader.Column("WaterFront", Runtime.Data.DataKind.R4, 8),
+ new Runtime.Data.TextLoader.Column("View", Runtime.Data.DataKind.R4, 9),
+ new Runtime.Data.TextLoader.Column("Condition", Runtime.Data.DataKind.R4, 10),
+ new Runtime.Data.TextLoader.Column("Grade", Runtime.Data.DataKind.R4, 11),
+ new Runtime.Data.TextLoader.Column("SqftAbove", Runtime.Data.DataKind.R4, 12),
+ new Runtime.Data.TextLoader.Column("SqftBasement", Runtime.Data.DataKind.R4, 13),
+ new Runtime.Data.TextLoader.Column("YearBuilt", Runtime.Data.DataKind.R4, 14),
+ new Runtime.Data.TextLoader.Column("YearRenovated", Runtime.Data.DataKind.R4, 15),
+ new Runtime.Data.TextLoader.Column("Zipcode", Runtime.Data.DataKind.R4, 16),
+ new Runtime.Data.TextLoader.Column("Lat", Runtime.Data.DataKind.R4, 17),
+ new Runtime.Data.TextLoader.Column("Long", Runtime.Data.DataKind.R4, 18),
+ new Runtime.Data.TextLoader.Column("SqftLiving15", Runtime.Data.DataKind.R4, 19),
+ new Runtime.Data.TextLoader.Column("SqftLot15", Runtime.Data.DataKind.R4, 20)
+ },
+ hasHeader: true,
+ separatorChar: ','
+ );
}
private static ITransformModel CreateKcHousePricePredictorModel(string dataPath)
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/ApiScenariosTests.cs b/test/Microsoft.ML.Tests/Scenarios/Api/ApiScenariosTests.cs
index 3b9db811e5..6027be2805 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/ApiScenariosTests.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/ApiScenariosTests.cs
@@ -52,34 +52,25 @@ public class SentimentPrediction
public float Score;
}
- private static TextLoader.Arguments MakeIrisTextLoaderArgs()
+ private static TextLoader.Column[] MakeIrisColumns()
{
- return new TextLoader.Arguments()
- {
- Separator = "comma",
- Column = new[]
+ return new[]
{
new TextLoader.Column("SepalLength", DataKind.R4, 0),
new TextLoader.Column("SepalWidth", DataKind.R4, 1),
new TextLoader.Column("PetalLength", DataKind.R4, 2),
new TextLoader.Column("PetalWidth",DataKind.R4, 3),
new TextLoader.Column("Label", DataKind.Text, 4)
- }
- };
+ };
}
- private static TextLoader.Arguments MakeSentimentTextLoaderArgs()
+ private static TextLoader.Column[] MakeSentimentColumns()
{
- return new TextLoader.Arguments()
- {
- Separator = "tab",
- HasHeader = true,
- Column = new[]
+ return new[]
{
new TextLoader.Column("Label", DataKind.BL, 0),
new TextLoader.Column("SentimentText", DataKind.Text, 1)
- }
- };
+ };
}
}
}
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs
index 104855c2a9..66ba6ba137 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamples.cs
@@ -43,7 +43,7 @@ private void IntermediateData(string dataPath)
var mlContext = new MLContext();
// Create the reader: define the data columns and where to find them in the text file.
- var reader = mlContext.Data.TextReader(ctx => (
+ var reader = mlContext.Data.CreateTextReader(ctx => (
// A boolean column depicting the 'target label'.
IsOver50K: ctx.LoadBool(0),
// Three text columns.
@@ -99,7 +99,7 @@ private void TrainRegression(string trainDataPath, string testDataPath, string m
// Step one: read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
- var reader = mlContext.Data.TextReader(ctx => (
+ var reader = mlContext.Data.CreateTextReader(ctx => (
// We read the first 11 values as a single float vector.
FeatureVector: ctx.LoadFloat(0, 10),
// Separately, read the target variable.
@@ -178,7 +178,7 @@ private ITransformer TrainOnIris(string irisDataPath)
// Step one: read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
- var reader = mlContext.Data.TextReader(ctx => (
+ var reader = mlContext.Data.CreateTextReader(ctx => (
// The four features of the Iris dataset.
SepalLength: ctx.LoadFloat(0),
SepalWidth: ctx.LoadFloat(1),
@@ -256,7 +256,7 @@ private void TrainAndInspectWeights(string dataPath)
// Step one: read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
- var reader = mlContext.Data.TextReader(ctx => (
+ var reader = mlContext.Data.CreateTextReader(ctx => (
// The four features of the Iris dataset.
SepalLength: ctx.LoadFloat(0),
SepalWidth: ctx.LoadFloat(1),
@@ -328,7 +328,7 @@ private void NormalizationWorkout(string dataPath)
var mlContext = new MLContext();
// Define the reader: specify the data columns and where to find them in the text file.
- var reader = mlContext.Data.TextReader(ctx => (
+ var reader = mlContext.Data.CreateTextReader(ctx => (
// The four features of the Iris dataset will be grouped together as one Features column.
Features: ctx.LoadFloat(0, 3),
// Label: kind of iris.
@@ -444,7 +444,7 @@ private void TextFeaturizationOn(string dataPath)
var mlContext = new MLContext();
// Define the reader: specify the data columns and where to find them in the text file.
- var reader = mlContext.Data.TextReader(ctx => (
+ var reader = mlContext.Data.CreateTextReader(ctx => (
IsToxic: ctx.LoadBool(0),
Message: ctx.LoadText(1)
), hasHeader: true);
@@ -506,7 +506,7 @@ private void CategoricalFeaturizationOn(params string[] dataPath)
var mlContext = new MLContext();
// Define the reader: specify the data columns and where to find them in the text file.
- var reader = mlContext.Data.TextReader(ctx => (
+ var reader = mlContext.Data.CreateTextReader(ctx => (
Label: ctx.LoadBool(0),
// We will load all the categorical features into one vector column of size 8.
CategoricalFeatures: ctx.LoadText(1, 8),
@@ -573,7 +573,7 @@ private void CrossValidationOn(string dataPath)
// Step one: read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
- var reader = mlContext.Data.TextReader(ctx => (
+ var reader = mlContext.Data.CreateTextReader(ctx => (
// The four features of the Iris dataset.
SepalLength: ctx.LoadFloat(0),
SepalWidth: ctx.LoadFloat(1),
@@ -633,7 +633,7 @@ private void MixMatch(string dataPath)
// Read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
- var reader = mlContext.Data.TextReader(ctx => (
+ var reader = mlContext.Data.CreateTextReader(ctx => (
// The four features of the Iris dataset.
SepalLength: ctx.LoadFloat(0),
SepalWidth: ctx.LoadFloat(1),
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs
index 1c14a5b158..706b4aad7d 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs
@@ -41,9 +41,7 @@ private void IntermediateData(string dataPath)
var mlContext = new MLContext();
// Create the reader: define the data columns and where to find them in the text file.
- var reader = mlContext.Data.TextReader(new TextLoader.Arguments
- {
- Column = new[] {
+ var reader = mlContext.Data.CreateTextReader(new[] {
// A boolean column depicting the 'label'.
new TextLoader.Column("IsOver50K", DataKind.BL, 0),
// Three text columns.
@@ -52,8 +50,8 @@ private void IntermediateData(string dataPath)
new TextLoader.Column("MaritalStatus", DataKind.TX, 3)
},
// First line of the file is a header, not a data row.
- HasHeader = true
- });
+ hasHeader: true
+ );
// Start creating our processing pipeline. For now, let's just concatenate all the text columns
// together into one.
@@ -93,9 +91,7 @@ private void TrainRegression(string trainDataPath, string testDataPath, string m
// Step one: read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
- var reader = mlContext.Data.TextReader(new TextLoader.Arguments
- {
- Column = new[] {
+ var reader = mlContext.Data.CreateTextReader(new[] {
// We read the first 11 values as a single float vector.
new TextLoader.Column("FeatureVector", DataKind.R4, 0, 10),
@@ -103,10 +99,10 @@ private void TrainRegression(string trainDataPath, string testDataPath, string m
new TextLoader.Column("Target", DataKind.R4, 11),
},
// First line of the file is a header, not a data row.
- HasHeader = true,
+ hasHeader: true,
// Default separator is tab, but we need a semicolon.
- Separator = ";"
- });
+ separatorChar: ';'
+ );
// Now read the file (remember though, readers are lazy, so the actual reading will happen when the data is accessed).
var trainData = reader.Read(trainDataPath);
@@ -171,9 +167,7 @@ private ITransformer TrainOnIris(string irisDataPath)
// Step one: read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
- var reader = mlContext.Data.TextReader(new TextLoader.Arguments
- {
- Column = new[] {
+ var reader = mlContext.Data.CreateTextReader(new[] {
new TextLoader.Column("SepalLength", DataKind.R4, 0),
new TextLoader.Column("SepalWidth", DataKind.R4, 1),
new TextLoader.Column("PetalLength", DataKind.R4, 2),
@@ -182,8 +176,8 @@ private ITransformer TrainOnIris(string irisDataPath)
new TextLoader.Column("Label", DataKind.TX, 4),
},
// Default separator is tab, but the dataset has comma.
- Separator = ","
- });
+ separatorChar: ','
+ );
// Retrieve the training data.
var trainData = reader.Read(irisDataPath);
@@ -240,17 +234,15 @@ private void NormalizationWorkout(string dataPath)
var mlContext = new MLContext();
// Define the reader: specify the data columns and where to find them in the text file.
- var reader = mlContext.Data.TextReader(new TextLoader.Arguments
- {
- Column = new[] {
+ var reader = mlContext.Data.CreateTextReader(new[] {
// The four features of the Iris dataset will be grouped together as one Features column.
new TextLoader.Column("Features", DataKind.R4, 0, 3),
// Label: kind of iris.
new TextLoader.Column("Label", DataKind.TX, 4),
},
// Default separator is tab, but the dataset has comma.
- Separator = ","
- });
+ separatorChar: ','
+ );
// Read the training data.
var trainData = reader.Read(dataPath);
@@ -303,14 +295,13 @@ private void TextFeaturizationOn(string dataPath)
var mlContext = new MLContext();
// Define the reader: specify the data columns and where to find them in the text file.
- var reader = mlContext.Data.TextReader(new TextLoader.Arguments
- {
- Column = new[] {
+ var reader = mlContext.Data.CreateTextReader(new[]
+ {
new TextLoader.Column("IsToxic", DataKind.BL, 0),
new TextLoader.Column("Message", DataKind.TX, 1),
},
- HasHeader = true
- });
+ hasHeader: true
+ );
// Read the data.
var data = reader.Read(dataPath);
@@ -371,9 +362,8 @@ private void CategoricalFeaturizationOn(params string[] dataPath)
var mlContext = new MLContext();
// Define the reader: specify the data columns and where to find them in the text file.
- var reader = mlContext.Data.TextReader(new TextLoader.Arguments
- {
- Column = new[] {
+ var reader = mlContext.Data.CreateTextReader(new[]
+ {
new TextLoader.Column("Label", DataKind.BL, 0),
// We will load all the categorical features into one vector column of size 8.
new TextLoader.Column("CategoricalFeatures", DataKind.TX, 1, 8),
@@ -382,8 +372,8 @@ private void CategoricalFeaturizationOn(params string[] dataPath)
// Let's also separately load the 'Workclass' column.
new TextLoader.Column("Workclass", DataKind.TX, 1),
},
- HasHeader = true
- });
+ hasHeader: true
+ );
// Read the data.
var data = reader.Read(dataPath);
@@ -436,9 +426,8 @@ private void CrossValidationOn(string dataPath)
// Step one: read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
- var reader = mlContext.Data.TextReader(new TextLoader.Arguments
- {
- Column = new[] {
+ var reader = mlContext.Data.CreateTextReader(new[]
+ {
// We read the first 11 values as a single float vector.
new TextLoader.Column("SepalLength", DataKind.R4, 0),
new TextLoader.Column("SepalWidth", DataKind.R4, 1),
@@ -448,8 +437,8 @@ private void CrossValidationOn(string dataPath)
new TextLoader.Column("Label", DataKind.TX, 4),
},
// Default separator is tab, but the dataset has comma.
- Separator = ","
- });
+ separatorChar: ','
+ );
// Read the data.
var data = reader.Read(dataPath);
@@ -498,14 +487,14 @@ private void ReadDataDynamic(string dataPath)
var mlContext = new MLContext();
// Create the reader: define the data columns and where to find them in the text file.
- var reader = mlContext.Data.TextReader(new[] {
+ var reader = mlContext.Data.CreateTextReader(new[] {
// We read the first 10 values as a single float vector.
new TextLoader.Column("FeatureVector", DataKind.R4, new[] {new TextLoader.Range(0, 9)}),
// Separately, read the target variable.
new TextLoader.Column("Target", DataKind.R4, 10)
},
// Default separator is tab, but we need a comma.
- s => s.Separator = ",");
+ separatorChar: ',' );
// Now read the file (remember though, readers are lazy, so the actual reading will happen when the data is accessed).
var data = reader.Read(dataPath);
@@ -527,11 +516,11 @@ public class OutputRow
public void CustomTransformer()
{
var mlContext = new MLContext();
- var data = mlContext.Data.ReadFromTextFile(new[]
+ var data = mlContext.Data.ReadFromTextFile(GetDataPath("adult.tiny.with-schema.txt"), new[]
{
new TextLoader.Column("Income", DataKind.R4, 10),
new TextLoader.Column("Features", DataKind.R4, 12, 14)
- }, GetDataPath("adult.tiny.with-schema.txt"), s => { s.Separator = "\t"; s.HasHeader = true; });
+ }, hasHeader: true);
PrepareData(mlContext, data);
TrainModel(mlContext, data);
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/CrossValidation.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/CrossValidation.cs
index 1eeaf9c13c..3c5cabb3dc 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/CrossValidation.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/CrossValidation.cs
@@ -27,7 +27,7 @@ void New_CrossValidation()
{
var ml = new MLContext(seed: 1, conc: 1);
- var data = ml.Data.TextReader(MakeSentimentTextLoaderArgs()).Read(GetDataPath(TestDatasets.Sentiment.trainFilename));
+ var data = ml.Data.CreateTextReader(MakeSentimentColumns(), hasHeader: true).Read(GetDataPath(TestDatasets.Sentiment.trainFilename));
// Pipeline.
var pipeline = ml.Transforms.Text.FeaturizeText("SentimentText", "Features")
.Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features", advancedSettings: (s) => { s.ConvergenceTolerance = 1f; s.NumThreads = 1; }));
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/DecomposableTrainAndPredict.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/DecomposableTrainAndPredict.cs
index 536a74283f..8694b5b199 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/DecomposableTrainAndPredict.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/DecomposableTrainAndPredict.cs
@@ -30,7 +30,7 @@ void New_DecomposableTrainAndPredict()
var dataPath = GetDataPath(TestDatasets.irisData.trainFilename);
var ml = new MLContext();
- var data = ml.Data.TextReader(MakeIrisTextLoaderArgs())
+ var data = ml.Data.CreateTextReader(MakeIrisColumns(), separatorChar: ',')
.Read(dataPath);
var pipeline = new ColumnConcatenatingEstimator (ml, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
@@ -41,7 +41,7 @@ void New_DecomposableTrainAndPredict()
var model = pipeline.Fit(data).GetModelFor(TransformerScope.Scoring);
var engine = model.MakePredictionFunction(ml);
- var testLoader = TextLoader.ReadFile(ml, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath));
+ var testLoader = ml.Data.ReadFromTextFile(dataPath, MakeIrisColumns(), separatorChar: ',');
var testData = testLoader.AsEnumerable(ml, false);
foreach (var input in testData.Take(20))
{
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Evaluation.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Evaluation.cs
index ad249fa6e9..b09dbbe82d 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Evaluation.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Evaluation.cs
@@ -22,7 +22,7 @@ public void New_Evaluation()
var ml = new MLContext(seed: 1, conc: 1);
// Pipeline.
- var pipeline = ml.Data.TextReader(MakeSentimentTextLoaderArgs())
+ var pipeline = ml.Data.CreateTextReader(MakeSentimentColumns(), hasHeader: true)
.Append(ml.Transforms.Text.FeaturizeText("SentimentText", "Features"))
.Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features", advancedSettings: s => s.NumThreads = 1));
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Extensibility.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Extensibility.cs
index 2c750a7b81..392bf95b85 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Extensibility.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Extensibility.cs
@@ -29,7 +29,7 @@ void New_Extensibility()
var dataPath = GetDataPath(TestDatasets.irisData.trainFilename);
var ml = new MLContext();
- var data = ml.Data.TextReader(MakeIrisTextLoaderArgs())
+ var data = ml.Data.CreateTextReader(MakeIrisColumns(), separatorChar: ',')
.Read(dataPath);
Action action = (i, j) =>
@@ -49,7 +49,7 @@ void New_Extensibility()
var model = pipeline.Fit(data).GetModelFor(TransformerScope.Scoring);
var engine = model.MakePredictionFunction(ml);
- var testLoader = TextLoader.ReadFile(ml, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath));
+ var testLoader = ml.Data.ReadFromTextFile(dataPath, MakeIrisColumns(), separatorChar: ',');
var testData = testLoader.AsEnumerable(ml, false);
foreach (var input in testData.Take(20))
{
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/FileBasedSavingOfData.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/FileBasedSavingOfData.cs
index 667581c9b3..b85f5e646b 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/FileBasedSavingOfData.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/FileBasedSavingOfData.cs
@@ -27,7 +27,7 @@ void New_FileBasedSavingOfData()
var ml = new MLContext(seed: 1, conc: 1);
var src = new MultiFileSource(GetDataPath(TestDatasets.Sentiment.trainFilename));
- var trainData = ml.Data.TextReader(MakeSentimentTextLoaderArgs())
+ var trainData = ml.Data.CreateTextReader(MakeSentimentColumns(), hasHeader: true)
.Append(ml.Transforms.Text.FeaturizeText("SentimentText", "Features"))
.Fit(src).Read(src);
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/IntrospectiveTraining.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/IntrospectiveTraining.cs
index daf2777047..6cf2898db4 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/IntrospectiveTraining.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/IntrospectiveTraining.cs
@@ -34,7 +34,7 @@ public partial class ApiScenariosTests
public void New_IntrospectiveTraining()
{
var ml = new MLContext(seed: 1, conc: 1);
- var data = ml.Data.TextReader(MakeSentimentTextLoaderArgs())
+ var data = ml.Data.CreateTextReader(MakeSentimentColumns(), hasHeader: true)
.Read(GetDataPath(TestDatasets.Sentiment.trainFilename));
var pipeline = ml.Transforms.Text.FeaturizeText("SentimentText", "Features")
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Metacomponents.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Metacomponents.cs
index 182451a30f..032ef53787 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Metacomponents.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Metacomponents.cs
@@ -26,7 +26,7 @@ public partial class ApiScenariosTests
public void New_Metacomponents()
{
var ml = new MLContext();
- var data = ml.Data.TextReader(MakeIrisTextLoaderArgs())
+ var data = ml.Data.CreateTextReader(MakeIrisColumns(), separatorChar: ',')
.Read(GetDataPath(TestDatasets.irisData.trainFilename));
var sdcaTrainer = ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features", advancedSettings: (s) => { s.MaxIterations = 100; s.Shuffle = true; s.NumThreads = 1; });
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/MultithreadedPrediction.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/MultithreadedPrediction.cs
index f2285b5200..b5ba493b2d 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/MultithreadedPrediction.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/MultithreadedPrediction.cs
@@ -26,7 +26,7 @@ public partial class ApiScenariosTests
void New_MultithreadedPrediction()
{
var ml = new MLContext(seed: 1, conc: 1);
- var reader = ml.Data.TextReader(MakeSentimentTextLoaderArgs());
+ var reader = ml.Data.CreateTextReader(MakeSentimentColumns(), hasHeader: true);
var data = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.Sentiment.trainFilename)));
// Pipeline.
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/ReconfigurablePrediction.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/ReconfigurablePrediction.cs
index ffda5b1ce1..52a9266f3a 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/ReconfigurablePrediction.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/ReconfigurablePrediction.cs
@@ -22,7 +22,7 @@ public partial class ApiScenariosTests
public void New_ReconfigurablePrediction()
{
var ml = new MLContext(seed: 1, conc: 1);
- var dataReader = ml.Data.TextReader(MakeSentimentTextLoaderArgs());
+ var dataReader = ml.Data.CreateTextReader(MakeSentimentColumns(), hasHeader: true);
var data = dataReader.Read(GetDataPath(TestDatasets.Sentiment.trainFilename));
var testData = dataReader.Read(GetDataPath(TestDatasets.Sentiment.testFilename));
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/SimpleTrainAndPredict.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/SimpleTrainAndPredict.cs
index 016acd6220..22ec24c29a 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/SimpleTrainAndPredict.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/SimpleTrainAndPredict.cs
@@ -22,7 +22,7 @@ public partial class ApiScenariosTests
public void New_SimpleTrainAndPredict()
{
var ml = new MLContext(seed: 1, conc: 1);
- var reader = ml.Data.TextReader(MakeSentimentTextLoaderArgs());
+ var reader = ml.Data.CreateTextReader(MakeSentimentColumns(), hasHeader: true);
var data = reader.Read(GetDataPath(TestDatasets.Sentiment.trainFilename));
// Pipeline.
var pipeline = ml.Transforms.Text.FeaturizeText("SentimentText", "Features")
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainSaveModelAndPredict.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainSaveModelAndPredict.cs
index adab64dec1..59bd307dbd 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainSaveModelAndPredict.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainSaveModelAndPredict.cs
@@ -26,7 +26,7 @@ public partial class ApiScenariosTests
public void New_TrainSaveModelAndPredict()
{
var ml = new MLContext(seed: 1, conc: 1);
- var reader = ml.Data.TextReader(MakeSentimentTextLoaderArgs());
+ var reader = ml.Data.CreateTextReader(MakeSentimentColumns(), hasHeader: true);
var data = reader.Read(GetDataPath(TestDatasets.Sentiment.trainFilename));
// Pipeline.
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainWithInitialPredictor.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainWithInitialPredictor.cs
index a117de429c..6c47365926 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainWithInitialPredictor.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainWithInitialPredictor.cs
@@ -22,7 +22,7 @@ public void New_TrainWithInitialPredictor()
var ml = new MLContext(seed: 1, conc: 1);
- var data = ml.Data.TextReader(MakeSentimentTextLoaderArgs()).Read(GetDataPath(TestDatasets.Sentiment.trainFilename));
+ var data = ml.Data.CreateTextReader(MakeSentimentColumns(), hasHeader: true).Read(GetDataPath(TestDatasets.Sentiment.trainFilename));
// Pipeline.
var pipeline = ml.Transforms.Text.FeaturizeText("SentimentText", "Features");
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainWithValidationSet.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainWithValidationSet.cs
index 2a7030ea94..bda23779c4 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainWithValidationSet.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainWithValidationSet.cs
@@ -20,7 +20,7 @@ public void New_TrainWithValidationSet()
{
var ml = new MLContext(seed: 1, conc: 1);
// Pipeline.
- var reader = ml.Data.TextReader(MakeSentimentTextLoaderArgs());
+ var reader = ml.Data.CreateTextReader(MakeSentimentColumns(), hasHeader: true);
var pipeline = ml.Transforms.Text.FeaturizeText("SentimentText", "Features");
// Train the pipeline, prepare train and validation set.
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Visibility.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Visibility.cs
index c91e2498a5..ebab7c3a3b 100644
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Visibility.cs
+++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Visibility.cs
@@ -26,7 +26,7 @@ public partial class ApiScenariosTests
void New_Visibility()
{
var ml = new MLContext(seed: 1, conc: 1);
- var pipeline = ml.Data.TextReader(MakeSentimentTextLoaderArgs())
+ var pipeline = ml.Data.CreateTextReader(MakeSentimentColumns(), hasHeader: true)
.Append(ml.Transforms.Text.FeaturizeText("SentimentText", "Features", s => s.OutputTokens = true));
var src = new MultiFileSource(GetDataPath(TestDatasets.Sentiment.trainFilename));
diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs
index 4752119236..1de3d82ace 100644
--- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs
+++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/IrisPlantClassificationTests.cs
@@ -26,10 +26,7 @@ public void TrainAndPredictIrisModelUsingDirectInstantiationTest()
{
var mlContext = new MLContext(seed: 1, conc: 1);
- var reader = mlContext.Data.TextReader(new TextLoader.Arguments()
- {
- HasHeader = false,
- Column = new[]
+ var reader = mlContext.Data.CreateTextReader(columns: new[]
{
new TextLoader.Column("Label", DataKind.R4, 0),
new TextLoader.Column("SepalLength", DataKind.R4, 1),
@@ -37,7 +34,7 @@ public void TrainAndPredictIrisModelUsingDirectInstantiationTest()
new TextLoader.Column("PetalLength", DataKind.R4, 3),
new TextLoader.Column("PetalWidth", DataKind.R4, 4)
}
- });
+ );
var pipe = mlContext.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
.Append(mlContext.Transforms.Normalize("Features"))
diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/SentimentPredictionTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/SentimentPredictionTests.cs
index b04a360168..ead36a22a5 100644
--- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/SentimentPredictionTests.cs
+++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/SentimentPredictionTests.cs
@@ -25,17 +25,14 @@ public void TrainAndPredictSentimentModelWithDirectionInstantiationTest()
var env = new MLContext(seed: 1, conc: 1);
// Pipeline
- var loader = TextLoader.ReadFile(env,
- new TextLoader.Arguments()
+ var loader = env.Data.ReadFromTextFile(dataPath,
+ columns: new[]
{
- Separator = "tab",
- HasHeader = true,
- Column = new[]
- {
- new TextLoader.Column("Label", DataKind.Num, 0),
- new TextLoader.Column("SentimentText", DataKind.Text, 1)
- }
- }, new MultiFileSource(dataPath));
+ new TextLoader.Column("Label", DataKind.Num, 0),
+ new TextLoader.Column("SentimentText", DataKind.Text, 1)
+ },
+ hasHeader: true
+ );
var trans = TextFeaturizingEstimator.Create(env, new TextFeaturizingEstimator.Arguments()
{
@@ -86,17 +83,14 @@ public void TrainAndPredictSentimentModelWithDirectionInstantiationTestWithWordE
var env = new MLContext(seed: 1, conc: 1);
// Pipeline
- var loader = TextLoader.ReadFile(env,
- new TextLoader.Arguments()
+ var loader = env.Data.ReadFromTextFile(dataPath,
+ columns: new[]
{
- Separator = "tab",
- HasHeader = true,
- Column = new[]
- {
- new TextLoader.Column("Label", DataKind.Num, 0),
- new TextLoader.Column("SentimentText", DataKind.Text, 1)
- }
- }, new MultiFileSource(dataPath));
+ new TextLoader.Column("Label", DataKind.Num, 0),
+ new TextLoader.Column("SentimentText", DataKind.Text, 1)
+ },
+ hasHeader: true
+ );
var text = TextFeaturizingEstimator.Create(env, new TextFeaturizingEstimator.Arguments()
{
diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs
index 10d0db46a9..130d232e95 100644
--- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs
+++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs
@@ -257,18 +257,15 @@ public void TensorFlowInputsOutputsSchemaTest()
public void TensorFlowTransformMNISTConvTest()
{
var mlContext = new MLContext(seed: 1, conc: 1);
- var reader = mlContext.Data.TextReader(
- new TextLoader.Arguments()
- {
- Separator = "tab",
- HasHeader = true,
- Column = new[]
+ var reader = mlContext.Data.CreateTextReader(
+ columns: new[]
{
new TextLoader.Column("Label", DataKind.U4 , new [] { new TextLoader.Range(0) }, new KeyRange(0, 9)),
new TextLoader.Column("Placeholder", DataKind.R4, new []{ new TextLoader.Range(1, 784) })
- }
- });
+ },
+ hasHeader: true
+ );
var trainData = reader.Read(GetDataPath(TestDatasets.mnistTiny28.trainFilename));
var testData = reader.Read(GetDataPath(TestDatasets.mnistOneClass.testFilename));
@@ -303,17 +300,12 @@ public void TensorFlowTransformMNISTLRTrainingTest()
try
{
var mlContext = new MLContext(seed: 1, conc: 1);
- var reader = mlContext.Data.TextReader(
- new TextLoader.Arguments
- {
- Separator = "tab",
- HasHeader = false,
- Column = new[]
+ var reader = mlContext.Data.CreateTextReader(columns: new[]
{
new TextLoader.Column("Label", DataKind.I8, 0),
new TextLoader.Column("Placeholder", DataKind.R4, new []{ new TextLoader.Range(1, 784) })
}
- });
+ );
var trainData = reader.Read(GetDataPath(TestDatasets.mnistTiny28.trainFilename));
var testData = reader.Read(GetDataPath(TestDatasets.mnistOneClass.testFilename));
@@ -398,17 +390,13 @@ private void ExecuteTFTransformMNISTConvTrainingTest(bool shuffle, int? shuffleS
{
var mlContext = new MLContext(seed: 1, conc: 1);
- var reader = mlContext.Data.TextReader(new TextLoader.Arguments
- {
- Separator = "tab",
- HasHeader = false,
- Column = new[]
+ var reader = mlContext.Data.CreateTextReader(new[]
{
new TextLoader.Column("Label", DataKind.U4, new []{ new TextLoader.Range(0) }, new KeyRange(0, 9)),
new TextLoader.Column("TfLabel", DataKind.I8, 0),
new TextLoader.Column("Placeholder", DataKind.R4, new []{ new TextLoader.Range(1, 784) })
}
- });
+ );
var trainData = reader.Read(GetDataPath(TestDatasets.mnistTiny28.trainFilename));
var testData = reader.Read(GetDataPath(TestDatasets.mnistOneClass.testFilename));
@@ -491,16 +479,13 @@ public void TensorFlowTransformMNISTConvSavedModelTest()
// of predicted label of a single in-memory example.
var mlContext = new MLContext(seed: 1, conc: 1);
- var reader = mlContext.Data.TextReader(new TextLoader.Arguments
- {
- Separator = "tab",
- HasHeader = true,
- Column = new[]
+ var reader = mlContext.Data.CreateTextReader(columns: new[]
{
new TextLoader.Column("Label", DataKind.U4 , new [] { new TextLoader.Range(0) }, new KeyRange(0, 9)),
new TextLoader.Column("Placeholder", DataKind.R4, new []{ new TextLoader.Range(1, 784) })
- }
- });
+ },
+ hasHeader: true
+ );
var trainData = reader.Read(GetDataPath(TestDatasets.mnistTiny28.trainFilename));
var testData = reader.Read(GetDataPath(TestDatasets.mnistOneClass.testFilename));
@@ -625,14 +610,13 @@ public void TensorFlowTransformCifar()
var dataFile = GetDataPath("images/images.tsv");
var imageFolder = Path.GetDirectoryName(dataFile);
- var data = TextLoader.Create(mlContext, new TextLoader.Arguments()
- {
- Column = new[]
- {
+ var data = mlContext.Data.ReadFromTextFile(dataFile,
+ columns: new[]
+ {
new TextLoader.Column("ImagePath", DataKind.TX, 0),
new TextLoader.Column("Name", DataKind.TX, 1),
- }
- }, new MultiFileSource(dataFile));
+ }
+ );
var pipeEstimator = new ImageLoadingEstimator(mlContext, imageFolder, ("ImagePath", "ImageReal"))
.Append(new ImageResizingEstimator(mlContext, "ImageReal", "ImageCropped", imageWidth, imageHeight))
@@ -673,14 +657,12 @@ public void TensorFlowTransformCifarSavedModel()
var dataFile = GetDataPath("images/images.tsv");
var imageFolder = Path.GetDirectoryName(dataFile);
- var data = TextLoader.Create(mlContext, new TextLoader.Arguments()
- {
- Column = new[]
+ var data = mlContext.Data.ReadFromTextFile(dataFile, columns: new[]
{
new TextLoader.Column("ImagePath", DataKind.TX, 0),
new TextLoader.Column("Name", DataKind.TX, 1),
- }
- }, new MultiFileSource(dataFile));
+ }
+ );
var images = ImageLoaderTransform.Create(mlContext, new ImageLoaderTransform.Arguments()
{
Column = new ImageLoaderTransform.Column[1]
@@ -732,14 +714,13 @@ public void TensorFlowTransformCifarInvalidShape()
var imageWidth = 28;
var dataFile = GetDataPath("images/images.tsv");
var imageFolder = Path.GetDirectoryName(dataFile);
- var data = TextLoader.Create(mlContext, new TextLoader.Arguments()
- {
- Column = new[]
+ var data = mlContext.Data.ReadFromTextFile(dataFile,
+ columns: new[]
{
new TextLoader.Column("ImagePath", DataKind.TX, 0),
new TextLoader.Column("Name", DataKind.TX, 1),
- }
- }, new MultiFileSource(dataFile));
+ }
+ );
var images = ImageLoaderTransform.Create(mlContext, new ImageLoaderTransform.Arguments()
{
Column = new ImageLoaderTransform.Column[1]
diff --git a/test/Microsoft.ML.Tests/Transformers/CustomMappingTests.cs b/test/Microsoft.ML.Tests/Transformers/CustomMappingTests.cs
index 840676b1d5..11f9a440ee 100644
--- a/test/Microsoft.ML.Tests/Transformers/CustomMappingTests.cs
+++ b/test/Microsoft.ML.Tests/Transformers/CustomMappingTests.cs
@@ -56,10 +56,10 @@ public void TestCustomTransformer()
{
string dataPath = GetDataPath("adult.tiny.with-schema.txt");
var source = new MultiFileSource(dataPath);
- var loader = ML.Data.TextReader(new[] {
+ var loader = ML.Data.CreateTextReader(new[] {
new TextLoader.Column("Float1", DataKind.R4, 9),
new TextLoader.Column("Float4", DataKind.R4, new[]{new TextLoader.Range(9), new TextLoader.Range(10), new TextLoader.Range(11), new TextLoader.Range(12) })
- }, s => { s.Separator = "\t"; s.HasHeader = true; });
+ }, hasHeader: true);
var data = loader.Read(source);
@@ -95,11 +95,11 @@ public void TestSchemaPropagation()
{
string dataPath = GetDataPath("adult.test");
var source = new MultiFileSource(dataPath);
- var loader = ML.Data.TextReader(new[] {
+ var loader = ML.Data.CreateTextReader(new[] {
new TextLoader.Column("Float1", DataKind.R4, 0),
new TextLoader.Column("Float4", DataKind.R4, new[]{new TextLoader.Range(0), new TextLoader.Range(2), new TextLoader.Range(4), new TextLoader.Range(10) }),
new TextLoader.Column("Text1", DataKind.Text, 0)
- }, s => { s.Separator = ","; s.HasHeader = true; });
+ }, hasHeader: true, separatorChar: ',' );
var data = loader.Read(source);