diff --git a/docs/code/MlNetCookBook.md b/docs/code/MlNetCookBook.md index edfb3236e5..c75c82c672 100644 --- a/docs/code/MlNetCookBook.md +++ b/docs/code/MlNetCookBook.md @@ -198,8 +198,8 @@ var reader = mlContext.Data.CreateTextReader(new[] { // Separately, read the target variable. new TextLoader.Column("Target", DataKind.R4, 11) }, - // Default separator is tab, but we need a comma. - separatorChar: ','); + // Default separator list only contains a tab, but we need a comma. + separators: new[] { ',' }); // Now read the file (remember though, readers are lazy, so the actual reading will happen when the data is accessed). var data = reader.Read(dataPath); @@ -219,8 +219,8 @@ private class AdultData // Read the data into a data view. var trainData = mlContext.Data.ReadFromTextFile(trainDataPath, - // Default separator is tab, but we need a semicolon. - separatorChar: ';', + // Default separator list only contains a tab, but we need a semicolon. + separators: new[] { ';' }, // First line of the file is a header, not a data row. hasHeader: true ); @@ -328,8 +328,8 @@ In the file above, the last column (12th) is label that we predict, and all the // First, we define the reader: specify the data columns and where to find them in the text file. // Read the data into a data view. Remember though, readers are lazy, so the actual reading will happen when the data is accessed. var trainData = mlContext.Data.ReadFromTextFile(dataPath, - // Default separator is tab, but the dataset has comma. - separatorChar: ',' + // Default separator list only contains a tab, but the dataset has comma. + separators: new[] { ',' } ); // Sometime, caching data in-memory after its first access can save some loading time when the data is going to be used @@ -372,8 +372,8 @@ Assuming the example above was used to train the model, here's how you calculate ```csharp // Read the test dataset. var testData = mlContext.Data.ReadFromTextFile(testDataPath, - // Default separator is tab, but the dataset has comma. - separatorChar: ',' + // Default separator list only contains a tab, but the dataset has comma. + separators: new[] { ',' } ); // Calculate metrics of the model on the test data. var metrics = mlContext.Regression.Evaluate(model.Transform(testData), label: "Target"); @@ -410,8 +410,8 @@ Here is the full example. Let's imagine that we have built a model for the famou // Step one: read the data as an IDataView. // Retrieve the training data. var trainData = mlContext.Data.ReadFromTextFile(irisDataPath, - // Default separator is tab, but the dataset has comma. - separatorChar: ',' + // Default separator list only contains a tab, but the dataset has comma. + separators: new[] { ',' } ); // Build the training pipeline. @@ -534,8 +534,8 @@ This is how we can extract the learned parameters out of the model that we train // Step one: read the data as an IDataView. // Retrieve the training data. var trainData = mlContext.Data.ReadFromTextFile(irisDataPath, - // Default separator is tab, but the dataset has comma. - separatorChar: ',' + // Default separator list only contains a tab, but the dataset has comma. + separators: new[] { ',' } ); // Build the training pipeline. @@ -622,8 +622,8 @@ Here's a snippet of code that demonstrates normalization in learning pipelines. // Read the training data. var trainData = mlContext.Data.ReadFromTextFile(dataPath, - // Default separator is tab, but the dataset has comma. - separatorChar: ',' + // Default separator list only contains a tab, but the dataset has comma. + separators: new[] { ',' } ); // Apply all kinds of standard ML.NET normalization to the raw features. @@ -807,8 +807,8 @@ Here's an example of training on Iris dataset using randomized 90/10 train-test ```csharp // Step one: read the data as an IDataView. var data = mlContext.Data.ReadFromTextFile(dataPath, - // Default separator is tab, but the dataset has comma. - separatorChar: ',' + // Default separator list only contains a tab, but the dataset has comma. + separators: ',' ); // Build the training pipeline. @@ -863,8 +863,8 @@ var reader = mlContext.Data.CreateTextReader(ctx => ( // Label: kind of iris. Label: ctx.LoadText(4) ), - // Default separator is tab, but the dataset has comma. - separatorChar: ','); + // Default separator list only contains a tab, but the dataset has comma. + separators: new[] { ',' }); // Read the data. var data = reader.Read(dataPath); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs index 562cadbf1e..b7aa94d82e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs @@ -39,7 +39,7 @@ public static void Example() new TextLoader.Column("Words", DataKind.TX, 0), new TextLoader.Column("Ids", DataKind.I4, 1), }, - separatorChar: ',' + separators: new[] { ',' } ); // Load the TensorFlow model once. diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs index 10e622061b..282838c49a 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs @@ -428,13 +428,13 @@ public class Options [Argument(ArgumentType.AtMostOnce, Visibility = ArgumentAttribute.VisibilityType.CmdLineOnly, HelpText = "Source column separator. Options: tab, space, comma, single character", ShortName = "sep")] // this is internal as it only serves the command line interface - internal string Separator = Defaults.Separator.ToString(); + internal string Separator = Defaults.Separator; /// - /// The characters that should be used as separators column separator. + /// Array of characters used as separators between data points in a row. {'\t'} will be used if not specified. /// [Argument(ArgumentType.AtMostOnce, Name = nameof(Separator), Visibility = ArgumentAttribute.VisibilityType.EntryPointsOnly, HelpText = "Source column separator.", ShortName = "sep")] - public char[] Separators = new[] { Defaults.Separator }; + public char[] Separators = Defaults.Separators; /// /// Specifies the input columns that should be mapped to columns. @@ -488,7 +488,8 @@ internal static class Defaults { internal const bool AllowQuoting = false; internal const bool AllowSparse = false; - internal const char Separator = '\t'; + internal static char[] Separators => new[] { '\t' }; + internal const string Separator = "\t"; internal const bool HasHeader = false; internal const bool TrimWhitespace = false; } @@ -1064,15 +1065,15 @@ private bool HasHeader /// /// The environment to use. /// Defines a mapping between input columns in the file and IDataView columns. - /// The character used as separator between data points in a row. By default the tab character is used as separator. + /// The array of characters used as separators between data points in a row. {'\t'} will be used if not specified. /// Whether the file has a header. /// Whether the file can contain numerical vectors in sparse format. /// Whether the content of a column can be parsed from a string starting and ending with quote. /// Allows to expose items that can be used for reading. - internal TextLoader(IHostEnvironment env, Column[] columns, char separatorChar = Defaults.Separator, + internal TextLoader(IHostEnvironment env, Column[] columns, char[] separators = null, bool hasHeader = Defaults.HasHeader, bool allowSparse = Defaults.AllowSparse, bool allowQuoting = Defaults.AllowQuoting, IMultiStreamSource dataSample = null) - : this(env, MakeArgs(columns, hasHeader, new[] { separatorChar }, allowSparse, allowQuoting), dataSample) + : this(env, MakeArgs(columns, hasHeader, separators ?? Defaults.Separators, allowSparse, allowQuoting), dataSample) { } @@ -1145,10 +1146,11 @@ internal TextLoader(IHostEnvironment env, Options options = null, IMultiStreamSo _host.CheckNonEmpty(options.Separator, nameof(options.Separator), "Must specify a separator"); - //Default arg.Separator is tab and default options. Separators is also a '\t'. - //At a time only one default can be different and whichever is different that will - //be used. - if (options.Separators.Length > 1 || options.Separators[0] != '\t') + // Default options.Separator is "\t" while default options.Separators is {'\t'}. + // We use options.Separators only if options.Separator is default and choose options.Seperators otherwise. + // The logic behind is that options.Separators has higher priority because it's a public API arguments, but + // options.Seperator is only for command line tool and entry points. + if (options.Separator == "\t") { var separators = new HashSet(); foreach (char c in options.Separators) @@ -1435,11 +1437,12 @@ void ICanSaveModel.Save(ModelSaveContext ctx) internal static TextLoader CreateTextReader(IHostEnvironment host, bool hasHeader = Defaults.HasHeader, - char separator = Defaults.Separator, + char[] separator = null, bool allowQuotedStrings = Defaults.AllowQuoting, bool supportSparse = Defaults.AllowSparse, bool trimWhitespace = Defaults.TrimWhitespace) { + separator = separator ?? Defaults.Separators; var userType = typeof(TInput); var fieldInfos = userType.GetFields(BindingFlags.Public | BindingFlags.Instance); @@ -1492,7 +1495,7 @@ internal static TextLoader CreateTextReader(IHostEnvironment host, Options options = new Options { HasHeader = hasHeader, - Separators = new[] { separator }, + Separators = separator, AllowQuoting = allowQuotedStrings, AllowSparse = supportSparse, TrimWhitespace = trimWhitespace, diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs index feb6a8c320..1371605d1f 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs @@ -16,19 +16,19 @@ public static class TextLoaderSaverCatalog /// /// The catalog. /// Array of columns defining the schema. - /// The character used as separator between data points in a row. By default the tab character is used as separator. + /// Array of characters used as separators between data points in a row. {'\t'} will be used if not specified. /// Whether the file has a header. /// Whether the file can contain numerical vectors in sparse format. /// Whether the file can contain column defined by a quoted string. /// The optional location of a data sample. The sample can be used to infer column names and number of slots in each column. public static TextLoader CreateTextLoader(this DataOperationsCatalog catalog, TextLoader.Column[] columns, - char separatorChar = TextLoader.Defaults.Separator, + char[] separators = null, bool hasHeader = TextLoader.Defaults.HasHeader, bool allowSparse = TextLoader.Defaults.AllowSparse, bool allowQuoting = TextLoader.Defaults.AllowQuoting, IMultiStreamSource dataSample = null) - => new TextLoader(CatalogUtils.GetEnvironment(catalog), columns, separatorChar, hasHeader, allowSparse, allowQuoting, dataSample); + => new TextLoader(CatalogUtils.GetEnvironment(catalog), columns, separators ?? TextLoader.Defaults.Separators, hasHeader, allowSparse, allowQuoting, dataSample); /// /// Create a text loader . @@ -45,7 +45,7 @@ public static TextLoader CreateTextLoader(this DataOperationsCatalog catalog, /// Create a text loader by inferencing the dataset schema from a data model type. /// /// The catalog. - /// Column separator character. Default is '\t' + /// Array of characters used as separators between data points in a row. {'\t'} will be used if not specified. /// Does the file contains header? /// Whether the input may include quoted values, /// which can contain separator characters, colons, @@ -57,12 +57,12 @@ public static TextLoader CreateTextLoader(this DataOperationsCatalog catalog, /// except for 3rd and 5th columns which have values 6 and 3 /// Remove trailing whitespace from lines public static TextLoader CreateTextLoader(this DataOperationsCatalog catalog, - char separatorChar = TextLoader.Defaults.Separator, + char[] separators = null, bool hasHeader = TextLoader.Defaults.HasHeader, bool allowQuoting = TextLoader.Defaults.AllowQuoting, bool allowSparse = TextLoader.Defaults.AllowSparse, bool trimWhitespace = TextLoader.Defaults.TrimWhitespace) - => TextLoader.CreateTextReader(CatalogUtils.GetEnvironment(catalog), hasHeader, separatorChar, allowQuoting, allowSparse, trimWhitespace); + => TextLoader.CreateTextReader(CatalogUtils.GetEnvironment(catalog), hasHeader, separators ?? TextLoader.Defaults.Separators, allowQuoting, allowSparse, trimWhitespace); /// /// Read a data view from a text file using . @@ -70,13 +70,13 @@ public static TextLoader CreateTextLoader(this DataOperationsCatalog cat /// The catalog. /// The columns of the schema. /// Whether the file has a header. - /// The character used as separator between data points in a row. By default the tab character is used as separator. + /// Array of characters used as separators between data points in a row. {'\t'} will be used if not specified. /// The path to the file. /// The data view. public static IDataView ReadFromTextFile(this DataOperationsCatalog catalog, string path, TextLoader.Column[] columns, - char separatorChar = TextLoader.Defaults.Separator, + char[] separators = null, bool hasHeader = TextLoader.Defaults.HasHeader) { Contracts.CheckNonEmpty(path, nameof(path)); @@ -85,7 +85,7 @@ public static IDataView ReadFromTextFile(this DataOperationsCatalog catalog, // REVIEW: it is almost always a mistake to have a 'trainable' text loader here. // Therefore, we are going to disallow data sample. - var reader = new TextLoader(env, columns, separatorChar, hasHeader, dataSample: null); + var reader = new TextLoader(env, columns, separators ?? TextLoader.Defaults.Separators, hasHeader, dataSample: null); return reader.Read(new MultiFileSource(path)); } @@ -94,7 +94,7 @@ public static IDataView ReadFromTextFile(this DataOperationsCatalog catalog, /// /// The catalog. /// Does the file contains header? - /// Column separator character. Default is '\t' + /// Array of characters used as separators between data points in a row. {'\t'} will be used if not specified. /// Whether the input may include quoted values, /// which can contain separator characters, colons, /// and distinguish empty values from missing values. When true, consecutive separators @@ -108,7 +108,7 @@ public static IDataView ReadFromTextFile(this DataOperationsCatalog catalog, /// The data view. public static IDataView ReadFromTextFile(this DataOperationsCatalog catalog, string path, - char separatorChar = TextLoader.Defaults.Separator, + char[] separators = null, bool hasHeader = TextLoader.Defaults.HasHeader, bool allowQuoting = TextLoader.Defaults.AllowQuoting, bool allowSparse = TextLoader.Defaults.AllowSparse, @@ -118,8 +118,8 @@ public static IDataView ReadFromTextFile(this DataOperationsCatalog cata // REVIEW: it is almost always a mistake to have a 'trainable' text loader here. // Therefore, we are going to disallow data sample. - return TextLoader.CreateTextReader(CatalogUtils.GetEnvironment(catalog), hasHeader, separatorChar, allowQuoting, allowSparse, trimWhitespace) - .Read(new MultiFileSource(path)); + return TextLoader.CreateTextReader(CatalogUtils.GetEnvironment(catalog), hasHeader, + separators ?? TextLoader.Defaults.Separators, allowQuoting, allowSparse, trimWhitespace).Read(new MultiFileSource(path)); } /// diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs index 203bd6e6bd..d226cf9f9d 100644 --- a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs +++ b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs @@ -120,7 +120,7 @@ public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext) new TextLoader.Column("native-country", DataKind.R4, 13), new TextLoader.Column("IsOver50K", DataKind.BL, 14), }, - separatorChar: ',', + separators: new[] { ',' }, hasHeader: true ); diff --git a/test/Microsoft.ML.Functional.Tests/DataIO.cs b/test/Microsoft.ML.Functional.Tests/DataIO.cs index b10d38b8c5..6792ce0d4f 100644 --- a/test/Microsoft.ML.Functional.Tests/DataIO.cs +++ b/test/Microsoft.ML.Functional.Tests/DataIO.cs @@ -97,7 +97,7 @@ public void WriteToAndReadASchemaFromADelimitedFile() { // Serialize a dataset with a known schema to a file. var filePath = SerializeDatasetToFile(mlContext, dataBefore, separator); - var dataAfter = mlContext.Data.ReadFromTextFile(filePath, separatorChar: separator, hasHeader: true, allowQuoting: true); + var dataAfter = mlContext.Data.ReadFromTextFile(filePath, separators: new[] { separator }, hasHeader: true, allowQuoting: true); Common.AssertTestTypeDatasetsAreEqual(mlContext, dataBefore, dataAfter); } } diff --git a/test/Microsoft.ML.Functional.Tests/Datasets/TypeTestData.cs b/test/Microsoft.ML.Functional.Tests/Datasets/TypeTestData.cs index f5524098bb..505d5996c4 100644 --- a/test/Microsoft.ML.Functional.Tests/Datasets/TypeTestData.cs +++ b/test/Microsoft.ML.Functional.Tests/Datasets/TypeTestData.cs @@ -100,7 +100,7 @@ public static TextLoader GetTextLoader(MLContext mlContext, char separator) new TextLoader.Column("Ug", DataKind.UG, 15), new TextLoader.Column("Features", DataKind.R4, 16, 16 + _numFeatures-1), }, - separatorChar: separator, + separators: new[] { separator }, hasHeader: true, allowQuoting: true); } diff --git a/test/Microsoft.ML.Tests/OnnxConversionTest.cs b/test/Microsoft.ML.Tests/OnnxConversionTest.cs index 0142b900cd..5744519aee 100644 --- a/test/Microsoft.ML.Tests/OnnxConversionTest.cs +++ b/test/Microsoft.ML.Tests/OnnxConversionTest.cs @@ -51,8 +51,7 @@ public void SimpleEndToEndOnnxConversionTest() var trainDataPath = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename); var mlContext = new MLContext(seed: 1, conc: 1); var data = mlContext.Data.ReadFromTextFile(trainDataPath, - separatorChar: ';' -, + separators: new[] { ';' }, hasHeader: true); var cachedTrainData = mlContext.Data.Cache(data); var dynamicPipeline = @@ -129,7 +128,7 @@ public void KmeansOnnxConversionTest() string dataPath = GetDataPath("breast-cancer.txt"); // Now read the file (remember though, readers are lazy, so the actual reading will happen when the data is accessed). var data = mlContext.Data.ReadFromTextFile(dataPath, - separatorChar: '\t', + separators: new[] { '\t' }, hasHeader: true); var pipeline = mlContext.Transforms.Normalize("Features"). @@ -207,7 +206,7 @@ public void KeyToVectorWithBagOnnxConversionTest() string dataPath = GetDataPath("breast-cancer.txt"); var data = mlContext.Data.ReadFromTextFile(dataPath, - separatorChar: '\t', + separators: new[] { '\t' }, hasHeader: true); var pipeline = mlContext.Transforms.Categorical.OneHotEncoding("F2", "F2", Transforms.Categorical.OneHotEncodingTransformer.OutputKind.Bag) @@ -305,8 +304,7 @@ public void LogisticRegressionOnnxConversionTest() var trainDataPath = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename); var mlContext = new MLContext(seed: 1, conc: 1); var data = mlContext.Data.ReadFromTextFile(trainDataPath, - separatorChar: ';' -, + separators: new[] { ';' }, hasHeader: true); var cachedTrainData = mlContext.Data.Cache(data); var dynamicPipeline = @@ -338,8 +336,7 @@ public void LightGbmBinaryClassificationOnnxConversionTest() var trainDataPath = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename); var mlContext = new MLContext(seed: 1, conc: 1); var data = mlContext.Data.ReadFromTextFile(trainDataPath, - separatorChar: ';' -, + separators: new[] { ';' }, hasHeader: true); var cachedTrainData = mlContext.Data.Cache(data); var dynamicPipeline = @@ -371,7 +368,7 @@ public void MulticlassLogisticRegressionOnnxConversionTest() string dataPath = GetDataPath("breast-cancer.txt"); var data = mlContext.Data.ReadFromTextFile(dataPath, - separatorChar: '\t', + separators: new[] { '\t' }, hasHeader: true); var pipeline = mlContext.Transforms.Normalize("Features"). @@ -401,7 +398,7 @@ public void RemoveVariablesInPipelineTest() string dataPath = GetDataPath("breast-cancer.txt"); var data = mlContext.Data.ReadFromTextFile(dataPath, - separatorChar: '\t', + separators: new[] { '\t' }, hasHeader: true); var pipeline = mlContext.Transforms.Categorical.OneHotEncoding("F2", "F2", Transforms.Categorical.OneHotEncodingTransformer.OutputKind.Bag) @@ -452,7 +449,7 @@ public void WordEmbeddingsTest() var mlContext = new MLContext(seed: 1, conc: 1); var dataPath = GetDataPath(@"small-sentiment-test.tsv"); var embedNetworkPath = GetDataPath(@"shortsentiment.emd"); - var data = mlContext.Data.ReadFromTextFile(dataPath, separatorChar: '\t', hasHeader: false); + var data = mlContext.Data.ReadFromTextFile(dataPath, separators: new[] { '\t' }, hasHeader: false); var pipeline = mlContext.Transforms.Text.ExtractWordEmbeddings("Embed", embedNetworkPath, "Tokens"); var model = pipeline.Fit(data); diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs index 33131d2bdf..2deb08073d 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs @@ -79,8 +79,7 @@ private void TrainRegression(string trainDataPath, string testDataPath, string m // Read the file (remember though, readers are lazy, so the actual reading will happen when the data is accessed). var trainData = mlContext.Data.ReadFromTextFile(trainDataPath, // Default separator is tab, but we need a semicolon. - separatorChar: ';' -, + separators: new[] { ';' }, // First line of the file is a header, not a data row. hasHeader: true); @@ -115,8 +114,7 @@ private void TrainRegression(string trainDataPath, string testDataPath, string m // Read the test dataset. var testData = mlContext.Data.ReadFromTextFile(testDataPath, // Default separator is tab, but we need a semicolon. - separatorChar: ';' -, + separators: new[] { ';' }, // First line of the file is a header, not a data row. hasHeader: true); @@ -152,7 +150,7 @@ private ITransformer TrainOnIris(string irisDataPath) // Retrieve the training data. var trainData = mlContext.Data.ReadFromTextFile(irisDataPath, // Default separator is tab, but the dataset has comma. - separatorChar: ',' + separators: new[] { ',' } ); //Preview the data @@ -237,7 +235,7 @@ private void NormalizationWorkout(string dataPath) // Read the training data. var trainData = mlContext.Data.ReadFromTextFile(dataPath, // Default separator is tab, but the dataset has comma. - separatorChar: ',' + separators: new[] { ',' } ); // Apply all kinds of standard ML.NET normalization to the raw features. @@ -409,7 +407,7 @@ private void CrossValidationOn(string dataPath) // Step one: read the data as an IDataView. var data = mlContext.Data.ReadFromTextFile(dataPath, // Default separator is tab, but the dataset has comma. - separatorChar: ',' + separators: new[] { ',' } ); // Build the training pipeline. @@ -458,7 +456,7 @@ private void ReadDataDynamic(string dataPath) // Now read the file (remember though, readers are lazy, so the actual reading will happen when the data is accessed). var reader = mlContext.Data.ReadFromTextFile(dataPath, // Default separator is tab, but we need a comma. - separatorChar: ','); + separators: new[] { ',' }); } // Define a class for all the input columns that we intend to consume. diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/DecomposableTrainAndPredict.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/DecomposableTrainAndPredict.cs index c4b1ca0e99..47bfcc2164 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/DecomposableTrainAndPredict.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/DecomposableTrainAndPredict.cs @@ -28,7 +28,7 @@ void DecomposableTrainAndPredict() var dataPath = GetDataPath(TestDatasets.irisData.trainFilename); var ml = new MLContext(); - var data = ml.Data.ReadFromTextFile(dataPath, separatorChar: ','); + var data = ml.Data.ReadFromTextFile(dataPath, separators: new[] { ',' }); var pipeline = new ColumnConcatenatingEstimator (ml, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") .Append(new ValueToKeyMappingEstimator(ml, "Label"), TransformerScope.TrainTest) @@ -39,7 +39,7 @@ void DecomposableTrainAndPredict() var model = pipeline.Fit(data).GetModelFor(TransformerScope.Scoring); var engine = model.CreatePredictionEngine(ml); - var testLoader = ml.Data.ReadFromTextFile(dataPath, TestDatasets.irisData.GetLoaderColumns(), separatorChar: ',', hasHeader: true); + var testLoader = ml.Data.ReadFromTextFile(dataPath, TestDatasets.irisData.GetLoaderColumns(), separators: new[] { ',' }, hasHeader: true); var testData = ml.CreateEnumerable(testLoader, false); foreach (var input in testData.Take(20)) { diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Extensibility.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Extensibility.cs index 76d9fa513a..2e515bbe7f 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Extensibility.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Extensibility.cs @@ -27,7 +27,7 @@ void Extensibility() var dataPath = GetDataPath(TestDatasets.irisData.trainFilename); var ml = new MLContext(); - var data = ml.Data.CreateTextLoader(TestDatasets.irisData.GetLoaderColumns(), separatorChar: ',') + var data = ml.Data.CreateTextLoader(TestDatasets.irisData.GetLoaderColumns(), separators: new[] { ',' }) .Read(dataPath); Action action = (i, j) => @@ -48,7 +48,7 @@ void Extensibility() var model = pipeline.Fit(data).GetModelFor(TransformerScope.Scoring); var engine = model.CreatePredictionEngine(ml); - var testLoader = ml.Data.ReadFromTextFile(dataPath, TestDatasets.irisData.GetLoaderColumns(), separatorChar: ','); + var testLoader = ml.Data.ReadFromTextFile(dataPath, TestDatasets.irisData.GetLoaderColumns(), separators: new[] { ',' }); var testData = ml.CreateEnumerable(testLoader, false); foreach (var input in testData.Take(20)) { diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Metacomponents.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Metacomponents.cs index 1a7acbc547..e9642a940c 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Metacomponents.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Metacomponents.cs @@ -22,7 +22,7 @@ public partial class ApiScenariosTests public void Metacomponents() { var ml = new MLContext(); - var data = ml.Data.ReadFromTextFile(GetDataPath(TestDatasets.irisData.trainFilename), separatorChar: ','); + var data = ml.Data.ReadFromTextFile(GetDataPath(TestDatasets.irisData.trainFilename), separators: new[] { ',' }); var sdcaTrainer = ml.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated( new SdcaNonCalibratedBinaryTrainer.Options { MaxIterations = 100, Shuffle = true, NumThreads = 1, }); diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/PredictAndMetadata.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/PredictAndMetadata.cs index fb38e1ad37..f17c7a515b 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/PredictAndMetadata.cs +++ b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/PredictAndMetadata.cs @@ -25,7 +25,7 @@ void PredictAndMetadata() var dataPath = GetDataPath(TestDatasets.irisData.trainFilename); var ml = new MLContext(); - var data = ml.Data.ReadFromTextFile(dataPath, separatorChar: ','); + var data = ml.Data.ReadFromTextFile(dataPath, separators: new[] { ',' }); var pipeline = ml.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") .Append(ml.Transforms.Conversion.MapValueToKey("Label"), TransformerScope.TrainTest) @@ -35,7 +35,7 @@ void PredictAndMetadata() var model = pipeline.Fit(data).GetModelFor(TransformerScope.Scoring); var engine = model.CreatePredictionEngine(ml); - var testLoader = ml.Data.ReadFromTextFile(dataPath, TestDatasets.irisData.GetLoaderColumns(), separatorChar: ',', hasHeader: true); + var testLoader = ml.Data.ReadFromTextFile(dataPath, TestDatasets.irisData.GetLoaderColumns(), separators: new[] { ',' }, hasHeader: true); var testData = ml.CreateEnumerable(testLoader, false); // During prediction we will get Score column with 3 float values. diff --git a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs index f84279c32d..c5bc510c8e 100644 --- a/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationWithStringLabelTests.cs @@ -23,7 +23,7 @@ public void TrainAndPredictIrisModelWithStringLabelTest() new TextLoader.Column("PetalWidth", DataKind.R4, 3), new TextLoader.Column("IrisPlantType", DataKind.TX, 4), }, - separatorChar: ',' + separators: new[] { ',' } ); // Read training and test data sets diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs index 085336646f..2c14ba3d7c 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs @@ -998,7 +998,7 @@ public void TensorFlowSentimentClassificationTest() new TextLoader.Column("Words", DataKind.TX, 0), new TextLoader.Column("Ids", DataKind.I4, 1), }, - separatorChar: ',' + separators: new[] { ',' } ); // We cannot resize variable length vector to fixed length vector in ML.NET diff --git a/test/Microsoft.ML.Tests/TextLoaderTests.cs b/test/Microsoft.ML.Tests/TextLoaderTests.cs index b8cb8afadd..6d26c78ec8 100644 --- a/test/Microsoft.ML.Tests/TextLoaderTests.cs +++ b/test/Microsoft.ML.Tests/TextLoaderTests.cs @@ -722,7 +722,7 @@ public void LoaderColumnsFromIrisData() var irisFirstRowValues = irisFirstRow.Values.GetEnumerator(); // Simple load - var dataIris = mlContext.Data.CreateTextLoader(separatorChar: ',').Read(dataPath); + var dataIris = mlContext.Data.CreateTextLoader(separators: new[] { ',' }).Read(dataPath); var previewIris = dataIris.Preview(1); Assert.Equal(5, previewIris.ColumnView.Length); @@ -738,7 +738,7 @@ public void LoaderColumnsFromIrisData() Assert.Equal("Iris-setosa", previewIris.RowView[0].Values[index].Value.ToString()); // Load with start and end indexes - var dataIrisStartEnd = mlContext.Data.CreateTextLoader(separatorChar: ',').Read(dataPath); + var dataIrisStartEnd = mlContext.Data.CreateTextLoader(separators: new[] { ',' }).Read(dataPath); var previewIrisStartEnd = dataIrisStartEnd.Preview(1); Assert.Equal(2, previewIrisStartEnd.ColumnView.Length); @@ -755,7 +755,7 @@ public void LoaderColumnsFromIrisData() } // load setting the distinct columns. Loading column 0 and 2 - var dataIrisColumnIndices = mlContext.Data.CreateTextLoader(separatorChar: ',').Read(dataPath); + var dataIrisColumnIndices = mlContext.Data.CreateTextLoader(separators: new[] { ',' }).Read(dataPath); var previewIrisColumnIndices = dataIrisColumnIndices.Preview(1); Assert.Equal(2, previewIrisColumnIndices.ColumnView.Length); diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/MetalinearEstimators.cs b/test/Microsoft.ML.Tests/TrainerEstimators/MetalinearEstimators.cs index bd08dc37a6..40595f3400 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/MetalinearEstimators.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/MetalinearEstimators.cs @@ -74,7 +74,7 @@ public void Pkpd() [Fact] public void MetacomponentsFeaturesRenamed() { - var data = new TextLoader(Env, TestDatasets.irisData.GetLoaderColumns(), separatorChar: ',') + var data = new TextLoader(Env, TestDatasets.irisData.GetLoaderColumns(), separators: new[] { ',' }) .Read(GetDataPath(TestDatasets.irisData.trainFilename)); var sdcaTrainer = ML.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated( diff --git a/test/Microsoft.ML.Tests/Transformers/CustomMappingTests.cs b/test/Microsoft.ML.Tests/Transformers/CustomMappingTests.cs index be13496d59..150d350a4f 100644 --- a/test/Microsoft.ML.Tests/Transformers/CustomMappingTests.cs +++ b/test/Microsoft.ML.Tests/Transformers/CustomMappingTests.cs @@ -93,7 +93,7 @@ public void TestSchemaPropagation() new TextLoader.Column("Float1", DataKind.R4, 0), new TextLoader.Column("Float4", DataKind.R4, new[]{new TextLoader.Range(0), new TextLoader.Range(2), new TextLoader.Range(4), new TextLoader.Range(10) }), new TextLoader.Column("Text1", DataKind.Text, 0) - }, separatorChar: ',', hasHeader: true); + }, separators: new[] { ',' }, hasHeader: true); var data = loader.Read(source);