Skip to content

Commit a52849c

Browse files
committed
removed ReadFile, only kept MlContext ReadFromTextFile
1 parent d90c116 commit a52849c

33 files changed

+70
-85
lines changed

docs/samples/Microsoft.ML.Samples/Dynamic/FeatureContributionCalculationTransform.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ public static void FeatureContributionCalculationTransform_Regression()
1919

2020
// Step 1: Read the data as an IDataView.
2121
// First, we define the reader: specify the data columns and where to find them in the text file.
22-
var reader = mlContext.Data.TextReader(
22+
var reader = mlContext.Data.CreateTextReader(
2323
columns: new[]
2424
{
2525
new TextLoader.Column("MedianHomeValue", DataKind.R4, 0),

docs/samples/Microsoft.ML.Samples/Dynamic/FeatureSelectionTransform.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ public static void FeatureSelectionTransform()
3131

3232
// First, we define the reader: specify the data columns and where to find them in the text file. Notice that we combine entries from
3333
// all the feature columns into entries of a vector of a single column named "Features".
34-
var reader = ml.Data.TextReader(
34+
var reader = ml.Data.CreateTextReader(
3535
columns: new[]
3636
{
3737
new TextLoader.Column("Label", DataKind.BL, 0),

docs/samples/Microsoft.ML.Samples/Dynamic/GeneralizedAdditiveModels.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ public static void RunExample()
1919

2020
// Step 1: Read the data as an IDataView.
2121
// First, we define the reader: specify the data columns and where to find them in the text file.
22-
var reader = mlContext.Data.TextReader(
22+
var reader = mlContext.Data.CreateTextReader(
2323
columns: new[]
2424
{
2525
new TextLoader.Column("MedianHomeValue", DataKind.R4, 0),

docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ public static void PFI_Regression()
2222
// First, we define the reader: specify the data columns and where to find them in the text file.
2323
// The data file is composed of rows of data, with each row having 11 numerical columns
2424
// separated by whitespace.
25-
var reader = mlContext.Data.TextReader(
25+
var reader = mlContext.Data.CreateTextReader(
2626
columns: new[]
2727
{
2828
// Read the first column (indexed by 0) in the data file as an R4 (float)

docs/samples/Microsoft.ML.Samples/Dynamic/SDCA.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ public static void SDCA_BinaryClassification()
2424

2525
// Step 1: Read the data as an IDataView.
2626
// First, we define the reader: specify the data columns and where to find them in the text file.
27-
var reader = mlContext.Data.TextReader(
27+
var reader = mlContext.Data.CreateTextReader(
2828
columns: new[]
2929
{
3030
new TextLoader.Column("Sentiment", DataKind.BL, 0),

src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1316,26 +1316,6 @@ internal static IDataLoader Create(IHostEnvironment env, ModelLoadContext ctx, I
13161316
internal static IDataLoader Create(IHostEnvironment env, Arguments args, IMultiStreamSource files)
13171317
=> (IDataLoader)new TextLoader(env, args, files).Read(files);
13181318

1319-
/// <summary>
1320-
/// Creates a <see cref="TextLoader"/> and uses it to read a specified file.
1321-
/// </summary>
1322-
/// <param name="env">The environment to use.</param>
1323-
/// <param name="columns">Defines a mapping between input columns in the file and IDataView columns.</param>
1324-
/// <param name="hasHeader">Whether the file has a header.</param>
1325-
/// <param name="separatorChar"> The character used as separator between data points in a row. By default the tab character is used as separator.</param>
1326-
/// <param name="fileSource">Specifies a file from which to read.</param>
1327-
public static IDataView ReadFile(IHostEnvironment env, IMultiStreamSource fileSource, Column[] columns, bool hasHeader = false, char separatorChar = '\t')
1328-
=> new TextLoader(env, columns, hasHeader, separatorChar, fileSource).Read(fileSource);
1329-
1330-
/// <summary>
1331-
/// Loads a text file into an <see cref="IDataView"/>. Supports basic mapping from input columns to IDataView columns.
1332-
/// </summary>
1333-
/// <param name="env">The environment to use.</param>
1334-
/// <param name="fileSource">Specifies a file from which to read.</param>
1335-
/// <param name="args">Defines the settings of the load operation.</param>
1336-
public static IDataView ReadFile(IHostEnvironment env, IMultiStreamSource fileSource, Arguments args = null)
1337-
=> new TextLoader(env, args, fileSource).Read(fileSource);
1338-
13391319
public void Save(ModelSaveContext ctx)
13401320
{
13411321
_host.CheckValue(ctx, nameof(ctx));

src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ public static class TextLoaderSaverCatalog
2121
/// <param name="hasHeader">Whether the file has a header.</param>
2222
/// <param name="separatorChar">The character used as separator between data points in a row. By default the tab character is used as separator.</param>
2323
/// <param name="dataSample">The optional location of a data sample.</param>
24-
public static TextLoader TextReader(this DataOperations catalog,
24+
public static TextLoader CreateTextReader(this DataOperations catalog,
2525
Column[] columns, bool hasHeader = false, char separatorChar = '\t', IMultiStreamSource dataSample = null)
2626
=> new TextLoader(CatalogUtils.GetEnvironment(catalog), columns, hasHeader, separatorChar, dataSample);
2727

@@ -31,7 +31,7 @@ public static TextLoader TextReader(this DataOperations catalog,
3131
/// <param name="catalog">The catalog.</param>
3232
/// <param name="args">Defines the settings of the load operation.</param>
3333
/// <param name="dataSample">Allows to expose items that can be used for reading.</param>
34-
public static TextLoader TextReader(this DataOperations catalog, Arguments args, IMultiStreamSource dataSample = null)
34+
public static TextLoader CreateTextReader(this DataOperations catalog, Arguments args, IMultiStreamSource dataSample = null)
3535
=> new TextLoader(CatalogUtils.GetEnvironment(catalog), args, dataSample);
3636

3737
/// <summary>
@@ -62,7 +62,7 @@ public static IDataView ReadFromTextFile(this DataOperations catalog,
6262
/// <param name="catalog">The catalog.</param>
6363
/// <param name="path">Specifies a file from which to read.</param>
6464
/// <param name="args">Defines the settings of the load operation.</param>
65-
public static IDataView ReadFromTextFile(this DataOperations catalog, string path, Arguments args)
65+
public static IDataView ReadFromTextFile(this DataOperations catalog, string path, Arguments args = null)
6666
{
6767
Contracts.CheckNonEmpty(path, nameof(path));
6868

src/Microsoft.ML.Data/Transforms/ValueToKeyMappingTransformer.cs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -483,9 +483,10 @@ private static TermMap CreateFileTermMap(IHostEnvironment env, IChannel ch, stri
483483
"{0} should not be specified when default loader is TextLoader. Ignoring {0}={1}",
484484
nameof(Arguments.TermsColumn), src);
485485
}
486-
termData = TextLoader.ReadFile(env, fileSource,
487-
columns: new[] { new TextLoader.Column("Term", DataKind.TX, 0) }
488-
);
486+
termData = new TextLoader(env,
487+
columns: new[] { new TextLoader.Column("Term", DataKind.TX, 0) },
488+
dataSample: fileSource)
489+
.Read(fileSource);
489490
src = "Term";
490491
autoConvert = true;
491492
}

src/Microsoft.ML.Data/Utilities/ModelFileUtils.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,8 @@ public static IEnumerable<KeyValuePair<ColumnRole, string>> LoadRoleMappingsOrNu
283283
{
284284
// REVIEW: Should really validate the schema here, and consider
285285
// ignoring this stream if it isn't as expected.
286-
var loader = TextLoader.ReadFile(env, new RepositoryStreamWrapper(rep, DirTrainingInfo, RoleMappingFile));
286+
var loader = new TextLoader(env, dataSample: new RepositoryStreamWrapper(rep, DirTrainingInfo, RoleMappingFile))
287+
.Read(new RepositoryStreamWrapper(rep, DirTrainingInfo, RoleMappingFile));
287288

288289
using (var cursor = loader.GetRowCursor(c => true))
289290
{

src/Microsoft.ML.Transforms/TermLookupTransformer.cs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -361,11 +361,14 @@ private static IComponentFactory<IMultiStreamSource, IDataLoader> GetLoaderFacto
361361
ulong max = ulong.MinValue;
362362
try
363363
{
364-
var data = TextLoader.ReadFile(host, new MultiFileSource(filename), new[]
364+
var data = new TextLoader(host, new[]
365365
{
366366
new TextLoader.Column("Term", DataKind.TX, 0),
367367
new TextLoader.Column("Value", DataKind.TX, 1)
368-
});
368+
},
369+
dataSample: new MultiFileSource(filename)
370+
).Read(new MultiFileSource(filename));
371+
369372
using (var cursor = data.GetRowCursor(c => true))
370373
{
371374
var getTerm = cursor.GetGetter<ReadOnlyMemory<char>>(0);

test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ public void TrainSentiment()
8686
AllowQuoting = false,
8787
AllowSparse = false
8888
};
89-
var loader = TextLoader.ReadFile(env, new MultiFileSource(_sentimentDataPath), arguments);
89+
var loader = env.Data.ReadFromTextFile(_sentimentDataPath, arguments);
9090

9191
var text = TextFeaturizingEstimator.Create(env,
9292
new TextFeaturizingEstimator.Arguments()

test/Microsoft.ML.Predictor.Tests/TestPredictors.cs

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -606,12 +606,12 @@ public void RankingLightGBMTest()
606606
public void TestTreeEnsembleCombiner()
607607
{
608608
var dataPath = GetDataPath("breast-cancer.txt");
609-
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath));
609+
var dataView = ML.Data.ReadFromTextFile(dataPath);
610610

611611
var fastTrees = new IPredictorModel[3];
612612
for (int i = 0; i < 3; i++)
613613
{
614-
fastTrees[i] = FastTree.TrainBinary(Env, new FastTreeBinaryClassificationTrainer.Arguments
614+
fastTrees[i] = FastTree.TrainBinary(ML, new FastTreeBinaryClassificationTrainer.Arguments
615615
{
616616
FeatureColumn = "Features",
617617
NumTrees = 5,
@@ -628,13 +628,13 @@ public void TestTreeEnsembleCombiner()
628628
public void TestTreeEnsembleCombinerWithCategoricalSplits()
629629
{
630630
var dataPath = GetDataPath("adult.tiny.with-schema.txt");
631-
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath));
631+
var dataView = ML.Data.ReadFromTextFile(dataPath);
632632

633-
var cat = new OneHotEncodingEstimator(Env, "Categories", "Features").Fit(dataView).Transform(dataView);
633+
var cat = new OneHotEncodingEstimator(ML, "Categories", "Features").Fit(dataView).Transform(dataView);
634634
var fastTrees = new IPredictorModel[3];
635635
for (int i = 0; i < 3; i++)
636636
{
637-
fastTrees[i] = FastTree.TrainBinary(Env, new FastTreeBinaryClassificationTrainer.Arguments
637+
fastTrees[i] = FastTree.TrainBinary(ML, new FastTreeBinaryClassificationTrainer.Arguments
638638
{
639639
FeatureColumn = "Features",
640640
NumTrees = 5,
@@ -729,35 +729,35 @@ private void CombineAndTestTreeEnsembles(IDataView idv, IPredictorModel[] fastTr
729729
public void TestEnsembleCombiner()
730730
{
731731
var dataPath = GetDataPath("breast-cancer.txt");
732-
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath));
732+
var dataView = ML.Data.ReadFromTextFile(dataPath);
733733

734734
var predictors = new IPredictorModel[]
735735
{
736-
FastTree.TrainBinary(Env, new FastTreeBinaryClassificationTrainer.Arguments
736+
FastTree.TrainBinary(ML, new FastTreeBinaryClassificationTrainer.Arguments
737737
{
738738
FeatureColumn = "Features",
739739
NumTrees = 5,
740740
NumLeaves = 4,
741741
LabelColumn = DefaultColumnNames.Label,
742742
TrainingData = dataView
743743
}).PredictorModel,
744-
AveragedPerceptronTrainer.TrainBinary(Env, new AveragedPerceptronTrainer.Arguments()
744+
AveragedPerceptronTrainer.TrainBinary(ML, new AveragedPerceptronTrainer.Arguments()
745745
{
746746
FeatureColumn = "Features",
747747
LabelColumn = DefaultColumnNames.Label,
748748
NumIterations = 2,
749749
TrainingData = dataView,
750750
NormalizeFeatures = NormalizeOption.No
751751
}).PredictorModel,
752-
LogisticRegression.TrainBinary(Env, new LogisticRegression.Arguments()
752+
LogisticRegression.TrainBinary(ML, new LogisticRegression.Arguments()
753753
{
754754
FeatureColumn = "Features",
755755
LabelColumn = DefaultColumnNames.Label,
756756
OptTol = 10e-4F,
757757
TrainingData = dataView,
758758
NormalizeFeatures = NormalizeOption.No
759759
}).PredictorModel,
760-
LogisticRegression.TrainBinary(Env, new LogisticRegression.Arguments()
760+
LogisticRegression.TrainBinary(ML, new LogisticRegression.Arguments()
761761
{
762762
FeatureColumn = "Features",
763763
LabelColumn = DefaultColumnNames.Label,
@@ -775,7 +775,7 @@ public void TestEnsembleCombiner()
775775
public void TestMultiClassEnsembleCombiner()
776776
{
777777
var dataPath = GetDataPath("breast-cancer.txt");
778-
var dataView = TextLoader.ReadFile(Env, new MultiFileSource(dataPath));
778+
var dataView = ML.Data.ReadFromTextFile(dataPath);
779779

780780
var predictors = new IPredictorModel[]
781781
{

test/Microsoft.ML.TestFramework/DataPipe/TestDataPipeBase.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -438,7 +438,7 @@ protected bool SaveLoadText(IDataView view, IHostEnvironment env,
438438

439439
// Note that we don't pass in "args", but pass in a default args so we test
440440
// the auto-schema parsing.
441-
var loadedData = TextLoader.ReadFile(env, new MultiFileSource(pathData));
441+
var loadedData = ML.Data.ReadFromTextFile(pathData);
442442
if (!CheckMetadataTypes(loadedData.Schema))
443443
Failed();
444444

test/Microsoft.ML.TestFramework/ModelHelper.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ namespace Microsoft.ML.TestFramework
1414
#pragma warning disable 612, 618
1515
public static class ModelHelper
1616
{
17-
private static IHostEnvironment s_environment = new MLContext(seed: 1);
17+
private static MLContext s_environment = new MLContext(seed: 1);
1818
private static ITransformModel s_housePriceModel;
1919

2020
public static void WriteKcHousePriceModel(string dataPath, string outputModelPath)
@@ -41,7 +41,7 @@ public static void WriteKcHousePriceModel(string dataPath, Stream stream)
4141

4242
public static IDataView GetKcHouseDataView(string dataPath)
4343
{
44-
return Runtime.Data.TextLoader.ReadFile(s_environment, new MultiFileSource(dataPath),
44+
return s_environment.Data.ReadFromTextFile(dataPath,
4545
columns: new[]
4646
{
4747
new Runtime.Data.TextLoader.Column("Id", Runtime.Data.DataKind.TX, 0),

test/Microsoft.ML.Tests/Scenarios/Api/CookbookSamples/CookbookSamplesDynamicApi.cs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ private void IntermediateData(string dataPath)
4141
var mlContext = new MLContext();
4242

4343
// Create the reader: define the data columns and where to find them in the text file.
44-
var reader = mlContext.Data.TextReader(new[] {
44+
var reader = mlContext.Data.CreateTextReader(new[] {
4545
// A boolean column depicting the 'label'.
4646
new TextLoader.Column("IsOver50K", DataKind.BL, 0),
4747
// Three text columns.
@@ -91,7 +91,7 @@ private void TrainRegression(string trainDataPath, string testDataPath, string m
9191

9292
// Step one: read the data as an IDataView.
9393
// First, we define the reader: specify the data columns and where to find them in the text file.
94-
var reader = mlContext.Data.TextReader(new[] {
94+
var reader = mlContext.Data.CreateTextReader(new[] {
9595
// We read the first 11 values as a single float vector.
9696
new TextLoader.Column("FeatureVector", DataKind.R4, 0, 10),
9797

@@ -167,7 +167,7 @@ private ITransformer TrainOnIris(string irisDataPath)
167167

168168
// Step one: read the data as an IDataView.
169169
// First, we define the reader: specify the data columns and where to find them in the text file.
170-
var reader = mlContext.Data.TextReader(new[] {
170+
var reader = mlContext.Data.CreateTextReader(new[] {
171171
new TextLoader.Column("SepalLength", DataKind.R4, 0),
172172
new TextLoader.Column("SepalWidth", DataKind.R4, 1),
173173
new TextLoader.Column("PetalLength", DataKind.R4, 2),
@@ -234,7 +234,7 @@ private void NormalizationWorkout(string dataPath)
234234
var mlContext = new MLContext();
235235

236236
// Define the reader: specify the data columns and where to find them in the text file.
237-
var reader = mlContext.Data.TextReader(new[] {
237+
var reader = mlContext.Data.CreateTextReader(new[] {
238238
// The four features of the Iris dataset will be grouped together as one Features column.
239239
new TextLoader.Column("Features", DataKind.R4, 0, 3),
240240
// Label: kind of iris.
@@ -295,7 +295,7 @@ private void TextFeaturizationOn(string dataPath)
295295
var mlContext = new MLContext();
296296

297297
// Define the reader: specify the data columns and where to find them in the text file.
298-
var reader = mlContext.Data.TextReader(new[]
298+
var reader = mlContext.Data.CreateTextReader(new[]
299299
{
300300
new TextLoader.Column("IsToxic", DataKind.BL, 0),
301301
new TextLoader.Column("Message", DataKind.TX, 1),
@@ -362,7 +362,7 @@ private void CategoricalFeaturizationOn(params string[] dataPath)
362362
var mlContext = new MLContext();
363363

364364
// Define the reader: specify the data columns and where to find them in the text file.
365-
var reader = mlContext.Data.TextReader(new[]
365+
var reader = mlContext.Data.CreateTextReader(new[]
366366
{
367367
new TextLoader.Column("Label", DataKind.BL, 0),
368368
// We will load all the categorical features into one vector column of size 8.
@@ -426,7 +426,7 @@ private void CrossValidationOn(string dataPath)
426426

427427
// Step one: read the data as an IDataView.
428428
// First, we define the reader: specify the data columns and where to find them in the text file.
429-
var reader = mlContext.Data.TextReader(new[]
429+
var reader = mlContext.Data.CreateTextReader(new[]
430430
{
431431
// We read the first 11 values as a single float vector.
432432
new TextLoader.Column("SepalLength", DataKind.R4, 0),
@@ -487,7 +487,7 @@ private void ReadDataDynamic(string dataPath)
487487
var mlContext = new MLContext();
488488

489489
// Create the reader: define the data columns and where to find them in the text file.
490-
var reader = mlContext.Data.TextReader(new[] {
490+
var reader = mlContext.Data.CreateTextReader(new[] {
491491
// We read the first 10 values as a single float vector.
492492
new TextLoader.Column("FeatureVector", DataKind.R4, new[] {new TextLoader.Range(0, 9)}),
493493
// Separately, read the target variable.

test/Microsoft.ML.Tests/Scenarios/Api/Estimators/CrossValidation.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ void New_CrossValidation()
2727
{
2828
var ml = new MLContext(seed: 1, conc: 1);
2929

30-
var data = ml.Data.TextReader(MakeSentimentColumns(), hasHeader: true).Read(GetDataPath(TestDatasets.Sentiment.trainFilename));
30+
var data = ml.Data.CreateTextReader(MakeSentimentColumns(), hasHeader: true).Read(GetDataPath(TestDatasets.Sentiment.trainFilename));
3131
// Pipeline.
3232
var pipeline = ml.Transforms.Text.FeaturizeText("SentimentText", "Features")
3333
.Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features", advancedSettings: (s) => { s.ConvergenceTolerance = 1f; s.NumThreads = 1; }));

0 commit comments

Comments
 (0)