Skip to content

Commit 96d561e

Browse files
committed
Remove a redundant ctor of TextLoader
1 parent ebe7ca8 commit 96d561e

File tree

7 files changed

+122
-87
lines changed

7 files changed

+122
-87
lines changed

src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs

-26
Original file line numberDiff line numberDiff line change
@@ -1085,32 +1085,6 @@ private bool HasHeader
10851085
private readonly IHost _host;
10861086
private const string RegistrationName = "TextLoader";
10871087

1088-
/// <summary>
1089-
/// Loads a text file into an <see cref="IDataView"/>. Supports basic mapping from input columns to IDataView columns.
1090-
/// </summary>
1091-
/// <param name="env">The environment to use.</param>
1092-
/// <param name="columns">Defines a mapping between input columns in the file and IDataView columns.</param>
1093-
/// <param name="separatorChar"> The character used as separator between data points in a row. By default the tab character is used as separator.</param>
1094-
/// <param name="hasHeader">Whether the file has a header.</param>
1095-
/// <param name="allowSparse">Whether the file can contain numerical vectors in sparse format.</param>
1096-
/// <param name="allowQuoting">Whether the content of a column can be parsed from a string starting and ending with quote.</param>
1097-
/// <param name="dataSample">Allows to expose items that can be used for reading.</param>
1098-
/// <param name="trimWhitespace">Remove trailing whitespace from lines.</param>
1099-
internal TextLoader(IHostEnvironment env, Column[] columns, char separatorChar = Defaults.Separator,
1100-
bool hasHeader = Defaults.HasHeader, bool allowSparse = Defaults.AllowSparse,
1101-
bool allowQuoting = Defaults.AllowQuoting, IMultiStreamSource dataSample = null, bool trimWhitespace = Defaults.TrimWhitespace)
1102-
: this(env, MakeArgs(columns, hasHeader, new[] { separatorChar }, allowSparse, allowQuoting, trimWhitespace), dataSample)
1103-
{
1104-
}
1105-
1106-
private static Options MakeArgs(Column[] columns, bool hasHeader, char[] separatorChars, bool allowSparse, bool allowQuoting, bool trimWhitespace)
1107-
{
1108-
Contracts.AssertValue(separatorChars);
1109-
var result = new Options { Columns = columns, HasHeader = hasHeader, Separators = separatorChars,
1110-
AllowSparse = allowSparse, AllowQuoting = allowQuoting, TrimWhitespace = trimWhitespace };
1111-
return result;
1112-
}
1113-
11141088
/// <summary>
11151089
/// Loads a text file into an <see cref="IDataView"/>. Supports basic mapping from input columns to IDataView columns.
11161090
/// </summary>

src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs

+23-4
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,19 @@ public static TextLoader CreateTextLoader(this DataOperationsCatalog catalog,
3030
bool allowQuoting = TextLoader.Defaults.AllowQuoting,
3131
bool trimWhitespace = TextLoader.Defaults.TrimWhitespace,
3232
bool allowSparse = TextLoader.Defaults.AllowSparse)
33-
=> new TextLoader(CatalogUtils.GetEnvironment(catalog), columns, separatorChar, hasHeader, allowSparse, allowQuoting, dataSample, trimWhitespace);
33+
{
34+
var options = new TextLoader.Options
35+
{
36+
Columns = columns,
37+
Separators = new[] { separatorChar },
38+
HasHeader = hasHeader,
39+
AllowQuoting = allowQuoting,
40+
TrimWhitespace = trimWhitespace,
41+
AllowSparse = allowSparse
42+
};
43+
44+
return new TextLoader(CatalogUtils.GetEnvironment(catalog), options: options, dataSample: dataSample);
45+
}
3446

3547
/// <summary>
3648
/// Create a text loader <see cref="TextLoader"/>.
@@ -94,10 +106,17 @@ public static IDataView ReadFromTextFile(this DataOperationsCatalog catalog,
94106
{
95107
Contracts.CheckNonEmpty(path, nameof(path));
96108

97-
var env = catalog.GetEnvironment();
109+
var options = new TextLoader.Options
110+
{
111+
Columns = columns,
112+
Separators = new[] { separatorChar },
113+
HasHeader = hasHeader,
114+
AllowQuoting = allowQuoting,
115+
TrimWhitespace = trimWhitespace,
116+
AllowSparse = allowSparse
117+
};
98118

99-
var reader = new TextLoader(env, columns, separatorChar: separatorChar, hasHeader: hasHeader, allowSparse: allowSparse,
100-
allowQuoting: allowQuoting, dataSample: dataSample, trimWhitespace: trimWhitespace);
119+
var reader = new TextLoader(CatalogUtils.GetEnvironment(catalog), options: options, dataSample: dataSample);
101120
return reader.Read(new MultiFileSource(path));
102121
}
103122

src/Microsoft.ML.Data/Transforms/ValueToKeyMappingTransformer.cs

+13-4
Original file line numberDiff line numberDiff line change
@@ -439,10 +439,19 @@ internal static IDataView GetKeyDataViewOrNull(IHostEnvironment env, IChannel ch
439439
"{0} should not be specified when default loader is " + nameof(TextLoader) + ". Ignoring {0}={1}",
440440
nameof(Options.TermsColumn), src);
441441
}
442-
keyData = new TextLoader(env,
443-
columns: new[] { new TextLoader.Column("Term", DataKind.String, 0) },
444-
dataSample: fileSource)
445-
.Read(fileSource);
442+
443+
// Create text loader.
444+
var options = new TextLoader.Options()
445+
{
446+
Columns = new[]
447+
{
448+
new TextLoader.Column("Term", DataKind.String, 0)
449+
}
450+
};
451+
var reader = new TextLoader(env, options: options, dataSample: fileSource);
452+
453+
keyData = reader.Read(fileSource);
454+
446455
src = "Term";
447456
// In this case they are relying on heuristics, so auto-loading in this case is most appropriate.
448457
autoConvert = true;

src/Microsoft.ML.Transforms/Text/StopWordsRemovingTransformer.cs

+10-4
Original file line numberDiff line numberDiff line change
@@ -737,13 +737,19 @@ private IDataLoader GetLoaderForStopwords(IChannel ch, string dataFile,
737737
{
738738
if (stopwordsCol == null)
739739
stopwordsCol = "Stopwords";
740-
dataLoader = new TextLoader(
741-
Host,
742-
columns: new[]
740+
741+
// Create text loader.
742+
var options = new TextLoader.Options()
743+
{
744+
Columns = new[]
743745
{
744746
new TextLoader.Column(stopwordsCol, DataKind.String, 0)
745747
},
746-
dataSample: fileSource).Read(fileSource) as IDataLoader;
748+
Separators = new[] { ',' },
749+
};
750+
var reader = new TextLoader(Host, options: options, dataSample: fileSource);
751+
752+
dataLoader = reader.Read(fileSource) as IDataLoader;
747753
}
748754
ch.AssertNonEmpty(stopwordsCol);
749755
}

test/Microsoft.ML.Benchmarks/PredictionEngineBench.cs

+39-25
Original file line numberDiff line numberDiff line change
@@ -39,17 +39,21 @@ public void SetupIrisPipeline()
3939
string _irisDataPath = BaseTestClass.GetDataPath("iris.txt");
4040

4141
var env = new MLContext(seed: 1, conc: 1);
42-
var reader = new TextLoader(env,
43-
columns: new[]
44-
{
45-
new TextLoader.Column("Label", DataKind.Single, 0),
46-
new TextLoader.Column("SepalLength", DataKind.Single, 1),
47-
new TextLoader.Column("SepalWidth", DataKind.Single, 2),
48-
new TextLoader.Column("PetalLength", DataKind.Single, 3),
49-
new TextLoader.Column("PetalWidth", DataKind.Single, 4),
50-
},
51-
hasHeader: true
52-
);
42+
43+
// Create text loader.
44+
var options = new TextLoader.Options()
45+
{
46+
Columns = new[]
47+
{
48+
new TextLoader.Column("Label", DataKind.Single, 0),
49+
new TextLoader.Column("SepalLength", DataKind.Single, 1),
50+
new TextLoader.Column("SepalWidth", DataKind.Single, 2),
51+
new TextLoader.Column("PetalLength", DataKind.Single, 3),
52+
new TextLoader.Column("PetalWidth", DataKind.Single, 4),
53+
},
54+
HasHeader = true,
55+
};
56+
var reader = new TextLoader(env, options: options);
5357

5458
IDataView data = reader.Read(_irisDataPath);
5559

@@ -73,13 +77,18 @@ public void SetupSentimentPipeline()
7377
string _sentimentDataPath = BaseTestClass.GetDataPath("wikipedia-detox-250-line-data.tsv");
7478

7579
var mlContext = new MLContext(seed: 1, conc: 1);
76-
var reader = new TextLoader(mlContext, columns: new[]
77-
{
78-
new TextLoader.Column("Label", DataKind.Boolean, 0),
79-
new TextLoader.Column("SentimentText", DataKind.String, 1)
80-
},
81-
hasHeader: true
82-
);
80+
81+
// Create text loader.
82+
var options = new TextLoader.Options()
83+
{
84+
Columns = new[]
85+
{
86+
new TextLoader.Column("Label", DataKind.Boolean, 0),
87+
new TextLoader.Column("SentimentText", DataKind.String, 1)
88+
},
89+
HasHeader = true,
90+
};
91+
var reader = new TextLoader(mlContext, options: options);
8392

8493
IDataView data = reader.Read(_sentimentDataPath);
8594

@@ -103,13 +112,18 @@ public void SetupBreastCancerPipeline()
103112
string _breastCancerDataPath = BaseTestClass.GetDataPath("breast-cancer.txt");
104113

105114
var env = new MLContext(seed: 1, conc: 1);
106-
var reader = new TextLoader(env, columns: new[]
107-
{
108-
new TextLoader.Column("Label", DataKind.Boolean, 0),
109-
new TextLoader.Column("Features", DataKind.Single, new[] { new TextLoader.Range(1, 9) })
110-
},
111-
hasHeader: false
112-
);
115+
116+
// Create text loader.
117+
var options = new TextLoader.Options()
118+
{
119+
Columns = new[]
120+
{
121+
new TextLoader.Column("Label", DataKind.Boolean, 0),
122+
new TextLoader.Column("Features", DataKind.Single, new[] { new TextLoader.Range(1, 9) })
123+
},
124+
HasHeader = false,
125+
};
126+
var reader = new TextLoader(env, options: options);
113127

114128
IDataView data = reader.Read(_breastCancerDataPath);
115129

test/Microsoft.ML.Benchmarks/StochasticDualCoordinateAscentClassifierBench.cs

+28-22
Original file line numberDiff line numberDiff line change
@@ -53,17 +53,20 @@ protected override IEnumerable<Metric> GetMetrics()
5353

5454
private TransformerChain<MulticlassPredictionTransformer<MulticlassLogisticRegressionModelParameters>> Train(string dataPath)
5555
{
56-
var reader = new TextLoader(mlContext,
57-
columns: new[]
58-
{
59-
new TextLoader.Column("Label", DataKind.Single, 0),
60-
new TextLoader.Column("SepalLength", DataKind.Single, 1),
61-
new TextLoader.Column("SepalWidth", DataKind.Single, 2),
62-
new TextLoader.Column("PetalLength", DataKind.Single, 3),
63-
new TextLoader.Column("PetalWidth", DataKind.Single, 4),
64-
},
65-
hasHeader: true
66-
);
56+
// Create text loader.
57+
var options = new TextLoader.Options()
58+
{
59+
Columns = new[]
60+
{
61+
new TextLoader.Column("Label", DataKind.Single, 0),
62+
new TextLoader.Column("SepalLength", DataKind.Single, 1),
63+
new TextLoader.Column("SepalWidth", DataKind.Single, 2),
64+
new TextLoader.Column("PetalLength", DataKind.Single, 3),
65+
new TextLoader.Column("PetalWidth", DataKind.Single, 4),
66+
},
67+
HasHeader = true,
68+
};
69+
var reader = new TextLoader(mlContext, options: options);
6770

6871
IDataView data = reader.Read(dataPath);
6972

@@ -116,17 +119,20 @@ public void SetupPredictBenchmarks()
116119
_predictionEngine = _trainedModel.CreatePredictionEngine<IrisData, IrisPrediction>(mlContext);
117120
_consumer.Consume(_predictionEngine.Predict(_example));
118121

119-
var reader = new TextLoader(mlContext,
120-
columns: new[]
121-
{
122-
new TextLoader.Column("Label", DataKind.Single, 0),
123-
new TextLoader.Column("SepalLength", DataKind.Single, 1),
124-
new TextLoader.Column("SepalWidth", DataKind.Single, 2),
125-
new TextLoader.Column("PetalLength", DataKind.Single, 3),
126-
new TextLoader.Column("PetalWidth", DataKind.Single, 4),
127-
},
128-
hasHeader: true
129-
);
122+
// Create text loader.
123+
var options = new TextLoader.Options()
124+
{
125+
Columns = new[]
126+
{
127+
new TextLoader.Column("Label", DataKind.Single, 0),
128+
new TextLoader.Column("SepalLength", DataKind.Single, 1),
129+
new TextLoader.Column("SepalWidth", DataKind.Single, 2),
130+
new TextLoader.Column("PetalLength", DataKind.Single, 3),
131+
new TextLoader.Column("PetalWidth", DataKind.Single, 4),
132+
},
133+
HasHeader = true,
134+
};
135+
var reader = new TextLoader(mlContext, options: options);
130136

131137
IDataView testData = reader.Read(_dataPath);
132138
IDataView scoredTestData = _trainedModel.Transform(testData);

test/Microsoft.ML.Tests/TrainerEstimators/MetalinearEstimators.cs

+9-2
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,15 @@ public void Pkpd()
7474
[Fact]
7575
public void MetacomponentsFeaturesRenamed()
7676
{
77-
var data = new TextLoader(Env, TestDatasets.irisData.GetLoaderColumns(), separatorChar: ',')
78-
.Read(GetDataPath(TestDatasets.irisData.trainFilename));
77+
// Create text loader.
78+
var options = new TextLoader.Options()
79+
{
80+
Columns = TestDatasets.irisData.GetLoaderColumns(),
81+
Separators = new[] { ',' },
82+
};
83+
var reader = new TextLoader(Env, options: options);
84+
85+
var data = reader.Read(GetDataPath(TestDatasets.irisData.trainFilename));
7986

8087
var sdcaTrainer = ML.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated(
8188
new SdcaNonCalibratedBinaryTrainer.Options {

0 commit comments

Comments
 (0)