diff --git a/test/Microsoft.ML.Functional.Tests/Common.cs b/test/Microsoft.ML.Functional.Tests/Common.cs
index 0e3e2e5c68..bcba3a8e27 100644
--- a/test/Microsoft.ML.Functional.Tests/Common.cs
+++ b/test/Microsoft.ML.Functional.Tests/Common.cs
@@ -7,6 +7,7 @@
using System.Linq;
using Microsoft.Data.DataView;
using Microsoft.ML.Data;
+using Microsoft.ML.Data.Evaluators.Metrics;
using Microsoft.ML.Functional.Tests.Datasets;
using Xunit;
@@ -160,13 +161,86 @@ public static void AssertEqual(TypeTestData testType1, TypeTestData testType2)
Assert.True(testType1.Ug.Equals(testType2.Ug));
}
+ ///
+ /// Check that a object is valid.
+ ///
+ /// The metrics object.
+ public static void AssertMetrics(AnomalyDetectionMetrics metrics)
+ {
+ Assert.InRange(metrics.Auc, 0, 1);
+ Assert.InRange(metrics.DrAtK, 0, 1);
+ }
+
+ ///
+ /// Check that a object is valid.
+ ///
+ /// The metrics object.
+ public static void AssertMetrics(BinaryClassificationMetrics metrics)
+ {
+ Assert.InRange(metrics.Accuracy, 0, 1);
+ Assert.InRange(metrics.Auc, 0, 1);
+ Assert.InRange(metrics.Auprc, 0, 1);
+ Assert.InRange(metrics.F1Score, 0, 1);
+ Assert.InRange(metrics.NegativePrecision, 0, 1);
+ Assert.InRange(metrics.NegativeRecall, 0, 1);
+ Assert.InRange(metrics.PositivePrecision, 0, 1);
+ Assert.InRange(metrics.PositiveRecall, 0, 1);
+ }
+
+ ///
+ /// Check that a object is valid.
+ ///
+ /// The metrics object.
+ public static void AssertMetrics(CalibratedBinaryClassificationMetrics metrics)
+ {
+ Assert.InRange(metrics.Entropy, double.NegativeInfinity, 1);
+ Assert.InRange(metrics.LogLoss, double.NegativeInfinity, 1);
+ Assert.InRange(metrics.LogLossReduction, double.NegativeInfinity, 100);
+ AssertMetrics(metrics as BinaryClassificationMetrics);
+ }
+
+ ///
+ /// Check that a object is valid.
+ ///
+ /// The metrics object.
+ public static void AssertMetrics(ClusteringMetrics metrics)
+ {
+ Assert.True(metrics.AvgMinScore >= 0);
+ Assert.True(metrics.Dbi >= 0);
+ if (!double.IsNaN(metrics.Nmi))
+ Assert.True(metrics.Nmi >= 0 && metrics.Nmi <= 1);
+ }
+
+ ///
+ /// Check that a object is valid.
+ ///
+ /// The metrics object.
+ public static void AssertMetrics(MultiClassClassifierMetrics metrics)
+ {
+ Assert.InRange(metrics.AccuracyMacro, 0, 1);
+ Assert.InRange(metrics.AccuracyMicro, 0, 1);
+ Assert.True(metrics.LogLoss >= 0);
+ Assert.InRange(metrics.TopKAccuracy, 0, 1);
+ }
+
+ ///
+ /// Check that a object is valid.
+ ///
+ /// The metrics object.
+ public static void AssertMetrics(RankerMetrics metrics)
+ {
+ foreach (var dcg in metrics.Dcg)
+ Assert.True(dcg >= 0);
+ foreach (var ndcg in metrics.Ndcg)
+ Assert.InRange(ndcg, 0, 100);
+ }
+
///
/// Check that a object is valid.
///
/// The metrics object.
public static void AssertMetrics(RegressionMetrics metrics)
{
- // Perform sanity checks on the metrics.
Assert.True(metrics.Rms >= 0);
Assert.True(metrics.L1 >= 0);
Assert.True(metrics.L2 >= 0);
@@ -179,7 +253,6 @@ public static void AssertMetrics(RegressionMetrics metrics)
/// The object.
public static void AssertMetricStatistics(MetricStatistics metric)
{
- // Perform sanity checks on the metrics.
Assert.True(metric.StandardDeviation >= 0);
Assert.True(metric.StandardError >= 0);
}
@@ -190,7 +263,6 @@ public static void AssertMetricStatistics(MetricStatistics metric)
/// The metrics object.
public static void AssertMetricsStatistics(RegressionMetricsStatistics metrics)
{
- // The mean can be any float; the standard deviation and error must be >=0.
AssertMetricStatistics(metrics.Rms);
AssertMetricStatistics(metrics.L1);
AssertMetricStatistics(metrics.L2);
diff --git a/test/Microsoft.ML.Functional.Tests/Datasets/Iris.cs b/test/Microsoft.ML.Functional.Tests/Datasets/Iris.cs
new file mode 100644
index 0000000000..d1cbfa3fad
--- /dev/null
+++ b/test/Microsoft.ML.Functional.Tests/Datasets/Iris.cs
@@ -0,0 +1,78 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+using System;
+using Microsoft.Data.DataView;
+using Microsoft.ML.Data;
+
+namespace Microsoft.ML.Functional.Tests.Datasets
+{
+ ///
+ /// A class for the Iris test dataset.
+ ///
+ internal sealed class Iris
+ {
+ [LoadColumn(0)]
+ public float Label { get; set; }
+
+ [LoadColumn(1)]
+ public float SepalLength { get; set; }
+
+ [LoadColumn(2)]
+ public float SepalWidth { get; set; }
+
+ [LoadColumn(4)]
+ public float PetalLength { get; set; }
+
+ [LoadColumn(5)]
+ public float PetalWidth { get; set; }
+
+ ///
+ /// The list of columns commonly used as features.
+ ///
+ public static readonly string[] Features = new string[] { "SepalLength", "SepalWidth", "PetalLength", "PetalWidth" };
+
+ public static IDataView LoadAsRankingProblem(MLContext mlContext, string filePath, bool hasHeader, char separatorChar, int seed = 1)
+ {
+ // Load the Iris data.
+ var data = mlContext.Data.ReadFromTextFile(filePath, hasHeader: hasHeader, separatorChar: separatorChar);
+
+ // Create a function that generates a random groupId.
+ var rng = new Random(seed);
+ Action generateGroupId = (input, output) =>
+ {
+ output.Label = input.Label;
+ // The standard set used in tests has 150 rows
+ output.GroupId = rng.Next(0, 30);
+ output.PetalLength = input.PetalLength;
+ output.PetalWidth = input.PetalWidth;
+ output.SepalLength = input.SepalLength;
+ output.SepalWidth = input.SepalWidth;
+ };
+
+ // Describe a pipeline that generates a groupId and converts it to a key.
+ var pipeline = mlContext.Transforms.CustomMapping(generateGroupId, null)
+ .Append(mlContext.Transforms.Conversion.MapValueToKey("GroupId"));
+
+ // Transform the data
+ var transformedData = pipeline.Fit(data).Transform(data);
+
+ return transformedData;
+ }
+ }
+
+ ///
+ /// A class for the Iris dataset with a GroupId column.
+ ///
+ internal sealed class IrisWithGroup
+ {
+ public float Label { get; set; }
+ public int GroupId { get; set; }
+ public float SepalLength { get; set; }
+ public float SepalWidth { get; set; }
+ public float PetalLength { get; set; }
+ public float PetalWidth { get; set; }
+ }
+}
diff --git a/test/Microsoft.ML.Functional.Tests/Datasets/MnistOneClass.cs b/test/Microsoft.ML.Functional.Tests/Datasets/MnistOneClass.cs
new file mode 100644
index 0000000000..07b26d3d9c
--- /dev/null
+++ b/test/Microsoft.ML.Functional.Tests/Datasets/MnistOneClass.cs
@@ -0,0 +1,29 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.ML.Data;
+
+namespace Microsoft.ML.Functional.Tests.Datasets
+{
+ internal sealed class MnistOneClass
+ {
+ private const int _featureLength = 783;
+
+ public float Label { get; set; }
+
+ public float[] Features { get; set; }
+
+ public static TextLoader GetTextLoader(MLContext mlContext, bool hasHeader, char separatorChar)
+ {
+ return mlContext.Data.CreateTextLoader(
+ new[] {
+ new TextLoader.Column("Label", DataKind.R4, 0),
+ new TextLoader.Column("Features", DataKind.R4, 1, 1 + _featureLength)
+ },
+ separatorChar: separatorChar,
+ hasHeader: hasHeader,
+ allowSparse: true);
+ }
+ }
+}
diff --git a/test/Microsoft.ML.Functional.Tests/Datasets/Sentiment.cs b/test/Microsoft.ML.Functional.Tests/Datasets/Sentiment.cs
new file mode 100644
index 0000000000..2465e291b3
--- /dev/null
+++ b/test/Microsoft.ML.Functional.Tests/Datasets/Sentiment.cs
@@ -0,0 +1,20 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.ML.Data;
+
+namespace Microsoft.ML.Functional.Tests.Datasets
+{
+ ///
+ /// A class for reading in the Sentiment test dataset.
+ ///
+ internal sealed class TweetSentiment
+ {
+ [LoadColumn(0), ColumnName("Label")]
+ public bool Sentiment { get; set; }
+
+ [LoadColumn(1)]
+ public string SentimentText { get; set; }
+ }
+}
diff --git a/test/Microsoft.ML.Functional.Tests/Datasets/TrivialMatrixFactorization.cs b/test/Microsoft.ML.Functional.Tests/Datasets/TrivialMatrixFactorization.cs
new file mode 100644
index 0000000000..005fc98c72
--- /dev/null
+++ b/test/Microsoft.ML.Functional.Tests/Datasets/TrivialMatrixFactorization.cs
@@ -0,0 +1,41 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+using System;
+using Microsoft.Data.DataView;
+using Microsoft.ML.Data;
+
+namespace Microsoft.ML.Functional.Tests.Datasets
+{
+ ///
+ /// A class describing the TrivialMatrixFactorization test dataset.
+ ///
+ internal sealed class TrivialMatrixFactorization
+ {
+ [LoadColumn(0)]
+ public float Label { get; set; }
+
+ [LoadColumn(1)]
+ public uint MatrixColumnIndex { get; set; }
+
+ [LoadColumn(2)]
+ public uint MatrixRowIndex { get; set; }
+
+ public static IDataView LoadAndFeaturizeFromTextFile(MLContext mlContext, string filePath, bool hasHeader, char separatorChar)
+ {
+ // Load the data from a textfile.
+ var data = mlContext.Data.ReadFromTextFile(filePath, hasHeader: hasHeader, separatorChar: separatorChar);
+
+ // Describe a pipeline to translate the uints to keys.
+ var pipeline = mlContext.Transforms.Conversion.MapValueToKey("MatrixColumnIndex")
+ .Append(mlContext.Transforms.Conversion.MapValueToKey("MatrixRowIndex"));
+
+ // Transform the data.
+ var transformedData = pipeline.Fit(data).Transform(data);
+
+ return transformedData;
+ }
+ }
+}
diff --git a/test/Microsoft.ML.Functional.Tests/Evaluation.cs b/test/Microsoft.ML.Functional.Tests/Evaluation.cs
new file mode 100644
index 0000000000..6ffec01b32
--- /dev/null
+++ b/test/Microsoft.ML.Functional.Tests/Evaluation.cs
@@ -0,0 +1,309 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.ML.Functional.Tests.Datasets;
+using Microsoft.ML.RunTests;
+using Microsoft.ML.TestFramework;
+using Microsoft.ML.TestFramework.Attributes;
+using Microsoft.ML.Trainers;
+using Microsoft.ML.Trainers.FastTree;
+using Microsoft.ML.Trainers.KMeans;
+using Xunit;
+using Xunit.Abstractions;
+
+namespace Microsoft.ML.Functional.Tests
+{
+ public class Evaluation : BaseTestClass
+ {
+ public Evaluation(ITestOutputHelper output): base(output)
+ {
+ }
+
+ ///
+ /// Train and Evaluate: Anomaly Detection.
+ ///
+ [Fact]
+ public void TrainAndEvaluateAnomalyDetection()
+ {
+ var mlContext = new MLContext(seed: 1, conc: 1);
+
+ var trainData = MnistOneClass.GetTextLoader(mlContext,
+ TestDatasets.mnistOneClass.fileHasHeader, TestDatasets.mnistOneClass.fileSeparator)
+ .Read(GetDataPath(TestDatasets.mnistOneClass.trainFilename));
+ var testData = MnistOneClass.GetTextLoader(mlContext,
+ TestDatasets.mnistOneClass.fileHasHeader, TestDatasets.mnistOneClass.fileSeparator)
+ .Read(GetDataPath(TestDatasets.mnistOneClass.testFilename));
+
+ // Create a training pipeline.
+ var pipeline = mlContext.AnomalyDetection.Trainers.RandomizedPca();
+
+ // Train the model.
+ var model = pipeline.Fit(trainData);
+
+ // Evaluate the model.
+ // TODO #2464: Using the train dataset will cause NaN metrics to be returned.
+ var scoredTest = model.Transform(testData);
+ var metrics = mlContext.AnomalyDetection.Evaluate(scoredTest);
+
+ // Check that the metrics returned are valid.
+ Common.AssertMetrics(metrics);
+ }
+
+ ///
+ /// Train and Evaluate: Binary Classification with no calibration.
+ ///
+ [Fact]
+ public void TrainAndEvaluateBinaryClassification()
+ {
+ var mlContext = new MLContext(seed: 1, conc: 1);
+
+ var data = mlContext.Data.ReadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename),
+ hasHeader: TestDatasets.Sentiment.fileHasHeader,
+ separatorChar: TestDatasets.Sentiment.fileSeparator);
+
+ // Create a training pipeline.
+ var pipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText")
+ .AppendCacheCheckpoint(mlContext)
+ .Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated(
+ new SdcaNonCalibratedBinaryTrainer.Options { NumThreads = 1 }));
+
+ // Train the model.
+ var model = pipeline.Fit(data);
+
+ // Evaluate the model.
+ var scoredData = model.Transform(data);
+ var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(scoredData);
+
+ // Check that the metrics returned are valid.
+ Common.AssertMetrics(metrics);
+ }
+
+ ///
+ /// Train and Evaluate: Binary Classification with a calibrated predictor.
+ ///
+ [Fact]
+ public void TrainAndEvaluateBinaryClassificationWithCalibration()
+ {
+ var mlContext = new MLContext(seed: 1, conc: 1);
+
+ var data = mlContext.Data.ReadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename),
+ hasHeader: TestDatasets.Sentiment.fileHasHeader,
+ separatorChar: TestDatasets.Sentiment.fileSeparator);
+
+ // Create a training pipeline.
+ var pipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText")
+ .AppendCacheCheckpoint(mlContext)
+ .Append(mlContext.BinaryClassification.Trainers.LogisticRegression(
+ new LogisticRegression.Options { NumThreads = 1 }));
+
+ // Train the model.
+ var model = pipeline.Fit(data);
+
+ // Evaluate the model.
+ var scoredData = model.Transform(data);
+ var metrics = mlContext.BinaryClassification.Evaluate(scoredData);
+
+ // Check that the metrics returned are valid.
+ Common.AssertMetrics(metrics);
+ }
+
+ ///
+ /// Train and Evaluate: Clustering.
+ ///
+ [Fact]
+ public void TrainAndEvaluateClustering()
+ {
+ var mlContext = new MLContext(seed: 1, conc: 1);
+
+ var data = mlContext.Data.ReadFromTextFile(GetDataPath(TestDatasets.iris.trainFilename),
+ hasHeader: TestDatasets.iris.fileHasHeader,
+ separatorChar: TestDatasets.iris.fileSeparator);
+
+ // Create a training pipeline.
+ var pipeline = mlContext.Transforms.Concatenate("Features", Iris.Features)
+ .AppendCacheCheckpoint(mlContext)
+ .Append(mlContext.Clustering.Trainers.KMeans(new KMeansPlusPlusTrainer.Options { NumThreads = 1 }));
+
+ // Train the model.
+ var model = pipeline.Fit(data);
+
+ // Evaluate the model.
+ var scoredData = model.Transform(data);
+ var metrics = mlContext.Clustering.Evaluate(scoredData);
+
+ // Check that the metrics returned are valid.
+ Common.AssertMetrics(metrics);
+ }
+
+ ///
+ /// Train and Evaluate: Multiclass Classification.
+ ///
+ [Fact]
+ public void TrainAndEvaluateMulticlassClassification()
+ {
+ var mlContext = new MLContext(seed: 1, conc: 1);
+
+ var data = mlContext.Data.ReadFromTextFile(GetDataPath(TestDatasets.iris.trainFilename),
+ hasHeader: TestDatasets.iris.fileHasHeader,
+ separatorChar: TestDatasets.iris.fileSeparator);
+
+ // Create a training pipeline.
+ var pipeline = mlContext.Transforms.Concatenate("Features", Iris.Features)
+ .AppendCacheCheckpoint(mlContext)
+ .Append(mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(
+ new SdcaMultiClassTrainer.Options { NumThreads = 1}));
+
+ // Train the model.
+ var model = pipeline.Fit(data);
+
+ // Evaluate the model.
+ var scoredData = model.Transform(data);
+ var metrics = mlContext.MulticlassClassification.Evaluate(scoredData);
+
+ // Check that the metrics returned are valid.
+ Common.AssertMetrics(metrics);
+ }
+
+ ///
+ /// Train and Evaluate: Ranking.
+ ///
+ [Fact]
+ public void TrainAndEvaluateRanking()
+ {
+ var mlContext = new MLContext(seed: 1, conc: 1);
+
+ var data = Iris.LoadAsRankingProblem(mlContext,
+ GetDataPath(TestDatasets.iris.trainFilename),
+ hasHeader: TestDatasets.iris.fileHasHeader,
+ separatorChar: TestDatasets.iris.fileSeparator);
+
+ // Create a training pipeline.
+ var pipeline = mlContext.Transforms.Concatenate("Features", Iris.Features)
+ .Append(mlContext.Ranking.Trainers.FastTree(new FastTreeRankingTrainer.Options { NumThreads = 1 }));
+
+ // Train the model.
+ var model = pipeline.Fit(data);
+
+ // Evaluate the model.
+ var scoredData = model.Transform(data);
+ var metrics = mlContext.Ranking.Evaluate(scoredData, label: "Label", groupId: "GroupId");
+
+ // Check that the metrics returned are valid.
+ Common.AssertMetrics(metrics);
+ }
+
+ ///
+ /// Train and Evaluate: Recommendation.
+ ///
+ [MatrixFactorizationFact]
+ public void TrainAndEvaluateRecommendation()
+ {
+ var mlContext = new MLContext(seed: 1, conc: 1);
+
+ // Get the dataset.
+ var data = TrivialMatrixFactorization.LoadAndFeaturizeFromTextFile(
+ mlContext,
+ GetDataPath(TestDatasets.trivialMatrixFactorization.trainFilename),
+ TestDatasets.trivialMatrixFactorization.fileHasHeader,
+ TestDatasets.trivialMatrixFactorization.fileSeparator);
+
+ // Create a pipeline to train on the sentiment data.
+ var pipeline = mlContext.Recommendation().Trainers.MatrixFactorization(
+ new MatrixFactorizationTrainer.Options{
+ MatrixColumnIndexColumnName = "MatrixColumnIndex",
+ MatrixRowIndexColumnName = "MatrixRowIndex",
+ LabelColumnName = "Label",
+ NumberOfIterations = 3,
+ NumberOfThreads = 1,
+ ApproximationRank = 4,
+ });
+
+ // Train the model.
+ var model = pipeline.Fit(data);
+
+ // Evaluate the model.
+ var scoredData = model.Transform(data);
+ var metrics = mlContext.Recommendation().Evaluate(scoredData);
+
+ // Check that the metrics returned are valid.
+ Common.AssertMetrics(metrics);
+ }
+
+ ///
+ /// Train and Evaluate: Regression.
+ ///
+ [Fact]
+ public void TrainAndEvaluateRegression()
+ {
+ var mlContext = new MLContext(seed: 1, conc: 1);
+
+ // Get the dataset.
+ var data = mlContext.Data.CreateTextLoader(TestDatasets.housing.GetLoaderColumns(),
+ hasHeader: TestDatasets.housing.fileHasHeader, separatorChar: TestDatasets.housing.fileSeparator)
+ .Read(GetDataPath(TestDatasets.housing.trainFilename));
+
+ // Create a pipeline to train on the sentiment data.
+ var pipeline = mlContext.Transforms.Concatenate("Features", new string[] {
+ "CrimesPerCapita", "PercentResidental", "PercentNonRetail", "CharlesRiver", "NitricOxides", "RoomsPerDwelling",
+ "PercentPre40s", "EmploymentDistance", "HighwayDistance", "TaxRate", "TeacherRatio"})
+ .Append(mlContext.Transforms.CopyColumns("Label", "MedianHomeValue"))
+ .Append(mlContext.Regression.Trainers.FastTree(new FastTreeRegressionTrainer.Options { NumThreads = 1 }));
+
+ // Train the model.
+ var model = pipeline.Fit(data);
+
+ // Evaluate the model.
+ var scoredData = model.Transform(data);
+ var metrics = mlContext.Regression.Evaluate(scoredData);
+
+ // Check that the metrics returned are valid.
+ Common.AssertMetrics(metrics);
+ }
+
+ ///
+ /// Evaluate With Precision-Recall Curves.
+ ///
+ ///
+ /// This is currently not possible using the APIs.
+ ///
+ [Fact]
+ public void TrainAndEvaluateWithPrecisionRecallCurves()
+ {
+ var mlContext = new MLContext(seed: 1, conc: 1);
+
+ var data = mlContext.Data.ReadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename),
+ hasHeader: TestDatasets.Sentiment.fileHasHeader,
+ separatorChar: TestDatasets.Sentiment.fileSeparator);
+
+ // Create a training pipeline.
+ var pipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText")
+ .AppendCacheCheckpoint(mlContext)
+ .Append(mlContext.BinaryClassification.Trainers.LogisticRegression(
+ new LogisticRegression.Options { NumThreads = 1 }));
+
+ // Train the model.
+ var model = pipeline.Fit(data);
+
+ // Evaluate the model.
+ var scoredData = model.Transform(data);
+ var metrics = mlContext.BinaryClassification.Evaluate(scoredData);
+
+ Common.AssertMetrics(metrics);
+
+ // This scenario is not possible with the current set of APIs.
+ // There could be two ways imaginable:
+ // 1. Getting a list of (P,R) from the Evaluator (as it calculates most of the information already).
+ // Not currently possible.
+ // 2. Manually setting the classifier threshold and calling evaluate many times:
+ // Not currently possible: Todo #2465: Allow the setting of threshold and thresholdColumn for scoring.
+ // Technically, this scenario is possible using custom mappers like so:
+ // 1. Get a list of all unique probability scores.
+ // e.g. By reading the IDataView as an IEnumerable, and keeping a hash of known probabilities up to some precision.
+ // 2. For each value of probability:
+ // a. Write a custom mapper to produce PredictedLabel at that probability threshold.
+ // b. Calculate Precision and Recall with these labels.
+ // c. Append the Precision and Recall to an IList.
+ }
+ }
+}
\ No newline at end of file
diff --git a/test/Microsoft.ML.Functional.Tests/Prediction.cs b/test/Microsoft.ML.Functional.Tests/Prediction.cs
index 661aac3fcd..74ec111c92 100644
--- a/test/Microsoft.ML.Functional.Tests/Prediction.cs
+++ b/test/Microsoft.ML.Functional.Tests/Prediction.cs
@@ -22,7 +22,8 @@ public void ReconfigurablePrediction()
var mlContext = new MLContext(seed: 789);
// Get the dataset, create a train and test
- var data = mlContext.Data.CreateTextLoader(TestDatasets.housing.GetLoaderColumns(), hasHeader: true)
+ var data = mlContext.Data.CreateTextLoader(TestDatasets.housing.GetLoaderColumns(),
+ hasHeader: TestDatasets.housing.fileHasHeader, separatorChar: TestDatasets.housing.fileSeparator)
.Read(BaseTestClass.GetDataPath(TestDatasets.housing.trainFilename));
var split = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.2);
diff --git a/test/Microsoft.ML.Functional.Tests/Validation.cs b/test/Microsoft.ML.Functional.Tests/Validation.cs
index eebe55b58c..ed1cccbf7c 100644
--- a/test/Microsoft.ML.Functional.Tests/Validation.cs
+++ b/test/Microsoft.ML.Functional.Tests/Validation.cs
@@ -27,7 +27,8 @@ void CrossValidation()
var mlContext = new MLContext(seed: 1, conc: 1);
// Get the dataset.
- var data = mlContext.Data.CreateTextLoader(TestDatasets.housing.GetLoaderColumns(), hasHeader: true)
+ var data = mlContext.Data.CreateTextLoader(TestDatasets.housing.GetLoaderColumns(),
+ hasHeader: TestDatasets.housing.fileHasHeader, separatorChar: TestDatasets.housing.fileSeparator)
.Read(BaseTestClass.GetDataPath(TestDatasets.housing.trainFilename));
// Create a pipeline to train on the sentiment data.
@@ -60,7 +61,8 @@ public void TrainWithValidationSet()
var mlContext = new MLContext(seed: 1, conc: 1);
// Get the dataset.
- var data = mlContext.Data.CreateTextLoader(TestDatasets.housing.GetLoaderColumns(), hasHeader: true)
+ var data = mlContext.Data.CreateTextLoader(TestDatasets.housing.GetLoaderColumns(),
+ hasHeader: TestDatasets.housing.fileHasHeader, separatorChar: TestDatasets.housing.fileSeparator)
.Read(BaseTestClass.GetDataPath(TestDatasets.housing.trainFilename));
var dataSplit = mlContext.Regression.TrainTestSplit(data, testFraction: 0.2);
var trainData = dataSplit.TrainSet;
diff --git a/test/Microsoft.ML.TestFramework/Datasets.cs b/test/Microsoft.ML.TestFramework/Datasets.cs
index 7197f1f64b..abc9862049 100644
--- a/test/Microsoft.ML.TestFramework/Datasets.cs
+++ b/test/Microsoft.ML.TestFramework/Datasets.cs
@@ -14,6 +14,8 @@ public class TestDataset
public string testFilename;
public string validFilename;
public string labelFilename;
+ public char fileSeparator;
+ public bool fileHasHeader;
// REVIEW: Replace these with appropriate SubComponents!
public string settings;
@@ -158,6 +160,8 @@ public static class TestDatasets
name = "housing",
trainFilename = "housing.txt",
testFilename = "housing.txt",
+ fileSeparator = '\t',
+ fileHasHeader = true,
loaderSettings = "loader=Text{col=Label:0 col=Features:~ header=+}",
GetLoaderColumns = () =>
{
@@ -206,6 +210,8 @@ public static class TestDatasets
name = "sentiment",
trainFilename = "wikipedia-detox-250-line-data.tsv",
testFilename = "wikipedia-detox-250-line-test.tsv",
+ fileHasHeader = true,
+ fileSeparator = '\t',
GetLoaderColumns = () =>
{
return new[]
@@ -447,6 +453,8 @@ public static class TestDatasets
name = "iris",
trainFilename = @"iris.txt",
testFilename = @"iris.txt",
+ fileHasHeader = true,
+ fileSeparator = '\t'
};
public static TestDataset irisMissing = new TestDataset()
@@ -655,6 +663,8 @@ public static class TestDatasets
name = "mnistOneClass",
trainFilename = @"MNIST.Train.0-class.tiny.txt",
testFilename = @"MNIST.Test.tiny.txt",
+ fileHasHeader = false,
+ fileSeparator = '\t',
settings = ""
};
@@ -704,6 +714,8 @@ public static class TestDatasets
name = "trivialMatrixFactorization",
trainFilename = @"trivial-train.tsv",
testFilename = @"trivial-test.tsv",
+ fileHasHeader = true,
+ fileSeparator = '\t',
loaderSettings = "loader=Text{col=Label:R4:0 col=User:U4[0-19]:1 col=Item:U4[0-39]:2 header+}"
};
}
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Evaluation.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Evaluation.cs
deleted file mode 100644
index cad289872e..0000000000
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Evaluation.cs
+++ /dev/null
@@ -1,39 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-using Microsoft.ML.Data;
-using Microsoft.ML.RunTests;
-using Microsoft.ML.Trainers;
-using Xunit;
-
-namespace Microsoft.ML.Tests.Scenarios.Api
-{
- public partial class ApiScenariosTests
- {
- ///
- /// Evaluation: Similar to the simple train scenario, except instead of having some
- /// predictive structure, be able to score another "test" data file, run the result
- /// through an evaluator and get metrics like AUC, accuracy, PR curves, and whatnot.
- /// Getting metrics out of this should be as straightforward and unannoying as possible.
- ///
- [Fact]
- public void Evaluation()
- {
- var ml = new MLContext(seed: 1, conc: 1);
-
- // Pipeline.
- var pipeline = ml.Data.CreateTextLoader(TestDatasets.Sentiment.GetLoaderColumns(), hasHeader: true)
- .Append(ml.Transforms.Text.FeaturizeText("Features", "SentimentText"))
- .Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent(
- new SdcaBinaryTrainer.Options { NumThreads = 1 }));
-
- // Train.
- var readerModel = pipeline.Fit(new MultiFileSource(GetDataPath(TestDatasets.Sentiment.trainFilename)));
-
- // Evaluate on the test set.
- var dataEval = readerModel.Read(new MultiFileSource(GetDataPath(TestDatasets.Sentiment.testFilename)));
- var metrics = ml.BinaryClassification.Evaluate(dataEval);
- }
- }
-}