diff --git a/test/Microsoft.ML.Functional.Tests/Debugging.cs b/test/Microsoft.ML.Functional.Tests/Debugging.cs
new file mode 100644
index 0000000000..a495c99c99
--- /dev/null
+++ b/test/Microsoft.ML.Functional.Tests/Debugging.cs
@@ -0,0 +1,214 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Collections.Generic;
+using Microsoft.ML.Data;
+using Microsoft.ML.Functional.Tests.Datasets;
+using Microsoft.ML.RunTests;
+using Microsoft.ML.TestFramework;
+using Microsoft.ML.Trainers;
+using Microsoft.ML.Transforms.Text;
+using Xunit;
+using Xunit.Abstractions;
+
+namespace Microsoft.ML.Functional.Tests
+{
+ public class Debugging : BaseTestClass
+ {
+ public Debugging(ITestOutputHelper output) : base(output)
+ {
+ }
+
+ ///
+ /// Debugging: The individual pipeline steps can be inspected to see what is happening to
+ /// data as it flows through.
+ ///
+ ///
+ /// It should, possibly through the debugger, be not such a pain to actually
+ /// see what is happening to your data when you apply this or that transform. For example, if I
+ /// were to have the text "Help I'm a bug!" I should be able to see the steps where it is
+ /// normalized to "help i'm a bug" then tokenized into ["help", "i'm", "a", "bug"] then
+ /// mapped into term numbers [203, 25, 3, 511] then projected into the sparse
+ /// float vector {3:1, 25:1, 203:1, 511:1}, etc. etc.
+ ///
+ [Fact]
+ void InspectIntermediatePipelineSteps()
+ {
+ var mlContext = new MLContext(seed: 1);
+
+ var data = mlContext.Data.LoadFromEnumerable(
+ new TweetSentiment[]
+ {
+ new TweetSentiment { Sentiment = true, SentimentText = "I love ML.NET." },
+ new TweetSentiment { Sentiment = true, SentimentText = "I love TLC." },
+ new TweetSentiment { Sentiment = false, SentimentText = "I dislike fika." }
+ });
+
+ // create a training pipeline.
+ var pipeline = mlContext.Transforms.Text.FeaturizeText(
+ "Features",
+ new TextFeaturizingEstimator.Options
+ {
+ KeepPunctuations = false,
+ OutputTokens = true,
+ CharFeatureExtractor = null, // new WordBagEstimator.Options { NgramLength = 0, SkipLength = -1 },
+ WordFeatureExtractor = new WordBagEstimator.Options { NgramLength = 1},
+ Norm = TextFeaturizingEstimator.NormFunction.None
+ },
+ "SentimentText");
+
+ // Fit the pipeline to the data.
+ var model = pipeline.Fit(data);
+
+ // Transform the data.
+ var transformedData = model.Transform(data);
+
+ var preview = transformedData.Preview();
+
+ // Verify that columns can be inspected.
+ // Validate the tokens column.
+ var tokensColumn = transformedData.GetColumn(transformedData.Schema["Features_TransformedText"]);
+ var expectedTokens = new string[3][]
+ {
+ new string[] {"i", "love", "mlnet"},
+ new string[] {"i", "love", "tlc"},
+ new string[] {"i", "dislike", "fika"},
+ };
+ int i = 0;
+ foreach (var rowTokens in tokensColumn)
+ Assert.Equal(expectedTokens[i++], rowTokens);
+
+ // Validate the Features column.
+ var featuresColumn = transformedData.GetColumn(transformedData.Schema["Features"]);
+ var expectedFeatures = new float[3][]
+ {
+ new float[6] { 1, 1, 1, 0, 0 ,0 },
+ new float[6] { 1, 1, 0, 1, 0, 0 },
+ new float[6] { 1, 0, 0, 0, 1, 1 }
+ };
+ i = 0;
+ foreach (var rowFeatures in featuresColumn)
+ Assert.Equal(expectedFeatures[i++], rowFeatures);
+ }
+
+ ///
+ /// Debugging: The schema of the pipeline can be inspected.
+ ///
+ [Fact]
+ public void InspectPipelineSchema()
+ {
+ var mlContext = new MLContext(seed: 1);
+
+ // Get the dataset.
+ var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.housing.trainFilename), hasHeader: true);
+
+ // Define a pipeline
+ var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features)
+ .Append(mlContext.Transforms.Normalize())
+ .AppendCacheCheckpoint(mlContext)
+ .Append(mlContext.Regression.Trainers.Sdca(
+ new SdcaRegressionTrainer.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 20 }));
+
+ // Fit the pipeline to the data.
+ var model = pipeline.Fit(data);
+
+ // Inspect the model schema, and verify that a Score column is produced.
+ var outputSchema = model.GetOutputSchema(data.Schema);
+ var columnNames = new string[outputSchema.Count];
+ int i = 0;
+ foreach (var column in outputSchema)
+ columnNames[i++] = column.Name;
+ Assert.Contains("Score", columnNames);
+ }
+
+ ///
+ /// Debugging: The schema read in can be verified by inspecting the data.
+ ///
+ [Fact]
+ public void InspectSchemaUponLoadingData()
+ {
+ var mlContext = new MLContext(seed: 1);
+
+ // Get the dataset.
+ var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.housing.trainFilename), hasHeader: true);
+
+ // Verify the column names.
+ int i = 0;
+ foreach (var column in data.Schema)
+ {
+ if (i == 0)
+ Assert.Equal("Label", column.Name);
+ else
+ Assert.Equal(HousingRegression.Features[i-1], column.Name);
+ i++;
+ }
+
+ // Verify that I can cast it to the right schema by inspecting the first row.
+ foreach (var row in mlContext.Data.CreateEnumerable(mlContext.Data.TakeRows(data, 1), true))
+ {
+ // Validate there was data in the row by checking that some values were not zero since zero is the default.
+ var rowSum = row.MedianHomeValue;
+ foreach (var property in HousingRegression.Features)
+ rowSum += (float) row.GetType().GetProperty(property).GetValue(row, null);
+
+ Assert.NotEqual(0, rowSum);
+ }
+ }
+
+ ///
+ /// Debugging: The progress of training can be accessed.
+ ///
+ [Fact]
+ public void ViewTrainingOutput()
+ {
+ var mlContext = new MLContext(seed: 1);
+
+ // Attach a listener.
+ var logWatcher = new LogWatcher();
+ mlContext.Log += logWatcher.ObserveEvent;
+
+ // Get the dataset.
+ var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.housing.trainFilename), hasHeader: true);
+
+ // Define a pipeline
+ var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features)
+ .Append(mlContext.Transforms.Normalize())
+ .AppendCacheCheckpoint(mlContext)
+ .Append(mlContext.Regression.Trainers.Sdca(
+ new SdcaRegressionTrainer.Options { NumberOfThreads = 1, MaximumNumberOfIterations = 20 }));
+
+ // Fit the pipeline to the data.
+ var model = pipeline.Fit(data);
+
+ // Validate that we can read lines from the file.
+ var expectedLines = new string[3] {
+ @"[Source=SdcaTrainerBase; Training, Kind=Info] Auto-tuning parameters: L2 = 0.001.",
+ @"[Source=SdcaTrainerBase; Training, Kind=Info] Auto-tuning parameters: L1Threshold (L1/L2) = 0.",
+ @"[Source=SdcaTrainerBase; Training, Kind=Info] Using best model from iteration 7."};
+ foreach (var line in expectedLines)
+ {
+ Assert.Contains(line, logWatcher.Lines);
+ Assert.Equal(1, logWatcher.Lines[line]);
+ }
+ }
+
+ internal class LogWatcher {
+
+ public readonly IDictionary Lines;
+
+ public LogWatcher()
+ {
+ Lines = new Dictionary();
+ }
+
+ public void ObserveEvent(object sender, LoggingEventArgs e)
+ {
+ if (Lines.ContainsKey(e.Message))
+ Lines[e.Message]++;
+ else
+ Lines[e.Message] = 1;
+ }
+ }
+ }
+}
diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Visibility.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Visibility.cs
deleted file mode 100644
index 8f4f3a8495..0000000000
--- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/Visibility.cs
+++ /dev/null
@@ -1,39 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-
-using System.Collections.Generic;
-using System.Linq;
-using Microsoft.ML.Data;
-using Microsoft.ML.RunTests;
-using Xunit;
-
-namespace Microsoft.ML.Tests.Scenarios.Api
-{
- public partial class ApiScenariosTests
- {
- ///
- /// Visibility: It should, possibly through the debugger, be not such a pain to actually
- /// see what is happening to your data when you apply this or that transform. For example, if I
- /// were to have the text "Help I'm a bug!" I should be able to see the steps where it is
- /// normalized to "help i'm a bug" then tokenized into ["help", "i'm", "a", "bug"] then
- /// mapped into term numbers [203, 25, 3, 511] then projected into the sparse
- /// float vector {3:1, 25:1, 203:1, 511:1}, etc. etc.
- ///
- [Fact]
- void Visibility()
- {
- var ml = new MLContext(seed: 1);
- var pipeline = ml.Data.CreateTextLoader(TestDatasets.Sentiment.GetLoaderColumns(), hasHeader: true)
- .Append(ml.Transforms.Text.FeaturizeText(
- "Features", new Transforms.Text.TextFeaturizingEstimator.Options { OutputTokens = true }, "SentimentText"));
-
- var src = new MultiFileSource(GetDataPath(TestDatasets.Sentiment.trainFilename));
- var data = pipeline.Fit(src).Load(src);
-
- var textColumn = data.GetColumn(data.Schema["SentimentText"]).Take(20);
- var transformedTextColumn = data.GetColumn(data.Schema["Features_TransformedText"]).Take(20);
- var features = data.GetColumn(data.Schema["Features"]).Take(20);
- }
- }
-}