Skip to content

Cleaning and Fixing public API for set of learners. #2765

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Mar 2, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,22 @@ public class PriorTrainer
{
public static void Example()
{
// Downloading the dataset from github.com/dotnet/machinelearning.
// This will create a sentiment.tsv file in the filesystem.
// You can open this file, if you want to see the data.
string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset()[0];
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
var mlContext = new MLContext();

// Download and featurize the dataset.
var dataFiles = SamplesUtils.DatasetUtils.DownloadSentimentDataset();
var trainFile = dataFiles[0];
var testFile = dataFiles[1];

// A preview of the data.
// Sentiment SentimentText
// 0 " :Erm, thank you. "
// 1 ==You're cool==

// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
var mlContext = new MLContext();

// Step 1: Load the data as an IDataView.
// First, we define the loader: specify the data columns and where to find them in the text file.
// Step 1: Read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
var loader = mlContext.Data.CreateTextLoader(
columns: new[]
{
Expand All @@ -31,12 +31,9 @@ public static void Example()
},
hasHeader: true
);

// Load the data
var data = loader.Load(dataFile);

// Split it between training and test data
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data);
// Load the data
var trainData = loader.Load(trainFile);

// Step 2: Pipeline
// Featurize the text column through the FeaturizeText API.
Expand All @@ -47,19 +44,27 @@ public static void Example()
.Append(mlContext.BinaryClassification.Trainers.Prior(labelColumnName: "Sentiment"));

// Step 3: Train the pipeline
var trainedPipeline = pipeline.Fit(trainTestData.TrainSet);
var trainedPipeline = pipeline.Fit(trainData);

// Step 4: Evaluate on the test set
var transformedData = trainedPipeline.Transform(trainTestData.TestSet);
var transformedData = trainedPipeline.Transform(loader.Load(testFile));
var evalMetrics = mlContext.BinaryClassification.Evaluate(transformedData, label: "Sentiment");

// Step 5: Inspect the output
Console.WriteLine("Accuracy: " + evalMetrics.Accuracy);
SamplesUtils.ConsoleUtils.PrintMetrics(evalMetrics);

// The Prior trainer outputs the proportion of a label in the dataset as the probability of that label.
// In this case it means that there is a split of around 64%-36% of positive and negative labels in the dataset.
// In this case 'Accuracy: 0.50' means that there is a split of around 50%-50% of positive and negative labels in the test dataset.
// Expected output:
// Accuracy: 0.647058823529412

// Accuracy: 0.50
// AUC: 0.50
// F1 Score: 0.67
// Negative Precision: 0.00
// Negative Recall: 0.00
// Positive Precision: 0.50
// Positive Recall: 1.00
// LogLoss: 1.05
// LogLossReduction: -4.89
// Entropy: 1.00
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,59 +7,64 @@ public static class RandomTrainer
{
public static void Example()
{
// Downloading the dataset from github.com/dotnet/machinelearning.
// This will create a sentiment.tsv file in the filesystem.
// You can open this file, if you want to see the data.
string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset()[0];
// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
var mlContext = new MLContext(seed: 1);

// Download and featurize the dataset.
var dataFiles = SamplesUtils.DatasetUtils.DownloadSentimentDataset();
var trainFile = dataFiles[0];
var testFile = dataFiles[1];

// A preview of the data.
// Sentiment SentimentText
// 0 " :Erm, thank you. "
// 1 ==You're cool==

// Create a new context for ML.NET operations. It can be used for exception tracking and logging,
// as a catalog of available operations and as the source of randomness.
var mlContext = new MLContext(seed: 1);

// Step 1: Load the data as an IDataView.
// First, we define the loader: specify the data columns and where to find them in the text file.
var loader = mlContext.Data.CreateTextLoader(
// Step 1: Read the data as an IDataView.
// First, we define the reader: specify the data columns and where to find them in the text file.
var reader = mlContext.Data.CreateTextLoader(
columns: new[]
{
new TextLoader.Column("Sentiment", DataKind.Single, 0),
new TextLoader.Column("SentimentText", DataKind.String, 1)
},
hasHeader: true
);

// Load the data
var data = loader.Load(dataFile);

// Split it between training and test data
var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data);
// Read the data
var trainData = reader.Load(trainFile);
Copy link
Member

@sfilipi sfilipi Mar 1, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

reader.Load [](start = 28, length = 11)

nit.: revert #Resolved


// Step 2: Pipeline
// Featurize the text column through the FeaturizeText API.
// Then append a binary classifier, setting the "Label" column as the label of the dataset, and
// the "Features" column produced by FeaturizeText as the features column.
var pipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText")
.AppendCacheCheckpoint(mlContext) // Add a data-cache step within a pipeline.
.AppendCacheCheckpoint(mlContext)
.Append(mlContext.BinaryClassification.Trainers.Random());

// Step 3: Train the pipeline
var trainedPipeline = pipeline.Fit(trainTestData.TrainSet);
var trainedPipeline = pipeline.Fit(trainData);

// Step 4: Evaluate on the test set
var transformedData = trainedPipeline.Transform(trainTestData.TestSet);
var transformedData = trainedPipeline.Transform(reader.Load(testFile));
var evalMetrics = mlContext.BinaryClassification.Evaluate(transformedData, label: "Sentiment");

// Step 5: Inspect the output
Console.WriteLine("Accuracy: " + evalMetrics.Accuracy);
SamplesUtils.ConsoleUtils.PrintMetrics(evalMetrics);

// We expect an output probability closet to 0.5 as the Random trainer outputs a random prediction.
// Regardless of the input features, the trainer will predict either positive or negative label with equal probability.
// Expected output (close to 0.5):
// Accuracy: 0.588235294117647
// Expected output: (close to 0.5):

// Accuracy: 0.56
// AUC: 0.57
// F1 Score: 0.60
// Negative Precision: 0.57
// Negative Recall: 0.44
// Positive Precision: 0.55
// Positive Recall: 0.67
// LogLoss: 1.53
// LogLossReduction: -53.37
// Entropy: 1.00
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ public static void Example()
var split = mlContext.BinaryClassification.TrainTestSplit(data, testFraction: 0.1);
// Create data training pipeline
var pipeline = mlContext.BinaryClassification.Trainers.SymbolicStochasticGradientDescent(
new ML.Trainers.HalLearners.SymSgdClassificationTrainer.Options()
new ML.Trainers.HalLearners.SymbolicStochasticGradientDescentClassificationTrainer.Options()
{
LearningRate = 0.2f,
NumberOfIterations = 10,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public static void Example()

// Create the estimator, here we only need OrdinaryLeastSquares trainer
// as data is already processed in a form consumable by the trainer
var pipeline = mlContext.Regression.Trainers.OrdinaryLeastSquares(new OlsLinearRegressionTrainer.Options()
var pipeline = mlContext.Regression.Trainers.OrdinaryLeastSquares(new OrdinaryLeastSquaresRegressionTrainer.Options()
{
L2Weight = 0.1f,
PerParameterSignificance = false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

namespace Microsoft.ML.Trainers
{
using Mkl = OlsLinearRegressionTrainer.Mkl;
using Mkl = OrdinaryLeastSquaresRegressionTrainer.Mkl;

public sealed class ComputeLRTrainingStdThroughHal : ComputeLRTrainingStd
{
Expand Down
40 changes: 20 additions & 20 deletions src/Microsoft.ML.HalLearners/HalLearnersCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@
namespace Microsoft.ML
{
/// <summary>
/// The trainer catalog extensions for the <see cref="OlsLinearRegressionTrainer"/> and <see cref="SymSgdClassificationTrainer"/>.
/// The trainer catalog extensions for the <see cref="OrdinaryLeastSquaresRegressionTrainer"/> and <see cref="SymbolicStochasticGradientDescentClassificationTrainer"/>.
/// </summary>
public static class HalLearnersCatalog
{
/// <summary>
/// Predict a target using a linear regression model trained with the <see cref="OlsLinearRegressionTrainer"/>.
/// Predict a target using a linear regression model trained with the <see cref="OrdinaryLeastSquaresRegressionTrainer"/>.
/// </summary>
/// <param name="catalog">The <see cref="RegressionCatalog"/>.</param>
/// <param name="labelColumnName">The name of the label column.</param>
Expand All @@ -27,48 +27,48 @@ public static class HalLearnersCatalog
/// ]]>
/// </format>
/// </example>
public static OlsLinearRegressionTrainer OrdinaryLeastSquares(this RegressionCatalog.RegressionTrainers catalog,
public static OrdinaryLeastSquaresRegressionTrainer OrdinaryLeastSquares(this RegressionCatalog.RegressionTrainers catalog,
string labelColumnName = DefaultColumnNames.Label,
string featureColumnName = DefaultColumnNames.Features,
string exampleWeightColumnName = null)
{
Contracts.CheckValue(catalog, nameof(catalog));
var env = CatalogUtils.GetEnvironment(catalog);
var options = new OlsLinearRegressionTrainer.Options
var options = new OrdinaryLeastSquaresRegressionTrainer.Options
{
LabelColumnName = labelColumnName,
FeatureColumnName = featureColumnName,
ExampleWeightColumnName = exampleWeightColumnName
};

return new OlsLinearRegressionTrainer(env, options);
return new OrdinaryLeastSquaresRegressionTrainer(env, options);
}

/// <summary>
/// Predict a target using a linear regression model trained with the <see cref="OlsLinearRegressionTrainer"/>.
/// Predict a target using a linear regression model trained with the <see cref="OrdinaryLeastSquaresRegressionTrainer"/>.
/// </summary>
/// <param name="catalog">The <see cref="RegressionCatalog"/>.</param>
/// <param name="options">Algorithm advanced options. See <see cref="OlsLinearRegressionTrainer.Options"/>.</param>
/// <param name="options">Algorithm advanced options. See <see cref="OrdinaryLeastSquaresRegressionTrainer.Options"/>.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[OrdinaryLeastSquares](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/Regression/OrdinaryLeastSquaresWithOptions.cs)]
/// ]]>
/// </format>
/// </example>
public static OlsLinearRegressionTrainer OrdinaryLeastSquares(
public static OrdinaryLeastSquaresRegressionTrainer OrdinaryLeastSquares(
this RegressionCatalog.RegressionTrainers catalog,
OlsLinearRegressionTrainer.Options options)
OrdinaryLeastSquaresRegressionTrainer.Options options)
{
Contracts.CheckValue(catalog, nameof(catalog));
Contracts.CheckValue(options, nameof(options));

var env = CatalogUtils.GetEnvironment(catalog);
return new OlsLinearRegressionTrainer(env, options);
return new OrdinaryLeastSquaresRegressionTrainer(env, options);
}

/// <summary>
/// Predict a target using a linear binary classification model trained with the <see cref="SymSgdClassificationTrainer"/>.
/// Predict a target using a linear binary classification model trained with the <see cref="SymbolicStochasticGradientDescentClassificationTrainer"/>.
/// </summary>
/// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param>
/// <param name="labelColumnName">The name of the label column.</param>
Expand All @@ -81,43 +81,43 @@ public static OlsLinearRegressionTrainer OrdinaryLeastSquares(
/// ]]>
/// </format>
/// </example>
public static SymSgdClassificationTrainer SymbolicStochasticGradientDescent(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
public static SymbolicStochasticGradientDescentClassificationTrainer SymbolicStochasticGradientDescent(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
string labelColumnName = DefaultColumnNames.Label,
string featureColumnName = DefaultColumnNames.Features,
int numberOfIterations = SymSgdClassificationTrainer.Defaults.NumberOfIterations)
int numberOfIterations = SymbolicStochasticGradientDescentClassificationTrainer.Defaults.NumberOfIterations)
{
Contracts.CheckValue(catalog, nameof(catalog));
var env = CatalogUtils.GetEnvironment(catalog);

var options = new SymSgdClassificationTrainer.Options
var options = new SymbolicStochasticGradientDescentClassificationTrainer.Options
{
LabelColumnName = labelColumnName,
FeatureColumnName = featureColumnName,
};

return new SymSgdClassificationTrainer(env, options);
return new SymbolicStochasticGradientDescentClassificationTrainer(env, options);
}

/// <summary>
/// Predict a target using a linear binary classification model trained with the <see cref="SymSgdClassificationTrainer"/>.
/// Predict a target using a linear binary classification model trained with the <see cref="SymbolicStochasticGradientDescentClassificationTrainer"/>.
/// </summary>
/// <param name="catalog">The <see cref="BinaryClassificationCatalog"/>.</param>
/// <param name="options">Algorithm advanced options. See <see cref="SymSgdClassificationTrainer.Options"/>.</param>
/// <param name="options">Algorithm advanced options. See <see cref="SymbolicStochasticGradientDescentClassificationTrainer.Options"/>.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[SymbolicStochasticGradientDescent](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescentWithOptions.cs)]
/// ]]>
/// </format>
/// </example>
public static SymSgdClassificationTrainer SymbolicStochasticGradientDescent(
public static SymbolicStochasticGradientDescentClassificationTrainer SymbolicStochasticGradientDescent(
this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
SymSgdClassificationTrainer.Options options)
SymbolicStochasticGradientDescentClassificationTrainer.Options options)
{
Contracts.CheckValue(catalog, nameof(catalog));
Contracts.CheckValue(options, nameof(options));
var env = CatalogUtils.GetEnvironment(catalog);
return new SymSgdClassificationTrainer(env, options);
return new SymbolicStochasticGradientDescentClassificationTrainer(env, options);
}

/// <summary>
Expand Down
Loading