diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/PriorTrainerSample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/PriorTrainerSample.cs new file mode 100644 index 0000000000..4d29f2c9ae --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/PriorTrainerSample.cs @@ -0,0 +1,67 @@ +using System; +using System.Linq; +using Microsoft.ML.Data; +using Microsoft.ML.Trainers; + +namespace Microsoft.ML.Samples.Dynamic +{ + public class PriorTrainerSample + { + public static void Example() + { + // Downloading the dataset from github.com/dotnet/machinelearning. + // This will create a sentiment.tsv file in the filesystem. + // You can open this file, if you want to see the data. + string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset(); + + // A preview of the data. + // Sentiment SentimentText + // 0 " :Erm, thank you. " + // 1 ==You're cool== + + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + var mlContext = new MLContext(); + + // Step 1: Read the data as an IDataView. + // First, we define the reader: specify the data columns and where to find them in the text file. + var reader = mlContext.Data.CreateTextLoader( + columns: new[] + { + new TextLoader.Column("Sentiment", DataKind.R4, 0), + new TextLoader.Column("SentimentText", DataKind.Text, 1) + }, + hasHeader: true + ); + + // Read the data + var data = reader.Read(dataFile); + + // Split it between training and test data + var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data); + + // Step 2: Pipeline + // Featurize the text column through the FeaturizeText API. + // Then append a binary classifier, setting the "Label" column as the label of the dataset, and + // the "Features" column produced by FeaturizeText as the features column. + var pipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText") + .AppendCacheCheckpoint(mlContext) // Add a data-cache step within a pipeline. + .Append(mlContext.BinaryClassification.Trainers.Prior(labelColumn: "Sentiment")); + + // Step 3: Train the pipeline + var trainedPipeline = pipeline.Fit(trainTestData.TrainSet); + + // Step 4: Evaluate on the test set + var transformedData = trainedPipeline.Transform(trainTestData.TestSet); + var evalMetrics = mlContext.BinaryClassification.Evaluate(transformedData, label: "Sentiment"); + + // Step 5: Inspect the output + Console.WriteLine("Accuracy: " + evalMetrics.Accuracy); + + // The Prior trainer outputs the proportion of a label in the dataset as the probability of that label. + // In this case it means that there is a split of around 64%-36% of positive and negative labels in the dataset. + // Expected output: + // Accuracy: 0.647058823529412 + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/RandomTrainerSample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/RandomTrainerSample.cs new file mode 100644 index 0000000000..70cd3f539a --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/RandomTrainerSample.cs @@ -0,0 +1,67 @@ +using System; +using System.Linq; +using Microsoft.ML.Data; +using Microsoft.ML.Trainers; + +namespace Microsoft.ML.Samples.Dynamic +{ + public class RandomTrainerSample + { + public static void Example() + { + // Downloading the dataset from github.com/dotnet/machinelearning. + // This will create a sentiment.tsv file in the filesystem. + // You can open this file, if you want to see the data. + string dataFile = SamplesUtils.DatasetUtils.DownloadSentimentDataset(); + + // A preview of the data. + // Sentiment SentimentText + // 0 " :Erm, thank you. " + // 1 ==You're cool== + + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + var mlContext = new MLContext(seed: 1); + + // Step 1: Read the data as an IDataView. + // First, we define the reader: specify the data columns and where to find them in the text file. + var reader = mlContext.Data.CreateTextLoader( + columns: new[] + { + new TextLoader.Column("Sentiment", DataKind.R4, 0), + new TextLoader.Column("SentimentText", DataKind.Text, 1) + }, + hasHeader: true + ); + + // Read the data + var data = reader.Read(dataFile); + + // Split it between training and test data + var trainTestData = mlContext.BinaryClassification.TrainTestSplit(data); + + // Step 2: Pipeline + // Featurize the text column through the FeaturizeText API. + // Then append a binary classifier, setting the "Label" column as the label of the dataset, and + // the "Features" column produced by FeaturizeText as the features column. + var pipeline = mlContext.Transforms.Text.FeaturizeText("Features", "SentimentText") + .AppendCacheCheckpoint(mlContext) // Add a data-cache step within a pipeline. + .Append(mlContext.BinaryClassification.Trainers.Random()); + + // Step 3: Train the pipeline + var trainedPipeline = pipeline.Fit(trainTestData.TrainSet); + + // Step 4: Evaluate on the test set + var transformedData = trainedPipeline.Transform(trainTestData.TestSet); + var evalMetrics = mlContext.BinaryClassification.Evaluate(transformedData, label: "Sentiment"); + + // Step 5: Inspect the output + Console.WriteLine("Accuracy: " + evalMetrics.Accuracy); + + // We expect an output probability closet to 0.5 as the Random trainer outputs a random prediction. + // Regardless of the input features, the trainer will predict either positive or negative label with equal probability. + // Expected output (close to 0.5): + // Accuracy: 0.588235294117647 + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMappingSample.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMappingSample.cs new file mode 100644 index 0000000000..17cd9a7b49 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMappingSample.cs @@ -0,0 +1,70 @@ +using System; +using Microsoft.ML.Data; + +namespace Microsoft.ML.Samples.Dynamic +{ + public class CustomMappingSample + { + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var mlContext = new MLContext(); + + // Get a small dataset as an IEnumerable and convert it to an IDataView. + var data = SamplesUtils.DatasetUtils.GetInfertData(); + var trainData = mlContext.Data.ReadFromEnumerable(data); + + // Preview of the data. + // + // Age Case Education Induced Parity PooledStratum RowNum ... + // 26 1 0-5yrs 1 6 3 1 ... + // 42 1 0-5yrs 1 1 1 2 ... + // 39 1 0-5yrs 2 6 4 3 ... + // 34 1 0-5yrs 2 4 2 4 ... + // 35 1 6-11yrs 1 3 32 5 ... + + // We define the custom mapping between input and output rows that will be applied by the transformation. + Action mapping = + (input, output) => output.IsUnderThirty = input.Age < 30; + + // Custom transformations can be used to transform data directly, or as part of a pipeline. Below we transform data directly. + var transformer = mlContext.Transforms.CustomMappingTransformer(mapping, null); + var transformedData = transformer.Transform(trainData); + + // Preview of the data. + // + // IsUnderThirty Age Case Education Induced Parity PooledStratum RowNum ... + // true 26 1 0-5yrs 1 6 3 1 ... + // false 42 1 0-5yrs 1 1 1 2 ... + // false 39 1 0-5yrs 2 6 4 3 ... + // false 34 1 0-5yrs 2 4 2 4 ... + // false 35 1 6-11yrs 1 3 32 5 ... + + // Here instead we use it as part of a pipeline of estimators. + var pipeline = mlContext.Transforms.CustomMapping(mapping, null) + .Append(mlContext.Transforms.Concatenate(outputColumnName: "Features", inputColumnNames: new[] { "Parity", "Induced" })) + // It is useful to add a caching checkpoint before a trainer that does several passes over the data. + .AppendCacheCheckpoint(mlContext) + // We use binary FastTree to predict the label column that was generated by the custom mapping at the first step of the pipeline. + .Append(mlContext.BinaryClassification.Trainers.FastTree(labelColumn: "IsUnderThirty")); + + // We can train the pipeline and use it to transform data. + transformedData = pipeline.Fit(trainData).Transform(trainData); + } + + // Represents the transformed infertility dataset. + public class SampleInfertDataTransformed + { + public int RowNum { get; set; } + public string Education { get; set; } + public bool IsUnderThirty { get; set; } + public float Parity { get; set; } + public float Induced { get; set; } + public float Case { get; set; } + public float Spontaneous { get; set; } + public float Stratum { get; set; } + public float PooledStratum { get; set; } + } + } +} diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs index 2a8a25df2b..282e060259 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs @@ -905,7 +905,7 @@ private Schema ComputeOutputSchema() internal const string Summary = "Loads text data file."; - public const string LoaderSignature = "TextLoader"; + internal const string LoaderSignature = "TextLoader"; private const uint VerForceVectorSupported = 0x0001000A; private const uint VersionNoMinCount = 0x0001000C; diff --git a/src/Microsoft.ML.Data/DataLoadSave/TransformWrapper.cs b/src/Microsoft.ML.Data/DataLoadSave/TransformWrapper.cs index 24a29b1cc9..8ac1b93c36 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/TransformWrapper.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/TransformWrapper.cs @@ -20,7 +20,7 @@ namespace Microsoft.ML.Data // It needs to become internal. public sealed class TransformWrapper : ITransformer { - public const string LoaderSignature = "TransformWrapper"; + internal const string LoaderSignature = "TransformWrapper"; private const string TransformDirTemplate = "Step_{0:000}"; private readonly IHost _host; diff --git a/src/Microsoft.ML.Data/Scorers/PredictionTransformer.cs b/src/Microsoft.ML.Data/Scorers/PredictionTransformer.cs index 64f71aba83..5eceebf857 100644 --- a/src/Microsoft.ML.Data/Scorers/PredictionTransformer.cs +++ b/src/Microsoft.ML.Data/Scorers/PredictionTransformer.cs @@ -54,6 +54,10 @@ public abstract class PredictionTransformerBase : IPredictionTransformer private protected ISchemaBindableMapper BindableMapper; protected Schema TrainSchema; + /// + /// Whether a call to should succeed, on an + /// appropriate schema. + /// public bool IsRowToRowMapper => true; /// @@ -257,8 +261,8 @@ private protected GenericScorer GetGenericScorer() public sealed class AnomalyPredictionTransformer : SingleFeaturePredictionTransformerBase where TModel : class { - public readonly string ThresholdColumn; - public readonly float Threshold; + internal readonly string ThresholdColumn; + internal readonly float Threshold; [BestFriend] internal AnomalyPredictionTransformer(IHostEnvironment env, TModel model, Schema inputSchema, string featureColumn, @@ -326,8 +330,8 @@ private static VersionInfo GetVersionInfo() public sealed class BinaryPredictionTransformer : SingleFeaturePredictionTransformerBase where TModel : class { - public readonly string ThresholdColumn; - public readonly float Threshold; + internal readonly string ThresholdColumn; + internal readonly float Threshold; [BestFriend] internal BinaryPredictionTransformer(IHostEnvironment env, TModel model, Schema inputSchema, string featureColumn, diff --git a/src/Microsoft.ML.FastTree/FastTreeRegression.cs b/src/Microsoft.ML.FastTree/FastTreeRegression.cs index 1399b8bebe..31abba62a8 100644 --- a/src/Microsoft.ML.FastTree/FastTreeRegression.cs +++ b/src/Microsoft.ML.FastTree/FastTreeRegression.cs @@ -36,7 +36,7 @@ namespace Microsoft.ML.Trainers.FastTree public sealed partial class FastTreeRegressionTrainer : BoostingFastTreeTrainerBase, FastTreeRegressionModelParameters> { - public const string LoadNameValue = "FastTreeRegression"; + internal const string LoadNameValue = "FastTreeRegression"; internal const string UserNameValue = "FastTree (Boosted Trees) Regression"; internal const string Summary = "Trains gradient boosted decision trees to fit target values using least-squares."; internal const string ShortName = "ftr"; @@ -142,7 +142,7 @@ private protected override OptimizationAlgorithm ConstructOptimizationAlgorithm( /// /// The dataset /// The list of regression targets, or null if was null - public static float[] GetDatasetRegressionLabels(Dataset set) + internal static float[] GetDatasetRegressionLabels(Dataset set) { if (set == null) return null; diff --git a/src/Microsoft.ML.FastTree/FastTreeTweedie.cs b/src/Microsoft.ML.FastTree/FastTreeTweedie.cs index 547d48f7ca..f31c82afd1 100644 --- a/src/Microsoft.ML.FastTree/FastTreeTweedie.cs +++ b/src/Microsoft.ML.FastTree/FastTreeTweedie.cs @@ -149,7 +149,7 @@ private protected override OptimizationAlgorithm ConstructOptimizationAlgorithm( /// /// The dataset /// The list of regression targets, or null if was null - public static float[] GetDatasetRegressionLabels(Dataset set) + internal static float[] GetDatasetRegressionLabels(Dataset set) { if (set == null) return null; diff --git a/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs index 559ed6b5e1..baa871f2fa 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmRankingTrainer.cs @@ -75,9 +75,9 @@ private static LightGbmRankingModelParameters Create(IHostEnvironment env, Model /// public sealed class LightGbmRankingTrainer : LightGbmTrainerBase, LightGbmRankingModelParameters> { - public const string UserName = "LightGBM Ranking"; - public const string LoadNameValue = "LightGBMRanking"; - public const string ShortName = "LightGBMRank"; + internal const string UserName = "LightGBM Ranking"; + internal const string LoadNameValue = "LightGBMRanking"; + internal const string ShortName = "LightGBMRank"; public override PredictionKind PredictionKind => PredictionKind.Ranking; diff --git a/src/Microsoft.ML.OnnxTransform/DnnImageFeaturizerTransform.cs b/src/Microsoft.ML.OnnxTransform/DnnImageFeaturizerTransform.cs index dbac203908..87cdb09dc7 100644 --- a/src/Microsoft.ML.OnnxTransform/DnnImageFeaturizerTransform.cs +++ b/src/Microsoft.ML.OnnxTransform/DnnImageFeaturizerTransform.cs @@ -61,7 +61,7 @@ public sealed class DnnImageFeaturizerEstimator : IEstimator /// Name of the column resulting from the transformation of . /// Name of column to transform. If set to , the value of the will be used as source. - public DnnImageFeaturizerEstimator(IHostEnvironment env, string outputColumnName, Func> modelFactory, string inputColumnName = null) + internal DnnImageFeaturizerEstimator(IHostEnvironment env, string outputColumnName, Func> modelFactory, string inputColumnName = null) { _modelChain = modelFactory(new DnnImageFeaturizerInput(outputColumnName, inputColumnName ?? outputColumnName, env, new DnnImageModelSelector())); } diff --git a/src/Microsoft.ML.OnnxTransform/OnnxCatalog.cs b/src/Microsoft.ML.OnnxTransform/OnnxCatalog.cs index f045ab2fa1..a960891d1a 100644 --- a/src/Microsoft.ML.OnnxTransform/OnnxCatalog.cs +++ b/src/Microsoft.ML.OnnxTransform/OnnxCatalog.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; using Microsoft.ML.Data; using Microsoft.ML.Transforms; @@ -61,5 +62,19 @@ public static OnnxScoringEstimator ApplyOnnxModel(this TransformsCatalog catalog bool fallbackToCpu = false) => new OnnxScoringEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnNames, inputColumnNames, modelFile, gpuDeviceId, fallbackToCpu); + /// + /// Creates a new instance of which applies a pre-trained DNN model to featurize an image. + /// + /// The transform's catalog. + /// The name of the column resulting from the transformation of . + /// An extension method on the that creates a chain of two + /// (one for preprocessing and one with a pretrained image DNN) with specific models + /// included in a package together with that extension method. + /// Name of column to transform. If set to , the value of the will be used as source. + public static DnnImageFeaturizerEstimator DnnFeaturizeImage(this TransformsCatalog catalog, + string outputColumnName, + Func> modelFactory, + string inputColumnName = null) + => new DnnImageFeaturizerEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, modelFactory, inputColumnName); } } diff --git a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FieldAwareFactorizationMachineModelParameters.cs b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FieldAwareFactorizationMachineModelParameters.cs index 9aac84dae1..0871709db1 100644 --- a/src/Microsoft.ML.StandardLearners/FactorizationMachine/FieldAwareFactorizationMachineModelParameters.cs +++ b/src/Microsoft.ML.StandardLearners/FactorizationMachine/FieldAwareFactorizationMachineModelParameters.cs @@ -285,17 +285,17 @@ public float[] GetLatentWeights() public sealed class FieldAwareFactorizationMachinePredictionTransformer : PredictionTransformerBase { - public const string LoaderSignature = "FAFMPredXfer"; + internal const string LoaderSignature = "FAFMPredXfer"; /// /// The name of the feature column used by the prediction transformer. /// - public IReadOnlyList FeatureColumns { get; } + internal IReadOnlyList FeatureColumns { get; } /// /// The type of the feature columns. /// - public IReadOnlyList FeatureColumnTypes { get; } + internal IReadOnlyList FeatureColumnTypes { get; } private readonly string _thresholdColumn; private readonly float _threshold; diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs index d5fe66bdd7..b7cbed8841 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/LogisticRegression.cs @@ -35,7 +35,7 @@ public sealed partial class LogisticRegression : LbfgsTrainerBase>, CalibratedModelParametersBase> { - public const string LoadNameValue = "LogisticRegression"; + internal const string LoadNameValue = "LogisticRegression"; internal const string UserNameValue = "Logistic Regression"; internal const string ShortName = "lr"; internal const string Summary = "Logistic Regression is a method in statistics used to predict the probability of occurrence of an event and can " diff --git a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs index 9d809dd3fa..38a81a125d 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/LogisticRegression/MulticlassLogisticRegression.cs @@ -41,7 +41,7 @@ namespace Microsoft.ML.Trainers public sealed class MulticlassLogisticRegression : LbfgsTrainerBase, MulticlassLogisticRegressionModelParameters> { - public const string LoadNameValue = "MultiClassLogisticRegression"; + internal const string LoadNameValue = "MultiClassLogisticRegression"; internal const string UserNameValue = "Multi-class Logistic Regression"; internal const string ShortName = "mlr"; diff --git a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs index eae9af3062..75fda7a77f 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/MultiClassNaiveBayesTrainer.cs @@ -15,7 +15,7 @@ using Microsoft.ML.Trainers; using Microsoft.ML.Training; -[assembly: LoadableClass(MultiClassNaiveBayesTrainer.Summary, typeof(MultiClassNaiveBayesTrainer), typeof(MultiClassNaiveBayesTrainer.Arguments), +[assembly: LoadableClass(MultiClassNaiveBayesTrainer.Summary, typeof(MultiClassNaiveBayesTrainer), typeof(MultiClassNaiveBayesTrainer.Options), new[] { typeof(SignatureMultiClassClassifierTrainer), typeof(SignatureTrainer) }, MultiClassNaiveBayesTrainer.UserName, MultiClassNaiveBayesTrainer.LoadName, @@ -30,18 +30,24 @@ namespace Microsoft.ML.Trainers { public sealed class MultiClassNaiveBayesTrainer : TrainerEstimatorBase, MultiClassNaiveBayesModelParameters> { - public const string LoadName = "MultiClassNaiveBayes"; + internal const string LoadName = "MultiClassNaiveBayes"; internal const string UserName = "Multiclass Naive Bayes"; internal const string ShortName = "MNB"; internal const string Summary = "Trains a multiclass Naive Bayes predictor that supports binary feature values."; - public sealed class Arguments : LearnerInputBaseWithLabel + internal sealed class Options : LearnerInputBaseWithLabel { } + /// Return the type of prediction task. public override PredictionKind PredictionKind => PredictionKind.MultiClassClassification; private static readonly TrainerInfo _info = new TrainerInfo(normalization: false, caching: false); + + /// + /// Auxiliary information about the trainer in terms of its capabilities + /// and requirements. + /// public override TrainerInfo Info => _info; /// @@ -50,7 +56,7 @@ public sealed class Arguments : LearnerInputBaseWithLabel /// The environment to use. /// The name of the label column. /// The name of the feature column. - public MultiClassNaiveBayesTrainer(IHostEnvironment env, + internal MultiClassNaiveBayesTrainer(IHostEnvironment env, string labelColumn = DefaultColumnNames.Label, string featureColumn = DefaultColumnNames.Features) : base(Contracts.CheckRef(env, nameof(env)).Register(LoadName), TrainerUtils.MakeR4VecFeature(featureColumn), @@ -63,11 +69,11 @@ public MultiClassNaiveBayesTrainer(IHostEnvironment env, /// /// Initializes a new instance of /// - internal MultiClassNaiveBayesTrainer(IHostEnvironment env, Arguments args) - : base(Contracts.CheckRef(env, nameof(env)).Register(LoadName), TrainerUtils.MakeR4VecFeature(args.FeatureColumn), - TrainerUtils.MakeU4ScalarColumn(args.LabelColumn)) + internal MultiClassNaiveBayesTrainer(IHostEnvironment env, Options options) + : base(Contracts.CheckRef(env, nameof(env)).Register(LoadName), TrainerUtils.MakeR4VecFeature(options.FeatureColumn), + TrainerUtils.MakeU4ScalarColumn(options.LabelColumn)) { - Host.CheckValue(args, nameof(args)); + Host.CheckValue(options, nameof(options)); } protected override SchemaShape.Column[] GetOutputColumnsCore(SchemaShape inputSchema) @@ -163,14 +169,14 @@ private protected override MultiClassNaiveBayesModelParameters TrainModelCore(Tr Desc = "Train a MultiClassNaiveBayesTrainer.", UserName = UserName, ShortName = ShortName)] - internal static CommonOutputs.MulticlassClassificationOutput TrainMultiClassNaiveBayesTrainer(IHostEnvironment env, Arguments input) + internal static CommonOutputs.MulticlassClassificationOutput TrainMultiClassNaiveBayesTrainer(IHostEnvironment env, Options input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register("TrainMultiClassNaiveBayes"); host.CheckValue(input, nameof(input)); EntryPointUtils.CheckInputArgs(host, input); - return LearnerEntryPointsUtils.Train(host, input, + return LearnerEntryPointsUtils.Train(host, input, () => new MultiClassNaiveBayesTrainer(host, input), () => LearnerEntryPointsUtils.FindColumn(host, input.TrainingData.Schema, input.LabelColumn)); } @@ -201,6 +207,7 @@ private static VersionInfo GetVersionInfo() private readonly VectorType _inputType; private readonly VectorType _outputType; + /// Return the type of prediction task. public override PredictionKind PredictionKind => PredictionKind.MultiClassClassification; ColumnType IValueMapper.InputType => _inputType; diff --git a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Ova.cs b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Ova.cs index 53f0f35fca..09b7e219ea 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Ova.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Ova.cs @@ -11,6 +11,7 @@ using Microsoft.Data.DataView; using Microsoft.ML; using Microsoft.ML.CommandLine; +using Microsoft.ML.Core.Data; using Microsoft.ML.Data; using Microsoft.ML.EntryPoints; using Microsoft.ML.Internal.Calibration; @@ -22,7 +23,7 @@ using Microsoft.ML.Training; using Newtonsoft.Json.Linq; -[assembly: LoadableClass(Ova.Summary, typeof(Ova), typeof(Ova.Arguments), +[assembly: LoadableClass(Ova.Summary, typeof(Ova), typeof(Ova.Options), new[] { typeof(SignatureMultiClassClassifierTrainer), typeof(SignatureTrainer) }, Ova.UserNameValue, Ova.LoadNameValue, DocName = "trainer/OvaPkpd.md")] @@ -47,13 +48,16 @@ public sealed class Ova : MetaMulticlassTrainer - /// Arguments passed to OVA. + /// Options passed to OVA. /// - public sealed class Arguments : ArgumentsBase + internal sealed class Options : ArgumentsBase { + /// + /// Whether to use probabilities (vs. raw outputs) to identify top-score category. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Use probability or margins to determine max", ShortName = "useprob")] [TGUI(Label = "Use Probability", Description = "Use probabilities (vs. raw outputs) to identify top-score category")] public bool UseProbabilities = true; @@ -61,16 +65,16 @@ public sealed class Arguments : ArgumentsBase /// /// Legacy constructor that builds the trainer supplying the base trainer to use, for the classification task - /// through the arguments. + /// through the . /// Developers should instantiate OVA by supplying the trainer argument directly to the OVA constructor /// using the other public constructor. /// /// The private for this estimator. - /// The legacy - internal Ova(IHostEnvironment env, Arguments args) - : base(env, args, LoadNameValue) + /// The legacy + internal Ova(IHostEnvironment env, Options options) + : base(env, options, LoadNameValue) { - _args = args; + _options = options; } /// @@ -91,7 +95,7 @@ internal Ova(IHostEnvironment env, int maxCalibrationExamples = 1000000000, bool useProbabilities = true) : base(env, - new Arguments + new Options { ImputeMissingLabelsAsNegative = imputeMissingLabelsAsNegative, MaxCalibrationExamples = maxCalibrationExamples, @@ -99,8 +103,8 @@ internal Ova(IHostEnvironment env, LoadNameValue, labelColumn, binaryEstimator, calibrator) { Host.CheckValue(labelColumn, nameof(labelColumn), "Label column should not be null."); - _args = (Arguments)Args; - _args.UseProbabilities = useProbabilities; + _options = (Options)Args; + _options.UseProbabilities = useProbabilities; } private protected override OvaModelParameters TrainCore(IChannel ch, RoleMappedData data, int count) @@ -112,7 +116,7 @@ private protected override OvaModelParameters TrainCore(IChannel ch, RoleMappedD ch.Info($"Training learner {i}"); predictors[i] = TrainOne(ch, Trainer, data, i).Model; } - return OvaModelParameters.Create(Host, _args.UseProbabilities, predictors); + return OvaModelParameters.Create(Host, _options.UseProbabilities, predictors); } private ISingleFeaturePredictionTransformer TrainOne(IChannel ch, TScalarTrainer trainer, RoleMappedData data, int cls) @@ -125,7 +129,7 @@ private ISingleFeaturePredictionTransformer TrainOne(IChannel // this is currently unsupported. var transformer = trainer.Fit(view); - if (_args.UseProbabilities) + if (_options.UseProbabilities) { var calibratedModel = transformer.Model as TDistPredictor; @@ -169,6 +173,7 @@ private IDataView MapLabels(RoleMappedData data, int cls) throw Host.ExceptNotSupp($"Label column type is not supported by OVA: {lab.Type}"); } + /// Trains and returns a . public override MulticlassPredictionTransformer Fit(IDataView input) { var roles = new KeyValuePair[1]; @@ -196,7 +201,7 @@ public override MulticlassPredictionTransformer Fit(IDataVie } } - return new MulticlassPredictionTransformer(Host, OvaModelParameters.Create(Host, _args.UseProbabilities, predictors), input.Schema, featureColumn, LabelColumn.Name); + return new MulticlassPredictionTransformer(Host, OvaModelParameters.Create(Host, _options.UseProbabilities, predictors), input.Schema, featureColumn, LabelColumn.Name); } } @@ -227,6 +232,7 @@ private static VersionInfo GetVersionInfo() public ImmutableArray SubModelParameters => _impl.Predictors.Cast().ToImmutableArray(); + /// Return the type of prediction task. public override PredictionKind PredictionKind => PredictionKind.MultiClassClassification; /// @@ -265,7 +271,7 @@ internal static OvaModelParameters Create(IHost host, OutputFormula outputFormu ivmd.OutputType != NumberType.Float || ivmd.DistType != NumberType.Float)) { - ch.Warning($"{nameof(Ova.Arguments.UseProbabilities)} specified with {nameof(Ova.Arguments.PredictorType)} that can't produce probabilities."); + ch.Warning($"{nameof(Ova.Options.UseProbabilities)} specified with {nameof(Ova.Options.PredictorType)} that can't produce probabilities."); ivmd = null; } diff --git a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Pkpd.cs b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Pkpd.cs index 344f9e5f6a..991e51a2e9 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Pkpd.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/MultiClass/Pkpd.cs @@ -15,7 +15,7 @@ using Microsoft.ML.Trainers; using Microsoft.ML.Training; -[assembly: LoadableClass(Pkpd.Summary, typeof(Pkpd), typeof(Pkpd.Arguments), +[assembly: LoadableClass(Pkpd.Summary, typeof(Pkpd), typeof(Pkpd.Options), new[] { typeof(SignatureMultiClassClassifierTrainer), typeof(SignatureTrainer) }, Pkpd.UserNameValue, Pkpd.LoadNameValue, DocName = "trainer/OvaPkpd.md")] @@ -62,19 +62,19 @@ public sealed class Pkpd : MetaMulticlassTrainer - /// Arguments passed to PKPD. + /// Options passed to PKPD. /// - public sealed class Arguments : ArgumentsBase + internal sealed class Options : ArgumentsBase { } /// /// Legacy constructor that builds the trainer supplying the base trainer to use, for the classification task - /// through the arguments. + /// through the Options. /// Developers should instantiate by supplying the trainer argument directly to the constructor /// using the other public constructor. /// - internal Pkpd(IHostEnvironment env, Arguments args) - : base(env, args, LoadNameValue) + internal Pkpd(IHostEnvironment env, Options options) + : base(env, options, LoadNameValue) { } @@ -94,7 +94,7 @@ internal Pkpd(IHostEnvironment env, ICalibratorTrainer calibrator = null, int maxCalibrationExamples = 1000000000) : base(env, - new Arguments + new Options { ImputeMissingLabelsAsNegative = imputeMissingLabelsAsNegative, MaxCalibrationExamples = maxCalibrationExamples, @@ -242,6 +242,7 @@ private static VersionInfo GetVersionInfo() private readonly TDistPredictor[] _predictors; private readonly IValueMapperDist[] _mappers; + /// Return the type of prediction task. public override PredictionKind PredictionKind => PredictionKind.MultiClassClassification; private readonly VectorType _inputType; private readonly ColumnType _outputType; diff --git a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs index 6f7bb7909d..35432221bb 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Online/AveragedPerceptron.cs @@ -32,7 +32,7 @@ namespace Microsoft.ML.Trainers.Online /// public sealed class AveragedPerceptronTrainer : AveragedLinearTrainer, LinearBinaryModelParameters> { - public const string LoadNameValue = "AveragedPerceptron"; + internal const string LoadNameValue = "AveragedPerceptron"; internal const string UserNameValue = "Averaged Perceptron"; internal const string ShortName = "ap"; internal const string Summary = "Averaged Perceptron Binary Classifier."; diff --git a/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs b/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs index ff49788028..6a0bde0213 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/SdcaMultiClass.cs @@ -31,9 +31,9 @@ namespace Microsoft.ML.Trainers /// public class SdcaMultiClassTrainer : SdcaTrainerBase, MulticlassLogisticRegressionModelParameters> { - public const string LoadNameValue = "SDCAMC"; - public const string UserNameValue = "Fast Linear Multi-class Classification (SA-SDCA)"; - public const string ShortName = "sasdcamc"; + internal const string LoadNameValue = "SDCAMC"; + internal const string UserNameValue = "Fast Linear Multi-class Classification (SA-SDCA)"; + internal const string ShortName = "sasdcamc"; internal const string Summary = "The SDCA linear multi-class classification trainer."; public sealed class Options : ArgumentsBase diff --git a/src/Microsoft.ML.StandardLearners/Standard/Simple/SimpleTrainers.cs b/src/Microsoft.ML.StandardLearners/Standard/Simple/SimpleTrainers.cs index a1f461c6a4..431a1aa147 100644 --- a/src/Microsoft.ML.StandardLearners/Standard/Simple/SimpleTrainers.cs +++ b/src/Microsoft.ML.StandardLearners/Standard/Simple/SimpleTrainers.cs @@ -14,13 +14,13 @@ using Microsoft.ML.Trainers; using Microsoft.ML.Training; -[assembly: LoadableClass(RandomTrainer.Summary, typeof(RandomTrainer), typeof(RandomTrainer.Arguments), +[assembly: LoadableClass(RandomTrainer.Summary, typeof(RandomTrainer), typeof(RandomTrainer.Options), new[] { typeof(SignatureBinaryClassifierTrainer), typeof(SignatureTrainer) }, RandomTrainer.UserNameValue, RandomTrainer.LoadNameValue, "random")] -[assembly: LoadableClass(RandomTrainer.Summary, typeof(PriorTrainer), typeof(PriorTrainer.Arguments), +[assembly: LoadableClass(RandomTrainer.Summary, typeof(PriorTrainer), typeof(PriorTrainer.Options), new[] { typeof(SignatureBinaryClassifierTrainer), typeof(SignatureTrainer) }, PriorTrainer.UserNameValue, PriorTrainer.LoadNameValue, @@ -38,7 +38,6 @@ namespace Microsoft.ML.Trainers /// /// A trainer that trains a predictor that returns random values /// - public sealed class RandomTrainer : TrainerBase, ITrainerEstimator, RandomModelParameters> { @@ -46,29 +45,38 @@ public sealed class RandomTrainer : TrainerBase, internal const string UserNameValue = "Random Predictor"; internal const string Summary = "A toy predictor that returns a random value."; - public sealed class Arguments + internal sealed class Options { } + /// Return the type of prediction task. public override PredictionKind PredictionKind => PredictionKind.BinaryClassification; private static readonly TrainerInfo _info = new TrainerInfo(normalization: false, caching: false); + + /// + /// Auxiliary information about the trainer in terms of its capabilities + /// and requirements. + /// public override TrainerInfo Info => _info; /// /// Initializes RandomTrainer object. /// - public RandomTrainer(IHostEnvironment env) + internal RandomTrainer(IHostEnvironment env) : base(env, LoadNameValue) { } - public RandomTrainer(IHostEnvironment env, Arguments args) + internal RandomTrainer(IHostEnvironment env, Options options) : base(env, LoadNameValue) { - Host.CheckValue(args, nameof(args)); + Host.CheckValue(options, nameof(options)); } + /// + /// Trains and returns a . + /// public BinaryPredictionTransformer Fit(IDataView input) { RoleMappedData trainRoles = new RoleMappedData(input); @@ -82,6 +90,10 @@ private protected override RandomModelParameters Train(TrainContext context) return new RandomModelParameters(Host, Host.Rand.Next()); } + /// + /// Returns the of the schema which will be produced by the transformer. + /// Used for schema propagation and verification in a pipeline. + /// public SchemaShape GetOutputSchema(SchemaShape inputSchema) { Host.CheckValue(inputSchema, nameof(inputSchema)); @@ -127,6 +139,7 @@ private static VersionInfo GetVersionInfo() private readonly object _instanceLock; private readonly Random _random; + /// Return the type of prediction task. public override PredictionKind PredictionKind => PredictionKind.BinaryClassification; private readonly ColumnType _inputType; @@ -139,7 +152,7 @@ private static VersionInfo GetVersionInfo() /// /// The host environment. /// The random seed. - public RandomModelParameters(IHostEnvironment env, int seed) + internal RandomModelParameters(IHostEnvironment env, int seed) : base(env, LoaderSignature) { _seed = seed; @@ -234,7 +247,7 @@ private void MapDist(in VBuffer src, ref float score, ref float prob) } /// - /// Learns the prior distribution for 0/1 class labels and just outputs that. + /// Learns the prior distribution for 0/1 class labels and outputs that. /// public sealed class PriorTrainer : TrainerBase, ITrainerEstimator, PriorModelParameters> @@ -242,29 +255,35 @@ public sealed class PriorTrainer : TrainerBase, internal const string LoadNameValue = "PriorPredictor"; internal const string UserNameValue = "Prior Predictor"; - public sealed class Arguments + internal sealed class Options { } private readonly String _labelColumnName; private readonly String _weightColumnName; + /// Return the type of prediction task. public override PredictionKind PredictionKind => PredictionKind.BinaryClassification; private static readonly TrainerInfo _info = new TrainerInfo(normalization: false, caching: false); + + /// + /// Auxiliary information about the trainer in terms of its capabilities + /// and requirements. + /// public override TrainerInfo Info => _info; - public PriorTrainer(IHostEnvironment env, Arguments args) + internal PriorTrainer(IHostEnvironment env, Options options) : base(env, LoadNameValue) { - Host.CheckValue(args, nameof(args)); + Host.CheckValue(options, nameof(options)); } /// /// Initializes PriorTrainer object. /// - public PriorTrainer(IHost host, String labelColumn, String weightColunn = null) - : base(host, LoadNameValue) + internal PriorTrainer(IHostEnvironment env, String labelColumn, String weightColunn = null) + : base(env, LoadNameValue) { Contracts.CheckValue(labelColumn, nameof(labelColumn)); Contracts.CheckValueOrNull(weightColunn); @@ -273,6 +292,9 @@ public PriorTrainer(IHost host, String labelColumn, String weightColunn = null) _weightColumnName = weightColunn != null ? weightColunn : null; } + /// + /// Trains and returns a . + /// public BinaryPredictionTransformer Fit(IDataView input) { RoleMappedData trainRoles = new RoleMappedData(input, feature: null, label: _labelColumnName, weight: _weightColumnName); @@ -332,6 +354,10 @@ private static SchemaShape.Column MakeFeatureColumn(string featureColumn) private static SchemaShape.Column MakeLabelColumn(string labelColumn) => new SchemaShape.Column(labelColumn, SchemaShape.Column.VectorKind.Scalar, NumberType.R4, false); + /// + /// Returns the of the schema which will be produced by the transformer. + /// Used for schema propagation and verification in a pipeline. + /// public SchemaShape GetOutputSchema(SchemaShape inputSchema) { Host.CheckValue(inputSchema, nameof(inputSchema)); @@ -376,7 +402,7 @@ private static VersionInfo GetVersionInfo() /// /// The host environment. /// The probability of the positive class. - public PriorModelParameters(IHostEnvironment env, float prob) + internal PriorModelParameters(IHostEnvironment env, float prob) : base(env, LoaderSignature) { Host.Check(!float.IsNaN(prob)); diff --git a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs index 88af8fc3cc..2250830b27 100644 --- a/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs +++ b/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs @@ -578,5 +578,48 @@ public static LinearSvmTrainer LinearSupportVectorMachines(this BinaryClassifica return new LinearSvmTrainer(CatalogUtils.GetEnvironment(catalog), options); } + + /// + /// Predict a target using the random binary classification model . + /// + /// + /// This trainer can be used as a baseline for other more sophisticated mdels. + /// + /// The . + /// + /// + /// + /// + public static RandomTrainer Random(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog) + { + Contracts.CheckValue(catalog, nameof(catalog)); + return new RandomTrainer(CatalogUtils.GetEnvironment(catalog), new RandomTrainer.Options()); + } + + /// + /// Predict a target using a binary classification model trained with trainer. + /// + /// + /// This trainer uses the proportion of a label in the training set as the probability of that label. + /// This trainer is often used as a baseline for other more sophisticated mdels. + /// + /// The . + /// The name of the label column. + /// The optional name of the weights column. + /// + /// + /// + /// + public static PriorTrainer Prior(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog, + string labelColumn = DefaultColumnNames.Label, + string weightsColumn = null) + { + Contracts.CheckValue(catalog, nameof(catalog)); + return new PriorTrainer(CatalogUtils.GetEnvironment(catalog), labelColumn, weightsColumn); + } } } diff --git a/src/Microsoft.ML.TensorFlow/TensorflowCatalog.cs b/src/Microsoft.ML.TensorFlow/TensorflowCatalog.cs index 74efc10689..4adbe22299 100644 --- a/src/Microsoft.ML.TensorFlow/TensorflowCatalog.cs +++ b/src/Microsoft.ML.TensorFlow/TensorflowCatalog.cs @@ -85,23 +85,23 @@ public static TensorFlowEstimator ScoreTensorFlowModel(this TransformsCatalog ca => new TensorFlowEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnNames, inputColumnNames, tensorFlowModel); /// - /// Score or Retrain a tensorflow model (based on setting of the ) setting. - /// The model is specified in the . + /// Score or Retrain a tensorflow model (based on setting of the ) setting. + /// The model is specified in the . /// /// The transform's catalog. - /// The specifying the inputs and the settings of the . + /// The specifying the inputs and the settings of the . public static TensorFlowEstimator TensorFlow(this TransformsCatalog catalog, - TensorFlowTransformer.Options options) + TensorFlowEstimator.Options options) => new TensorFlowEstimator(CatalogUtils.GetEnvironment(catalog), options); /// - /// Scores or retrains (based on setting of the ) a pre-traiend TensorFlow model specified via . + /// Scores or retrains (based on setting of the ) a pre-traiend TensorFlow model specified via . /// /// The transform's catalog. - /// The specifying the inputs and the settings of the . + /// The specifying the inputs and the settings of the . /// The pre-loaded TensorFlow model. public static TensorFlowEstimator TensorFlow(this TransformsCatalog catalog, - TensorFlowTransformer.Options options, + TensorFlowEstimator.Options options, TensorFlowModelInfo tensorFlowModel) => new TensorFlowEstimator(CatalogUtils.GetEnvironment(catalog), options, tensorFlowModel); } diff --git a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs index cfe041445e..36d680acf9 100644 --- a/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs +++ b/src/Microsoft.ML.TensorFlow/TensorflowTransform.cs @@ -19,7 +19,7 @@ using Microsoft.ML.Transforms.TensorFlow; [assembly: LoadableClass(TensorFlowTransformer.Summary, typeof(IDataTransform), typeof(TensorFlowTransformer), - typeof(TensorFlowTransformer.Options), typeof(SignatureDataTransform), TensorFlowTransformer.UserName, TensorFlowTransformer.ShortName)] + typeof(TensorFlowEstimator.Options), typeof(SignatureDataTransform), TensorFlowTransformer.UserName, TensorFlowTransformer.ShortName)] [assembly: LoadableClass(TensorFlowTransformer.Summary, typeof(IDataTransform), typeof(TensorFlowTransformer), null, typeof(SignatureLoadDataTransform), TensorFlowTransformer.UserName, TensorFlowTransformer.LoaderSignature)] @@ -37,110 +37,6 @@ namespace Microsoft.ML.Transforms /// public sealed class TensorFlowTransformer : RowToRowTransformerBase { - /// - /// The options for the . - /// - public sealed class Options : TransformInputBase - { - /// - /// Location of the TensorFlow model. - /// - [Argument(ArgumentType.Required, HelpText = "TensorFlow model used by the transform. Please see https://www.tensorflow.org/mobile/prepare_models for more details.", SortOrder = 0)] - public string ModelLocation; - - /// - /// The names of the model inputs. - /// - [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "The names of the model inputs", ShortName = "inputs", SortOrder = 1)] - public string[] InputColumns; - - /// - /// The names of the requested model outputs. - /// - [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "The name of the outputs", ShortName = "outputs", SortOrder = 2)] - public string[] OutputColumns; - - /// - /// The name of the label column in that will be mapped to label node in TensorFlow model. - /// - [Argument(ArgumentType.AtMostOnce, HelpText = "Training labels.", ShortName = "label", SortOrder = 4)] - public string LabelColumn; - - /// - /// The name of the label in TensorFlow model. - /// - [Argument(ArgumentType.AtMostOnce, HelpText = "TensorFlow label node.", ShortName = "TFLabel", SortOrder = 5)] - public string TensorFlowLabel; - - /// - /// Name of the operation in TensorFlow graph that is used for optimizing parameters in the graph. - /// Usually it is the name specified in the minimize method of optimizer in python - /// e.g. optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost, name = "SGDOptimizer"). - /// - [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the optimization operation in the TensorFlow graph.", ShortName = "OptimizationOp", SortOrder = 6)] - public string OptimizationOperation; - - /// - /// The name of the operation in the TensorFlow graph to compute training loss (Optional). - /// - [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the operation in the TensorFlow graph to compute training loss (Optional)", ShortName = "LossOp", SortOrder = 7)] - public string LossOperation; - - /// - /// The name of the operation in the TensorFlow graph to compute performance metric during training (Optional). - /// - [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the operation in the TensorFlow graph to compute performance metric during training (Optional)", ShortName = "MetricOp", SortOrder = 8)] - public string MetricOperation; - - /// - /// Number of samples to use for mini-batch training. - /// - [Argument(ArgumentType.AtMostOnce, HelpText = "Number of samples to use for mini-batch training.", SortOrder = 9)] - public int BatchSize = 64; - - /// - /// Number of training iterations. - /// - [Argument(ArgumentType.AtMostOnce, HelpText = "Number of training iterations.", SortOrder = 10)] - public int Epoch = 5; - - /// - /// The name of the operation in the TensorFlow graph which sets optimizer learning rate (Optional). - /// - [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the operation in the TensorFlow graph which sets optimizer learning rate (Optional).", SortOrder = 11)] - public string LearningRateOperation; - - /// - /// Learning rate to use during optimization. - /// - [Argument(ArgumentType.AtMostOnce, HelpText = "Learning rate to use during optimization.", SortOrder = 12)] - public float LearningRate = 0.01f; - - /// - /// Name of the input in TensorFlow graph that specifiy the location for saving/restoring models to/from disk. - /// This parameter is set by different kinds of 'Savers' in TensorFlow and users don't have control over this. - /// Therefore, its highly unlikely that this parameter is changed from its default value of 'save/Const'. - /// Please change it cautiously if you need to. - /// - [Argument(ArgumentType.AtMostOnce, HelpText = "Name of the input in TensorFlow graph that specifiy the location for saving/restoring models from disk.", SortOrder = 13)] - public string SaveLocationOperation = "save/Const"; - - /// - /// Name of the operation in TensorFlow graph that is used for saving/restoring models to/from disk. - /// This parameter is set by different kinds of 'Savers' in TensorFlow and users don't have control over this. - /// Therefore, its highly unlikely that this parameter is changed from its default value of 'save/control_dependency'. - /// Please change it cautiously if you need to. - /// - [Argument(ArgumentType.AtMostOnce, HelpText = "Name of the input in TensorFlow graph that specifiy the location for saving/restoring models from disk.", SortOrder = 14)] - public string SaveOperation = "save/control_dependency"; - - /// - /// Needed for command line to specify if retraining is requested. - /// - [Argument(ArgumentType.AtMostOnce, HelpText = "Retrain TensorFlow model.", SortOrder = 15)] - public bool ReTrain = false; - } - private readonly string _savedModelPath; private readonly bool _isTemporarySavedModel; @@ -300,7 +196,7 @@ private static TensorFlowTransformer Create(IHostEnvironment env, ModelLoadConte } // Factory method for SignatureDataTransform. - internal static IDataTransform Create(IHostEnvironment env, Options options, IDataView input) + internal static IDataTransform Create(IHostEnvironment env, TensorFlowEstimator.Options options, IDataView input) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(options, nameof(options)); @@ -311,12 +207,12 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa return new TensorFlowTransformer(env, options, input).MakeDataTransform(input); } - internal TensorFlowTransformer(IHostEnvironment env, Options options, IDataView input) + internal TensorFlowTransformer(IHostEnvironment env, TensorFlowEstimator.Options options, IDataView input) : this(env, options, TensorFlowUtils.LoadTensorFlowModel(env, options.ModelLocation), input) { } - internal TensorFlowTransformer(IHostEnvironment env, Options options, TensorFlowModelInfo tensorFlowModel, IDataView input) + internal TensorFlowTransformer(IHostEnvironment env, TensorFlowEstimator.Options options, TensorFlowModelInfo tensorFlowModel, IDataView input) : this(env, tensorFlowModel.Session, options.OutputColumns, options.InputColumns, TensorFlowUtils.IsSavedModel(env, options.ModelLocation) ? options.ModelLocation : null, false) { @@ -335,7 +231,7 @@ internal TensorFlowTransformer(IHostEnvironment env, Options options, TensorFlow } } - private void CheckTrainingParameters(Options options) + private void CheckTrainingParameters(TensorFlowEstimator.Options options) { Host.CheckNonWhiteSpace(options.LabelColumn, nameof(options.LabelColumn)); Host.CheckNonWhiteSpace(options.OptimizationOperation, nameof(options.OptimizationOperation)); @@ -404,7 +300,7 @@ private void CheckTrainingParameters(Options options) return (inputColIndex, isInputVector, tfInputType, tfInputShape); } - private void TrainCore(Options options, IDataView input) + private void TrainCore(TensorFlowEstimator.Options options, IDataView input) { var inputsForTraining = new string[Inputs.Length + 1]; var inputColIndices = new int[inputsForTraining.Length]; @@ -482,7 +378,7 @@ private void TrainCore(Options options, IDataView input) string[] inputsForTraining, ITensorValueGetter[] srcTensorGetters, List fetchList, - Options options) + TensorFlowEstimator.Options options) { float loss = 0; float metric = 0; @@ -512,7 +408,7 @@ private void TrainCore(Options options, IDataView input) /// After retraining Session and Graphs are both up-to-date /// However model on disk is not which is used to serialzed to ML.Net stream /// - private void UpdateModelOnDisk(string modelDir, Options options) + private void UpdateModelOnDisk(string modelDir, TensorFlowEstimator.Options options) { try { @@ -955,7 +851,7 @@ protected override Schema.DetachedColumn[] GetOutputColumnsCore() Desc = Summary, UserName = UserName, ShortName = ShortName)] - internal static CommonOutputs.TransformOutput TensorFlowScorer(IHostEnvironment env, Options input) + internal static CommonOutputs.TransformOutput TensorFlowScorer(IHostEnvironment env, TensorFlowEstimator.Options input) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(input, nameof(input)); @@ -1080,8 +976,112 @@ public TFTensor GetBufferedBatchTensor() /// public sealed class TensorFlowEstimator : IEstimator { + /// + /// The options for the . + /// + public sealed class Options : TransformInputBase + { + /// + /// Location of the TensorFlow model. + /// + [Argument(ArgumentType.Required, HelpText = "TensorFlow model used by the transform. Please see https://www.tensorflow.org/mobile/prepare_models for more details.", SortOrder = 0)] + public string ModelLocation; + + /// + /// The names of the model inputs. + /// + [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "The names of the model inputs", ShortName = "inputs", SortOrder = 1)] + public string[] InputColumns; + + /// + /// The names of the requested model outputs. + /// + [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "The name of the outputs", ShortName = "outputs", SortOrder = 2)] + public string[] OutputColumns; + + /// + /// The name of the label column in that will be mapped to label node in TensorFlow model. + /// + [Argument(ArgumentType.AtMostOnce, HelpText = "Training labels.", ShortName = "label", SortOrder = 4)] + public string LabelColumn; + + /// + /// The name of the label in TensorFlow model. + /// + [Argument(ArgumentType.AtMostOnce, HelpText = "TensorFlow label node.", ShortName = "TFLabel", SortOrder = 5)] + public string TensorFlowLabel; + + /// + /// Name of the operation in TensorFlow graph that is used for optimizing parameters in the graph. + /// Usually it is the name specified in the minimize method of optimizer in python + /// e.g. optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost, name = "SGDOptimizer"). + /// + [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the optimization operation in the TensorFlow graph.", ShortName = "OptimizationOp", SortOrder = 6)] + public string OptimizationOperation; + + /// + /// The name of the operation in the TensorFlow graph to compute training loss (Optional). + /// + [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the operation in the TensorFlow graph to compute training loss (Optional)", ShortName = "LossOp", SortOrder = 7)] + public string LossOperation; + + /// + /// The name of the operation in the TensorFlow graph to compute performance metric during training (Optional). + /// + [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the operation in the TensorFlow graph to compute performance metric during training (Optional)", ShortName = "MetricOp", SortOrder = 8)] + public string MetricOperation; + + /// + /// Number of samples to use for mini-batch training. + /// + [Argument(ArgumentType.AtMostOnce, HelpText = "Number of samples to use for mini-batch training.", SortOrder = 9)] + public int BatchSize = 64; + + /// + /// Number of training iterations. + /// + [Argument(ArgumentType.AtMostOnce, HelpText = "Number of training iterations.", SortOrder = 10)] + public int Epoch = 5; + + /// + /// The name of the operation in the TensorFlow graph which sets optimizer learning rate (Optional). + /// + [Argument(ArgumentType.AtMostOnce, HelpText = "The name of the operation in the TensorFlow graph which sets optimizer learning rate (Optional).", SortOrder = 11)] + public string LearningRateOperation; + + /// + /// Learning rate to use during optimization. + /// + [Argument(ArgumentType.AtMostOnce, HelpText = "Learning rate to use during optimization.", SortOrder = 12)] + public float LearningRate = 0.01f; + + /// + /// Name of the input in TensorFlow graph that specifiy the location for saving/restoring models to/from disk. + /// This parameter is set by different kinds of 'Savers' in TensorFlow and users don't have control over this. + /// Therefore, its highly unlikely that this parameter is changed from its default value of 'save/Const'. + /// Please change it cautiously if you need to. + /// + [Argument(ArgumentType.AtMostOnce, HelpText = "Name of the input in TensorFlow graph that specifiy the location for saving/restoring models from disk.", SortOrder = 13)] + public string SaveLocationOperation = "save/Const"; + + /// + /// Name of the operation in TensorFlow graph that is used for saving/restoring models to/from disk. + /// This parameter is set by different kinds of 'Savers' in TensorFlow and users don't have control over this. + /// Therefore, its highly unlikely that this parameter is changed from its default value of 'save/control_dependency'. + /// Please change it cautiously if you need to. + /// + [Argument(ArgumentType.AtMostOnce, HelpText = "Name of the input in TensorFlow graph that specifiy the location for saving/restoring models from disk.", SortOrder = 14)] + public string SaveOperation = "save/control_dependency"; + + /// + /// Needed for command line to specify if retraining is requested. + /// + [Argument(ArgumentType.AtMostOnce, HelpText = "Retrain TensorFlow model.", SortOrder = 15)] + public bool ReTrain = false; + } + private readonly IHost _host; - private readonly TensorFlowTransformer.Options _options; + private readonly Options _options; private readonly TensorFlowModelInfo _tensorFlowModel; private readonly TFDataType[] _tfInputTypes; private readonly ColumnType[] _outputTypes; @@ -1098,12 +1098,12 @@ internal TensorFlowEstimator(IHostEnvironment env, string[] outputColumnNames, s { } - internal TensorFlowEstimator(IHostEnvironment env, TensorFlowTransformer.Options options) + internal TensorFlowEstimator(IHostEnvironment env, Options options) : this(env, options, TensorFlowUtils.LoadTensorFlowModel(env, options.ModelLocation)) { } - internal TensorFlowEstimator(IHostEnvironment env, TensorFlowTransformer.Options options, TensorFlowModelInfo tensorFlowModel) + internal TensorFlowEstimator(IHostEnvironment env, Options options, TensorFlowModelInfo tensorFlowModel) { _host = Contracts.CheckRef(env, nameof(env)).Register(nameof(TensorFlowEstimator)); _options = options; @@ -1114,9 +1114,9 @@ internal TensorFlowEstimator(IHostEnvironment env, TensorFlowTransformer.Options _outputTypes = outputTuple.outputTypes; } - private static TensorFlowTransformer.Options CreateArguments(TensorFlowModelInfo tensorFlowModel, string[] outputColumnNames, string[] inputColumnName) + private static Options CreateArguments(TensorFlowModelInfo tensorFlowModel, string[] outputColumnNames, string[] inputColumnName) { - var options = new TensorFlowTransformer.Options(); + var options = new Options(); options.ModelLocation = tensorFlowModel.ModelPath; options.InputColumns = inputColumnName; options.OutputColumns = outputColumnNames; diff --git a/src/Microsoft.ML.Transforms/CustomMappingCatalog.cs b/src/Microsoft.ML.Transforms/CustomMappingCatalog.cs index 5cb04bddf1..a74e80f800 100644 --- a/src/Microsoft.ML.Transforms/CustomMappingCatalog.cs +++ b/src/Microsoft.ML.Transforms/CustomMappingCatalog.cs @@ -23,6 +23,12 @@ public static class CustomMappingCatalog /// The contract name, used by ML.NET for loading the model. If null is specified, such a trained model would not be save-able. /// Additional parameters for schema mapping between and input data. /// Additional parameters for schema mapping between and output data. + /// + /// + /// + /// public static CustomMappingEstimator CustomMapping(this TransformsCatalog catalog, Action mapAction, string contractName, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) where TSrc : class, new() @@ -41,6 +47,12 @@ public static CustomMappingEstimator CustomMapping(this /// The contract name, used by ML.NET for loading the model. If null is specified, such a trained model would not be save-able. /// Additional parameters for schema mapping between and input data. /// Additional parameters for schema mapping between and output data. + /// + /// + /// + /// public static CustomMappingTransformer CustomMappingTransformer(this TransformsCatalog catalog, Action mapAction, string contractName, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) where TSrc : class, new() diff --git a/src/Microsoft.ML.Transforms/CustomMappingTransformer.cs b/src/Microsoft.ML.Transforms/CustomMappingTransformer.cs index 609d9c842f..be201c0b30 100644 --- a/src/Microsoft.ML.Transforms/CustomMappingTransformer.cs +++ b/src/Microsoft.ML.Transforms/CustomMappingTransformer.cs @@ -30,7 +30,12 @@ public sealed class CustomMappingTransformer : ITransformer internal InternalSchemaDefinition AddedSchema { get; } internal SchemaDefinition InputSchemaDefinition { get; } + /// + /// Whether a call to should succeed, on an + /// appropriate schema. + /// public bool IsRowToRowMapper => true; + /// /// Create a custom mapping of input columns to output columns. /// @@ -39,7 +44,7 @@ public sealed class CustomMappingTransformer : ITransformer /// The name of the action (will be saved to the model). /// Additional parameters for schema mapping between and input data. /// Additional parameters for schema mapping between and output data. - public CustomMappingTransformer(IHostEnvironment env, Action mapAction, string contractName, + internal CustomMappingTransformer(IHostEnvironment env, Action mapAction, string contractName, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) { Contracts.CheckValue(env, nameof(env)); @@ -61,6 +66,7 @@ public CustomMappingTransformer(IHostEnvironment env, Action mapActi } void ICanSaveModel.Save(ModelSaveContext ctx) => SaveModel(ctx); + internal void SaveModel(ModelSaveContext ctx) { if (_contractName == null) @@ -68,6 +74,10 @@ internal void SaveModel(ModelSaveContext ctx) LambdaTransform.SaveCustomTransformer(_host, ctx, _contractName); } + /// + /// Returns the which would be produced by the transformer applied to + /// an input data with schema . + /// public Schema GetOutputSchema(Schema inputSchema) { _host.CheckValue(inputSchema, nameof(inputSchema)); @@ -75,12 +85,21 @@ public Schema GetOutputSchema(Schema inputSchema) return RowToRowMapperTransform.GetOutputSchema(inputSchema, mapper); } + /// + /// Take the data in, make transformations, output the data. + /// Note that 's are lazy, so no actual transformations happen here, just schema validation. + /// public IDataView Transform(IDataView input) { _host.CheckValue(input, nameof(input)); return new RowToRowMapperTransform(_host, input, MakeRowMapper(input.Schema), MakeRowMapper); } + /// + /// Constructs a row-to-row mapper based on an input schema. If + /// is false, then an exception is thrown. If the is in any way + /// unsuitable for constructing the mapper, an exception is likewise thrown. + /// public IRowToRowMapper GetRowToRowMapper(Schema inputSchema) { _host.CheckValue(inputSchema, nameof(inputSchema)); @@ -185,6 +204,13 @@ public ITransformer GetTransformer() } } + /// + /// The to define a custom mapping of rows of an . + /// For usage details, please see + /// + /// + /// Calling in this estimator, produces an . + /// public sealed class CustomMappingEstimator : TrivialEstimator> where TSrc : class, new() where TDst : class, new() @@ -197,13 +223,17 @@ public sealed class CustomMappingEstimator : TrivialEstimatorThe contract name, used by ML.NET for loading the model. If null is specified, such a trained model would not be save-able. /// Additional parameters for schema mapping between and input data. /// Additional parameters for schema mapping between and output data. - public CustomMappingEstimator(IHostEnvironment env, Action mapAction, string contractName, + internal CustomMappingEstimator(IHostEnvironment env, Action mapAction, string contractName, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(CustomMappingEstimator)), new CustomMappingTransformer(env, mapAction, contractName, inputSchemaDefinition, outputSchemaDefinition)) { } + /// + /// Returns the of the schema which will be produced by the transformer. + /// Used for schema propagation and verification in a pipeline. + /// public override SchemaShape GetOutputSchema(SchemaShape inputSchema) { var addedCols = DataViewConstructionUtils.GetSchemaColumns(Transformer.AddedSchema); diff --git a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv index 0c60d474be..34822a5f0f 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv +++ b/test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv @@ -60,7 +60,7 @@ Trainers.LightGbmRegressor LightGBM Regression Microsoft.ML.LightGBM.LightGbm Tr Trainers.LinearSvmBinaryClassifier Train a linear SVM. Microsoft.ML.Trainers.Online.LinearSvmTrainer TrainLinearSvm Microsoft.ML.Trainers.Online.LinearSvmTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+BinaryClassificationOutput Trainers.LogisticRegressionBinaryClassifier Logistic Regression is a method in statistics used to predict the probability of occurrence of an event and can be used as a classification algorithm. The algorithm predicts the probability of occurrence of an event by fitting data to a logistical function. Microsoft.ML.Trainers.LogisticRegression TrainBinary Microsoft.ML.Trainers.LogisticRegression+Options Microsoft.ML.EntryPoints.CommonOutputs+BinaryClassificationOutput Trainers.LogisticRegressionClassifier Logistic Regression is a method in statistics used to predict the probability of occurrence of an event and can be used as a classification algorithm. The algorithm predicts the probability of occurrence of an event by fitting data to a logistical function. Microsoft.ML.Trainers.LogisticRegression TrainMultiClass Microsoft.ML.Trainers.MulticlassLogisticRegression+Options Microsoft.ML.EntryPoints.CommonOutputs+MulticlassClassificationOutput -Trainers.NaiveBayesClassifier Train a MultiClassNaiveBayesTrainer. Microsoft.ML.Trainers.MultiClassNaiveBayesTrainer TrainMultiClassNaiveBayesTrainer Microsoft.ML.Trainers.MultiClassNaiveBayesTrainer+Arguments Microsoft.ML.EntryPoints.CommonOutputs+MulticlassClassificationOutput +Trainers.NaiveBayesClassifier Train a MultiClassNaiveBayesTrainer. Microsoft.ML.Trainers.MultiClassNaiveBayesTrainer TrainMultiClassNaiveBayesTrainer Microsoft.ML.Trainers.MultiClassNaiveBayesTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+MulticlassClassificationOutput Trainers.OnlineGradientDescentRegressor Train a Online gradient descent perceptron. Microsoft.ML.Trainers.Online.OnlineGradientDescentTrainer TrainRegression Microsoft.ML.Trainers.Online.OnlineGradientDescentTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+RegressionOutput Trainers.OrdinaryLeastSquaresRegressor Train an OLS regression model. Microsoft.ML.Trainers.HalLearners.OlsLinearRegressionTrainer TrainRegression Microsoft.ML.Trainers.HalLearners.OlsLinearRegressionTrainer+Options Microsoft.ML.EntryPoints.CommonOutputs+RegressionOutput Trainers.PcaAnomalyDetector Train an PCA Anomaly model. Microsoft.ML.Trainers.PCA.RandomizedPcaTrainer TrainPcaAnomaly Microsoft.ML.Trainers.PCA.RandomizedPcaTrainer+Arguments Microsoft.ML.EntryPoints.CommonOutputs+AnomalyDetectionOutput @@ -126,7 +126,7 @@ Transforms.ScoreColumnSelector Selects only the last score columns and the extra Transforms.Scorer Turn the predictor model into a transform model Microsoft.ML.EntryPoints.ScoreModel MakeScoringTransform Microsoft.ML.EntryPoints.ScoreModel+ModelInput Microsoft.ML.EntryPoints.ScoreModel+Output Transforms.Segregator Un-groups vector columns into sequences of rows, inverse of Group transform Microsoft.ML.Transforms.GroupingOperations Ungroup Microsoft.ML.Transforms.UngroupTransform+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput Transforms.SentimentAnalyzer Uses a pretrained sentiment model to score input strings Microsoft.ML.Transforms.Text.TextAnalytics AnalyzeSentiment Microsoft.ML.Transforms.Text.SentimentAnalyzingTransformer+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput -Transforms.TensorFlowScorer Transforms the data using the TensorFlow model. Microsoft.ML.Transforms.TensorFlowTransformer TensorFlowScorer Microsoft.ML.Transforms.TensorFlowTransformer+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput +Transforms.TensorFlowScorer Transforms the data using the TensorFlow model. Microsoft.ML.Transforms.TensorFlowTransformer TensorFlowScorer Microsoft.ML.Transforms.TensorFlowEstimator+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput Transforms.TextFeaturizer A transform that turns a collection of text documents into numerical feature vectors. The feature vectors are normalized counts of (word and/or character) ngrams in a given tokenized text. Microsoft.ML.Transforms.Text.TextAnalytics TextTransform Microsoft.ML.Transforms.Text.TextFeaturizingEstimator+Arguments Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput Transforms.TextToKeyConverter Converts input values (words, numbers, etc.) to index in a dictionary. Microsoft.ML.Transforms.Categorical.Categorical TextToKey Microsoft.ML.Transforms.Conversions.ValueToKeyMappingTransformer+Options Microsoft.ML.EntryPoints.CommonOutputs+TransformOutput Transforms.TrainTestDatasetSplitter Split the dataset into train and test sets Microsoft.ML.EntryPoints.TrainTestSplit Split Microsoft.ML.EntryPoints.TrainTestSplit+Input Microsoft.ML.EntryPoints.TrainTestSplit+Output diff --git a/test/Microsoft.ML.OnnxTransformTest/DnnImageFeaturizerTest.cs b/test/Microsoft.ML.OnnxTransformTest/DnnImageFeaturizerTest.cs index 2ebb55ee4d..edc7ff8451 100644 --- a/test/Microsoft.ML.OnnxTransformTest/DnnImageFeaturizerTest.cs +++ b/test/Microsoft.ML.OnnxTransformTest/DnnImageFeaturizerTest.cs @@ -74,7 +74,7 @@ void TestDnnImageFeaturizer() var xyData = new List { new TestDataXY() { A = new float[inputSize] } }; var stringData = new List { new TestDataDifferntType() { data_0 = new string[inputSize] } }; var sizeData = new List { new TestDataSize() { data_0 = new float[2] } }; - var pipe = new DnnImageFeaturizerEstimator(Env, "output_1", m => m.ModelSelector.ResNet18(m.Environment, m.OutputColumn, m.InputColumn), "data_0"); + var pipe = ML.Transforms.DnnFeaturizeImage("output_1", m => m.ModelSelector.ResNet18(m.Environment, m.OutputColumn, m.InputColumn), "data_0"); var invalidDataWrongNames = ML.Data.ReadFromEnumerable(xyData); var invalidDataWrongTypes = ML.Data.ReadFromEnumerable(stringData); @@ -146,7 +146,7 @@ public void TestOldSavingAndLoading() var inputNames = "data_0"; var outputNames = "output_1"; - var est = new DnnImageFeaturizerEstimator(Env, outputNames, m => m.ModelSelector.ResNet18(m.Environment, m.OutputColumn, m.InputColumn), inputNames); + var est = ML.Transforms.DnnFeaturizeImage(outputNames, m => m.ModelSelector.ResNet18(m.Environment, m.OutputColumn, m.InputColumn), inputNames); var transformer = est.Fit(dataView); var result = transformer.Transform(dataView); var resultRoles = new RoleMappedData(result); diff --git a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs index f006c5a7f0..ab1bb4ee54 100644 --- a/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs +++ b/test/Microsoft.ML.Tests/ScenariosWithDirectInstantiation/TensorflowTests.cs @@ -545,7 +545,7 @@ public void TensorFlowTransformMNISTLRTrainingTest() var pipe = mlContext.Transforms.Categorical.OneHotEncoding("OneHotLabel", "Label") .Append(mlContext.Transforms.Normalize(new NormalizingEstimator.MinMaxColumn("Features", "Placeholder"))) - .Append(mlContext.Transforms.TensorFlow(new TensorFlowTransformer.Options() + .Append(mlContext.Transforms.TensorFlow(new TensorFlowEstimator.Options() { ModelLocation = model_location, InputColumns = new[] { "Features" }, @@ -659,7 +659,7 @@ private void ExecuteTFTransformMNISTConvTrainingTest(bool shuffle, int? shuffleS } var pipe = mlContext.Transforms.CopyColumns(("Features", "Placeholder")) - .Append(mlContext.Transforms.TensorFlow(new TensorFlowTransformer.Options() + .Append(mlContext.Transforms.TensorFlow(new TensorFlowEstimator.Options() { ModelLocation = modelLocation, InputColumns = new[] { "Features" }, diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/PriorRandomTests.cs b/test/Microsoft.ML.Tests/TrainerEstimators/PriorRandomTests.cs index 870ac49e61..b862809cc8 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/PriorRandomTests.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/PriorRandomTests.cs @@ -32,7 +32,7 @@ private IDataView GetBreastCancerDataviewWithTextColumns() public void TestEstimatorRandom() { var dataView = GetBreastCancerDataviewWithTextColumns(); - var pipe = new RandomTrainer(Env); + var pipe = ML.BinaryClassification.Trainers.Random(); // Test only that the schema propagation works. // REVIEW: the save/load is not preserving the full state of the random predictor. This is unfortunate, but we don't care too much at this point. @@ -45,7 +45,7 @@ public void TestEstimatorPrior() { var dataView = GetBreastCancerDataviewWithTextColumns(); - var pipe = new PriorTrainer(Contracts.CheckRef(Env, nameof(Env)).Register("PriorPredictor"), "Label"); + var pipe = ML.BinaryClassification.Trainers.Prior("Label"); TestEstimatorCore(pipe, dataView); Done(); } diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/TrainerEstimators.cs b/test/Microsoft.ML.Tests/TrainerEstimators/TrainerEstimators.cs index cc75f6cdeb..ca45eb38a8 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/TrainerEstimators.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/TrainerEstimators.cs @@ -108,7 +108,7 @@ public void TestEstimatorHogwildSGD() public void TestEstimatorMultiClassNaiveBayesTrainer() { (IEstimator pipe, IDataView dataView) = GetMultiClassPipeline(); - pipe = pipe.Append(new MultiClassNaiveBayesTrainer(Env, "Label", "Features")); + pipe = pipe.Append(ML.MulticlassClassification.Trainers.NaiveBayes("Label", "Features")); TestEstimatorCore(pipe, dataView); Done(); }