diff --git a/ZBaselines/Common/EntryPoints/core_ep-list.tsv b/ZBaselines/Common/EntryPoints/core_ep-list.tsv index 7fc82434b4..61f2604a8d 100644 --- a/ZBaselines/Common/EntryPoints/core_ep-list.tsv +++ b/ZBaselines/Common/EntryPoints/core_ep-list.tsv @@ -1,8 +1,9 @@ Data.CustomTextLoader Import a dataset from a text file Microsoft.ML.Runtime.EntryPoints.ImportTextData ImportText Microsoft.ML.Runtime.EntryPoints.ImportTextData+Input Microsoft.ML.Runtime.EntryPoints.ImportTextData+Output Data.DataViewReference Pass dataview from memory to experiment Microsoft.ML.Runtime.EntryPoints.DataViewReference ImportData Microsoft.ML.Runtime.EntryPoints.DataViewReference+Input Microsoft.ML.Runtime.EntryPoints.DataViewReference+Output -Data.IDataViewArrayConverter Create and array variable Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro MakeArray Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayIDataViewInput Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayIDataViewOutput -Data.PredictorModelArrayConverter Create and array variable Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro MakeArray Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayIPredictorModelInput Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayIPredictorModelOutput +Data.IDataViewArrayConverter Create an array variable of IDataView Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro MakeArray Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayIDataViewInput Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayIDataViewOutput +Data.PredictorModelArrayConverter Create an array variable of IPredictorModel Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro MakeArray Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayIPredictorModelInput Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayIPredictorModelOutput Data.TextLoader Import a dataset from a text file Microsoft.ML.Runtime.EntryPoints.ImportTextData TextLoader Microsoft.ML.Runtime.EntryPoints.ImportTextData+LoaderInput Microsoft.ML.Runtime.EntryPoints.ImportTextData+Output +Data.TransformModelArrayConverter Create an array variable of ITransformModel Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro MakeArray Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayITransformModelInput Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+ArrayITransformModelOutput Models.AnomalyDetectionEvaluator Evaluates an anomaly detection scored dataset. Microsoft.ML.Runtime.Data.Evaluate AnomalyDetection Microsoft.ML.Runtime.Data.AnomalyDetectionMamlEvaluator+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+CommonEvaluateOutput Models.BinaryClassificationEvaluator Evaluates a binary classification scored dataset. Microsoft.ML.Runtime.Data.Evaluate Binary Microsoft.ML.Runtime.Data.BinaryClassifierMamlEvaluator+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+ClassificationEvaluateOutput Models.BinaryCrossValidator Cross validation for binary classification Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro CrossValidateBinary Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+MacroOutput`1[Microsoft.ML.Runtime.EntryPoints.CrossValidationBinaryMacro+Output] diff --git a/ZBaselines/Common/EntryPoints/core_manifest.json b/ZBaselines/Common/EntryPoints/core_manifest.json index 05b7965842..010d4a0afa 100644 --- a/ZBaselines/Common/EntryPoints/core_manifest.json +++ b/ZBaselines/Common/EntryPoints/core_manifest.json @@ -63,7 +63,7 @@ }, { "Name": "Data.IDataViewArrayConverter", - "Desc": "Create and array variable", + "Desc": "Create an array variable of IDataView", "FriendlyName": null, "ShortName": null, "Inputs": [ @@ -92,7 +92,7 @@ }, { "Name": "Data.PredictorModelArrayConverter", - "Desc": "Create and array variable", + "Desc": "Create an array variable of IPredictorModel", "FriendlyName": null, "ShortName": null, "Inputs": [ @@ -469,6 +469,35 @@ "ILearningPipelineLoader" ] }, + { + "Name": "Data.TransformModelArrayConverter", + "Desc": "Create an array variable of ITransformModel", + "FriendlyName": null, + "ShortName": null, + "Inputs": [ + { + "Name": "TransformModel", + "Type": { + "Kind": "Array", + "ItemType": "TransformModel" + }, + "Desc": "The models", + "Required": true, + "SortOrder": 1.0, + "IsNullable": false + } + ], + "Outputs": [ + { + "Name": "OutputModel", + "Type": { + "Kind": "Array", + "ItemType": "TransformModel" + }, + "Desc": "The model array" + } + ] + }, { "Name": "Models.AnomalyDetectionEvaluator", "Desc": "Evaluates an anomaly detection scored dataset.", @@ -1300,7 +1329,7 @@ "Label" ], "Required": false, - "SortOrder": 6.0, + "SortOrder": 5.0, "IsNullable": false, "Default": "Label" }, @@ -1320,7 +1349,7 @@ }, "Desc": "Specifies the trainer kind, which determines the evaluator to be used.", "Required": true, - "SortOrder": 7.0, + "SortOrder": 6.0, "IsNullable": false, "Default": "SignatureBinaryClassifierTrainer" } @@ -1408,12 +1437,22 @@ "Kind": "Struct", "Fields": [ { - "Name": "Model", + "Name": "PredictorModel", "Type": "PredictorModel", - "Desc": "The model", - "Required": true, + "Desc": "The predictor model", + "Required": false, "SortOrder": 1.0, - "IsNullable": false + "IsNullable": false, + "Default": null + }, + { + "Name": "TransformModel", + "Type": "TransformModel", + "Desc": "The transform model", + "Required": false, + "SortOrder": 2.0, + "IsNullable": false, + "Default": null } ] }, @@ -1430,7 +1469,7 @@ "strat" ], "Required": false, - "SortOrder": 7.0, + "SortOrder": 6.0, "IsNullable": false, "Default": null }, @@ -1442,7 +1481,7 @@ "k" ], "Required": false, - "SortOrder": 8.0, + "SortOrder": 7.0, "IsNullable": false, "Default": 2 }, @@ -1462,7 +1501,7 @@ }, "Desc": "Specifies the trainer kind, which determines the evaluator to be used.", "Required": true, - "SortOrder": 9.0, + "SortOrder": 8.0, "IsNullable": false, "Default": "SignatureBinaryClassifierTrainer" } @@ -1476,6 +1515,14 @@ }, "Desc": "The final model including the trained predictor model and the model from the transforms, provided as the Input.TransformModel." }, + { + "Name": "TransformModel", + "Type": { + "Kind": "Array", + "ItemType": "TransformModel" + }, + "Desc": "The final model including the trained predictor model and the model from the transforms, provided as the Input.TransformModel." + }, { "Name": "Warnings", "Type": "DataView", @@ -2999,12 +3046,22 @@ "Kind": "Struct", "Fields": [ { - "Name": "Model", + "Name": "PredictorModel", "Type": "PredictorModel", - "Desc": "The model", - "Required": true, + "Desc": "The predictor model", + "Required": false, "SortOrder": 1.0, - "IsNullable": false + "IsNullable": false, + "Default": null + }, + { + "Name": "TransformModel", + "Type": "TransformModel", + "Desc": "Transform model", + "Required": false, + "SortOrder": 2.0, + "IsNullable": false, + "Default": null } ] }, @@ -3058,6 +3115,11 @@ "Type": "PredictorModel", "Desc": "The final model including the trained predictor model and the model from the transforms, provided as the Input.TransformModel." }, + { + "Name": "TransformModel", + "Type": "TransformModel", + "Desc": "The final model including the trained predictor model and the model from the transforms, provided as the Input.TransformModel." + }, { "Name": "Warnings", "Type": "DataView", diff --git a/src/Microsoft.ML.PipelineInference/PipelinePattern.cs b/src/Microsoft.ML.PipelineInference/PipelinePattern.cs index 662a16798f..c6b4de44fa 100644 --- a/src/Microsoft.ML.PipelineInference/PipelinePattern.cs +++ b/src/Microsoft.ML.PipelineInference/PipelinePattern.cs @@ -152,7 +152,7 @@ public Experiment CreateTrainTestExperiment(IDataView trainData, IDataView testD }, Outputs = { - Model = finalOutput + PredictorModel = finalOutput }, PipelineId = UniqueId.ToString("N"), Kind = MacroUtils.TrainerKindApiValue(trainerKind), @@ -189,7 +189,7 @@ public Models.TrainTestEvaluator.Output AddAsTrainTest(Var trainData, }, Outputs = { - Model = finalOutput + PredictorModel = finalOutput }, TrainingData = trainData, TestingData = testData, diff --git a/src/Microsoft.ML/CSharpApi.cs b/src/Microsoft.ML/CSharpApi.cs index 5bb4782599..058e8bafe3 100644 --- a/src/Microsoft.ML/CSharpApi.cs +++ b/src/Microsoft.ML/CSharpApi.cs @@ -82,6 +82,18 @@ public void Add(Microsoft.ML.Data.TextLoader input, Microsoft.ML.Data.TextLoader _jsonNodes.Add(Serialize("Data.TextLoader", input, output)); } + public Microsoft.ML.Data.TransformModelArrayConverter.Output Add(Microsoft.ML.Data.TransformModelArrayConverter input) + { + var output = new Microsoft.ML.Data.TransformModelArrayConverter.Output(); + Add(input, output); + return output; + } + + public void Add(Microsoft.ML.Data.TransformModelArrayConverter input, Microsoft.ML.Data.TransformModelArrayConverter.Output output) + { + _jsonNodes.Add(Serialize("Data.TransformModelArrayConverter", input, output)); + } + public Microsoft.ML.Models.AnomalyDetectionEvaluator.Output Add(Microsoft.ML.Models.AnomalyDetectionEvaluator input) { var output = new Microsoft.ML.Models.AnomalyDetectionEvaluator.Output(); @@ -1396,7 +1408,7 @@ namespace Data { /// - /// Create and array variable + /// Create an array variable of IDataView /// public sealed partial class IDataViewArrayConverter { @@ -1423,7 +1435,7 @@ namespace Data { /// - /// Create and array variable + /// Create an array variable of IPredictorModel /// public sealed partial class PredictorModelArrayConverter { @@ -1599,6 +1611,8 @@ public void SetInput(IHostEnvironment env, Experiment experiment) experiment.SetInput(InputFile, inputFile); } + public Var GetInputData() => null; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { Contracts.Assert(previousStep == null); @@ -1640,6 +1654,33 @@ public sealed class Output } } + namespace Data + { + + /// + /// Create an array variable of ITransformModel + /// + public sealed partial class TransformModelArrayConverter + { + + + /// + /// The models + /// + public ArrayVar TransformModel { get; set; } = new ArrayVar(); + + + public sealed class Output + { + /// + /// The model array + /// + public ArrayVar OutputModel { get; set; } = new ArrayVar(); + + } + } + } + namespace Models { @@ -2171,9 +2212,14 @@ public sealed partial class CrossValidationMacroSubGraphInput public sealed partial class CrossValidationMacroSubGraphOutput { /// - /// The model + /// The predictor model /// - public Var Model { get; set; } = new Var(); + public Var PredictorModel { get; set; } = new Var(); + + /// + /// The transform model + /// + public Var TransformModel { get; set; } = new Var(); } @@ -2232,6 +2278,11 @@ public sealed class Output /// public ArrayVar PredictorModel { get; set; } = new ArrayVar(); + /// + /// The final model including the trained predictor model and the model from the transforms, provided as the Input.TransformModel. + /// + public ArrayVar TransformModel { get; set; } = new ArrayVar(); + /// /// Warning dataset /// @@ -2327,14 +2378,19 @@ public sealed class Output public Var OutputData { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(DatasetTransformer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(DatasetTransformer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new DatasetTransformerPipelineStep(output); } @@ -2397,14 +2453,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ICal public Var PredictorModel { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(FixedPlattCalibrator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(FixedPlattCalibrator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new FixedPlattCalibratorPipelineStep(output); } @@ -2529,14 +2590,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ICal public Var PredictorModel { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(NaiveCalibrator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(NaiveCalibrator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new NaiveCalibratorPipelineStep(output); } @@ -2642,14 +2708,19 @@ public sealed class Output public Var PredictorModel { get; set; } = new Var(); } + public Var GetInputData() => TrainingData; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(OneVersusAll)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(OneVersusAll)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - TrainingData = dataStep.Data; + TrainingData = dataStep.Data; + } Output output = experiment.Add(this); return new OneVersusAllPipelineStep(output); } @@ -2725,14 +2796,19 @@ public sealed class Output public Var PredictorModel { get; set; } = new Var(); } + public Var GetInputData() => TrainingData; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(OvaModelCombiner)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(OvaModelCombiner)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - TrainingData = dataStep.Data; + TrainingData = dataStep.Data; + } Output output = experiment.Add(this); return new OvaModelCombinerPipelineStep(output); } @@ -2784,14 +2860,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ICal public Var PredictorModel { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(PAVCalibrator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(PAVCalibrator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new PAVCalibratorPipelineStep(output); } @@ -2901,14 +2982,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ICal public Var PredictorModel { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(PlattCalibrator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(PlattCalibrator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new PlattCalibratorPipelineStep(output); } @@ -3307,9 +3393,14 @@ public sealed partial class TrainTestMacroSubGraphInput public sealed partial class TrainTestMacroSubGraphOutput { /// - /// The model + /// The predictor model /// - public Var Model { get; set; } = new Var(); + public Var PredictorModel { get; set; } = new Var(); + + /// + /// Transform model + /// + public Var TransformModel { get; set; } = new Var(); } @@ -3373,6 +3464,11 @@ public sealed class Output /// public Var PredictorModel { get; set; } = new Var(); + /// + /// The final model including the trained predictor model and the model from the transforms, provided as the Input.TransformModel. + /// + public Var TransformModel { get; set; } = new Var(); + /// /// Warning dataset /// @@ -3554,14 +3650,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IBin public Var PredictorModel { get; set; } = new Var(); } + public Var GetInputData() => TrainingData; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(AveragedPerceptronBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(AveragedPerceptronBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - TrainingData = dataStep.Data; + TrainingData = dataStep.Data; + } Output output = experiment.Add(this); return new AveragedPerceptronBinaryClassifierPipelineStep(output); } @@ -3849,14 +3950,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IBin public Var PredictorModel { get; set; } = new Var(); } + public Var GetInputData() => TrainingData; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(FastForestBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(FastForestBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - TrainingData = dataStep.Data; + TrainingData = dataStep.Data; + } Output output = experiment.Add(this); return new FastForestBinaryClassifierPipelineStep(output); } @@ -4126,14 +4232,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IReg public Var PredictorModel { get; set; } = new Var(); } + public Var GetInputData() => TrainingData; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(FastForestRegressor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(FastForestRegressor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - TrainingData = dataStep.Data; + TrainingData = dataStep.Data; + } Output output = experiment.Add(this); return new FastForestRegressorPipelineStep(output); } @@ -4519,14 +4630,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IBin public Var PredictorModel { get; set; } = new Var(); } + public Var GetInputData() => TrainingData; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(FastTreeBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(FastTreeBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - TrainingData = dataStep.Data; + TrainingData = dataStep.Data; + } Output output = experiment.Add(this); return new FastTreeBinaryClassifierPipelineStep(output); } @@ -4940,14 +5056,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IRan public Var PredictorModel { get; set; } = new Var(); } + public Var GetInputData() => TrainingData; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(FastTreeRanker)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(FastTreeRanker)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - TrainingData = dataStep.Data; + TrainingData = dataStep.Data; + } Output output = experiment.Add(this); return new FastTreeRankerPipelineStep(output); } @@ -5321,14 +5442,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IReg public Var PredictorModel { get; set; } = new Var(); } + public Var GetInputData() => TrainingData; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(FastTreeRegressor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(FastTreeRegressor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - TrainingData = dataStep.Data; + TrainingData = dataStep.Data; + } Output output = experiment.Add(this); return new FastTreeRegressorPipelineStep(output); } @@ -5707,14 +5833,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IReg public Var PredictorModel { get; set; } = new Var(); } + public Var GetInputData() => TrainingData; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(FastTreeTweedieRegressor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(FastTreeTweedieRegressor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - TrainingData = dataStep.Data; + TrainingData = dataStep.Data; + } Output output = experiment.Add(this); return new FastTreeTweedieRegressorPipelineStep(output); } @@ -5859,14 +5990,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IBin public Var PredictorModel { get; set; } = new Var(); } + public Var GetInputData() => TrainingData; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(GeneralizedAdditiveModelBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(GeneralizedAdditiveModelBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - TrainingData = dataStep.Data; + TrainingData = dataStep.Data; + } Output output = experiment.Add(this); return new GeneralizedAdditiveModelBinaryClassifierPipelineStep(output); } @@ -5995,14 +6131,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IReg public Var PredictorModel { get; set; } = new Var(); } + public Var GetInputData() => TrainingData; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(GeneralizedAdditiveModelRegressor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(GeneralizedAdditiveModelRegressor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - TrainingData = dataStep.Data; + TrainingData = dataStep.Data; + } Output output = experiment.Add(this); return new GeneralizedAdditiveModelRegressorPipelineStep(output); } @@ -6096,14 +6237,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IClu public Var PredictorModel { get; set; } = new Var(); } + public Var GetInputData() => TrainingData; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(KMeansPlusPlusClusterer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(KMeansPlusPlusClusterer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - TrainingData = dataStep.Data; + TrainingData = dataStep.Data; + } Output output = experiment.Add(this); return new KMeansPlusPlusClustererPipelineStep(output); } @@ -6226,14 +6372,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IBin public Var PredictorModel { get; set; } = new Var(); } + public Var GetInputData() => TrainingData; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(LinearSvmBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(LinearSvmBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - TrainingData = dataStep.Data; + TrainingData = dataStep.Data; + } Output output = experiment.Add(this); return new LinearSvmBinaryClassifierPipelineStep(output); } @@ -6371,14 +6522,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IBin public Var PredictorModel { get; set; } = new Var(); } + public Var GetInputData() => TrainingData; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(LogisticRegressionBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(LogisticRegressionBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - TrainingData = dataStep.Data; + TrainingData = dataStep.Data; + } Output output = experiment.Add(this); return new LogisticRegressionBinaryClassifierPipelineStep(output); } @@ -6516,14 +6672,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IMul public Var PredictorModel { get; set; } = new Var(); } + public Var GetInputData() => TrainingData; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(LogisticRegressionClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(LogisticRegressionClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - TrainingData = dataStep.Data; + TrainingData = dataStep.Data; + } Output output = experiment.Add(this); return new LogisticRegressionClassifierPipelineStep(output); } @@ -6584,14 +6745,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IMul public Var PredictorModel { get; set; } = new Var(); } + public Var GetInputData() => TrainingData; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(NaiveBayesClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(NaiveBayesClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - TrainingData = dataStep.Data; + TrainingData = dataStep.Data; + } Output output = experiment.Add(this); return new NaiveBayesClassifierPipelineStep(output); } @@ -6734,14 +6900,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IReg public Var PredictorModel { get; set; } = new Var(); } + public Var GetInputData() => TrainingData; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(OnlineGradientDescentRegressor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(OnlineGradientDescentRegressor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - TrainingData = dataStep.Data; + TrainingData = dataStep.Data; + } Output output = experiment.Add(this); return new OnlineGradientDescentRegressorPipelineStep(output); } @@ -6818,14 +6989,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IReg public Var PredictorModel { get; set; } = new Var(); } + public Var GetInputData() => TrainingData; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(OrdinaryLeastSquaresRegressor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(OrdinaryLeastSquaresRegressor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - TrainingData = dataStep.Data; + TrainingData = dataStep.Data; + } Output output = experiment.Add(this); return new OrdinaryLeastSquaresRegressorPipelineStep(output); } @@ -6909,14 +7085,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IAno public Var PredictorModel { get; set; } = new Var(); } + public Var GetInputData() => TrainingData; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(PcaAnomalyDetector)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(PcaAnomalyDetector)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - TrainingData = dataStep.Data; + TrainingData = dataStep.Data; + } Output output = experiment.Add(this); return new PcaAnomalyDetectorPipelineStep(output); } @@ -7049,14 +7230,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IReg public Var PredictorModel { get; set; } = new Var(); } + public Var GetInputData() => TrainingData; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(PoissonRegressor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(PoissonRegressor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - TrainingData = dataStep.Data; + TrainingData = dataStep.Data; + } Output output = experiment.Add(this); return new PoissonRegressorPipelineStep(output); } @@ -7185,14 +7371,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IBin public Var PredictorModel { get; set; } = new Var(); } + public Var GetInputData() => TrainingData; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(StochasticDualCoordinateAscentBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(StochasticDualCoordinateAscentBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - TrainingData = dataStep.Data; + TrainingData = dataStep.Data; + } Output output = experiment.Add(this); return new StochasticDualCoordinateAscentBinaryClassifierPipelineStep(output); } @@ -7305,14 +7496,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IMul public Var PredictorModel { get; set; } = new Var(); } + public Var GetInputData() => TrainingData; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(StochasticDualCoordinateAscentClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(StochasticDualCoordinateAscentClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - TrainingData = dataStep.Data; + TrainingData = dataStep.Data; + } Output output = experiment.Add(this); return new StochasticDualCoordinateAscentClassifierPipelineStep(output); } @@ -7425,14 +7621,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IReg public Var PredictorModel { get; set; } = new Var(); } + public Var GetInputData() => TrainingData; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(StochasticDualCoordinateAscentRegressor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(StochasticDualCoordinateAscentRegressor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - TrainingData = dataStep.Data; + TrainingData = dataStep.Data; + } Output output = experiment.Add(this); return new StochasticDualCoordinateAscentRegressorPipelineStep(output); } @@ -7559,14 +7760,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IBin public Var PredictorModel { get; set; } = new Var(); } + public Var GetInputData() => TrainingData; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(StochasticGradientDescentBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(StochasticGradientDescentBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - TrainingData = dataStep.Data; + TrainingData = dataStep.Data; + } Output output = experiment.Add(this); return new StochasticGradientDescentBinaryClassifierPipelineStep(output); } @@ -7632,14 +7838,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(ApproximateBootstrapSampler)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(ApproximateBootstrapSampler)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new ApproximateBootstrapSamplerPipelineStep(output); } @@ -7692,14 +7903,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(BinaryPredictionScoreColumnsRenamer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(BinaryPredictionScoreColumnsRenamer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new BinaryPredictionScoreColumnsRenamerPipelineStep(output); } @@ -7836,14 +8052,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(BinNormalizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(BinNormalizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new BinNormalizerPipelineStep(output); } @@ -8008,14 +8229,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(CategoricalHashOneHotVectorizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(CategoricalHashOneHotVectorizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new CategoricalHashOneHotVectorizerPipelineStep(output); } @@ -8178,14 +8404,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(CategoricalOneHotVectorizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(CategoricalOneHotVectorizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new CategoricalOneHotVectorizerPipelineStep(output); } @@ -8297,14 +8528,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(CharacterTokenizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(CharacterTokenizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new CharacterTokenizerPipelineStep(output); } @@ -8387,14 +8623,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(ColumnConcatenator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(ColumnConcatenator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new ColumnConcatenatorPipelineStep(output); } @@ -8501,14 +8742,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(ColumnCopier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(ColumnCopier)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new ColumnCopierPipelineStep(output); } @@ -8561,14 +8807,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(ColumnDropper)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(ColumnDropper)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new ColumnDropperPipelineStep(output); } @@ -8621,14 +8872,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(ColumnSelector)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(ColumnSelector)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new ColumnSelectorPipelineStep(output); } @@ -8783,14 +9039,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(ColumnTypeConverter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(ColumnTypeConverter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new ColumnTypeConverterPipelineStep(output); } @@ -8848,14 +9109,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(CombinerByContiguousGroupId)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(CombinerByContiguousGroupId)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new CombinerByContiguousGroupIdPipelineStep(output); } @@ -8982,14 +9248,19 @@ public sealed class Output public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(ConditionalNormalizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(ConditionalNormalizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new ConditionalNormalizerPipelineStep(output); } @@ -9043,14 +9314,19 @@ public sealed class Output public Var OutputData { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(DataCache)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(DataCache)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new DataCachePipelineStep(output); } @@ -9275,14 +9551,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(Dictionarizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(Dictionarizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new DictionarizerPipelineStep(output); } @@ -9335,14 +9616,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(FeatureCombiner)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(FeatureCombiner)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new FeatureCombinerPipelineStep(output); } @@ -9400,14 +9686,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(FeatureSelectorByCount)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(FeatureSelectorByCount)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new FeatureSelectorByCountPipelineStep(output); } @@ -9475,14 +9766,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(FeatureSelectorByMutualInformation)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(FeatureSelectorByMutualInformation)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new FeatureSelectorByMutualInformationPipelineStep(output); } @@ -9619,14 +9915,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(GlobalContrastNormalizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(GlobalContrastNormalizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new GlobalContrastNormalizerPipelineStep(output); } @@ -9778,14 +10079,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(HashConverter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(HashConverter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new HashConverterPipelineStep(output); } @@ -9892,14 +10198,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(KeyToTextConverter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(KeyToTextConverter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new KeyToTextConverterPipelineStep(output); } @@ -9957,14 +10268,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(LabelColumnKeyBooleanConverter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(LabelColumnKeyBooleanConverter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new LabelColumnKeyBooleanConverterPipelineStep(output); } @@ -10081,14 +10397,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(LabelIndicator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(LabelIndicator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new LabelIndicatorPipelineStep(output); } @@ -10141,14 +10462,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(LabelToFloatConverter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(LabelToFloatConverter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new LabelToFloatConverterPipelineStep(output); } @@ -10270,14 +10596,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(LogMeanVarianceNormalizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(LogMeanVarianceNormalizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new LogMeanVarianceNormalizerPipelineStep(output); } @@ -10412,14 +10743,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(LpNormalizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(LpNormalizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new LpNormalizerPipelineStep(output); } @@ -10559,14 +10895,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(MeanVarianceNormalizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(MeanVarianceNormalizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new MeanVarianceNormalizerPipelineStep(output); } @@ -10669,14 +11010,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(MinMaxNormalizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(MinMaxNormalizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new MinMaxNormalizerPipelineStep(output); } @@ -10825,14 +11171,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(MissingValueHandler)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(MissingValueHandler)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new MissingValueHandlerPipelineStep(output); } @@ -10939,14 +11290,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(MissingValueIndicator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(MissingValueIndicator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new MissingValueIndicatorPipelineStep(output); } @@ -11053,14 +11409,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(MissingValuesDropper)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(MissingValuesDropper)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new MissingValuesDropperPipelineStep(output); } @@ -11118,14 +11479,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(MissingValuesRowDropper)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(MissingValuesRowDropper)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new MissingValuesRowDropperPipelineStep(output); } @@ -11272,14 +11638,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(MissingValueSubstitutor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(MissingValueSubstitutor)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new MissingValueSubstitutorPipelineStep(output); } @@ -11470,14 +11841,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(NGramTranslator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(NGramTranslator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new NGramTranslatorPipelineStep(output); } @@ -11525,14 +11901,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(NoOperation)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(NoOperation)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new NoOperationPipelineStep(output); } @@ -11585,14 +11966,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(OptionalColumnCreator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(OptionalColumnCreator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new OptionalColumnCreatorPipelineStep(output); } @@ -11749,14 +12135,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(PcaCalculator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(PcaCalculator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new PcaCalculatorPipelineStep(output); } @@ -11809,14 +12200,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(PredictedLabelColumnOriginalValueConverter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(PredictedLabelColumnOriginalValueConverter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new PredictedLabelColumnOriginalValueConverterPipelineStep(output); } @@ -11898,14 +12294,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(RandomNumberGenerator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(RandomNumberGenerator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new RandomNumberGeneratorPipelineStep(output); } @@ -11983,14 +12384,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(RowRangeFilter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(RowRangeFilter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new RowRangeFilterPipelineStep(output); } @@ -12048,14 +12454,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(RowSkipAndTakeFilter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(RowSkipAndTakeFilter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new RowSkipAndTakeFilterPipelineStep(output); } @@ -12108,14 +12519,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(RowSkipFilter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(RowSkipFilter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new RowSkipFilterPipelineStep(output); } @@ -12168,14 +12584,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(RowTakeFilter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(RowTakeFilter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new RowTakeFilterPipelineStep(output); } @@ -12228,14 +12649,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(ScoreColumnSelector)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(ScoreColumnSelector)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new ScoreColumnSelectorPipelineStep(output); } @@ -12332,14 +12758,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(Segregator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(Segregator)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new SegregatorPipelineStep(output); } @@ -12397,14 +12828,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(SentimentAnalyzer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(SentimentAnalyzer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new SentimentAnalyzerPipelineStep(output); } @@ -12522,14 +12958,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(SupervisedBinNormalizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(SupervisedBinNormalizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new SupervisedBinNormalizerPipelineStep(output); } @@ -12713,14 +13154,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(TextFeaturizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(TextFeaturizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new TextFeaturizerPipelineStep(output); } @@ -12833,14 +13279,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(TextToKeyConverter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(TextToKeyConverter)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new TextToKeyConverterPipelineStep(output); } @@ -12945,14 +13396,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(TreeLeafFeaturizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(TreeLeafFeaturizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new TreeLeafFeaturizerPipelineStep(output); } @@ -13101,14 +13557,19 @@ public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITra public Var Model { get; set; } = new Var(); } + public Var GetInputData() => Data; + public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment) { - if (!(previousStep is ILearningPipelineDataStep dataStep)) + if (previousStep != null) { - throw new InvalidOperationException($"{ nameof(WordTokenizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); - } + if (!(previousStep is ILearningPipelineDataStep dataStep)) + { + throw new InvalidOperationException($"{ nameof(WordTokenizer)} only supports an { nameof(ILearningPipelineDataStep)} as an input."); + } - Data = dataStep.Data; + Data = dataStep.Data; + } Output output = experiment.Add(this); return new WordTokenizerPipelineStep(output); } diff --git a/src/Microsoft.ML/Data/CollectionDataSource.cs b/src/Microsoft.ML/Data/CollectionDataSource.cs index 56523fc994..8551079d30 100644 --- a/src/Microsoft.ML/Data/CollectionDataSource.cs +++ b/src/Microsoft.ML/Data/CollectionDataSource.cs @@ -52,6 +52,8 @@ public void SetInput(IHostEnvironment environment, Experiment experiment) experiment.SetInput(_dataViewEntryPoint.Data, _dataView); } + public Var GetInputData() => null; + public abstract IDataView GetDataView(IHostEnvironment environment); } diff --git a/src/Microsoft.ML/ILearningPipelineItem.cs b/src/Microsoft.ML/ILearningPipelineItem.cs index d0430b711d..c36f890c57 100644 --- a/src/Microsoft.ML/ILearningPipelineItem.cs +++ b/src/Microsoft.ML/ILearningPipelineItem.cs @@ -14,6 +14,12 @@ namespace Microsoft.ML public interface ILearningPipelineItem { ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment); + + /// + /// Returns the place holder for input IDataView object for the node in the execution graph. + /// + /// + Var GetInputData(); } /// diff --git a/src/Microsoft.ML/Models/BinaryClassificationEvaluator.cs b/src/Microsoft.ML/Models/BinaryClassificationEvaluator.cs index e0a4eae826..1a670fc854 100644 --- a/src/Microsoft.ML/Models/BinaryClassificationEvaluator.cs +++ b/src/Microsoft.ML/Models/BinaryClassificationEvaluator.cs @@ -4,7 +4,6 @@ using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Data; -using Microsoft.ML.Runtime.EntryPoints; using Microsoft.ML.Transforms; namespace Microsoft.ML.Models @@ -66,7 +65,11 @@ public BinaryClassificationMetrics Evaluate(PredictionModel model, ILearningPipe throw environment.Except($"Could not find ConfusionMatrix in the results returned in {nameof(BinaryClassificationEvaluator)} Evaluate."); } - return BinaryClassificationMetrics.FromMetrics(environment, overallMetrics, confusionMatrix); + var metric = BinaryClassificationMetrics.FromMetrics(environment, overallMetrics, confusionMatrix); + + Contracts.Check(metric.Count == 1, $"Exactly one metric set was expected but found {metric.Count} metrics"); + + return metric[0]; } } } diff --git a/src/Microsoft.ML/Models/BinaryClassificationMetrics.cs b/src/Microsoft.ML/Models/BinaryClassificationMetrics.cs index aa3a94f3a9..f536f30ed0 100644 --- a/src/Microsoft.ML/Models/BinaryClassificationMetrics.cs +++ b/src/Microsoft.ML/Models/BinaryClassificationMetrics.cs @@ -6,6 +6,7 @@ using Microsoft.ML.Runtime.Api; using Microsoft.ML.Runtime.Data; using System; +using System.Collections.Generic; namespace Microsoft.ML.Models { @@ -18,41 +19,50 @@ private BinaryClassificationMetrics() { } - internal static BinaryClassificationMetrics FromMetrics(IHostEnvironment env, IDataView overallMetrics, IDataView confusionMatrix) + internal static List FromMetrics(IHostEnvironment env, IDataView overallMetrics, IDataView confusionMatrix, int confusionMatriceStartIndex = 0) { Contracts.AssertValue(env); env.AssertValue(overallMetrics); env.AssertValue(confusionMatrix); var metricsEnumerable = overallMetrics.AsEnumerable(env, true, ignoreMissingColumns: true); - var enumerator = metricsEnumerable.GetEnumerator(); - if (!enumerator.MoveNext()) + if (!metricsEnumerable.GetEnumerator().MoveNext()) { throw env.Except("The overall RegressionMetrics didn't have any rows."); } - SerializationClass metrics = enumerator.Current; + List metrics = new List(); + var confusionMatrices = ConfusionMatrix.Create(env, confusionMatrix).GetEnumerator(); - if (enumerator.MoveNext()) + int Index = 0; + foreach(var metric in metricsEnumerable) { - throw env.Except("The overall RegressionMetrics contained more than 1 row."); + + if (Index++ >= confusionMatriceStartIndex && !confusionMatrices.MoveNext()) + { + throw env.Except("Confusion matrices didn't have enough matrices."); + } + + metrics.Add( + new BinaryClassificationMetrics() + { + Auc = metric.Auc, + Accuracy = metric.Accuracy, + PositivePrecision = metric.PositivePrecision, + PositiveRecall = metric.PositiveRecall, + NegativePrecision = metric.NegativePrecision, + NegativeRecall = metric.NegativeRecall, + LogLoss = metric.LogLoss, + LogLossReduction = metric.LogLossReduction, + Entropy = metric.Entropy, + F1Score = metric.F1Score, + Auprc = metric.Auprc, + ConfusionMatrix = confusionMatrices.Current, + }); + } - return new BinaryClassificationMetrics() - { - Auc = metrics.Auc, - Accuracy = metrics.Accuracy, - PositivePrecision = metrics.PositivePrecision, - PositiveRecall = metrics.PositiveRecall, - NegativePrecision = metrics.NegativePrecision, - NegativeRecall = metrics.NegativeRecall, - LogLoss = metrics.LogLoss, - LogLossReduction = metrics.LogLossReduction, - Entropy = metrics.Entropy, - F1Score = metrics.F1Score, - Auprc = metrics.Auprc, - ConfusionMatrix = ConfusionMatrix.Create(env, confusionMatrix), - }; + return metrics; } /// @@ -155,7 +165,7 @@ internal static BinaryClassificationMetrics FromMetrics(IHostEnvironment env, ID /// /// This class contains the public fields necessary to deserialize from IDataView. /// - private class SerializationClass + private sealed class SerializationClass { #pragma warning disable 649 // never assigned [ColumnName(BinaryClassifierEvaluator.Auc)] diff --git a/src/Microsoft.ML/Models/ClassificationEvaluator.cs b/src/Microsoft.ML/Models/ClassificationEvaluator.cs index c8bec8642f..8fedc3fb4f 100644 --- a/src/Microsoft.ML/Models/ClassificationEvaluator.cs +++ b/src/Microsoft.ML/Models/ClassificationEvaluator.cs @@ -66,7 +66,11 @@ public ClassificationMetrics Evaluate(PredictionModel model, ILearningPipelineLo throw environment.Except($"Could not find ConfusionMatrix in the results returned in {nameof(BinaryClassificationEvaluator)} Evaluate."); } - return ClassificationMetrics.FromMetrics(environment, overallMetrics, confusionMatrix); + var metric = ClassificationMetrics.FromMetrics(environment, overallMetrics, confusionMatrix); + + Contracts.Check(metric.Count == 1, $"Exactly one metric set was expected but found {metric.Count} metrics"); + + return metric[0]; } } } diff --git a/src/Microsoft.ML/Models/ClassificationMetrics.cs b/src/Microsoft.ML/Models/ClassificationMetrics.cs index 81c0f91d7b..f3a2416bca 100644 --- a/src/Microsoft.ML/Models/ClassificationMetrics.cs +++ b/src/Microsoft.ML/Models/ClassificationMetrics.cs @@ -5,6 +5,7 @@ using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Api; using Microsoft.ML.Runtime.Data; +using System.Collections.Generic; namespace Microsoft.ML.Models { @@ -17,36 +18,45 @@ private ClassificationMetrics() { } - internal static ClassificationMetrics FromMetrics(IHostEnvironment env, IDataView overallMetrics, IDataView confusionMatrix) + internal static List FromMetrics(IHostEnvironment env, IDataView overallMetrics, IDataView confusionMatrix, + int confusionMatriceStartIndex = 0) { Contracts.AssertValue(env); env.AssertValue(overallMetrics); env.AssertValue(confusionMatrix); var metricsEnumerable = overallMetrics.AsEnumerable(env, true, ignoreMissingColumns: true); - var enumerator = metricsEnumerable.GetEnumerator(); - if (!enumerator.MoveNext()) + if (!metricsEnumerable.GetEnumerator().MoveNext()) { throw env.Except("The overall RegressionMetrics didn't have any rows."); } - SerializationClass metrics = enumerator.Current; + List metrics = new List(); + var confusionMatrices = ConfusionMatrix.Create(env, confusionMatrix).GetEnumerator(); - if (enumerator.MoveNext()) + int Index = 0; + foreach (var metric in metricsEnumerable) { - throw env.Except("The overall RegressionMetrics contained more than 1 row."); + if (Index++ >= confusionMatriceStartIndex && !confusionMatrices.MoveNext()) + { + throw env.Except("Confusion matrices didn't have enough matrices."); + } + + metrics.Add( + new ClassificationMetrics() + { + AccuracyMicro = metric.AccuracyMicro, + AccuracyMacro = metric.AccuracyMacro, + LogLoss = metric.LogLoss, + LogLossReduction = metric.LogLossReduction, + TopKAccuracy = metric.TopKAccuracy, + PerClassLogLoss = metric.PerClassLogLoss, + ConfusionMatrix = confusionMatrices.Current + }); + } - return new ClassificationMetrics() - { - AccuracyMicro = metrics.AccuracyMicro, - AccuracyMacro = metrics.AccuracyMacro, - LogLoss = metrics.LogLoss, - LogLossReduction = metrics.LogLossReduction, - TopKAccuracy = metrics.TopKAccuracy, - PerClassLogLoss = metrics.PerClassLogLoss, - ConfusionMatrix = ConfusionMatrix.Create(env, confusionMatrix) - }; + return metrics; } /// @@ -125,7 +135,7 @@ internal static ClassificationMetrics FromMetrics(IHostEnvironment env, IDataVie /// /// This class contains the public fields necessary to deserialize from IDataView. /// - private class SerializationClass + private sealed class SerializationClass { #pragma warning disable 649 // never assigned [ColumnName(MultiClassClassifierEvaluator.AccuracyMicro)] diff --git a/src/Microsoft.ML/Models/ConfusionMatrix.cs b/src/Microsoft.ML/Models/ConfusionMatrix.cs index 2040fc8331..72aa5061dc 100644 --- a/src/Microsoft.ML/Models/ConfusionMatrix.cs +++ b/src/Microsoft.ML/Models/ConfusionMatrix.cs @@ -41,7 +41,7 @@ private ConfusionMatrix(double[,] elements, string[] classNames) }); } - internal static ConfusionMatrix Create(IHostEnvironment env, IDataView confusionMatrix) + internal static List Create(IHostEnvironment env, IDataView confusionMatrix) { Contracts.AssertValue(env); env.AssertValue(confusionMatrix); @@ -51,18 +51,28 @@ internal static ConfusionMatrix Create(IHostEnvironment env, IDataView confusion env.Except($"ConfusionMatrix data view did not contain a {nameof(MetricKinds.ColumnNames.Count)} column."); } + IRowCursor cursor = confusionMatrix.GetRowCursor(col => col == countColumn); + var slots = default(VBuffer); + confusionMatrix.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, countColumn, ref slots); + string[] classNames = new string[slots.Count]; + for (int i = 0; i < slots.Count; i++) + { + classNames[i] = slots.Values[i].ToString(); + } + ColumnType type = confusionMatrix.Schema.GetColumnType(countColumn); env.Assert(type.IsVector); - - double[,] elements = new double[type.VectorSize, type.VectorSize]; - - IRowCursor cursor = confusionMatrix.GetRowCursor(col => col == countColumn); ValueGetter> countGetter = cursor.GetGetter>(countColumn); VBuffer countValues = default; - + List confusionMatrices = new List(); + int valuesRowIndex = 0; + double[,] elements = null; while (cursor.MoveNext()) { + if(valuesRowIndex == 0) + elements = new double[type.VectorSize, type.VectorSize]; + countGetter(ref countValues); for (int i = 0; i < countValues.Length; i++) { @@ -70,17 +80,15 @@ internal static ConfusionMatrix Create(IHostEnvironment env, IDataView confusion } valuesRowIndex++; - } - var slots = default(VBuffer); - confusionMatrix.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, countColumn, ref slots); - string[] classNames = new string[slots.Count]; - for (int i = 0; i < slots.Count; i++) - { - classNames[i] = slots.Values[i].ToString(); + if(valuesRowIndex == type.VectorSize) + { + valuesRowIndex = 0; + confusionMatrices.Add(new ConfusionMatrix(elements, classNames)); + } } - return new ConfusionMatrix(elements, classNames); + return confusionMatrices; } /// diff --git a/src/Microsoft.ML/Models/CrossValidator.cs b/src/Microsoft.ML/Models/CrossValidator.cs new file mode 100644 index 0000000000..173e03916c --- /dev/null +++ b/src/Microsoft.ML/Models/CrossValidator.cs @@ -0,0 +1,182 @@ +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.Api; +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.EntryPoints; +using System.Collections.Generic; +using System.IO; + +namespace Microsoft.ML.Models +{ + /// + /// Performs cross-validation on a pipeline. + /// + public sealed partial class CrossValidator + { + /// + /// Performs cross validation on a pipeline. + /// + /// Class type that represents input schema. + /// Class type that represents prediction schema. + /// Machine learning pipeline may contain loader, transforms and at least one trainer. + /// List containing metrics and predictor model for each fold + public CrossValidationOutput CrossValidate(LearningPipeline pipeline) + where TInput : class + where TOutput : class, new() + { + using (var environment = new TlcEnvironment()) + { + Experiment subGraph = environment.CreateExperiment(); + ILearningPipelineStep step = null; + List loaders = new List(); + List> transformModels = new List>(); + Var lastTransformModel = null; + Var firstPipelineDataStep = null; + Var firstModel = null; + ILearningPipelineItem firstTransform = null; + foreach (ILearningPipelineItem currentItem in pipeline) + { + if (currentItem is ILearningPipelineLoader loader) + { + loaders.Add(loader); + continue; + } + + step = currentItem.ApplyStep(step, subGraph); + + if (step is ILearningPipelineDataStep dataStep && dataStep.Model != null) + { + transformModels.Add(dataStep.Model); + if (firstPipelineDataStep == null) + { + firstPipelineDataStep = dataStep.Data; + firstTransform = currentItem; + } + } + else if (step is ILearningPipelinePredictorStep predictorDataStep) + { + if (lastTransformModel != null) + transformModels.Insert(0, lastTransformModel); + + Var predictorModel; + if (transformModels.Count != 0) + { + var localModelInput = new Transforms.ManyHeterogeneousModelCombiner + { + PredictorModel = predictorDataStep.Model, + TransformModels = new ArrayVar(transformModels.ToArray()) + }; + var localModelOutput = subGraph.Add(localModelInput); + predictorModel = localModelOutput.PredictorModel; + } + else + predictorModel = predictorDataStep.Model; + firstModel = predictorModel; + + var scorer = new Transforms.Scorer + { + PredictorModel = predictorModel + }; + + var scorerOutput = subGraph.Add(scorer); + lastTransformModel = scorerOutput.ScoringTransform; + step = new ScorerPipelineStep(scorerOutput.ScoredData, scorerOutput.ScoringTransform); + transformModels.Clear(); + } + } + + if (transformModels.Count > 0) + { + if (lastTransformModel != null) + transformModels.Insert(0, lastTransformModel); + + var modelInput = new Transforms.ModelCombiner + { + Models = new ArrayVar(transformModels.ToArray()) + }; + + var modelOutput = subGraph.Add(modelInput); + lastTransformModel = modelOutput.OutputModel; + } + + var experiment = environment.CreateExperiment(); + var importTextOutput = loaders[0].ApplyStep(null, experiment); + + Data = (importTextOutput as ILearningPipelineDataStep).Data; + Nodes = subGraph; + TransformModel = null; + Inputs.Data = firstTransform.GetInputData(); + Outputs.PredictorModel = null; + Outputs.TransformModel = lastTransformModel; + var crossValidateOutput = experiment.Add(this); + experiment.Compile(); + foreach (ILearningPipelineLoader loader in loaders) + { + loader.SetInput(environment, experiment); + } + + experiment.Run(); + + var cvOutput = new CrossValidationOutput(); + cvOutput.PredictorModels = new PredictionModel[NumFolds]; + + for (int Index = 0; Index < NumFolds; Index++) + { + + if (Kind == MacroUtilsTrainerKinds.SignatureBinaryClassifierTrainer) + { + cvOutput.BinaryClassificationMetrics = BinaryClassificationMetrics.FromMetrics( + environment, + experiment.GetOutput(crossValidateOutput.OverallMetrics), + experiment.GetOutput(crossValidateOutput.ConfusionMatrix), 2); + } + else if(Kind == MacroUtilsTrainerKinds.SignatureMultiClassClassifierTrainer) + { + cvOutput.ClassificationMetrics = ClassificationMetrics.FromMetrics( + environment, + experiment.GetOutput(crossValidateOutput.OverallMetrics), + experiment.GetOutput(crossValidateOutput.ConfusionMatrix), 2); + } + else if (Kind == MacroUtilsTrainerKinds.SignatureRegressorTrainer) + { + cvOutput.RegressionMetrics = RegressionMetrics.FromOverallMetrics( + environment, + experiment.GetOutput(crossValidateOutput.OverallMetrics)); + } + else + { + //Implement metrics for ranking, clustering and anomaly detection. + throw Contracts.Except($"{Kind.ToString()} is not supported at the moment."); + } + + ITransformModel model = experiment.GetOutput(crossValidateOutput.TransformModel[Index]); + BatchPredictionEngine predictor; + using (var memoryStream = new MemoryStream()) + { + model.Save(environment, memoryStream); + + memoryStream.Position = 0; + + predictor = environment.CreateBatchPredictionEngine(memoryStream); + + cvOutput.PredictorModels[Index] = new PredictionModel(predictor, memoryStream); + } + } + + return cvOutput; + } + } + } + + public class CrossValidationOutput + where TInput : class + where TOutput : class, new() + { + public List BinaryClassificationMetrics; + public List ClassificationMetrics; + public List RegressionMetrics; + public PredictionModel[] PredictorModels; + + //REVIEW: Add warnings and per instance results and implement + //metrics for ranking, clustering and anomaly detection. + } +} diff --git a/src/Microsoft.ML/Models/RegressionEvaluator.cs b/src/Microsoft.ML/Models/RegressionEvaluator.cs index 8c2daa53f0..2cb05ee092 100644 --- a/src/Microsoft.ML/Models/RegressionEvaluator.cs +++ b/src/Microsoft.ML/Models/RegressionEvaluator.cs @@ -4,7 +4,6 @@ using Microsoft.ML.Runtime; using Microsoft.ML.Runtime.Data; -using Microsoft.ML.Runtime.EntryPoints; using Microsoft.ML.Transforms; namespace Microsoft.ML.Models @@ -60,8 +59,12 @@ public RegressionMetrics Evaluate(PredictionModel model, ILearningPipelineLoader { throw environment.Except($"Could not find OverallMetrics in the results returned in {nameof(RegressionEvaluator)} Evaluate."); } + + var metric = RegressionMetrics.FromOverallMetrics(environment, overallMetrics); - return RegressionMetrics.FromOverallMetrics(environment, overallMetrics); + Contracts.Assert(metric.Count == 1, $"Exactly one metric set was expected but found {metric.Count} metrics"); + + return metric[0]; } } } diff --git a/src/Microsoft.ML/Models/RegressionMetrics.cs b/src/Microsoft.ML/Models/RegressionMetrics.cs index f5a5122242..68f9af2feb 100644 --- a/src/Microsoft.ML/Models/RegressionMetrics.cs +++ b/src/Microsoft.ML/Models/RegressionMetrics.cs @@ -6,6 +6,7 @@ using Microsoft.ML.Runtime.Api; using Microsoft.ML.Runtime.Data; using System; +using System.Collections.Generic; namespace Microsoft.ML.Models { @@ -18,33 +19,31 @@ private RegressionMetrics() { } - internal static RegressionMetrics FromOverallMetrics(IHostEnvironment env, IDataView overallMetrics) + internal static List FromOverallMetrics(IHostEnvironment env, IDataView overallMetrics) { Contracts.AssertValue(env); env.AssertValue(overallMetrics); var metricsEnumerable = overallMetrics.AsEnumerable(env, true, ignoreMissingColumns: true); - var enumerator = metricsEnumerable.GetEnumerator(); - if (!enumerator.MoveNext()) + if (!metricsEnumerable.GetEnumerator().MoveNext()) { throw env.Except("The overall RegressionMetrics didn't have any rows."); } - SerializationClass metrics = enumerator.Current; - - if (enumerator.MoveNext()) + List metrics = new List(); + foreach (var metric in metricsEnumerable) { - throw env.Except("The overall RegressionMetrics contained more than 1 row."); + metrics.Add(new RegressionMetrics() + { + L1 = metric.L1, + L2 = metric.L2, + Rms = metric.Rms, + LossFn = metric.LossFn, + RSquared = metric.RSquared, + }); } - return new RegressionMetrics() - { - L1 = metrics.L1, - L2 = metrics.L2, - Rms = metrics.Rms, - LossFn = metrics.LossFn, - RSquared = metrics.RSquared, - }; + return metrics; } /// @@ -94,7 +93,7 @@ internal static RegressionMetrics FromOverallMetrics(IHostEnvironment env, IData /// /// This class contains the public fields necessary to deserialize from IDataView. /// - private class SerializationClass + private sealed class SerializationClass { #pragma warning disable 649 // never assigned [ColumnName(Runtime.Data.RegressionEvaluator.L1)] diff --git a/src/Microsoft.ML/Models/TrainTestEvaluator.cs b/src/Microsoft.ML/Models/TrainTestEvaluator.cs new file mode 100644 index 0000000000..19261e82de --- /dev/null +++ b/src/Microsoft.ML/Models/TrainTestEvaluator.cs @@ -0,0 +1,179 @@ +using Microsoft.ML.Runtime; +using Microsoft.ML.Runtime.Api; +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.EntryPoints; +using System.Collections.Generic; +using System.IO; +using System.Linq; + +namespace Microsoft.ML.Models +{ + /// + /// Performs Train-Test on a pipeline. + /// + public sealed partial class TrainTestEvaluator + { + /// + /// Performs train-test on a pipeline. + /// + /// Class type that represents input schema. + /// Class type that represents prediction schema. + /// Machine learning pipeline that contains , + /// transforms and at least one trainer. + /// that represents the test dataset. + /// Metrics and predictor model. + public TrainTestEvaluatorOutput TrainTestEvaluate(LearningPipeline pipeline, ILearningPipelineLoader testData) + where TInput : class + where TOutput : class, new() + { + using (var environment = new TlcEnvironment()) + { + Experiment subGraph = environment.CreateExperiment(); + ILearningPipelineStep step = null; + List loaders = new List(); + List> transformModels = new List>(); + Var lastTransformModel = null; + Var firstPipelineDataStep = null; + Var firstModel = null; + ILearningPipelineItem firstTransform = null; + foreach (ILearningPipelineItem currentItem in pipeline) + { + if (currentItem is ILearningPipelineLoader loader) + { + loaders.Add(loader); + continue; + } + + step = currentItem.ApplyStep(step, subGraph); + + if (step is ILearningPipelineDataStep dataStep && dataStep.Model != null) + { + transformModels.Add(dataStep.Model); + if (firstPipelineDataStep == null) + { + firstPipelineDataStep = dataStep.Data; + firstTransform = currentItem; + } + } + else if (step is ILearningPipelinePredictorStep predictorDataStep) + { + if (lastTransformModel != null) + transformModels.Insert(0, lastTransformModel); + + Var predictorModel; + if (transformModels.Count != 0) + { + var localModelInput = new Transforms.ManyHeterogeneousModelCombiner + { + PredictorModel = predictorDataStep.Model, + TransformModels = new ArrayVar(transformModels.ToArray()) + }; + var localModelOutput = subGraph.Add(localModelInput); + predictorModel = localModelOutput.PredictorModel; + } + else + predictorModel = predictorDataStep.Model; + firstModel = predictorModel; + + var scorer = new Transforms.Scorer + { + PredictorModel = predictorModel + }; + + var scorerOutput = subGraph.Add(scorer); + lastTransformModel = scorerOutput.ScoringTransform; + step = new ScorerPipelineStep(scorerOutput.ScoredData, scorerOutput.ScoringTransform); + transformModels.Clear(); + } + } + + if (transformModels.Count > 0) + { + if (lastTransformModel != null) + transformModels.Insert(0, lastTransformModel); + + var modelInput = new Transforms.ModelCombiner + { + Models = new ArrayVar(transformModels.ToArray()) + }; + + var modelOutput = subGraph.Add(modelInput); + lastTransformModel = modelOutput.OutputModel; + } + + var experiment = environment.CreateExperiment(); + + TrainingData = (loaders[0].ApplyStep(null, experiment) as ILearningPipelineDataStep).Data; + TestingData = (testData.ApplyStep(null, experiment) as ILearningPipelineDataStep).Data; + Nodes = subGraph; + TransformModel = null; + Inputs.Data = firstTransform.GetInputData(); + Outputs.PredictorModel = null; + Outputs.TransformModel = lastTransformModel; + var crossValidateOutput = experiment.Add(this); + experiment.Compile(); + foreach (ILearningPipelineLoader loader in loaders) + loader.SetInput(environment, experiment); + + testData.SetInput(environment, experiment); + + experiment.Run(); + + var trainTestOutput = new TrainTestEvaluatorOutput(); + if (Kind == MacroUtilsTrainerKinds.SignatureBinaryClassifierTrainer) + { + trainTestOutput.BinaryClassificationMetrics = BinaryClassificationMetrics.FromMetrics( + environment, + experiment.GetOutput(crossValidateOutput.OverallMetrics), + experiment.GetOutput(crossValidateOutput.ConfusionMatrix)).FirstOrDefault(); + } + else if (Kind == MacroUtilsTrainerKinds.SignatureMultiClassClassifierTrainer) + { + trainTestOutput.ClassificationMetrics = ClassificationMetrics.FromMetrics( + environment, + experiment.GetOutput(crossValidateOutput.OverallMetrics), + experiment.GetOutput(crossValidateOutput.ConfusionMatrix)).FirstOrDefault(); + } + else if (Kind == MacroUtilsTrainerKinds.SignatureRegressorTrainer) + { + trainTestOutput.RegressionMetrics = RegressionMetrics.FromOverallMetrics( + environment, + experiment.GetOutput(crossValidateOutput.OverallMetrics)).FirstOrDefault(); + } + else + { + //Implement metrics for ranking, clustering and anomaly detection. + throw Contracts.Except($"{Kind.ToString()} is not supported at the moment."); + } + + ITransformModel model = experiment.GetOutput(crossValidateOutput.TransformModel); + BatchPredictionEngine predictor; + using (var memoryStream = new MemoryStream()) + { + model.Save(environment, memoryStream); + + memoryStream.Position = 0; + + predictor = environment.CreateBatchPredictionEngine(memoryStream); + + trainTestOutput.PredictorModels = new PredictionModel(predictor, memoryStream); + } + + return trainTestOutput; + } + } + } + + public class TrainTestEvaluatorOutput + where TInput : class + where TOutput : class, new() + { + public BinaryClassificationMetrics BinaryClassificationMetrics; + public ClassificationMetrics ClassificationMetrics; + public RegressionMetrics RegressionMetrics; + public PredictionModel PredictorModels; + + //REVIEW: Add warnings and per instance results and implement + //metrics for ranking, clustering and anomaly detection. + } +} diff --git a/src/Microsoft.ML/Runtime/EntryPoints/CrossValidationBinaryMacro.cs b/src/Microsoft.ML/Runtime/EntryPoints/CrossValidationBinaryMacro.cs index 302a71245c..fca8d3ac5b 100644 --- a/src/Microsoft.ML/Runtime/EntryPoints/CrossValidationBinaryMacro.cs +++ b/src/Microsoft.ML/Runtime/EntryPoints/CrossValidationBinaryMacro.cs @@ -224,7 +224,7 @@ public sealed class ArrayIPredictorModelOutput public IPredictorModel[] OutputModel; } - [TlcModule.EntryPoint(Desc = "Create and array variable", Name = "Data.PredictorModelArrayConverter")] + [TlcModule.EntryPoint(Desc = "Create an array variable of IPredictorModel", Name = "Data.PredictorModelArrayConverter")] public static ArrayIPredictorModelOutput MakeArray(IHostEnvironment env, ArrayIPredictorModelInput input) { var result = new ArrayIPredictorModelOutput @@ -234,6 +234,29 @@ public static ArrayIPredictorModelOutput MakeArray(IHostEnvironment env, ArrayIP return result; } + public sealed class ArrayITransformModelInput + { + [Argument(ArgumentType.Required, HelpText = "The models", SortOrder = 1)] + public ITransformModel[] TransformModel; + } + + public sealed class ArrayITransformModelOutput + { + [TlcModule.Output(Desc = "The model array", SortOrder = 1)] + public ITransformModel[] OutputModel; + } + + [TlcModule.EntryPoint(Desc = "Create an array variable of ITransformModel", Name = "Data.TransformModelArrayConverter")] + public static ArrayITransformModelOutput MakeArray(IHostEnvironment env, ArrayITransformModelInput input) + { + var result = new ArrayITransformModelOutput + { + OutputModel = input.TransformModel + }; + return result; + } + + public sealed class ArrayIDataViewInput { [Argument(ArgumentType.Required, HelpText = "The data sets", SortOrder = 1)] @@ -246,7 +269,7 @@ public sealed class ArrayIDataViewOutput public IDataView[] OutputData; } - [TlcModule.EntryPoint(Desc = "Create and array variable", Name = "Data.IDataViewArrayConverter")] + [TlcModule.EntryPoint(Desc = "Create an array variable of IDataView", Name = "Data.IDataViewArrayConverter")] public static ArrayIDataViewOutput MakeArray(IHostEnvironment env, ArrayIDataViewInput input) { var result = new ArrayIDataViewOutput diff --git a/src/Microsoft.ML/Runtime/EntryPoints/CrossValidationMacro.cs b/src/Microsoft.ML/Runtime/EntryPoints/CrossValidationMacro.cs index f39dd2ec3f..569d0b3571 100644 --- a/src/Microsoft.ML/Runtime/EntryPoints/CrossValidationMacro.cs +++ b/src/Microsoft.ML/Runtime/EntryPoints/CrossValidationMacro.cs @@ -29,8 +29,11 @@ public sealed class SubGraphInput public sealed class SubGraphOutput { - [Argument(ArgumentType.Required, HelpText = "The model", SortOrder = 1)] - public Var Model; + [Argument(ArgumentType.AtMostOnce, HelpText = "The predictor model", SortOrder = 1)] + public Var PredictorModel; + + [Argument(ArgumentType.AtMostOnce, HelpText = "The transform model", SortOrder = 2)] + public Var TransformModel; } public sealed class Arguments @@ -42,7 +45,8 @@ public sealed class Arguments public IDataView Data; [TlcModule.OptionalInput] - [Argument(ArgumentType.AtMostOnce, HelpText = "The transform model from the pipeline before this command. It gets included in the Output.PredictorModel.", SortOrder = 2)] + [Argument(ArgumentType.AtMostOnce, HelpText = "The transform model from the pipeline before this command. " + + "It gets included in the Output.PredictorModel.", SortOrder = 2)] public ITransformModel TransformModel; // This is the subgraph that describes how to train a model for each fold. It should @@ -62,16 +66,16 @@ public sealed class Arguments // For splitting the data into folds, this column is used for grouping rows and makes sure // that a group of rows is not split among folds. - [Argument(ArgumentType.LastOccurenceWins, HelpText = "Column to use for stratification", ShortName = "strat", SortOrder = 7)] + [Argument(ArgumentType.LastOccurenceWins, HelpText = "Column to use for stratification", ShortName = "strat", SortOrder = 6)] public string StratificationColumn; // The number of folds to generate. - [Argument(ArgumentType.LastOccurenceWins, HelpText = "Number of folds in k-fold cross-validation", ShortName = "k", SortOrder = 8)] + [Argument(ArgumentType.LastOccurenceWins, HelpText = "Number of folds in k-fold cross-validation", ShortName = "k", SortOrder = 7)] public int NumFolds = 2; // REVIEW: suggest moving to subcomponents for evaluators, to allow for different parameters on the evaluators // (and the same for the TrainTest macro). I currently do not know how to do this, so this should be revisited in the future. - [Argument(ArgumentType.Required, HelpText = "Specifies the trainer kind, which determines the evaluator to be used.", SortOrder = 9)] + [Argument(ArgumentType.Required, HelpText = "Specifies the trainer kind, which determines the evaluator to be used.", SortOrder = 8)] public MacroUtils.TrainerKinds Kind = MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer; } @@ -79,22 +83,28 @@ public sealed class Arguments // but that requires changes in the entry points infrastructure to support structs in the output classes. public sealed class Output { - [TlcModule.Output(Desc = "The final model including the trained predictor model and the model from the transforms, provided as the Input.TransformModel.", SortOrder = 1)] + [TlcModule.Output(Desc = "The final model including the trained predictor model and the model from the transforms, " + + "provided as the Input.TransformModel.", SortOrder = 1)] public IPredictorModel[] PredictorModel; - [TlcModule.Output(Desc = "Warning dataset", SortOrder = 2)] + [TlcModule.Output(Desc = "The final model including the trained predictor model and the model from the transforms, " + + "provided as the Input.TransformModel.", SortOrder = 2)] + public ITransformModel[] TransformModel; + + [TlcModule.Output(Desc = "Warning dataset", SortOrder = 3)] public IDataView Warnings; - [TlcModule.Output(Desc = "Overall metrics dataset", SortOrder = 3)] + [TlcModule.Output(Desc = "Overall metrics dataset", SortOrder = 4)] public IDataView OverallMetrics; - [TlcModule.Output(Desc = "Per instance metrics dataset", SortOrder = 4)] + [TlcModule.Output(Desc = "Per instance metrics dataset", SortOrder = 5)] public IDataView PerInstanceMetrics; - [TlcModule.Output(Desc = "Confusion matrix dataset", SortOrder = 5)] + [TlcModule.Output(Desc = "Confusion matrix dataset", SortOrder = 6)] public IDataView ConfusionMatrix; } + public sealed class CombineMetricsInput { [Argument(ArgumentType.Multiple, HelpText = "Overall metrics datasets", SortOrder = 1)] @@ -109,25 +119,25 @@ public sealed class CombineMetricsInput [Argument(ArgumentType.Multiple, HelpText = "Warning datasets", SortOrder = 4)] public IDataView[] Warnings; - [Argument(ArgumentType.AtMostOnce, HelpText = "The label column name", ShortName = "Label", SortOrder = 6)] + [Argument(ArgumentType.AtMostOnce, HelpText = "The label column name", ShortName = "Label", SortOrder = 5)] public string LabelColumn = DefaultColumnNames.Label; - [Argument(ArgumentType.Required, HelpText = "Specifies the trainer kind, which determines the evaluator to be used.", SortOrder = 7)] + [Argument(ArgumentType.Required, HelpText = "Specifies the trainer kind, which determines the evaluator to be used.", SortOrder = 6)] public MacroUtils.TrainerKinds Kind = MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer; } public sealed class CombinedOutput { - [TlcModule.Output(Desc = "Warning dataset", SortOrder = 2)] + [TlcModule.Output(Desc = "Warning dataset", SortOrder = 1)] public IDataView Warnings; - [TlcModule.Output(Desc = "Overall metrics dataset", SortOrder = 3)] + [TlcModule.Output(Desc = "Overall metrics dataset", SortOrder = 2)] public IDataView OverallMetrics; - [TlcModule.Output(Desc = "Per instance metrics dataset", SortOrder = 4)] + [TlcModule.Output(Desc = "Per instance metrics dataset", SortOrder = 3)] public IDataView PerInstanceMetrics; - [TlcModule.Output(Desc = "Confusion matrix dataset", SortOrder = 5)] + [TlcModule.Output(Desc = "Confusion matrix dataset", SortOrder = 4)] public IDataView ConfusionMatrix; } @@ -157,6 +167,7 @@ public static CommonOutputs.MacroOutput CrossValidate( subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); var predModelVars = new Var[input.NumFolds]; + var transformModelVars = new Var[input.NumFolds]; var inputTransformModelVars = new Var[input.NumFolds]; var warningsVars = new Var[input.NumFolds]; var overallMetricsVars = new Var[input.NumFolds]; @@ -188,11 +199,27 @@ public static CommonOutputs.MacroOutput CrossValidate( { VarName = mapping[input.Inputs.Data.VarName] }; - args.Outputs.Model = new Var + + if (input.Outputs.PredictorModel != null && mapping.ContainsKey(input.Outputs.PredictorModel.VarName)) { - VarName = mapping[input.Outputs.Model.VarName] - }; + args.Outputs.PredictorModel = new Var + { + VarName = mapping[input.Outputs.PredictorModel.VarName] + }; + } + else + args.Outputs.PredictorModel = null; + if (input.Outputs.TransformModel != null && mapping.ContainsKey(input.Outputs.TransformModel.VarName)) + { + args.Outputs.TransformModel = new Var + { + VarName = mapping[input.Outputs.TransformModel.VarName] + }; + } + else + args.Outputs.TransformModel = null; + // Set train/test trainer kind to match. args.Kind = input.Kind; @@ -206,23 +233,48 @@ public static CommonOutputs.MacroOutput CrossValidate( inputBindingMap.Add(nameof(args.TestingData), new List { testingData }); inputMap.Add(testingData, new ArrayIndexVariableBinding(cvSplitOutput.TestData.VarName, k)); var outputMap = new Dictionary(); + var transformModelVar = new Var(); var predModelVar = new Var(); - outputMap.Add(nameof(TrainTestMacro.Output.PredictorModel), predModelVar.VarName); - predModelVars[k] = predModelVar; - - ML.Transforms.TwoHeterogeneousModelCombiner.Output modelCombineOutput = null; - if (transformModelVarName != null && transformModelVarName.VariableName != null) + if (input.Outputs.PredictorModel == null) { - var modelCombine = new ML.Transforms.TwoHeterogeneousModelCombiner + outputMap.Add(nameof(TrainTestMacro.Output.TransformModel), transformModelVar.VarName); + transformModelVars[k] = transformModelVar; + ML.Transforms.ModelCombiner.Output modelCombineOutput = null; + if (transformModelVarName != null && transformModelVarName.VariableName != null) { - TransformModel = { VarName = transformModelVarName.VariableName }, - PredictorModel = predModelVar - }; - - exp.Reset(); - modelCombineOutput = exp.Add(modelCombine); - subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); - predModelVars[k] = modelCombineOutput.PredictorModel; + var modelCombine = new ML.Transforms.ModelCombiner + { + Models = new ArrayVar( + new Var[] { + new Var { VarName = transformModelVarName.VariableName }, + transformModelVar } + ) + }; + + exp.Reset(); + modelCombineOutput = exp.Add(modelCombine); + subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); + transformModelVars[k] = modelCombineOutput.OutputModel; + } + } + else + { + outputMap.Add(nameof(TrainTestMacro.Output.PredictorModel), predModelVar.VarName); + predModelVars[k] = predModelVar; + ML.Transforms.TwoHeterogeneousModelCombiner.Output modelCombineOutput = null; + if (transformModelVarName != null && transformModelVarName.VariableName != null) + { + var modelCombine = new ML.Transforms.TwoHeterogeneousModelCombiner + { + TransformModel = { VarName = transformModelVarName.VariableName }, + PredictorModel = predModelVar + }; + + exp.Reset(); + modelCombineOutput = exp.Add(modelCombine); + subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); + predModelVars[k] = modelCombineOutput.PredictorModel; + } } var warningVar = new Var(); @@ -237,19 +289,34 @@ public static CommonOutputs.MacroOutput CrossValidate( var confusionMatrix = new Var(); outputMap.Add(nameof(TrainTestMacro.Output.ConfusionMatrix), confusionMatrix.VarName); confusionMatrixVars[k] = confusionMatrix; - subGraphNodes.Add(EntryPointNode.Create(env, "Models.TrainTestEvaluator", args, node.Catalog, node.Context, inputBindingMap, inputMap, outputMap)); + const string trainTestEvaluatorMacroEntryPoint = "Models.TrainTestEvaluator"; + subGraphNodes.Add(EntryPointNode.Create(env, trainTestEvaluatorMacroEntryPoint, args, node.Catalog, node.Context, inputBindingMap, inputMap, outputMap)); } exp.Reset(); // Convert predictors from all folds into an array of predictors. - var outModels = new ML.Data.PredictorModelArrayConverter + + if (input.Outputs.PredictorModel == null) { - Model = new ArrayVar(predModelVars) - }; - var outModelsOutput = new ML.Data.PredictorModelArrayConverter.Output(); - outModelsOutput.OutputModel.VarName = node.GetOutputVariableName(nameof(Output.PredictorModel)); - exp.Add(outModels, outModelsOutput); + var outModels = new ML.Data.TransformModelArrayConverter + { + TransformModel = new ArrayVar(transformModelVars) + }; + var outModelsOutput = new ML.Data.TransformModelArrayConverter.Output(); + outModelsOutput.OutputModel.VarName = node.GetOutputVariableName(nameof(Output.TransformModel)); + exp.Add(outModels, outModelsOutput); + } + else + { + var outModels = new ML.Data.PredictorModelArrayConverter + { + Model = new ArrayVar(predModelVars) + }; + var outModelsOutput = new ML.Data.PredictorModelArrayConverter.Output(); + outModelsOutput.OutputModel.VarName = node.GetOutputVariableName(nameof(Output.PredictorModel)); + exp.Add(outModels, outModelsOutput); + } // Convert warnings data views from all folds into an array of data views. var warnings = new ML.Data.IDataViewArrayConverter @@ -330,6 +397,7 @@ public static CommonOutputs.MacroOutput CrossValidate( public static CombinedOutput CombineMetrics(IHostEnvironment env, CombineMetricsInput input) { var eval = GetEvaluator(env, input.Kind); + var perInst = EvaluateUtils.ConcatenatePerInstanceDataViews(env, eval, true, true, input.PerInstanceMetrics.Select( idv => RoleMappedData.Create(idv, RoleMappedSchema.CreatePair(RoleMappedSchema.ColumnRole.Label, input.LabelColumn))).ToArray(), out var variableSizeVectorColumnNames); @@ -369,6 +437,7 @@ public static CombinedOutput CombineMetrics(IHostEnvironment env, CombineMetrics } } } + conf = EvaluateUtils.ConcatenateOverallMetrics(env, input.ConfusionMatrix); } diff --git a/src/Microsoft.ML/Runtime/EntryPoints/TrainTestMacro.cs b/src/Microsoft.ML/Runtime/EntryPoints/TrainTestMacro.cs index b05b5e5c69..edd4cf6e5b 100644 --- a/src/Microsoft.ML/Runtime/EntryPoints/TrainTestMacro.cs +++ b/src/Microsoft.ML/Runtime/EntryPoints/TrainTestMacro.cs @@ -7,6 +7,7 @@ using Microsoft.ML.Runtime.CommandLine; using Microsoft.ML.Runtime.Data; using Microsoft.ML.Runtime.EntryPoints; +using Microsoft.ML.Transforms; using Newtonsoft.Json.Linq; [assembly: LoadableClass(typeof(void), typeof(TrainTestMacro), null, typeof(SignatureEntryPointModule), "TrainTestMacro")] @@ -23,8 +24,11 @@ public sealed class SubGraphInput public sealed class SubGraphOutput { - [Argument(ArgumentType.Required, HelpText = "The model", SortOrder = 1)] - public Var Model; + [Argument(ArgumentType.AtMostOnce, HelpText = "The predictor model", SortOrder = 1)] + public Var PredictorModel; + + [Argument(ArgumentType.AtMostOnce, HelpText = "Transform model", SortOrder = 2)] + public Var TransformModel; } public sealed class Arguments @@ -62,31 +66,36 @@ public sealed class Arguments public sealed class Output { - [TlcModule.Output(Desc = "The final model including the trained predictor model and the model from the transforms, provided as the Input.TransformModel.", SortOrder = 1)] + [TlcModule.Output(Desc = "The final model including the trained predictor model and the model from the transforms, " + + "provided as the Input.TransformModel.", SortOrder = 1)] public IPredictorModel PredictorModel; - [TlcModule.Output(Desc = "Warning dataset", SortOrder = 2)] + [TlcModule.Output(Desc = "The final model including the trained predictor model and the model from the transforms, " + + "provided as the Input.TransformModel.", SortOrder = 2)] + public ITransformModel TransformModel; + + [TlcModule.Output(Desc = "Warning dataset", SortOrder = 3)] public IDataView Warnings; - [TlcModule.Output(Desc = "Overall metrics dataset", SortOrder = 3)] + [TlcModule.Output(Desc = "Overall metrics dataset", SortOrder = 4)] public IDataView OverallMetrics; - [TlcModule.Output(Desc = "Per instance metrics dataset", SortOrder = 4)] + [TlcModule.Output(Desc = "Per instance metrics dataset", SortOrder = 5)] public IDataView PerInstanceMetrics; - [TlcModule.Output(Desc = "Confusion matrix dataset", SortOrder = 5)] + [TlcModule.Output(Desc = "Confusion matrix dataset", SortOrder = 6)] public IDataView ConfusionMatrix; - [TlcModule.Output(Desc = "Warning dataset for training", SortOrder = 6)] + [TlcModule.Output(Desc = "Warning dataset for training", SortOrder = 7)] public IDataView TrainingWarnings; - [TlcModule.Output(Desc = "Overall metrics dataset for training", SortOrder = 7)] + [TlcModule.Output(Desc = "Overall metrics dataset for training", SortOrder = 8)] public IDataView TrainingOverallMetrics; - [TlcModule.Output(Desc = "Per instance metrics dataset for training", SortOrder = 8)] + [TlcModule.Output(Desc = "Per instance metrics dataset for training", SortOrder = 9)] public IDataView TrainingPerInstanceMetrics; - [TlcModule.Output(Desc = "Confusion matrix dataset for training", SortOrder = 9)] + [TlcModule.Output(Desc = "Confusion matrix dataset for training", SortOrder = 10)] public IDataView TrainingConfusionMatrix; } @@ -117,10 +126,13 @@ public static CommonOutputs.MacroOutput TrainTest( subGraphRunContext.RemoveVariable(dataVariable); // Change the subgraph to use the model variable as output. - varName = input.Outputs.Model.VarName; + varName = input.Outputs.PredictorModel == null ? input.Outputs.TransformModel.VarName : input.Outputs.PredictorModel.VarName; if (!subGraphRunContext.TryGetVariable(varName, out dataVariable)) throw env.Except($"Invalid variable name '{varName}'."); - string outputVarName = node.GetOutputVariableName(nameof(Output.PredictorModel)); + + string outputVarName = input.Outputs.PredictorModel == null ? node.GetOutputVariableName(nameof(Output.TransformModel)) : + node.GetOutputVariableName(nameof(Output.PredictorModel)); + foreach (var subGraphNode in subGraphNodes) subGraphNode.RenameOutputVariable(dataVariable.Name, outputVarName); subGraphRunContext.RemoveVariable(dataVariable); @@ -136,26 +148,59 @@ public static CommonOutputs.MacroOutput TrainTest( var testingVar = node.GetInputVariable(nameof(input.TestingData)); var exp = new Experiment(env); - //combine the predictor model with any potential transfrom model passed from the outer graph - if (transformModelVarName != null && transformModelVarName.VariableName != null) + DatasetScorer.Output scoreNodeOutput = null; + ML.Models.DatasetTransformer.Output datasetTransformNodeOutput = null; + if (input.Outputs.PredictorModel == null) + { + //combine the predictor model with any potential transfrom model passed from the outer graph + if (transformModelVarName != null && transformModelVarName.VariableName != null) + { + var modelCombine = new ML.Transforms.ModelCombiner + { + Models = new ArrayVar( + new Var[] { + new Var { VarName = transformModelVarName.VariableName }, + new Var { VarName = outputVarName} } + ) + }; + + var modelCombineOutput = exp.Add(modelCombine); + outputVarName = modelCombineOutput.OutputModel.VarName; + } + + var datasetTransformerNode = new Models.DatasetTransformer + { + Data = { VarName = testingVar.ToJson() }, + TransformModel = { VarName = outputVarName } + }; + + datasetTransformNodeOutput = exp.Add(datasetTransformerNode); + } + else { - var modelCombine = new ML.Transforms.TwoHeterogeneousModelCombiner + //combine the predictor model with any potential transfrom model passed from the outer graph + if (transformModelVarName != null && transformModelVarName.VariableName != null) + { + var modelCombine = new TwoHeterogeneousModelCombiner + { + TransformModel = { VarName = transformModelVarName.VariableName }, + PredictorModel = { VarName = outputVarName } + }; + + var modelCombineOutput = exp.Add(modelCombine); + outputVarName = modelCombineOutput.PredictorModel.VarName; + } + + // Add the scoring node for testing. + var scoreNode = new DatasetScorer { - TransformModel = { VarName = transformModelVarName.VariableName }, + Data = { VarName = testingVar.ToJson() }, PredictorModel = { VarName = outputVarName } }; - var modelCombineOutput = exp.Add(modelCombine); - outputVarName = modelCombineOutput.PredictorModel.VarName; + scoreNodeOutput = exp.Add(scoreNode); } - // Add the scoring node for testing. - var scoreNode = new ML.Transforms.DatasetScorer - { - Data = { VarName = testingVar.ToJson() }, - PredictorModel = { VarName = outputVarName } - }; - var scoreNodeOutput = exp.Add(scoreNode); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); // Do not double-add previous nodes. @@ -172,23 +217,40 @@ public static CommonOutputs.MacroOutput TrainTest( if (input.IncludeTrainingMetrics) { - // Add the scoring node for training. - var scoreNodeTraining = new ML.Transforms.DatasetScorer + DatasetScorer.Output scoreNodeTrainingOutput = null; + ML.Models.DatasetTransformer.Output datasetTransformNodeTrainingOutput = null; + if (input.Outputs.PredictorModel == null) { - Data = { VarName = trainingVar.ToJson() }, - PredictorModel = { VarName = outputVarName } - }; - var scoreNodeTrainingOutput = exp.Add(scoreNodeTraining); + var datasetTransformerNode = new Models.DatasetTransformer + { + Data = { VarName = testingVar.ToJson() }, + TransformModel = { VarName = outputVarName } + }; + + datasetTransformNodeTrainingOutput = exp.Add(datasetTransformerNode); + } + else + { + // Add the scoring node for training. + var scoreNodeTraining = new DatasetScorer + { + Data = { VarName = trainingVar.ToJson() }, + PredictorModel = { VarName = outputVarName } + }; + scoreNodeTrainingOutput = exp.Add(scoreNodeTraining); + } + subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); // Do not double-add previous nodes. exp.Reset(); - // Add the evaluator node for training. + // Add the evaluator node for training. var evalInputOutputTraining = MacroUtils.GetEvaluatorInputOutput(input.Kind, settings); var evalNodeTraining = evalInputOutputTraining.Item1; var evalOutputTraining = evalInputOutputTraining.Item2; - evalNodeTraining.Data.VarName = scoreNodeTrainingOutput.ScoredData.VarName; + evalNodeTraining.Data.VarName = input.Outputs.PredictorModel == null ? datasetTransformNodeTrainingOutput.OutputData.VarName : + scoreNodeTrainingOutput.ScoredData.VarName; if (node.OutputMap.TryGetValue(nameof(Output.TrainingWarnings), out outVariableName)) evalOutputTraining.Warnings.VarName = outVariableName; @@ -211,7 +273,7 @@ public static CommonOutputs.MacroOutput TrainTest( var evalInputOutput = MacroUtils.GetEvaluatorInputOutput(input.Kind, settings); var evalNode = evalInputOutput.Item1; var evalOutput = evalInputOutput.Item2; - evalNode.Data.VarName = scoreNodeOutput.ScoredData.VarName; + evalNode.Data.VarName = input.Outputs.PredictorModel == null ? datasetTransformNodeOutput.OutputData.VarName : scoreNodeOutput.ScoredData.VarName; if (node.OutputMap.TryGetValue(nameof(Output.Warnings), out outVariableName)) evalOutput.Warnings.VarName = outVariableName; diff --git a/src/Microsoft.ML/Runtime/Experiment/Experiment.cs b/src/Microsoft.ML/Runtime/Experiment/Experiment.cs index 9fb0560701..108befb74b 100644 --- a/src/Microsoft.ML/Runtime/Experiment/Experiment.cs +++ b/src/Microsoft.ML/Runtime/Experiment/Experiment.cs @@ -34,7 +34,6 @@ private sealed class SerializationHelper private readonly JsonSerializer _serializer; private readonly SerializationHelper _helper; private EntryPointGraph _graph; - public Experiment(Runtime.IHostEnvironment env) { _env = env; diff --git a/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs b/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs index 5fabb15840..7f5114b185 100644 --- a/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs +++ b/src/Microsoft.ML/Runtime/Internal/Tools/CSharpApiGenerator.cs @@ -750,6 +750,10 @@ private void GenerateLoaderAddInputMethod(IndentingTextWriter writer, string cla writer.WriteLine("}"); writer.WriteLine(""); + //GetInputData + writer.WriteLine("public Var GetInputData() => null;"); + writer.WriteLine(""); + //Apply. writer.WriteLine($"public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment)"); writer.WriteLine("{"); @@ -955,27 +959,41 @@ private void GenerateInput(IndentingTextWriter writer, writer.WriteLine(); GenerateOutput(writer, entryPointInfo, out HashSet outputVariableNames); - GenerateApplyFunction(writer, entryPointInfo, transformType, classBase, outputVariableNames); + GenerateApplyFunction(writer, entryPointInfo, transformType, outputVariableNames, entryPointInfo.InputKinds); writer.Outdent(); writer.WriteLine("}"); } private static void GenerateApplyFunction(IndentingTextWriter writer, ModuleCatalog.EntryPointInfo entryPointInfo, - Type type, string classBase, HashSet outputVariableNames) + Type type, HashSet outputVariableNames, Type[] inputKinds) { + if (inputKinds == null) + return; + bool isTransform = false; bool isCalibrator = false; - if (classBase.Contains("ITransformInput")) + + if (inputKinds.Any(t => typeof(ITransformInput).IsAssignableFrom(t))) isTransform = true; - else if (!classBase.Contains("ITrainerInput")) + else if (!inputKinds.Any(t => typeof(ITrainerInput).IsAssignableFrom(t))) return; - if (classBase.Contains("ICalibratorInput")) + if (inputKinds.Any(t => typeof(ICalibratorInput).IsAssignableFrom(t))) isCalibrator = true; + if (isTransform) + writer.WriteLine("public Var GetInputData() => Data;"); + else + writer.WriteLine("public Var GetInputData() => TrainingData;"); + + writer.WriteLine(""); string className = GeneratorUtils.GetClassAndMethodNames(entryPointInfo).Item2; writer.WriteLine("public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment)"); writer.WriteLine("{"); + + writer.Indent(); + writer.WriteLine("if (previousStep != null)"); + writer.WriteLine("{"); writer.Indent(); writer.WriteLine("if (!(previousStep is ILearningPipelineDataStep dataStep))"); writer.WriteLine("{"); @@ -992,6 +1010,9 @@ private static void GenerateApplyFunction(IndentingTextWriter writer, ModuleCata else writer.WriteLine("TrainingData = dataStep.Data;"); + writer.Outdent(); + writer.WriteLine("}"); + string pipelineStep = $"{className}PipelineStep"; writer.WriteLine($"Output output = experiment.Add(this);"); writer.WriteLine($"return new {pipelineStep}(output);"); diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs index c7c199f2d1..a385917367 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestCSharpApi.cs @@ -319,7 +319,7 @@ public void TestCrossValidationMacro() TransformModel = null }; crossValidate.Inputs.Data = nop.Data; - crossValidate.Outputs.Model = modelCombineOutput.PredictorModel; + crossValidate.Outputs.PredictorModel = modelCombineOutput.PredictorModel; var crossValidateOutput = experiment.Add(crossValidate); experiment.Compile(); @@ -410,7 +410,7 @@ public void TestCrossValidationMacroWithMultiClass() TransformModel = null }; crossValidate.Inputs.Data = nop.Data; - crossValidate.Outputs.Model = modelCombineOutput.PredictorModel; + crossValidate.Outputs.PredictorModel = modelCombineOutput.PredictorModel; var crossValidateOutput = experiment.Add(crossValidate); experiment.Compile(); @@ -541,7 +541,7 @@ public void TestCrossValidationMacroWithStratification() StratificationColumn = "Strat" }; crossValidate.Inputs.Data = nop.Data; - crossValidate.Outputs.Model = modelCombineOutput.PredictorModel; + crossValidate.Outputs.PredictorModel = modelCombineOutput.PredictorModel; var crossValidateOutput = experiment.Add(crossValidate); experiment.Compile(); experiment.SetInput(importInput.InputFile, new SimpleFileHandle(env, dataPath, false, false)); diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs index 34b9317176..2343c7949a 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs @@ -1875,7 +1875,7 @@ public void EntryPointTrainTestMacroNoTransformInput() 'Data': '$data1' }, 'Outputs': { - 'Model': '$model' + 'PredictorModel': '$model' } }, 'Outputs': { @@ -1980,7 +1980,7 @@ public void EntryPointTrainTestMacro() 'Data': '$data1' }, 'Outputs': { - 'Model': '$model' + 'PredictorModel': '$model' } }, 'Outputs': { @@ -2108,7 +2108,7 @@ public void EntryPointChainedTrainTestMacros() 'Data': '$data1' }, 'Outputs': { - 'Model': '$model' + 'PredictorModel': '$model' } }, 'Outputs': { @@ -2141,7 +2141,7 @@ public void EntryPointChainedTrainTestMacros() 'Data': '$data4' }, 'Outputs': { - 'Model': '$model2' + 'PredictorModel': '$model2' } }, 'Outputs': { @@ -2274,7 +2274,7 @@ public void EntryPointChainedCrossValMacros() 'Data': '$data6' }, 'Outputs': { - 'Model': '$model' + 'PredictorModel': '$model' } }, 'Outputs': { @@ -2336,7 +2336,7 @@ public void EntryPointChainedCrossValMacros() 'Data': '$data4' }, 'Outputs': { - 'Model': '$model2' + 'PredictorModel': '$model2' } }, 'Outputs': { diff --git a/test/Microsoft.ML.TestFramework/ModelHelper.cs b/test/Microsoft.ML.TestFramework/ModelHelper.cs index 1b0ab4eb8e..edf4408bcb 100644 --- a/test/Microsoft.ML.TestFramework/ModelHelper.cs +++ b/test/Microsoft.ML.TestFramework/ModelHelper.cs @@ -58,7 +58,6 @@ public static IDataView GetKcHouseDataView(string dataPath) private static ITransformModel CreateKcHousePricePredictorModel(string dataPath) { Experiment experiment = s_environment.CreateExperiment(); - var importData = new Data.TextLoader(dataPath) { Arguments = new TextLoaderArguments diff --git a/test/Microsoft.ML.Tests/Scenarios/HousePricePredictionTests.cs b/test/Microsoft.ML.Tests/Scenarios/HousePricePredictionTests.cs index 392462a0eb..81a2d950b5 100644 --- a/test/Microsoft.ML.Tests/Scenarios/HousePricePredictionTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/HousePricePredictionTests.cs @@ -2,11 +2,8 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using Microsoft.ML.Models; using Microsoft.ML.Runtime.Api; using Microsoft.ML.TestFramework; -using Microsoft.ML.Trainers; -using Microsoft.ML.Transforms; using Xunit; using Xunit.Abstractions; diff --git a/test/Microsoft.ML.Tests/Scenarios/HousePriceTrainAndPredictionTests.cs b/test/Microsoft.ML.Tests/Scenarios/HousePriceTrainAndPredictionTests.cs index 31fc4fdd6d..df529f04a7 100644 --- a/test/Microsoft.ML.Tests/Scenarios/HousePriceTrainAndPredictionTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/HousePriceTrainAndPredictionTests.cs @@ -4,12 +4,9 @@ using Microsoft.ML.Data; using Microsoft.ML.Models; -using Microsoft.ML.Runtime.Api; -using Microsoft.ML.TestFramework; using Microsoft.ML.Trainers; using Microsoft.ML.Transforms; using Xunit; -using Xunit.Abstractions; namespace Microsoft.ML.Scenarios { diff --git a/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs b/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs index 80947644e9..f99fefe378 100644 --- a/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs +++ b/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs @@ -67,7 +67,6 @@ public void TrainAndPredictSentimentModelTest() pipeline.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" }); PredictionModel model = pipeline.Train(); - IEnumerable sentiments = new[] { new SentimentData @@ -111,6 +110,7 @@ public void TrainAndPredictSentimentModelTest() } } }; + var evaluator = new BinaryClassificationEvaluator(); BinaryClassificationMetrics metrics = evaluator.Evaluate(model, testData); @@ -143,6 +143,306 @@ public void TrainAndPredictSentimentModelTest() Assert.Equal(1, matrix["negative", "negative"]); } + [Fact] + public void TrainTestPredictSentimentModelTest() + { + string dataPath = GetDataPath(SentimentDataPath); + var pipeline = new LearningPipeline(); + + pipeline.Add(new Data.TextLoader(dataPath) + { + Arguments = new TextLoaderArguments + { + Separator = new[] { '\t' }, + HasHeader = true, + Column = new[] + { + new TextLoaderColumn() + { + Name = "Label", + Source = new [] { new TextLoaderRange(0) }, + Type = Runtime.Data.DataKind.Num + }, + + new TextLoaderColumn() + { + Name = "SentimentText", + Source = new [] { new TextLoaderRange(1) }, + Type = Runtime.Data.DataKind.Text + } + } + } + }); + + pipeline.Add(new TextFeaturizer("Features", "SentimentText") + { + KeepDiacritics = false, + KeepPunctuations = false, + TextCase = TextNormalizerTransformCaseNormalizationMode.Lower, + OutputTokens = true, + StopWordsRemover = new PredefinedStopWordsRemover(), + VectorNormalizer = TextTransformTextNormKind.L2, + CharFeatureExtractor = new NGramNgramExtractor() { NgramLength = 3, AllLengths = false }, + WordFeatureExtractor = new NGramNgramExtractor() { NgramLength = 2, AllLengths = true } + }); + + pipeline.Add(new FastTreeBinaryClassifier() { NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2 }); + pipeline.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" }); + + PredictionModel model = pipeline.Train(); + IEnumerable sentiments = new[] + { + new SentimentData + { + SentimentText = "Please refrain from adding nonsense to Wikipedia." + }, + new SentimentData + { + SentimentText = "He is a CHEATER, and the article should say that." + } + }; + + string testDataPath = GetDataPath(SentimentTestPath); + var testData = new Data.TextLoader(testDataPath) + { + Arguments = new TextLoaderArguments + { + Separator = new[] { '\t' }, + HasHeader = true, + Column = new[] + { + new TextLoaderColumn() + { + Name = "Label", + Source = new [] { new TextLoaderRange(0) }, + Type = Runtime.Data.DataKind.Num + }, + + new TextLoaderColumn() + { + Name = "SentimentText", + Source = new [] { new TextLoaderRange(1) }, + Type = Runtime.Data.DataKind.Text + } + } + } + }; + + var tt = new TrainTestEvaluator().TrainTestEvaluate(pipeline, testData); + + Assert.Null(tt.ClassificationMetrics); + Assert.Null(tt.RegressionMetrics); + Assert.NotNull(tt.BinaryClassificationMetrics); + Assert.NotNull(tt.PredictorModels); + + BinaryClassificationMetrics metrics = tt.BinaryClassificationMetrics; + Assert.Equal(.5556, metrics.Accuracy, 4); + Assert.Equal(.8, metrics.Auc, 1); + Assert.Equal(.87, metrics.Auprc, 2); + Assert.Equal(1, metrics.Entropy, 3); + Assert.Equal(.6923, metrics.F1Score, 4); + Assert.Equal(.969, metrics.LogLoss, 3); + Assert.Equal(3.083, metrics.LogLossReduction, 3); + Assert.Equal(1, metrics.NegativePrecision, 3); + Assert.Equal(.111, metrics.NegativeRecall, 3); + Assert.Equal(.529, metrics.PositivePrecision, 3); + Assert.Equal(1, metrics.PositiveRecall); + + ConfusionMatrix matrix = metrics.ConfusionMatrix; + Assert.Equal(2, matrix.Order); + Assert.Equal(2, matrix.ClassNames.Count); + Assert.Equal("positive", matrix.ClassNames[0]); + Assert.Equal("negative", matrix.ClassNames[1]); + + Assert.Equal(9, matrix[0, 0]); + Assert.Equal(9, matrix["positive", "positive"]); + Assert.Equal(0, matrix[0, 1]); + Assert.Equal(0, matrix["positive", "negative"]); + + Assert.Equal(8, matrix[1, 0]); + Assert.Equal(8, matrix["negative", "positive"]); + Assert.Equal(1, matrix[1, 1]); + Assert.Equal(1, matrix["negative", "negative"]); + + IEnumerable predictions = tt.PredictorModels.Predict(sentiments); + Assert.Equal(2, predictions.Count()); + Assert.True(predictions.ElementAt(0).Sentiment.IsFalse); + Assert.True(predictions.ElementAt(1).Sentiment.IsTrue); + + predictions = tt.PredictorModels.Predict(sentiments); + Assert.Equal(2, predictions.Count()); + Assert.True(predictions.ElementAt(0).Sentiment.IsFalse); + Assert.True(predictions.ElementAt(1).Sentiment.IsTrue); + } + + [Fact] + public void CrossValidateSentimentModelTest() + { + string dataPath = GetDataPath(SentimentDataPath); + var pipeline = new LearningPipeline(); + + pipeline.Add(new Data.TextLoader(dataPath) + { + Arguments = new TextLoaderArguments + { + Separator = new[] { '\t' }, + HasHeader = true, + Column = new[] + { + new TextLoaderColumn() + { + Name = "Label", + Source = new [] { new TextLoaderRange(0) }, + Type = Runtime.Data.DataKind.Num + }, + + new TextLoaderColumn() + { + Name = "SentimentText", + Source = new [] { new TextLoaderRange(1) }, + Type = Runtime.Data.DataKind.Text + } + } + } + }); + + pipeline.Add(new TextFeaturizer("Features", "SentimentText") + { + KeepDiacritics = false, + KeepPunctuations = false, + TextCase = TextNormalizerTransformCaseNormalizationMode.Lower, + OutputTokens = true, + StopWordsRemover = new PredefinedStopWordsRemover(), + VectorNormalizer = TextTransformTextNormKind.L2, + CharFeatureExtractor = new NGramNgramExtractor() { NgramLength = 3, AllLengths = false }, + WordFeatureExtractor = new NGramNgramExtractor() { NgramLength = 2, AllLengths = true } + }); + + pipeline.Add(new FastTreeBinaryClassifier() { NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2 }); + pipeline.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" }); + + IEnumerable sentiments = new[] + { + new SentimentData + { + SentimentText = "Please refrain from adding nonsense to Wikipedia." + }, + new SentimentData + { + SentimentText = "He is a CHEATER, and the article should say that." + } + }; + + var cv = new CrossValidator().CrossValidate(pipeline); + + //First two items are average and std. deviation of metrics from the folds. + Assert.Equal(2, cv.PredictorModels.Count()); + Assert.Null(cv.ClassificationMetrics); + Assert.Null(cv.RegressionMetrics); + Assert.NotNull(cv.BinaryClassificationMetrics); + Assert.Equal(4, cv.BinaryClassificationMetrics.Count()); + + //Avergae of all folds. + BinaryClassificationMetrics metrics = cv.BinaryClassificationMetrics[0]; + Assert.Equal(0.57023626091422708, metrics.Accuracy, 4); + Assert.Equal(0.54960689910161487, metrics.Auc, 1); + Assert.Equal(0.67048277219704255, metrics.Auprc, 2); + Assert.Equal(0, metrics.Entropy, 3); + Assert.Equal(0.68942642723130532, metrics.F1Score, 4); + Assert.Equal(0.97695909611968434, metrics.LogLoss, 3); + Assert.Equal(-3.050726259114541, metrics.LogLossReduction, 3); + Assert.Equal(0.37553879310344829, metrics.NegativePrecision, 3); + Assert.Equal(0.25683962264150945, metrics.NegativeRecall, 3); + Assert.Equal(0.63428539173628362, metrics.PositivePrecision, 3); + Assert.Equal(0.75795196364816619, metrics.PositiveRecall); + Assert.Null(metrics.ConfusionMatrix); + + //Std. Deviation. + metrics = cv.BinaryClassificationMetrics[1]; + Assert.Equal(0.039933230611196011, metrics.Accuracy, 4); + Assert.Equal(0.021066177821462407, metrics.Auc, 1); + Assert.Equal(0.045842033921572725, metrics.Auprc, 2); + Assert.Equal(0, metrics.Entropy, 3); + Assert.Equal(0.030085767890644915, metrics.F1Score, 4); + Assert.Equal(0.032906777175141941, metrics.LogLoss, 3); + Assert.Equal(0.86311349745170118, metrics.LogLossReduction, 3); + Assert.Equal(0.030711206896551647, metrics.NegativePrecision, 3); + Assert.Equal(0.068160377358490579, metrics.NegativeRecall, 3); + Assert.Equal(0.051761119891622735, metrics.PositivePrecision, 3); + Assert.Equal(0.0015417072379052127, metrics.PositiveRecall); + Assert.Null(metrics.ConfusionMatrix); + + //Fold 1. + metrics = cv.BinaryClassificationMetrics[2]; + Assert.Equal(0.53030303030303028, metrics.Accuracy, 4); + Assert.Equal(0.52854072128015284, metrics.Auc, 1); + Assert.Equal(0.62464073827546951, metrics.Auprc, 2); + Assert.Equal(0, metrics.Entropy, 3); + Assert.Equal(0.65934065934065933, metrics.F1Score, 4); + Assert.Equal(1.0098658732948276, metrics.LogLoss, 3); + Assert.Equal(-3.9138397565662424, metrics.LogLossReduction, 3); + Assert.Equal(0.34482758620689657, metrics.NegativePrecision, 3); + Assert.Equal(0.18867924528301888, metrics.NegativeRecall, 3); + Assert.Equal(0.58252427184466016, metrics.PositivePrecision, 3); + Assert.Equal(0.759493670886076, metrics.PositiveRecall); + + ConfusionMatrix matrix = metrics.ConfusionMatrix; + Assert.Equal(2, matrix.Order); + Assert.Equal(2, matrix.ClassNames.Count); + Assert.Equal("positive", matrix.ClassNames[0]); + Assert.Equal("negative", matrix.ClassNames[1]); + + Assert.Equal(60, matrix[0, 0]); + Assert.Equal(60, matrix["positive", "positive"]); + Assert.Equal(19, matrix[0, 1]); + Assert.Equal(19, matrix["positive", "negative"]); + + Assert.Equal(43, matrix[1, 0]); + Assert.Equal(43, matrix["negative", "positive"]); + Assert.Equal(10, matrix[1, 1]); + Assert.Equal(10, matrix["negative", "negative"]); + + //Fold 2. + metrics = cv.BinaryClassificationMetrics[3]; + Assert.Equal(0.61016949152542377, metrics.Accuracy, 4); + Assert.Equal(0.57067307692307689, metrics.Auc, 1); + Assert.Equal(0.71632480611861549, metrics.Auprc, 2); + Assert.Equal(0, metrics.Entropy, 3); + Assert.Equal(0.71951219512195119, metrics.F1Score, 4); + Assert.Equal(0.94405231894454111, metrics.LogLoss, 3); + Assert.Equal(-2.1876127616628396, metrics.LogLossReduction, 3); + Assert.Equal(0.40625, metrics.NegativePrecision, 3); + Assert.Equal(0.325, metrics.NegativeRecall, 3); + Assert.Equal(0.686046511627907, metrics.PositivePrecision, 3); + Assert.Equal(0.75641025641025639, metrics.PositiveRecall); + + matrix = metrics.ConfusionMatrix; + Assert.Equal(2, matrix.Order); + Assert.Equal(2, matrix.ClassNames.Count); + Assert.Equal("positive", matrix.ClassNames[0]); + Assert.Equal("negative", matrix.ClassNames[1]); + + Assert.Equal(59, matrix[0, 0]); + Assert.Equal(59, matrix["positive", "positive"]); + Assert.Equal(19, matrix[0, 1]); + Assert.Equal(19, matrix["positive", "negative"]); + + Assert.Equal(27, matrix[1, 0]); + Assert.Equal(27, matrix["negative", "positive"]); + Assert.Equal(13, matrix[1, 1]); + Assert.Equal(13, matrix["negative", "negative"]); + + IEnumerable predictions = cv.PredictorModels[0].Predict(sentiments); + Assert.Equal(2, predictions.Count()); + Assert.True(predictions.ElementAt(0).Sentiment.IsTrue); + Assert.True(predictions.ElementAt(1).Sentiment.IsTrue); + + predictions = cv.PredictorModels[1].Predict(sentiments); + Assert.Equal(2, predictions.Count()); + Assert.True(predictions.ElementAt(0).Sentiment.IsTrue); + Assert.True(predictions.ElementAt(1).Sentiment.IsTrue); + } + public class SentimentData { [Column(ordinal: "0", name: "Label")]