From 2de024f38bed0bf93c517abee2d0cfafd098a404 Mon Sep 17 00:00:00 2001 From: Rogan Carr Date: Fri, 8 Mar 2019 17:21:38 -0800 Subject: [PATCH 01/10] Adding tests for model files. --- .../Datasets/ScoreColumn.cs | 14 ++ .../ModelFiles.cs | 122 ++++++++++++++++++ .../Estimators/TrainSaveModelAndPredict.cs | 62 --------- 3 files changed, 136 insertions(+), 62 deletions(-) create mode 100644 test/Microsoft.ML.Functional.Tests/Datasets/ScoreColumn.cs create mode 100644 test/Microsoft.ML.Functional.Tests/ModelFiles.cs delete mode 100644 test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainSaveModelAndPredict.cs diff --git a/test/Microsoft.ML.Functional.Tests/Datasets/ScoreColumn.cs b/test/Microsoft.ML.Functional.Tests/Datasets/ScoreColumn.cs new file mode 100644 index 0000000000..d4184b77c0 --- /dev/null +++ b/test/Microsoft.ML.Functional.Tests/Datasets/ScoreColumn.cs @@ -0,0 +1,14 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace Microsoft.ML.Functional.Tests.Datasets +{ + /// + /// A class to hold a feature column. + /// + internal sealed class ScoreColumn + { + public float Score { get; set; } + } +} diff --git a/test/Microsoft.ML.Functional.Tests/ModelFiles.cs b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs new file mode 100644 index 0000000000..13de1c5395 --- /dev/null +++ b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs @@ -0,0 +1,122 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.IO; +using System.IO.Compression; +using System.Linq; +using Microsoft.ML.Functional.Tests.Datasets; +using Microsoft.ML.RunTests; +using Microsoft.ML.TestFramework; +using Microsoft.ML.Trainers; +using Microsoft.ML.Trainers.FastTree; +using Xunit; +using Xunit.Abstractions; + +namespace Microsoft.ML.Functional.Tests +{ + public class ModelFiles : BaseTestClass + { + public ModelFiles(ITestOutputHelper output) : base(output) + { + } + + /// + /// Model Files: The (minimum) nuget version can be found in the model file. + /// + [Fact] + public void DetermineNugetVersionFromModel() + { + var modelFile = GetDataPath(@"backcompat\keep-model.zip"); + using (ZipArchive archive = ZipFile.OpenRead(modelFile)) + { + // The version of the entire model is kept in the version file. + var version = archive.Entries.First(x => x.FullName == @"TrainingInfo\Version.txt"); + Assert.NotNull(version); + using (var stream = version.Open()) + using (var reader = new StreamReader(stream)) + { + // The only line in the file is the version of the model. + var line = reader.ReadLine(); + Assert.Equal(@"1.0.0.0", line); + } + } + } + + /// + /// Model Files: Supported model classes can be saved as ONNX files. + /// + [Fact] + public void SaveModelAsOnnx() + { + var mlContext = new MLContext(seed: 1); + + // Get the dataset. + var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.housing.trainFilename), hasHeader: true); + + // Create a pipeline to train on the housing data. + var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) + .Append(mlContext.Regression.Trainers.StochasticDualCoordinateAscent( + new SdcaRegressionTrainer.Options { NumberOfThreads = 1 })); + + // Fit the pipeline. + var model = pipeline.Fit(data); + + // Save as Onnx + var modelPath = DeleteOutputPath("SaveModelAsOnnx.onnx"); + using (var file = File.Create(modelPath)) + mlContext.Model.ConvertToOnnx(model, data, file); + } + + /// + /// Model Files: Save a model, including all transforms, then load and make predictions. + /// + /// + /// Serves two scenarios: + /// 1. I can train a model and save it to a file, including transforms. + /// 2. Training and prediction happen in different processes (or even different machines). + /// The actual test will not run in different processes, but will simulate the idea that the + /// "communication pipe" is just a serialized model of some form. + /// + [Fact] + public void FitPipelineSaveModelAndPredict() + { + var mlContext = new MLContext(seed: 1); + + // Get the dataset. + var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.housing.trainFilename), hasHeader: true); + + // Create a pipeline to train on the housing data. + var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) + .Append(mlContext.Regression.Trainers.FastTree( + new FastTreeRegressionTrainer.Options { NumberOfThreads = 1, NumberOfTrees = 10 })); + + // Fit the pipeline. + var model = pipeline.Fit(data); + + var modelPath = DeleteOutputPath("fitPipelineSaveModelAndPredict.zip"); + // Save model to a file. + using (var file = File.Create(modelPath)) + mlContext.Model.Save(model, file); + + // Load model from a file. + ITransformer serializedModel; + using (var file = File.OpenRead(modelPath)) + serializedModel = mlContext.Model.Load(file); + + // Create prediction engine and test predictions. + var originalPredictionEngine = mlContext.Model.CreatePredictionEngine(model); + var serializedPredictionEngine = mlContext.Model.CreatePredictionEngine(serializedModel); + + // Take a handful of examples out of the dataset and compute predictions. + var dataEnumerator = mlContext.Data.CreateEnumerable(mlContext.Data.TakeRows(data, 5), false); + foreach (var row in dataEnumerator) + { + var originalPrediction = originalPredictionEngine.Predict(row); + var serializedPrediction = serializedPredictionEngine.Predict(row); + // Check that the predictions are identical. + Assert.Equal(originalPrediction.Score, serializedPrediction.Score); + } + } + } +} \ No newline at end of file diff --git a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainSaveModelAndPredict.cs b/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainSaveModelAndPredict.cs deleted file mode 100644 index e662657801..0000000000 --- a/test/Microsoft.ML.Tests/Scenarios/Api/Estimators/TrainSaveModelAndPredict.cs +++ /dev/null @@ -1,62 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System.IO; -using System.Linq; -using Microsoft.ML.Data; -using Microsoft.ML.RunTests; -using Microsoft.ML.Trainers; -using Xunit; - -namespace Microsoft.ML.Tests.Scenarios.Api -{ - public partial class ApiScenariosTests - { - /// - /// Train, save/load model, predict: - /// Serve the scenario where training and prediction happen in different processes (or even different machines). - /// The actual test will not run in different processes, but will simulate the idea that the - /// "communication pipe" is just a serialized model of some form. - /// - [Fact] - public void TrainSaveModelAndPredict() - { - var ml = new MLContext(seed: 1); - var data = ml.Data.LoadFromTextFile(GetDataPath(TestDatasets.Sentiment.trainFilename), hasHeader: true); - - // Pipeline. - var pipeline = ml.Transforms.Text.FeaturizeText("Features", "SentimentText") - .AppendCacheCheckpoint(ml) - .Append(ml.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated( - new SdcaNonCalibratedBinaryTrainer.Options { NumberOfThreads = 1 })); - - // Train. - var model = pipeline.Fit(data); - - var modelPath = GetOutputPath("temp.zip"); - // Save model. - using (var file = File.Create(modelPath)) - model.SaveTo(ml, file); - - // Load model. - ITransformer loadedModel; - using (var file = File.OpenRead(modelPath)) - loadedModel = TransformerChain.LoadFrom(ml, file); - - // Create prediction engine and test predictions. - var engine = loadedModel.CreatePredictionEngine(ml); - - // Take a couple examples out of the test data and run predictions on top. - var testData = ml.Data.CreateEnumerable( - ml.Data.LoadFromTextFile(GetDataPath(TestDatasets.Sentiment.testFilename), hasHeader: true), false); - foreach (var input in testData.Take(5)) - { - var prediction = engine.Predict(input); - // Verify that predictions match and scores are separated from zero. - Assert.Equal(input.Sentiment, prediction.Sentiment); - Assert.True(input.Sentiment && prediction.Score > 1 || !input.Sentiment && prediction.Score < -1); - } - } - } -} From 90525f9fb260135d4bd274ff9a87f58107d87bb3 Mon Sep 17 00:00:00 2001 From: Rogan Carr Date: Fri, 8 Mar 2019 20:46:53 -0800 Subject: [PATCH 02/10] fixing cross-plat directory separators. --- test/Microsoft.ML.Functional.Tests/ModelFiles.cs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/test/Microsoft.ML.Functional.Tests/ModelFiles.cs b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs index 13de1c5395..cb54e82f12 100644 --- a/test/Microsoft.ML.Functional.Tests/ModelFiles.cs +++ b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs @@ -27,13 +27,14 @@ public ModelFiles(ITestOutputHelper output) : base(output) [Fact] public void DetermineNugetVersionFromModel() { - var modelFile = GetDataPath(@"backcompat\keep-model.zip"); + var modelFile = GetDataPath(@"backcompat" + Path.DirectorySeparatorChar + @"keep-model.zip"); + var versionFileName = @"TrainingInfo" + Path.DirectorySeparatorChar + @"Version.txt"; using (ZipArchive archive = ZipFile.OpenRead(modelFile)) { // The version of the entire model is kept in the version file. - var version = archive.Entries.First(x => x.FullName == @"TrainingInfo\Version.txt"); - Assert.NotNull(version); - using (var stream = version.Open()) + var versionPath = archive.Entries.First(x => x.FullName == versionFileName); + Assert.NotNull(versionPath); + using (var stream = versionPath.Open()) using (var reader = new StreamReader(stream)) { // The only line in the file is the version of the model. From 121ee96a5458229ece371a21ef799b694166e253 Mon Sep 17 00:00:00 2001 From: Rogan Carr Date: Fri, 8 Mar 2019 22:19:31 -0800 Subject: [PATCH 03/10] fixing cross-plat directory separators. --- test/Microsoft.ML.Functional.Tests/ModelFiles.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Microsoft.ML.Functional.Tests/ModelFiles.cs b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs index cb54e82f12..bf63e78c88 100644 --- a/test/Microsoft.ML.Functional.Tests/ModelFiles.cs +++ b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs @@ -28,7 +28,7 @@ public ModelFiles(ITestOutputHelper output) : base(output) public void DetermineNugetVersionFromModel() { var modelFile = GetDataPath(@"backcompat" + Path.DirectorySeparatorChar + @"keep-model.zip"); - var versionFileName = @"TrainingInfo" + Path.DirectorySeparatorChar + @"Version.txt"; + var versionFileName = @"TrainingInfo\Version.txt"; // Can't find this cross plat. using (ZipArchive archive = ZipFile.OpenRead(modelFile)) { // The version of the entire model is kept in the version file. From 9fb2873129af2c669909328875f2d5658cd3be7e Mon Sep 17 00:00:00 2001 From: Rogan Carr Date: Thu, 14 Mar 2019 15:48:36 -0700 Subject: [PATCH 04/10] Addressing PR comments. --- ...reContributionOutput.cs => CommonColumns.cs} | 17 ++++++++++++++++- .../Datasets/FeatureColumn.cs | 14 -------------- .../Datasets/ScoreColumn.cs | 14 -------------- .../Microsoft.ML.Functional.Tests/ModelFiles.cs | 10 +++++----- 4 files changed, 21 insertions(+), 34 deletions(-) rename test/Microsoft.ML.Functional.Tests/Datasets/{FeatureContributionOutput.cs => CommonColumns.cs} (58%) delete mode 100644 test/Microsoft.ML.Functional.Tests/Datasets/FeatureColumn.cs delete mode 100644 test/Microsoft.ML.Functional.Tests/Datasets/ScoreColumn.cs diff --git a/test/Microsoft.ML.Functional.Tests/Datasets/FeatureContributionOutput.cs b/test/Microsoft.ML.Functional.Tests/Datasets/CommonColumns.cs similarity index 58% rename from test/Microsoft.ML.Functional.Tests/Datasets/FeatureContributionOutput.cs rename to test/Microsoft.ML.Functional.Tests/Datasets/CommonColumns.cs index 6aa8dcbb11..cede38a022 100644 --- a/test/Microsoft.ML.Functional.Tests/Datasets/FeatureContributionOutput.cs +++ b/test/Microsoft.ML.Functional.Tests/Datasets/CommonColumns.cs @@ -2,9 +2,16 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. - namespace Microsoft.ML.Functional.Tests.Datasets { + /// + /// A class to hold a feature column. + /// + internal sealed class FeatureColumn + { + public float[] Features { get; set; } + } + /// /// A class to hold the output of FeatureContributionCalculator /// @@ -12,4 +19,12 @@ internal sealed class FeatureContributionOutput { public float[] FeatureContributions { get; set; } } + + /// + /// A class to hold a feature column. + /// + internal sealed class ScoreColumn + { + public float Score { get; set; } + } } diff --git a/test/Microsoft.ML.Functional.Tests/Datasets/FeatureColumn.cs b/test/Microsoft.ML.Functional.Tests/Datasets/FeatureColumn.cs deleted file mode 100644 index 090ad23646..0000000000 --- a/test/Microsoft.ML.Functional.Tests/Datasets/FeatureColumn.cs +++ /dev/null @@ -1,14 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -namespace Microsoft.ML.Functional.Tests.Datasets -{ - /// - /// A class to hold a feature column. - /// - internal sealed class FeatureColumn - { - public float[] Features { get; set; } - } -} diff --git a/test/Microsoft.ML.Functional.Tests/Datasets/ScoreColumn.cs b/test/Microsoft.ML.Functional.Tests/Datasets/ScoreColumn.cs deleted file mode 100644 index d4184b77c0..0000000000 --- a/test/Microsoft.ML.Functional.Tests/Datasets/ScoreColumn.cs +++ /dev/null @@ -1,14 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -namespace Microsoft.ML.Functional.Tests.Datasets -{ - /// - /// A class to hold a feature column. - /// - internal sealed class ScoreColumn - { - public float Score { get; set; } - } -} diff --git a/test/Microsoft.ML.Functional.Tests/ModelFiles.cs b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs index bf63e78c88..1b902b5a18 100644 --- a/test/Microsoft.ML.Functional.Tests/ModelFiles.cs +++ b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs @@ -27,8 +27,8 @@ public ModelFiles(ITestOutputHelper output) : base(output) [Fact] public void DetermineNugetVersionFromModel() { - var modelFile = GetDataPath(@"backcompat" + Path.DirectorySeparatorChar + @"keep-model.zip"); - var versionFileName = @"TrainingInfo\Version.txt"; // Can't find this cross plat. + var modelFile = GetDataPath($"backcompat{Path.DirectorySeparatorChar}keep-model.zip"); + var versionFileName = @"TrainingInfo\Version.txt"; // Must use '\' for cross-platform testing. using (ZipArchive archive = ZipFile.OpenRead(modelFile)) { // The version of the entire model is kept in the version file. @@ -57,7 +57,7 @@ public void SaveModelAsOnnx() // Create a pipeline to train on the housing data. var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) - .Append(mlContext.Regression.Trainers.StochasticDualCoordinateAscent( + .Append(mlContext.Regression.Trainers.Sdca( new SdcaRegressionTrainer.Options { NumberOfThreads = 1 })); // Fit the pipeline. @@ -106,8 +106,8 @@ public void FitPipelineSaveModelAndPredict() serializedModel = mlContext.Model.Load(file); // Create prediction engine and test predictions. - var originalPredictionEngine = mlContext.Model.CreatePredictionEngine(model); - var serializedPredictionEngine = mlContext.Model.CreatePredictionEngine(serializedModel); + var originalPredictionEngine = model.CreatePredictionEngine(mlContext); + var serializedPredictionEngine = serializedModel.CreatePredictionEngine(mlContext); // Take a handful of examples out of the dataset and compute predictions. var dataEnumerator = mlContext.Data.CreateEnumerable(mlContext.Data.TakeRows(data, 5), false); From 9ca8ddc67ddb4619824a409cd9717067ab6cb376 Mon Sep 17 00:00:00 2001 From: Rogan Carr Date: Fri, 22 Mar 2019 10:56:53 -0700 Subject: [PATCH 05/10] In progress commit. --- .../ModelFiles.cs | 110 ++++++++++++++---- .../ModelLoading.cs | 64 +--------- 2 files changed, 86 insertions(+), 88 deletions(-) diff --git a/test/Microsoft.ML.Functional.Tests/ModelFiles.cs b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs index 8fdc07683c..8980e33b76 100644 --- a/test/Microsoft.ML.Functional.Tests/ModelFiles.cs +++ b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs @@ -5,6 +5,8 @@ using System.IO; using System.IO.Compression; using System.Linq; +using Microsoft.ML.Calibrators; +using Microsoft.ML.Data; using Microsoft.ML.Functional.Tests.Datasets; using Microsoft.ML.RunTests; using Microsoft.ML.TestFramework; @@ -44,31 +46,6 @@ public void DetermineNugetVersionFromModel() } } - /// - /// Model Files: Supported model classes can be saved as ONNX files. - /// - [Fact] - public void SaveModelAsOnnx() - { - var mlContext = new MLContext(seed: 1); - - // Get the dataset. - var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.housing.trainFilename), hasHeader: true); - - // Create a pipeline to train on the housing data. - var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) - .Append(mlContext.Regression.Trainers.Sdca( - new SdcaRegressionTrainer.Options { NumberOfThreads = 1 })); - - // Fit the pipeline. - var model = pipeline.Fit(data); - - // Save as Onnx - var modelPath = DeleteOutputPath("SaveModelAsOnnx.onnx"); - using (var file = File.Create(modelPath)) - mlContext.Model.ConvertToOnnx(model, data, file); - } - /// /// Model Files: Save a model, including all transforms, then load and make predictions. /// @@ -119,5 +96,88 @@ public void FitPipelineSaveModelAndPredict() Assert.Equal(originalPrediction.Score, serializedPrediction.Score); } } + + [Fact] + public void LoadModelAndExtractPredictor() + { + var mlContext = new MLContext(seed: 1); + + // Load the dataset. + var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); + var loader = mlContext.Data.CreateTextLoader(hasHeader: true, dataSample: file); + var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.adult.trainFilename), + hasHeader: TestDatasets.adult.fileHasHeader, + separatorChar: TestDatasets.adult.fileSeparator); + + // Pipeline. + var trainerPipeline = mlContext.Transforms.Concatenate("Features", Adult.NumericalFeatures) + .Append(mlContext.BinaryClassification.Trainers.LogisticRegression()); + // Define the same pipeline starting with the loader. + var loaderAndTrainerPipeline = loader.Append(mlContext.Transforms.Concatenate("Features", Adult.NumericalFeatures)) + .Append(mlContext.BinaryClassification.Trainers.LogisticRegression()); + + // Fit the pipelines to the dataset. + var transformerModel = trainerPipeline.Fit(data); + var compositeLoaderModel = loaderAndTrainerPipeline.Fit(file); + + // Serialize the models to a stream. + // Save a transformer model with an input schema. + string modelAndSchemaPath = DeleteOutputPath(FullTestName + "-model-schema.zip"); + mlContext.Model.Save(transformerModel, data.Schema, modelAndSchemaPath); + // Save a loader model without an input schema. + string compositeLoaderModelPath = DeleteOutputPath(FullTestName + "-composite-model.zip"); + mlContext.Model.Save(compositeLoaderModel, compositeLoaderModelPath); + // Save a transformer model, specifying the loader. + string loaderAndTransformerModelPath = DeleteOutputPath(FullTestName + "-loader-transformer.zip"); + mlContext.Model.Save(loader, transformerModel, loaderAndTransformerModelPath); + + // Load the serialized models back in. + ITransformer serializedTransformerModel; + IDataLoader serializedCompositeLoader; + ITransformer serializedCompositeLoaderWithSchema; + ITransformer serializedCompositeLoaderWithLoader; + IDataLoader serializedLoaderAndTransformerModel; + ITransformer serializedLoaderAndTransformerModelWithSchema; + ITransformer serializedLoaderAndTransformerModelWithLoader; + // Load the transformer model. + using (var fs = File.OpenRead(modelAndSchemaPath)) + serializedTransformerModel = mlContext.Model.Load(fs, out var loadedSchema); + using (var fs = File.OpenRead(compositeLoaderModelPath)) + { + // This model can be loaded either as a composite data loader, + // a transformer model + an input schema, or a transformer model + a data loader. + serializedCompositeLoader = mlContext.Model.Load(fs); + serializedCompositeLoaderWithLoader = mlContext.Model.LoadWithDataLoader(fs, out IDataLoader serializedLoader); + serializedCompositeLoaderWithSchema = mlContext.Model.Load(fs, out var schema); + Common.AssertEqual(compositeLoaderModel.GetOutputSchema(), schema); + } + using (var fs = File.OpenRead(loaderAndTransformerModelPath)) + { + // This model can be loaded either as a composite data loader, + // a transformer model + an input schema, or a transformer model + a data loader. + serializedLoaderAndTransformerModel = mlContext.Model.Load(fs); + serializedLoaderAndTransformerModelWithSchema = mlContext.Model.Load(fs, out var schema); + Common.AssertEqual(transformerModel.GetOutputSchema(data.Schema), schema); + serializedLoaderAndTransformerModelWithLoader = mlContext.Model.LoadWithDataLoader(fs, out IDataLoader serializedLoader); + } + + // Validate that the models contain the expected estimator. + var gam = ((serializedTransformerModel as ISingleFeaturePredictionTransformer).Model + as CalibratedModelParametersBase).SubModel + as GamBinaryModelParameters; + Assert.NotNull(gam); + + gam = (((serializedCompositeLoader as CompositeDataLoader).Transformer.LastTransformer + as ISingleFeaturePredictionTransformer).Model + as CalibratedModelParametersBase).SubModel + as GamBinaryModelParameters; + Assert.NotNull(gam); + + gam = (((serializedLoaderAndTransformerModelWithLoader as TransformerChain).LastTransformer + as ISingleFeaturePredictionTransformer).Model + as CalibratedModelParametersBase).SubModel + as GamBinaryModelParameters; + Assert.NotNull(gam); + } } } \ No newline at end of file diff --git a/test/Microsoft.ML.Functional.Tests/ModelLoading.cs b/test/Microsoft.ML.Functional.Tests/ModelLoading.cs index 193ddedad5..06452148ba 100644 --- a/test/Microsoft.ML.Functional.Tests/ModelLoading.cs +++ b/test/Microsoft.ML.Functional.Tests/ModelLoading.cs @@ -41,69 +41,7 @@ private class InputData public float[] Features { get; set; } } - [Fact] - public void LoadModelAndExtractPredictor() - { - var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); - var loader = _ml.Data.CreateTextLoader(hasHeader: true, dataSample: file); - var data = loader.Load(file); - - // Pipeline. - var pipeline = _ml.BinaryClassification.Trainers.Gam(); - // Define the same pipeline starting with the loader. - var pipeline1 = loader.Append(_ml.BinaryClassification.Trainers.Gam()); - - // Train. - var transformerModel = pipeline.Fit(data); - var compositeLoaderModel = pipeline1.Fit(file); - - // Save and reload. - string modelAndSchemaPath = GetOutputPath(FullTestName + "-model-schema.zip"); - _ml.Model.Save(transformerModel, data.Schema, modelAndSchemaPath); - string compositeLoaderModelPath = GetOutputPath(FullTestName + "-composite-model.zip"); - _ml.Model.Save(compositeLoaderModel, compositeLoaderModelPath); - string loaderAndTransformerModelPath = GetOutputPath(FullTestName + "-loader-transformer.zip"); - _ml.Model.Save(loader, transformerModel, loaderAndTransformerModelPath); - - ITransformer loadedTransformerModel; - IDataLoader loadedCompositeLoader; - ITransformer loadedTransformerModel1; - using (var fs = File.OpenRead(modelAndSchemaPath)) - loadedTransformerModel = _ml.Model.Load(fs, out var loadedSchema); - using (var fs = File.OpenRead(compositeLoaderModelPath)) - { - // This model can be loaded either as a composite data loader, - // a transformer model + an input schema, or a transformer model + a data loader. - var t = _ml.Model.LoadWithDataLoader(fs, out IDataLoader l); - var t1 = _ml.Model.Load(fs, out var s); - loadedCompositeLoader = _ml.Model.Load(fs); - } - using (var fs = File.OpenRead(loaderAndTransformerModelPath)) - { - // This model can be loaded either as a composite data loader, - // a transformer model + an input schema, or a transformer model + a data loader. - var t = _ml.Model.Load(fs, out var s); - var c = _ml.Model.Load(fs); - loadedTransformerModel1 = _ml.Model.LoadWithDataLoader(fs, out IDataLoader l); - } - - var gam = ((loadedTransformerModel as ISingleFeaturePredictionTransformer).Model - as CalibratedModelParametersBase).SubModel - as GamBinaryModelParameters; - Assert.NotNull(gam); - - gam = (((loadedCompositeLoader as CompositeDataLoader).Transformer.LastTransformer - as ISingleFeaturePredictionTransformer).Model - as CalibratedModelParametersBase).SubModel - as GamBinaryModelParameters; - Assert.NotNull(gam); - - gam = (((loadedTransformerModel1 as TransformerChain).LastTransformer - as ISingleFeaturePredictionTransformer).Model - as CalibratedModelParametersBase).SubModel - as GamBinaryModelParameters; - Assert.NotNull(gam); - } + [Fact] public void SaveAndLoadModelWithLoader() From e3b993004462de3a0e256bdd4fe05a8ce3624057 Mon Sep 17 00:00:00 2001 From: Rogan Carr Date: Fri, 22 Mar 2019 13:38:42 -0700 Subject: [PATCH 06/10] work in progress --- .../ModelFiles.cs | 4 +- .../ModelLoading.cs | 264 ++++++++++++------ 2 files changed, 174 insertions(+), 94 deletions(-) diff --git a/test/Microsoft.ML.Functional.Tests/ModelFiles.cs b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs index 8980e33b76..eb48b6f6da 100644 --- a/test/Microsoft.ML.Functional.Tests/ModelFiles.cs +++ b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs @@ -149,7 +149,7 @@ public void LoadModelAndExtractPredictor() serializedCompositeLoader = mlContext.Model.Load(fs); serializedCompositeLoaderWithLoader = mlContext.Model.LoadWithDataLoader(fs, out IDataLoader serializedLoader); serializedCompositeLoaderWithSchema = mlContext.Model.Load(fs, out var schema); - Common.AssertEqual(compositeLoaderModel.GetOutputSchema(), schema); + Common.AssertEqual(loader.GetOutputSchema(), schema); } using (var fs = File.OpenRead(loaderAndTransformerModelPath)) { @@ -157,7 +157,7 @@ public void LoadModelAndExtractPredictor() // a transformer model + an input schema, or a transformer model + a data loader. serializedLoaderAndTransformerModel = mlContext.Model.Load(fs); serializedLoaderAndTransformerModelWithSchema = mlContext.Model.Load(fs, out var schema); - Common.AssertEqual(transformerModel.GetOutputSchema(data.Schema), schema); + Common.AssertEqual(data.Schema, schema); serializedLoaderAndTransformerModelWithLoader = mlContext.Model.LoadWithDataLoader(fs, out IDataLoader serializedLoader); } diff --git a/test/Microsoft.ML.Functional.Tests/ModelLoading.cs b/test/Microsoft.ML.Functional.Tests/ModelLoading.cs index 06452148ba..4810ad2a09 100644 --- a/test/Microsoft.ML.Functional.Tests/ModelLoading.cs +++ b/test/Microsoft.ML.Functional.Tests/ModelLoading.cs @@ -8,7 +8,6 @@ using Microsoft.ML.Calibrators; using Microsoft.ML.Data; using Microsoft.ML.RunTests; -using Microsoft.ML.TestFramework; using Microsoft.ML.Trainers.FastTree; using Microsoft.ML.Transforms; using Xunit; @@ -16,22 +15,12 @@ namespace Microsoft.ML.Functional.Tests { - public partial class ModelLoadingTests : BaseTestClass + public partial class ModelLoadingTests : TestDataPipeBase { - private MLContext _ml; - public ModelLoadingTests(ITestOutputHelper output) : base(output) { } - protected override void Initialize() - { - base.Initialize(); - - _ml = new MLContext(42); - _ml.AddStandardComponents(); - } - private class InputData { [LoadColumn(0)] @@ -41,46 +30,137 @@ private class InputData public float[] Features { get; set; } } - + [Fact] + public void LoadModelAndExtractPredictor() + { + var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); + var loader = ML.Data.CreateTextLoader(hasHeader: true, dataSample: file); + var data = loader.Load(file); + + // Pipeline. + var pipeline = ML.BinaryClassification.Trainers.Gam(); + // Define the same pipeline starting with the loader. + var pipeline1 = loader.Append(ML.BinaryClassification.Trainers.Gam()); + + // Train. + var transformerModel = pipeline.Fit(data); + var compositeLoaderModel = pipeline1.Fit(file); + + // Save and reload the "same" model with some differences in structure. + + // In this case we are saving the transformer model, but *not* the loader, just the schema from that loader. + string modelAndSchemaPath = GetOutputPath(FullTestName + "-model-schema.zip"); + ML.Model.Save(transformerModel, data.Schema, modelAndSchemaPath); + + // In this case we have combined the loader with the transformer model to form a "composite" loader, and are just + // saving that one loader to this file. + string compositeLoaderModelPath = GetOutputPath(FullTestName + "-composite-model.zip"); + ML.Model.Save(null, compositeLoaderModel, compositeLoaderModelPath); + + // In this case we are saving the transformer model, as well as the associated data loader. + string loaderAndTransformerModelPath = GetOutputPath(FullTestName + "-loader-transformer.zip"); + ML.Model.Save(transformerModel, loader, loaderAndTransformerModelPath); + + ITransformer loadedTransformerModel; + IDataLoader loadedCompositeLoader; + ITransformer loadedTransformerModel1; + using (var fs = File.OpenRead(modelAndSchemaPath)) + loadedTransformerModel = ML.Model.Load(fs, out var loadedSchema); + using (var fs = File.OpenRead(compositeLoaderModelPath)) + { + // This model can be loaded either as a composite data loader, + // a transformer model + an input schema, or a transformer model + a data loader. + var t = ML.Model.LoadWithDataLoader(fs, out loadedCompositeLoader); + // This is a bit strange, as it seems to test that it can reload from the same + // stream twice opened only once, which as far as I know is not really a requirement + // of the design or API, but we are nonetheless testing it. If this winds up failing, + // I'm not sure we should really insist on this as a design requirement. + var t1 = ML.Model.Load(fs, out var s); + + CheckSameSchemas(loadedCompositeLoader.GetOutputSchema(), s); + // We combined the GAM with the loader, so the remaining chain should just be empty. + Assert.Empty(Assert.IsType>(t)); + Assert.Empty(Assert.IsType>(t1)); + } + using (var fs = File.OpenRead(loaderAndTransformerModelPath)) + { + // This model can be loaded either as a composite data loader, + // a transformer model + an input schema, or a transformer model + a data loader. + var t = ML.Model.Load(fs, out var s); + CheckSameSchemas(loader.GetOutputSchema(), s); + + loadedTransformerModel1 = ML.Model.LoadWithDataLoader(fs, out var l); + } + + void AssertIsGam(ITransformer trans) + { + Assert.IsType( + Assert.IsAssignableFrom( + Assert.IsAssignableFrom>(trans).Model).SubModel); + } + + // In the case of the directly used transformer model, the thing we loaded should be itself the result from fitting GAM. + AssertIsGam(loadedTransformerModel); + + // This is quite similar, the fact that we omitted saving the loader and saved the input schema to the model itself. + AssertIsGam(loadedTransformerModel1); + + // If we had combined the transformer with the loader, and then saved *that*, then the resulting loaded "model" + // will be empty (as tested above), but the loader itself with a composite loader containing the result from + // fitting GAM as the sole item in its transformer chain. + var fromComposite = Assert.Single(Assert.IsType>( + Assert.IsType>(loadedCompositeLoader).Transformer)); + AssertIsGam(fromComposite); + + Done(); + } [Fact] public void SaveAndLoadModelWithLoader() { var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); - var loader = _ml.Data.CreateTextLoader(hasHeader: true, dataSample: file); + var loader = ML.Data.CreateTextLoader(hasHeader: true, dataSample: file); var data = loader.Load(file); // Pipeline. - var pipeline = _ml.BinaryClassification.Trainers.Gam(); + var pipeline = ML.BinaryClassification.Trainers.Gam(); // Train. var model = pipeline.Fit(data); // Save and reload. string modelPath = GetOutputPath(FullTestName + "-model.zip"); - _ml.Model.Save(loader, model, modelPath); + ML.Model.Save(model, loader, modelPath); - IDataLoader loadedModel; + IDataLoader loadedLoader; ITransformer loadedModelWithoutLoader; + ITransformer loadedModelWithLoader; DataViewSchema loadedSchema; using (var fs = File.OpenRead(modelPath)) { - loadedModel = _ml.Model.Load(fs); - loadedModelWithoutLoader = _ml.Model.Load(fs, out loadedSchema); + loadedModelWithLoader = ML.Model.LoadWithDataLoader(fs, out loadedLoader); + Assert.IsAssignableFrom>(loadedModelWithLoader); + loadedModelWithoutLoader = ML.Model.Load(fs, out loadedSchema); + Assert.IsAssignableFrom>(loadedModelWithoutLoader); + + CheckSameSchemas(loadedLoader.GetOutputSchema(), loadedSchema); } - // Without deserializing the loader from the model we lose the slot names. - data = _ml.Data.LoadFromEnumerable(new[] { new InputData() }); + // When using a novel data source other than one derived from the loader, we will not have + // the slot names. + data = ML.Data.LoadFromEnumerable(new[] { new InputData() }); data = loadedModelWithoutLoader.Transform(data); - Assert.True(!data.Schema["Features"].HasSlotNames()); + Assert.False(data.Schema["Features"].HasSlotNames()); + // When we plumb the loaded schema through the transformer though, we should have slot names. + var noLoaderTransformedSchema = loadedModelWithoutLoader.GetOutputSchema(loadedSchema); + Assert.True(noLoaderTransformedSchema["Features"].HasSlotNames()); - data = loadedModel.Load(file); + data = loadedLoader.Load(file); Assert.True(data.Schema["Features"].HasSlotNames()); VBuffer> slotNames = default; data.Schema["Features"].GetSlotNames(ref slotNames); var ageIndex = FindIndex(slotNames.GetValues(), "age"); - var transformer = (loadedModel as CompositeDataLoader).Transformer.LastTransformer; - var singleFeaturePredictionTransformer = transformer as ISingleFeaturePredictionTransformer; + var singleFeaturePredictionTransformer = loadedModelWithLoader as ISingleFeaturePredictionTransformer; Assert.NotNull(singleFeaturePredictionTransformer); var calibratedModelParameters = singleFeaturePredictionTransformer.Model as CalibratedModelParametersBase; Assert.NotNull(calibratedModelParameters); @@ -94,30 +174,30 @@ public void SaveAndLoadModelWithLoader() public void LoadSchemaAndCreateNewData() { var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); - var loader = _ml.Data.CreateTextLoader(hasHeader: true, dataSample: file); + var loader = ML.Data.CreateTextLoader(hasHeader: true, dataSample: file); var data = loader.Load(file); // Pipeline. - var pipeline = _ml.Transforms.Normalize("Features"); + var pipeline = ML.Transforms.Normalize("Features"); // Train. var model = pipeline.Fit(data); // Save and reload. string modelPath = GetOutputPath(FullTestName + "-model.zip"); - _ml.Model.Save(loader, model, modelPath); + ML.Model.Save(model, loader, modelPath); ITransformer loadedModel; DataViewSchema loadedSchema; using (var fs = File.OpenRead(modelPath)) - loadedModel = _ml.Model.Load(fs, out loadedSchema); + loadedModel = ML.Model.Load(fs, out loadedSchema); // Without using the schema from the model we lose the slot names. - data = _ml.Data.LoadFromEnumerable(new[] { new InputData() }); + data = ML.Data.LoadFromEnumerable(new[] { new InputData() }); data = loadedModel.Transform(data); Assert.True(!data.Schema["Features"].HasSlotNames()); - data = _ml.Data.LoadFromEnumerable(new[] { new InputData() }, loadedSchema); + data = ML.Data.LoadFromEnumerable(new[] { new InputData() }, loadedSchema); Assert.True(data.Schema["Features"].HasSlotNames()); } @@ -125,12 +205,12 @@ public void LoadSchemaAndCreateNewData() public void SaveTextLoaderAndLoad() { var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); - var loader = _ml.Data.CreateTextLoader(hasHeader: true, dataSample: file); + var loader = ML.Data.CreateTextLoader(hasHeader: true, dataSample: file); string modelPath = GetOutputPath(FullTestName + "-model.zip"); - _ml.Model.Save(loader, modelPath); + ML.Model.Save(null, loader, modelPath); - Load(modelPath, out var loadedWithSchema, out var loadedSchema, out var loadedLoader, + Load(modelPath, out var loadedWithSchema, out var loadedSchema, out var loadedWithLoader, out var loadedLoaderWithTransformer); Assert.True(loadedWithSchema is TransformerChain); Assert.False((loadedWithSchema as TransformerChain).Any()); @@ -138,7 +218,6 @@ public void SaveTextLoaderAndLoad() loadedSchema.GetColumnOrNull("Label") != null && loadedSchema.GetColumnOrNull("Features") != null && loadedSchema["Features"].HasSlotNames()); - Assert.True(loadedLoader is TextLoader); Assert.True(loadedWithLoader is TransformerChain); Assert.False((loadedWithLoader as TransformerChain).Any()); Assert.True(loadedLoaderWithTransformer is TextLoader); @@ -153,103 +232,104 @@ public void SaveTextLoaderAndLoad() public void SaveCompositeLoaderAndLoad() { var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); - var loader = _ml.Data.CreateTextLoader(hasHeader: true, dataSample: file); - var composite = loader.Append(_ml.Transforms.Normalize("Features")); - var model = composite.Fit(file); + var loader = ML.Data.CreateTextLoader(hasHeader: true, dataSample: file); + var composite = loader.Append(ML.Transforms.Normalize("Features")); + var loaderWithEmbeddedModel = composite.Fit(file); string modelPath = GetOutputPath(FullTestName + "-model.zip"); - _ml.Model.Save(model, modelPath); + ML.Model.Save(null, loaderWithEmbeddedModel, modelPath); - Load(modelPath, out var loadedWithSchema, out var loadedSchema, out var loadedLoader, + Load(modelPath, out var loadedWithSchema, out var loadedSchema, out var loadedWithLoader, out var loadedLoaderWithTransformer); - Assert.True(loadedWithSchema is TransformerChain); - Assert.True((loadedWithSchema as TransformerChain).Count() == 1); - Assert.True(loadedSchema.Count == 2 && - loadedSchema.GetColumnOrNull("Label") != null - && loadedSchema.GetColumnOrNull("Features") != null - && loadedSchema["Features"].HasSlotNames()); - Assert.True(loadedLoader is CompositeDataLoader); - Assert.True(loadedWithLoader is TransformerChain); - Assert.True((loadedWithLoader as TransformerChain).Count() == 1); - Assert.True(loadedLoaderWithTransformer is TextLoader); - var schema = loadedLoaderWithTransformer.GetOutputSchema(); - Assert.True(schema.Count == 2 && - schema.GetColumnOrNull("Label") != null - && schema.GetColumnOrNull("Features") != null - && schema["Features"].HasSlotNames()); + // Because we saved the transform model as part of the composite loader, with no transforms, + // the transform that should be loaded should be an empty transformer chain, since the "model," + // such as it is, has been combined with the loader. + Assert.Empty(Assert.IsType>(loadedWithSchema)); + Assert.Empty(Assert.IsType>(loadedWithLoader)); + + var expectedSchema = loaderWithEmbeddedModel.GetOutputSchema(); + Assert.True(expectedSchema.Count == 3); + Assert.NotNull(expectedSchema.GetColumnOrNull("Label")); + Assert.NotNull(expectedSchema.GetColumnOrNull("Features")); + Assert.True(expectedSchema["Features"].HasSlotNames()); + + CheckSameSchemas(loaderWithEmbeddedModel.GetOutputSchema(), loadedSchema); + var schemaFromLoadedLoader = loadedLoaderWithTransformer.GetOutputSchema(); + CheckSameSchemas(loaderWithEmbeddedModel.GetOutputSchema(), schemaFromLoadedLoader); + + // The type of the loader itself should be a composite data loader, and its single transformer + // should be the normalizing transformer. + var compositeLoader = Assert.IsType>(loadedLoaderWithTransformer); + var chainFromLoader = compositeLoader.Transformer; + Assert.IsType(Assert.Single(compositeLoader.Transformer)); + + Done(); } [Fact] public void SaveLoaderAndTransformerAndLoad() { var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); - var loader = _ml.Data.CreateTextLoader(hasHeader: true, dataSample: file); - var estimator = _ml.Transforms.Normalize("Features"); - var model = estimator.Fit(loader.Load(file)); + var loader = ML.Data.CreateTextLoader(hasHeader: true, dataSample: file); + var estimator = ML.Transforms.Normalize("Features"); + var data = loader.Load(file); + var model = estimator.Fit(data); + + // First get the input schema. + var expectedInputSchema = loader.GetOutputSchema(); + Assert.Equal(2, expectedInputSchema.Count); + Assert.NotNull(expectedInputSchema.GetColumnOrNull("Label")); + Assert.NotNull(expectedInputSchema.GetColumnOrNull("Features")); + Assert.True(expectedInputSchema["Features"].HasSlotNames()); string modelPath = GetOutputPath(FullTestName + "-model.zip"); - _ml.Model.Save(loader, model, modelPath); + ML.Model.Save(model, loader, modelPath); - Load(modelPath, out var loadedWithSchema, out var loadedSchema, out var loadedLoader, + // Reload the loader and schema. + Load(modelPath, out var loadedWithSchema, out var loadedInputSchema, out var loadedWithLoader, out var loadedLoaderWithTransformer); - Assert.True(loadedWithSchema is TransformerChain); - Assert.True((loadedWithSchema as TransformerChain).Count() == 1); - Assert.True(loadedSchema.Count == 2 && - loadedSchema.GetColumnOrNull("Label") != null - && loadedSchema.GetColumnOrNull("Features") != null - && loadedSchema["Features"].HasSlotNames()); - Assert.True(loadedLoader is CompositeDataLoader); - Assert.True(loadedWithLoader is TransformerChain); - Assert.True((loadedWithLoader as TransformerChain).Count() == 1); - Assert.True(loadedLoaderWithTransformer is TextLoader); - var schema = loadedLoaderWithTransformer.GetOutputSchema(); - Assert.True(schema.Count == 2 && - schema.GetColumnOrNull("Label") != null - && schema.GetColumnOrNull("Features") != null - && schema["Features"].HasSlotNames()); + Assert.IsType(loadedWithSchema); + Assert.IsType(loadedWithLoader); + Assert.IsType(loadedLoaderWithTransformer); + + CheckSameSchemas(expectedInputSchema, loadedInputSchema); + var reloadedLoaderInputSchema = loadedLoaderWithTransformer.GetOutputSchema(); + CheckSameSchemas(expectedInputSchema, reloadedLoaderInputSchema); + + Done(); } [Fact] public void SaveTransformerAndSchemaAndLoad() { var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); - var loader = _ml.Data.CreateTextLoader(hasHeader: true, dataSample: file); - var estimator = _ml.Transforms.Normalize("Features"); + var loader = ML.Data.CreateTextLoader(hasHeader: true, dataSample: file); + var estimator = ML.Transforms.Normalize("Features"); var model = estimator.Fit(loader.Load(file)); string modelPath = GetOutputPath(FullTestName + "-model.zip"); - _ml.Model.Save(model, loader.GetOutputSchema(), modelPath); + ML.Model.Save(model, loader.GetOutputSchema(), modelPath); - Load(modelPath, out var loadedWithSchema, out var loadedSchema, out var loadedLoader, + Load(modelPath, out var loadedWithSchema, out var loadedSchema, out var loadedWithLoader, out var loadedLoaderWithTransformer); Assert.True(loadedWithSchema is NormalizingTransformer); Assert.True(loadedSchema.Count == 2 && loadedSchema.GetColumnOrNull("Label") != null && loadedSchema.GetColumnOrNull("Features") != null && loadedSchema["Features"].HasSlotNames()); - Assert.Null(loadedLoader); Assert.Null(loadedWithLoader); Assert.Null(loadedLoaderWithTransformer); } private void Load(string filename, out ITransformer loadedWithSchema, out DataViewSchema loadedSchema, - out IDataLoader loadedLoader, out ITransformer loadedWithLoader, - out IDataLoader loadedLoaderWithTransformer) + out ITransformer loadedWithLoader, out IDataLoader loadedLoaderWithTransformer) { using (var fs = File.OpenRead(filename)) { + loadedWithSchema = ML.Model.Load(fs, out loadedSchema); try { - loadedLoader = _ml.Model.Load(fs); - } - catch (Exception) - { - loadedLoader = null; - } - loadedWithSchema = _ml.Model.Load(fs, out loadedSchema); - try - { - loadedWithLoader = _ml.Model.LoadWithDataLoader(fs, out loadedLoaderWithTransformer); + loadedWithLoader = ML.Model.LoadWithDataLoader(fs, out loadedLoaderWithTransformer); } catch (Exception) { From 982a3ba5e89e5f37aca731d2eb2dd476c5d0dfd8 Mon Sep 17 00:00:00 2001 From: Rogan Carr Date: Fri, 22 Mar 2019 13:50:40 -0700 Subject: [PATCH 07/10] Adding ModelFile tests to Loading, combining into one file. --- .../ModelFiles.cs | 381 +++++++++++++++--- .../ModelLoading.cs | 354 ---------------- 2 files changed, 314 insertions(+), 421 deletions(-) delete mode 100644 test/Microsoft.ML.Functional.Tests/ModelLoading.cs diff --git a/test/Microsoft.ML.Functional.Tests/ModelFiles.cs b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs index eb48b6f6da..0dd87ab67a 100644 --- a/test/Microsoft.ML.Functional.Tests/ModelFiles.cs +++ b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; using System.IO; using System.IO.Compression; using System.Linq; @@ -9,20 +10,28 @@ using Microsoft.ML.Data; using Microsoft.ML.Functional.Tests.Datasets; using Microsoft.ML.RunTests; -using Microsoft.ML.TestFramework; -using Microsoft.ML.Trainers; using Microsoft.ML.Trainers.FastTree; +using Microsoft.ML.Transforms; using Xunit; using Xunit.Abstractions; namespace Microsoft.ML.Functional.Tests { - public class ModelFiles : BaseTestClass + public partial class ModelFiles : TestDataPipeBase { public ModelFiles(ITestOutputHelper output) : base(output) { } + private class InputData + { + [LoadColumn(0)] + public bool Label { get; set; } + [LoadColumn(9, 14)] + [VectorType(6)] + public float[] Features { get; set; } + } + /// /// Model Files: The (minimum) nuget version can be found in the model file. /// @@ -85,7 +94,7 @@ public void FitPipelineSaveModelAndPredict() // Create prediction engine and test predictions. var originalPredictionEngine = mlContext.Model.CreatePredictionEngine(model); var serializedPredictionEngine = mlContext.Model.CreatePredictionEngine(serializedModel); - + // Take a handful of examples out of the dataset and compute predictions. var dataEnumerator = mlContext.Data.CreateEnumerable(mlContext.Data.TakeRows(data, 5), false); foreach (var row in dataEnumerator) @@ -100,84 +109,322 @@ public void FitPipelineSaveModelAndPredict() [Fact] public void LoadModelAndExtractPredictor() { - var mlContext = new MLContext(seed: 1); - - // Load the dataset. var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); - var loader = mlContext.Data.CreateTextLoader(hasHeader: true, dataSample: file); - var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.adult.trainFilename), - hasHeader: TestDatasets.adult.fileHasHeader, - separatorChar: TestDatasets.adult.fileSeparator); + var loader = ML.Data.CreateTextLoader(hasHeader: true, dataSample: file); + var data = loader.Load(file); // Pipeline. - var trainerPipeline = mlContext.Transforms.Concatenate("Features", Adult.NumericalFeatures) - .Append(mlContext.BinaryClassification.Trainers.LogisticRegression()); + var pipeline = ML.BinaryClassification.Trainers.Gam(); // Define the same pipeline starting with the loader. - var loaderAndTrainerPipeline = loader.Append(mlContext.Transforms.Concatenate("Features", Adult.NumericalFeatures)) - .Append(mlContext.BinaryClassification.Trainers.LogisticRegression()); - - // Fit the pipelines to the dataset. - var transformerModel = trainerPipeline.Fit(data); - var compositeLoaderModel = loaderAndTrainerPipeline.Fit(file); - - // Serialize the models to a stream. - // Save a transformer model with an input schema. - string modelAndSchemaPath = DeleteOutputPath(FullTestName + "-model-schema.zip"); - mlContext.Model.Save(transformerModel, data.Schema, modelAndSchemaPath); - // Save a loader model without an input schema. - string compositeLoaderModelPath = DeleteOutputPath(FullTestName + "-composite-model.zip"); - mlContext.Model.Save(compositeLoaderModel, compositeLoaderModelPath); - // Save a transformer model, specifying the loader. - string loaderAndTransformerModelPath = DeleteOutputPath(FullTestName + "-loader-transformer.zip"); - mlContext.Model.Save(loader, transformerModel, loaderAndTransformerModelPath); - - // Load the serialized models back in. - ITransformer serializedTransformerModel; - IDataLoader serializedCompositeLoader; - ITransformer serializedCompositeLoaderWithSchema; - ITransformer serializedCompositeLoaderWithLoader; - IDataLoader serializedLoaderAndTransformerModel; - ITransformer serializedLoaderAndTransformerModelWithSchema; - ITransformer serializedLoaderAndTransformerModelWithLoader; - // Load the transformer model. + var pipeline1 = loader.Append(ML.BinaryClassification.Trainers.Gam()); + + // Train. + var transformerModel = pipeline.Fit(data); + var compositeLoaderModel = pipeline1.Fit(file); + + // Save and reload the "same" model with some differences in structure. + + // In this case we are saving the transformer model, but *not* the loader, just the schema from that loader. + string modelAndSchemaPath = GetOutputPath(FullTestName + "-model-schema.zip"); + ML.Model.Save(transformerModel, data.Schema, modelAndSchemaPath); + + // In this case we have combined the loader with the transformer model to form a "composite" loader, and are just + // saving that one loader to this file. + string compositeLoaderModelPath = GetOutputPath(FullTestName + "-composite-model.zip"); + ML.Model.Save(null, compositeLoaderModel, compositeLoaderModelPath); + + // In this case we are saving the transformer model, as well as the associated data loader. + string loaderAndTransformerModelPath = GetOutputPath(FullTestName + "-loader-transformer.zip"); + ML.Model.Save(transformerModel, loader, loaderAndTransformerModelPath); + + ITransformer loadedTransformerModel; + IDataLoader loadedCompositeLoader; + ITransformer loadedTransformerModel1; using (var fs = File.OpenRead(modelAndSchemaPath)) - serializedTransformerModel = mlContext.Model.Load(fs, out var loadedSchema); + loadedTransformerModel = ML.Model.Load(fs, out var loadedSchema); using (var fs = File.OpenRead(compositeLoaderModelPath)) { // This model can be loaded either as a composite data loader, // a transformer model + an input schema, or a transformer model + a data loader. - serializedCompositeLoader = mlContext.Model.Load(fs); - serializedCompositeLoaderWithLoader = mlContext.Model.LoadWithDataLoader(fs, out IDataLoader serializedLoader); - serializedCompositeLoaderWithSchema = mlContext.Model.Load(fs, out var schema); - Common.AssertEqual(loader.GetOutputSchema(), schema); + var t = ML.Model.LoadWithDataLoader(fs, out loadedCompositeLoader); + // This is a bit strange, as it seems to test that it can reload from the same + // stream twice opened only once, which as far as I know is not really a requirement + // of the design or API, but we are nonetheless testing it. If this winds up failing, + // I'm not sure we should really insist on this as a design requirement. + var t1 = ML.Model.Load(fs, out var s); + + CheckSameSchemas(loadedCompositeLoader.GetOutputSchema(), s); + // We combined the GAM with the loader, so the remaining chain should just be empty. + Assert.Empty(Assert.IsType>(t)); + Assert.Empty(Assert.IsType>(t1)); } using (var fs = File.OpenRead(loaderAndTransformerModelPath)) { // This model can be loaded either as a composite data loader, // a transformer model + an input schema, or a transformer model + a data loader. - serializedLoaderAndTransformerModel = mlContext.Model.Load(fs); - serializedLoaderAndTransformerModelWithSchema = mlContext.Model.Load(fs, out var schema); - Common.AssertEqual(data.Schema, schema); - serializedLoaderAndTransformerModelWithLoader = mlContext.Model.LoadWithDataLoader(fs, out IDataLoader serializedLoader); + var t = ML.Model.Load(fs, out var s); + CheckSameSchemas(loader.GetOutputSchema(), s); + + loadedTransformerModel1 = ML.Model.LoadWithDataLoader(fs, out var l); + } + + void AssertIsGam(ITransformer trans) + { + Assert.IsType( + Assert.IsAssignableFrom( + Assert.IsAssignableFrom>(trans).Model).SubModel); + } + + // In the case of the directly used transformer model, the thing we loaded should be itself the result from fitting GAM. + AssertIsGam(loadedTransformerModel); + + // This is quite similar, the fact that we omitted saving the loader and saved the input schema to the model itself. + AssertIsGam(loadedTransformerModel1); + + // If we had combined the transformer with the loader, and then saved *that*, then the resulting loaded "model" + // will be empty (as tested above), but the loader itself with a composite loader containing the result from + // fitting GAM as the sole item in its transformer chain. + var fromComposite = Assert.Single(Assert.IsType>( + Assert.IsType>(loadedCompositeLoader).Transformer)); + AssertIsGam(fromComposite); + + Done(); + } + + [Fact] + public void SaveAndLoadModelWithLoader() + { + var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); + var loader = ML.Data.CreateTextLoader(hasHeader: true, dataSample: file); + var data = loader.Load(file); + + // Pipeline. + var pipeline = ML.BinaryClassification.Trainers.Gam(); + + // Train. + var model = pipeline.Fit(data); + + // Save and reload. + string modelPath = GetOutputPath(FullTestName + "-model.zip"); + ML.Model.Save(model, loader, modelPath); + + IDataLoader loadedLoader; + ITransformer loadedModelWithoutLoader; + ITransformer loadedModelWithLoader; + DataViewSchema loadedSchema; + using (var fs = File.OpenRead(modelPath)) + { + loadedModelWithLoader = ML.Model.LoadWithDataLoader(fs, out loadedLoader); + Assert.IsAssignableFrom>(loadedModelWithLoader); + loadedModelWithoutLoader = ML.Model.Load(fs, out loadedSchema); + Assert.IsAssignableFrom>(loadedModelWithoutLoader); + + CheckSameSchemas(loadedLoader.GetOutputSchema(), loadedSchema); + } + + // When using a novel data source other than one derived from the loader, we will not have + // the slot names. + data = ML.Data.LoadFromEnumerable(new[] { new InputData() }); + data = loadedModelWithoutLoader.Transform(data); + Assert.False(data.Schema["Features"].HasSlotNames()); + // When we plumb the loaded schema through the transformer though, we should have slot names. + var noLoaderTransformedSchema = loadedModelWithoutLoader.GetOutputSchema(loadedSchema); + Assert.True(noLoaderTransformedSchema["Features"].HasSlotNames()); + + data = loadedLoader.Load(file); + Assert.True(data.Schema["Features"].HasSlotNames()); + VBuffer> slotNames = default; + data.Schema["Features"].GetSlotNames(ref slotNames); + var ageIndex = FindIndex(slotNames.GetValues(), "age"); + var singleFeaturePredictionTransformer = loadedModelWithLoader as ISingleFeaturePredictionTransformer; + Assert.NotNull(singleFeaturePredictionTransformer); + var calibratedModelParameters = singleFeaturePredictionTransformer.Model as CalibratedModelParametersBase; + Assert.NotNull(calibratedModelParameters); + var gamModel = calibratedModelParameters.SubModel as GamBinaryModelParameters; + Assert.NotNull(gamModel); + var ageBinUpperBounds = gamModel.GetBinUpperBounds(ageIndex); + var ageBinEffects = gamModel.GetBinEffects(ageIndex); + } + + [Fact] + public void LoadSchemaAndCreateNewData() + { + var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); + var loader = ML.Data.CreateTextLoader(hasHeader: true, dataSample: file); + var data = loader.Load(file); + + // Pipeline. + var pipeline = ML.Transforms.Normalize("Features"); + + // Train. + var model = pipeline.Fit(data); + + // Save and reload. + string modelPath = GetOutputPath(FullTestName + "-model.zip"); + ML.Model.Save(model, loader, modelPath); + + ITransformer loadedModel; + DataViewSchema loadedSchema; + using (var fs = File.OpenRead(modelPath)) + loadedModel = ML.Model.Load(fs, out loadedSchema); + + // Without using the schema from the model we lose the slot names. + data = ML.Data.LoadFromEnumerable(new[] { new InputData() }); + data = loadedModel.Transform(data); + Assert.True(!data.Schema["Features"].HasSlotNames()); + + data = ML.Data.LoadFromEnumerable(new[] { new InputData() }, loadedSchema); + Assert.True(data.Schema["Features"].HasSlotNames()); + } + + [Fact] + public void SaveTextLoaderAndLoad() + { + var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); + var loader = ML.Data.CreateTextLoader(hasHeader: true, dataSample: file); + + string modelPath = GetOutputPath(FullTestName + "-model.zip"); + ML.Model.Save(null, loader, modelPath); + + Load(modelPath, out var loadedWithSchema, out var loadedSchema, + out var loadedWithLoader, out var loadedLoaderWithTransformer); + Assert.True(loadedWithSchema is TransformerChain); + Assert.False((loadedWithSchema as TransformerChain).Any()); + Assert.True(loadedSchema.Count == 2 && + loadedSchema.GetColumnOrNull("Label") != null + && loadedSchema.GetColumnOrNull("Features") != null + && loadedSchema["Features"].HasSlotNames()); + Assert.True(loadedWithLoader is TransformerChain); + Assert.False((loadedWithLoader as TransformerChain).Any()); + Assert.True(loadedLoaderWithTransformer is TextLoader); + var schema = loadedLoaderWithTransformer.GetOutputSchema(); + Assert.True(schema.Count == 2 && + schema.GetColumnOrNull("Label") != null + && schema.GetColumnOrNull("Features") != null + && schema["Features"].HasSlotNames()); + } + + [Fact] + public void SaveCompositeLoaderAndLoad() + { + var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); + var loader = ML.Data.CreateTextLoader(hasHeader: true, dataSample: file); + var composite = loader.Append(ML.Transforms.Normalize("Features")); + var loaderWithEmbeddedModel = composite.Fit(file); + + string modelPath = GetOutputPath(FullTestName + "-model.zip"); + ML.Model.Save(null, loaderWithEmbeddedModel, modelPath); + + Load(modelPath, out var loadedWithSchema, out var loadedSchema, + out var loadedWithLoader, out var loadedLoaderWithTransformer); + // Because we saved the transform model as part of the composite loader, with no transforms, + // the transform that should be loaded should be an empty transformer chain, since the "model," + // such as it is, has been combined with the loader. + Assert.Empty(Assert.IsType>(loadedWithSchema)); + Assert.Empty(Assert.IsType>(loadedWithLoader)); + + var expectedSchema = loaderWithEmbeddedModel.GetOutputSchema(); + Assert.True(expectedSchema.Count == 3); + Assert.NotNull(expectedSchema.GetColumnOrNull("Label")); + Assert.NotNull(expectedSchema.GetColumnOrNull("Features")); + Assert.True(expectedSchema["Features"].HasSlotNames()); + + CheckSameSchemas(loaderWithEmbeddedModel.GetOutputSchema(), loadedSchema); + var schemaFromLoadedLoader = loadedLoaderWithTransformer.GetOutputSchema(); + CheckSameSchemas(loaderWithEmbeddedModel.GetOutputSchema(), schemaFromLoadedLoader); + + // The type of the loader itself should be a composite data loader, and its single transformer + // should be the normalizing transformer. + var compositeLoader = Assert.IsType>(loadedLoaderWithTransformer); + var chainFromLoader = compositeLoader.Transformer; + Assert.IsType(Assert.Single(compositeLoader.Transformer)); + + Done(); + } + + [Fact] + public void SaveLoaderAndTransformerAndLoad() + { + var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); + var loader = ML.Data.CreateTextLoader(hasHeader: true, dataSample: file); + var estimator = ML.Transforms.Normalize("Features"); + var data = loader.Load(file); + var model = estimator.Fit(data); + + // First get the input schema. + var expectedInputSchema = loader.GetOutputSchema(); + Assert.Equal(2, expectedInputSchema.Count); + Assert.NotNull(expectedInputSchema.GetColumnOrNull("Label")); + Assert.NotNull(expectedInputSchema.GetColumnOrNull("Features")); + Assert.True(expectedInputSchema["Features"].HasSlotNames()); + + string modelPath = GetOutputPath(FullTestName + "-model.zip"); + ML.Model.Save(model, loader, modelPath); + + // Reload the loader and schema. + Load(modelPath, out var loadedWithSchema, out var loadedInputSchema, + out var loadedWithLoader, out var loadedLoaderWithTransformer); + Assert.IsType(loadedWithSchema); + Assert.IsType(loadedWithLoader); + Assert.IsType(loadedLoaderWithTransformer); + + CheckSameSchemas(expectedInputSchema, loadedInputSchema); + var reloadedLoaderInputSchema = loadedLoaderWithTransformer.GetOutputSchema(); + CheckSameSchemas(expectedInputSchema, reloadedLoaderInputSchema); + + Done(); + } + + [Fact] + public void SaveTransformerAndSchemaAndLoad() + { + var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); + var loader = ML.Data.CreateTextLoader(hasHeader: true, dataSample: file); + var estimator = ML.Transforms.Normalize("Features"); + var model = estimator.Fit(loader.Load(file)); + + string modelPath = GetOutputPath(FullTestName + "-model.zip"); + ML.Model.Save(model, loader.GetOutputSchema(), modelPath); + + Load(modelPath, out var loadedWithSchema, out var loadedSchema, + out var loadedWithLoader, out var loadedLoaderWithTransformer); + Assert.True(loadedWithSchema is NormalizingTransformer); + Assert.True(loadedSchema.Count == 2 && + loadedSchema.GetColumnOrNull("Label") != null + && loadedSchema.GetColumnOrNull("Features") != null + && loadedSchema["Features"].HasSlotNames()); + Assert.Null(loadedWithLoader); + Assert.Null(loadedLoaderWithTransformer); + } + + private void Load(string filename, out ITransformer loadedWithSchema, out DataViewSchema loadedSchema, + out ITransformer loadedWithLoader, out IDataLoader loadedLoaderWithTransformer) + { + using (var fs = File.OpenRead(filename)) + { + loadedWithSchema = ML.Model.Load(fs, out loadedSchema); + try + { + loadedWithLoader = ML.Model.LoadWithDataLoader(fs, out loadedLoaderWithTransformer); + } + catch (Exception) + { + loadedWithLoader = null; + loadedLoaderWithTransformer = null; + } } + } - // Validate that the models contain the expected estimator. - var gam = ((serializedTransformerModel as ISingleFeaturePredictionTransformer).Model - as CalibratedModelParametersBase).SubModel - as GamBinaryModelParameters; - Assert.NotNull(gam); - - gam = (((serializedCompositeLoader as CompositeDataLoader).Transformer.LastTransformer - as ISingleFeaturePredictionTransformer).Model - as CalibratedModelParametersBase).SubModel - as GamBinaryModelParameters; - Assert.NotNull(gam); - - gam = (((serializedLoaderAndTransformerModelWithLoader as TransformerChain).LastTransformer - as ISingleFeaturePredictionTransformer).Model - as CalibratedModelParametersBase).SubModel - as GamBinaryModelParameters; - Assert.NotNull(gam); + private int FindIndex(ReadOnlySpan> values, string slotName) + { + int index = 0; + foreach (var value in values) + { + if (value.Span.SequenceEqual(slotName.AsSpan())) + return index; + index++; + } + return -1; } } -} \ No newline at end of file +} diff --git a/test/Microsoft.ML.Functional.Tests/ModelLoading.cs b/test/Microsoft.ML.Functional.Tests/ModelLoading.cs deleted file mode 100644 index 4810ad2a09..0000000000 --- a/test/Microsoft.ML.Functional.Tests/ModelLoading.cs +++ /dev/null @@ -1,354 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.IO; -using System.Linq; -using Microsoft.ML.Calibrators; -using Microsoft.ML.Data; -using Microsoft.ML.RunTests; -using Microsoft.ML.Trainers.FastTree; -using Microsoft.ML.Transforms; -using Xunit; -using Xunit.Abstractions; - -namespace Microsoft.ML.Functional.Tests -{ - public partial class ModelLoadingTests : TestDataPipeBase - { - public ModelLoadingTests(ITestOutputHelper output) : base(output) - { - } - - private class InputData - { - [LoadColumn(0)] - public bool Label { get; set; } - [LoadColumn(9, 14)] - [VectorType(6)] - public float[] Features { get; set; } - } - - [Fact] - public void LoadModelAndExtractPredictor() - { - var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); - var loader = ML.Data.CreateTextLoader(hasHeader: true, dataSample: file); - var data = loader.Load(file); - - // Pipeline. - var pipeline = ML.BinaryClassification.Trainers.Gam(); - // Define the same pipeline starting with the loader. - var pipeline1 = loader.Append(ML.BinaryClassification.Trainers.Gam()); - - // Train. - var transformerModel = pipeline.Fit(data); - var compositeLoaderModel = pipeline1.Fit(file); - - // Save and reload the "same" model with some differences in structure. - - // In this case we are saving the transformer model, but *not* the loader, just the schema from that loader. - string modelAndSchemaPath = GetOutputPath(FullTestName + "-model-schema.zip"); - ML.Model.Save(transformerModel, data.Schema, modelAndSchemaPath); - - // In this case we have combined the loader with the transformer model to form a "composite" loader, and are just - // saving that one loader to this file. - string compositeLoaderModelPath = GetOutputPath(FullTestName + "-composite-model.zip"); - ML.Model.Save(null, compositeLoaderModel, compositeLoaderModelPath); - - // In this case we are saving the transformer model, as well as the associated data loader. - string loaderAndTransformerModelPath = GetOutputPath(FullTestName + "-loader-transformer.zip"); - ML.Model.Save(transformerModel, loader, loaderAndTransformerModelPath); - - ITransformer loadedTransformerModel; - IDataLoader loadedCompositeLoader; - ITransformer loadedTransformerModel1; - using (var fs = File.OpenRead(modelAndSchemaPath)) - loadedTransformerModel = ML.Model.Load(fs, out var loadedSchema); - using (var fs = File.OpenRead(compositeLoaderModelPath)) - { - // This model can be loaded either as a composite data loader, - // a transformer model + an input schema, or a transformer model + a data loader. - var t = ML.Model.LoadWithDataLoader(fs, out loadedCompositeLoader); - // This is a bit strange, as it seems to test that it can reload from the same - // stream twice opened only once, which as far as I know is not really a requirement - // of the design or API, but we are nonetheless testing it. If this winds up failing, - // I'm not sure we should really insist on this as a design requirement. - var t1 = ML.Model.Load(fs, out var s); - - CheckSameSchemas(loadedCompositeLoader.GetOutputSchema(), s); - // We combined the GAM with the loader, so the remaining chain should just be empty. - Assert.Empty(Assert.IsType>(t)); - Assert.Empty(Assert.IsType>(t1)); - } - using (var fs = File.OpenRead(loaderAndTransformerModelPath)) - { - // This model can be loaded either as a composite data loader, - // a transformer model + an input schema, or a transformer model + a data loader. - var t = ML.Model.Load(fs, out var s); - CheckSameSchemas(loader.GetOutputSchema(), s); - - loadedTransformerModel1 = ML.Model.LoadWithDataLoader(fs, out var l); - } - - void AssertIsGam(ITransformer trans) - { - Assert.IsType( - Assert.IsAssignableFrom( - Assert.IsAssignableFrom>(trans).Model).SubModel); - } - - // In the case of the directly used transformer model, the thing we loaded should be itself the result from fitting GAM. - AssertIsGam(loadedTransformerModel); - - // This is quite similar, the fact that we omitted saving the loader and saved the input schema to the model itself. - AssertIsGam(loadedTransformerModel1); - - // If we had combined the transformer with the loader, and then saved *that*, then the resulting loaded "model" - // will be empty (as tested above), but the loader itself with a composite loader containing the result from - // fitting GAM as the sole item in its transformer chain. - var fromComposite = Assert.Single(Assert.IsType>( - Assert.IsType>(loadedCompositeLoader).Transformer)); - AssertIsGam(fromComposite); - - Done(); - } - - [Fact] - public void SaveAndLoadModelWithLoader() - { - var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); - var loader = ML.Data.CreateTextLoader(hasHeader: true, dataSample: file); - var data = loader.Load(file); - - // Pipeline. - var pipeline = ML.BinaryClassification.Trainers.Gam(); - - // Train. - var model = pipeline.Fit(data); - - // Save and reload. - string modelPath = GetOutputPath(FullTestName + "-model.zip"); - ML.Model.Save(model, loader, modelPath); - - IDataLoader loadedLoader; - ITransformer loadedModelWithoutLoader; - ITransformer loadedModelWithLoader; - DataViewSchema loadedSchema; - using (var fs = File.OpenRead(modelPath)) - { - loadedModelWithLoader = ML.Model.LoadWithDataLoader(fs, out loadedLoader); - Assert.IsAssignableFrom>(loadedModelWithLoader); - loadedModelWithoutLoader = ML.Model.Load(fs, out loadedSchema); - Assert.IsAssignableFrom>(loadedModelWithoutLoader); - - CheckSameSchemas(loadedLoader.GetOutputSchema(), loadedSchema); - } - - // When using a novel data source other than one derived from the loader, we will not have - // the slot names. - data = ML.Data.LoadFromEnumerable(new[] { new InputData() }); - data = loadedModelWithoutLoader.Transform(data); - Assert.False(data.Schema["Features"].HasSlotNames()); - // When we plumb the loaded schema through the transformer though, we should have slot names. - var noLoaderTransformedSchema = loadedModelWithoutLoader.GetOutputSchema(loadedSchema); - Assert.True(noLoaderTransformedSchema["Features"].HasSlotNames()); - - data = loadedLoader.Load(file); - Assert.True(data.Schema["Features"].HasSlotNames()); - VBuffer> slotNames = default; - data.Schema["Features"].GetSlotNames(ref slotNames); - var ageIndex = FindIndex(slotNames.GetValues(), "age"); - var singleFeaturePredictionTransformer = loadedModelWithLoader as ISingleFeaturePredictionTransformer; - Assert.NotNull(singleFeaturePredictionTransformer); - var calibratedModelParameters = singleFeaturePredictionTransformer.Model as CalibratedModelParametersBase; - Assert.NotNull(calibratedModelParameters); - var gamModel = calibratedModelParameters.SubModel as GamBinaryModelParameters; - Assert.NotNull(gamModel); - var ageBinUpperBounds = gamModel.GetBinUpperBounds(ageIndex); - var ageBinEffects = gamModel.GetBinEffects(ageIndex); - } - - [Fact] - public void LoadSchemaAndCreateNewData() - { - var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); - var loader = ML.Data.CreateTextLoader(hasHeader: true, dataSample: file); - var data = loader.Load(file); - - // Pipeline. - var pipeline = ML.Transforms.Normalize("Features"); - - // Train. - var model = pipeline.Fit(data); - - // Save and reload. - string modelPath = GetOutputPath(FullTestName + "-model.zip"); - ML.Model.Save(model, loader, modelPath); - - ITransformer loadedModel; - DataViewSchema loadedSchema; - using (var fs = File.OpenRead(modelPath)) - loadedModel = ML.Model.Load(fs, out loadedSchema); - - // Without using the schema from the model we lose the slot names. - data = ML.Data.LoadFromEnumerable(new[] { new InputData() }); - data = loadedModel.Transform(data); - Assert.True(!data.Schema["Features"].HasSlotNames()); - - data = ML.Data.LoadFromEnumerable(new[] { new InputData() }, loadedSchema); - Assert.True(data.Schema["Features"].HasSlotNames()); - } - - [Fact] - public void SaveTextLoaderAndLoad() - { - var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); - var loader = ML.Data.CreateTextLoader(hasHeader: true, dataSample: file); - - string modelPath = GetOutputPath(FullTestName + "-model.zip"); - ML.Model.Save(null, loader, modelPath); - - Load(modelPath, out var loadedWithSchema, out var loadedSchema, - out var loadedWithLoader, out var loadedLoaderWithTransformer); - Assert.True(loadedWithSchema is TransformerChain); - Assert.False((loadedWithSchema as TransformerChain).Any()); - Assert.True(loadedSchema.Count == 2 && - loadedSchema.GetColumnOrNull("Label") != null - && loadedSchema.GetColumnOrNull("Features") != null - && loadedSchema["Features"].HasSlotNames()); - Assert.True(loadedWithLoader is TransformerChain); - Assert.False((loadedWithLoader as TransformerChain).Any()); - Assert.True(loadedLoaderWithTransformer is TextLoader); - var schema = loadedLoaderWithTransformer.GetOutputSchema(); - Assert.True(schema.Count == 2 && - schema.GetColumnOrNull("Label") != null - && schema.GetColumnOrNull("Features") != null - && schema["Features"].HasSlotNames()); - } - - [Fact] - public void SaveCompositeLoaderAndLoad() - { - var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); - var loader = ML.Data.CreateTextLoader(hasHeader: true, dataSample: file); - var composite = loader.Append(ML.Transforms.Normalize("Features")); - var loaderWithEmbeddedModel = composite.Fit(file); - - string modelPath = GetOutputPath(FullTestName + "-model.zip"); - ML.Model.Save(null, loaderWithEmbeddedModel, modelPath); - - Load(modelPath, out var loadedWithSchema, out var loadedSchema, - out var loadedWithLoader, out var loadedLoaderWithTransformer); - // Because we saved the transform model as part of the composite loader, with no transforms, - // the transform that should be loaded should be an empty transformer chain, since the "model," - // such as it is, has been combined with the loader. - Assert.Empty(Assert.IsType>(loadedWithSchema)); - Assert.Empty(Assert.IsType>(loadedWithLoader)); - - var expectedSchema = loaderWithEmbeddedModel.GetOutputSchema(); - Assert.True(expectedSchema.Count == 3); - Assert.NotNull(expectedSchema.GetColumnOrNull("Label")); - Assert.NotNull(expectedSchema.GetColumnOrNull("Features")); - Assert.True(expectedSchema["Features"].HasSlotNames()); - - CheckSameSchemas(loaderWithEmbeddedModel.GetOutputSchema(), loadedSchema); - var schemaFromLoadedLoader = loadedLoaderWithTransformer.GetOutputSchema(); - CheckSameSchemas(loaderWithEmbeddedModel.GetOutputSchema(), schemaFromLoadedLoader); - - // The type of the loader itself should be a composite data loader, and its single transformer - // should be the normalizing transformer. - var compositeLoader = Assert.IsType>(loadedLoaderWithTransformer); - var chainFromLoader = compositeLoader.Transformer; - Assert.IsType(Assert.Single(compositeLoader.Transformer)); - - Done(); - } - - [Fact] - public void SaveLoaderAndTransformerAndLoad() - { - var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); - var loader = ML.Data.CreateTextLoader(hasHeader: true, dataSample: file); - var estimator = ML.Transforms.Normalize("Features"); - var data = loader.Load(file); - var model = estimator.Fit(data); - - // First get the input schema. - var expectedInputSchema = loader.GetOutputSchema(); - Assert.Equal(2, expectedInputSchema.Count); - Assert.NotNull(expectedInputSchema.GetColumnOrNull("Label")); - Assert.NotNull(expectedInputSchema.GetColumnOrNull("Features")); - Assert.True(expectedInputSchema["Features"].HasSlotNames()); - - string modelPath = GetOutputPath(FullTestName + "-model.zip"); - ML.Model.Save(model, loader, modelPath); - - // Reload the loader and schema. - Load(modelPath, out var loadedWithSchema, out var loadedInputSchema, - out var loadedWithLoader, out var loadedLoaderWithTransformer); - Assert.IsType(loadedWithSchema); - Assert.IsType(loadedWithLoader); - Assert.IsType(loadedLoaderWithTransformer); - - CheckSameSchemas(expectedInputSchema, loadedInputSchema); - var reloadedLoaderInputSchema = loadedLoaderWithTransformer.GetOutputSchema(); - CheckSameSchemas(expectedInputSchema, reloadedLoaderInputSchema); - - Done(); - } - - [Fact] - public void SaveTransformerAndSchemaAndLoad() - { - var file = new MultiFileSource(GetDataPath(TestDatasets.adult.trainFilename)); - var loader = ML.Data.CreateTextLoader(hasHeader: true, dataSample: file); - var estimator = ML.Transforms.Normalize("Features"); - var model = estimator.Fit(loader.Load(file)); - - string modelPath = GetOutputPath(FullTestName + "-model.zip"); - ML.Model.Save(model, loader.GetOutputSchema(), modelPath); - - Load(modelPath, out var loadedWithSchema, out var loadedSchema, - out var loadedWithLoader, out var loadedLoaderWithTransformer); - Assert.True(loadedWithSchema is NormalizingTransformer); - Assert.True(loadedSchema.Count == 2 && - loadedSchema.GetColumnOrNull("Label") != null - && loadedSchema.GetColumnOrNull("Features") != null - && loadedSchema["Features"].HasSlotNames()); - Assert.Null(loadedWithLoader); - Assert.Null(loadedLoaderWithTransformer); - } - - private void Load(string filename, out ITransformer loadedWithSchema, out DataViewSchema loadedSchema, - out ITransformer loadedWithLoader, out IDataLoader loadedLoaderWithTransformer) - { - using (var fs = File.OpenRead(filename)) - { - loadedWithSchema = ML.Model.Load(fs, out loadedSchema); - try - { - loadedWithLoader = ML.Model.LoadWithDataLoader(fs, out loadedLoaderWithTransformer); - } - catch (Exception) - { - loadedWithLoader = null; - loadedLoaderWithTransformer = null; - } - } - } - - private int FindIndex(ReadOnlySpan> values, string slotName) - { - int index = 0; - foreach (var value in values) - { - if (value.Span.SequenceEqual(slotName.AsSpan())) - return index; - index++; - } - return -1; - } - } -} From 5000dbfae6c8f557e6f4d35820c36fe7b78345e6 Mon Sep 17 00:00:00 2001 From: Rogan Carr Date: Fri, 22 Mar 2019 16:01:56 -0700 Subject: [PATCH 08/10] Addressing PR comments. --- .../Datasets/CommonColumns.cs | 4 ++-- test/Microsoft.ML.Functional.Tests/ModelFiles.cs | 10 +++++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/test/Microsoft.ML.Functional.Tests/Datasets/CommonColumns.cs b/test/Microsoft.ML.Functional.Tests/Datasets/CommonColumns.cs index c5e896184d..8e4f45a71b 100644 --- a/test/Microsoft.ML.Functional.Tests/Datasets/CommonColumns.cs +++ b/test/Microsoft.ML.Functional.Tests/Datasets/CommonColumns.cs @@ -21,7 +21,7 @@ internal sealed class FeatureContributionOutput } /// - /// A class to hold a feature column. + /// A class to hold a score column. /// internal sealed class ScoreColumn { @@ -29,7 +29,7 @@ internal sealed class ScoreColumn } /// - /// A class to hold a feature column. + /// A class to hold a vector score column. /// internal sealed class VectorScoreColumn { diff --git a/test/Microsoft.ML.Functional.Tests/ModelFiles.cs b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs index 0dd87ab67a..30f7f3aa5d 100644 --- a/test/Microsoft.ML.Functional.Tests/ModelFiles.cs +++ b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs @@ -83,14 +83,16 @@ public void FitPipelineSaveModelAndPredict() var modelPath = DeleteOutputPath("fitPipelineSaveModelAndPredict.zip"); // Save model to a file. - using (var file = File.Create(modelPath)) - mlContext.Model.Save(model, data.Schema, file); + mlContext.Model.Save(model, data.Schema, modelPath); // Load model from a file. ITransformer serializedModel; using (var file = File.OpenRead(modelPath)) + { serializedModel = mlContext.Model.Load(file, out var serializedSchema); - + CheckSameSchemas(data.Schema, serializedSchema); + } + // Create prediction engine and test predictions. var originalPredictionEngine = mlContext.Model.CreatePredictionEngine(model); var serializedPredictionEngine = mlContext.Model.CreatePredictionEngine(serializedModel); @@ -104,6 +106,8 @@ public void FitPipelineSaveModelAndPredict() // Check that the predictions are identical. Assert.Equal(originalPrediction.Score, serializedPrediction.Score); } + + Done(); } [Fact] From 44ed95e84d793bd1203f1cd237ddd13f04518ae0 Mon Sep 17 00:00:00 2001 From: Rogan Carr Date: Mon, 25 Mar 2019 10:16:02 -0700 Subject: [PATCH 09/10] Changing model version checker to use a model created on the fly. --- .../ModelFiles.cs | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/test/Microsoft.ML.Functional.Tests/ModelFiles.cs b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs index 30f7f3aa5d..c53ae56c4a 100644 --- a/test/Microsoft.ML.Functional.Tests/ModelFiles.cs +++ b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs @@ -38,9 +38,26 @@ private class InputData [Fact] public void DetermineNugetVersionFromModel() { - var modelFile = GetDataPath($"backcompat{Path.DirectorySeparatorChar}keep-model.zip"); + var mlContext = new MLContext(seed: 1); + + // Get the dataset. + var data = mlContext.Data.LoadFromTextFile(GetDataPath(TestDatasets.housing.trainFilename), hasHeader: true); + + // Create a pipeline to train on the housing data. + var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features) + .Append(mlContext.Regression.Trainers.FastTree( + new FastTreeRegressionTrainer.Options { NumberOfThreads = 1, NumberOfTrees = 10 })); + + // Fit the pipeline. + var model = pipeline.Fit(data); + + // Save model to a file. + var modelPath = DeleteOutputPath("determineNugetVersionFromModel.zip"); + mlContext.Model.Save(model, data.Schema, modelPath); + + // Check that the version can be extracted from the model. var versionFileName = @"TrainingInfo\Version.txt"; // Must use '\' for cross-platform testing. - using (ZipArchive archive = ZipFile.OpenRead(modelFile)) + using (ZipArchive archive = ZipFile.OpenRead(modelPath)) { // The version of the entire model is kept in the version file. var versionPath = archive.Entries.First(x => x.FullName == versionFileName); From 47518c551cedce75933cbb3eae164889627376a2 Mon Sep 17 00:00:00 2001 From: Rogan Carr Date: Mon, 25 Mar 2019 10:58:27 -0700 Subject: [PATCH 10/10] Fixing cross-plat tests. --- test/Microsoft.ML.Functional.Tests/ModelFiles.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Microsoft.ML.Functional.Tests/ModelFiles.cs b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs index c53ae56c4a..e1fbe98749 100644 --- a/test/Microsoft.ML.Functional.Tests/ModelFiles.cs +++ b/test/Microsoft.ML.Functional.Tests/ModelFiles.cs @@ -56,7 +56,7 @@ public void DetermineNugetVersionFromModel() mlContext.Model.Save(model, data.Schema, modelPath); // Check that the version can be extracted from the model. - var versionFileName = @"TrainingInfo\Version.txt"; // Must use '\' for cross-platform testing. + var versionFileName = @"TrainingInfo" + Path.DirectorySeparatorChar + "Version.txt"; using (ZipArchive archive = ZipFile.OpenRead(modelPath)) { // The version of the entire model is kept in the version file.