From 925bdf02af88388be261b9652dc210db46e29e9c Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Mon, 8 Apr 2019 17:49:11 -0700 Subject: [PATCH 1/6] The test pipeline for consuming an ONNX model would fail due to the Score column being named "Score0". The ONNX model will rename the output columns by design, therefore a different class with the ColumnName of "Score0" is needed. This fixes the test pipeline to address this issue. Fixes #2981 --- test/Microsoft.ML.Functional.Tests/ONNX.cs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/test/Microsoft.ML.Functional.Tests/ONNX.cs b/test/Microsoft.ML.Functional.Tests/ONNX.cs index 88438305bd..2776a1299e 100644 --- a/test/Microsoft.ML.Functional.Tests/ONNX.cs +++ b/test/Microsoft.ML.Functional.Tests/ONNX.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using System.IO; +using Microsoft.ML.Data; using Microsoft.ML.Functional.Tests.Datasets; using Microsoft.ML.RunTests; using Microsoft.ML.TestFramework; @@ -14,6 +15,12 @@ namespace Microsoft.ML.Functional.Tests { + internal sealed class OnnxScoreColumn + { + [ColumnName("Score0")] + public float[] Score { get; set; } + } + public class ONNX : BaseTestClass { public ONNX(ITestOutputHelper output) : base(output) @@ -51,15 +58,9 @@ public void SaveOnnxModelLoadAndScoreFastTree() var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(modelPath); var onnxModel = onnxEstimator.Fit(data); - // TODO #2980: ONNX outputs don't match the outputs of the model, so we must hand-correct this for now. - // TODO #2981: ONNX models cannot be fit as part of a pipeline, so we must use a workaround like this. - var onnxWorkaroundPipeline = onnxModel.Append( - mlContext.Transforms.CopyColumns("Score", "Score0").Fit(onnxModel.Transform(data))); - // Create prediction engine and test predictions. var originalPredictionEngine = mlContext.Model.CreatePredictionEngine(model); - // TODO #2982: ONNX produces vector types and not the original output type. - var onnxPredictionEngine = mlContext.Model.CreatePredictionEngine(onnxWorkaroundPipeline); + var onnxPredictionEngine = mlContext.Model.CreatePredictionEngine(onnxModel); // Take a handful of examples out of the dataset and compute predictions. var dataEnumerator = mlContext.Data.CreateEnumerable(mlContext.Data.TakeRows(data, 5), false); From f278dd19f227c049c39ba05c9b1f72c48b2ad392 Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Tue, 9 Apr 2019 22:18:11 -0700 Subject: [PATCH 2/6] When adding an ONNX transform to an ML.NET pipeline, an exception would occur if the input type was not a variable vector or vector type. This is not needed as we do support converting basic types to equivalent ONNX tensor type. Therefore removing the check in GetOutputSchema fixes this problem. --- src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs | 2 -- test/Microsoft.ML.Functional.Tests/ONNX.cs | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs b/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs index 85ec23cfe3..d32a002626 100644 --- a/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs +++ b/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs @@ -564,8 +564,6 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) var input = Transformer.Inputs[i]; if (!inputSchema.TryFindColumn(input, out var col)) throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", input); - if (!(col.Kind == SchemaShape.Column.VectorKind.VariableVector || col.Kind == SchemaShape.Column.VectorKind.Vector)) - throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", input, "vector", col.GetTypeString()); var inputsInfo = Transformer.Model.ModelInfo.InputsInfo; var idx = Transformer.Model.InputNames.IndexOf(input); diff --git a/test/Microsoft.ML.Functional.Tests/ONNX.cs b/test/Microsoft.ML.Functional.Tests/ONNX.cs index 2776a1299e..e239c1d888 100644 --- a/test/Microsoft.ML.Functional.Tests/ONNX.cs +++ b/test/Microsoft.ML.Functional.Tests/ONNX.cs @@ -17,7 +17,6 @@ namespace Microsoft.ML.Functional.Tests { internal sealed class OnnxScoreColumn { - [ColumnName("Score0")] public float[] Score { get; set; } } @@ -55,7 +54,8 @@ public void SaveOnnxModelLoadAndScoreFastTree() mlContext.Model.ConvertToOnnx(model, data, file); // Load the model as a transform. - var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(modelPath); + var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(modelPath) + .Append(mlContext.Transforms.CopyColumns("Score", "Score0")); var onnxModel = onnxEstimator.Fit(data); // Create prediction engine and test predictions. From 9c19726bcf6e3c9c188f70f4a4204741036527f8 Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Tue, 9 Apr 2019 22:28:33 -0700 Subject: [PATCH 3/6] Reverted back to using VectorScoreColumn --- test/Microsoft.ML.Functional.Tests/ONNX.cs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/test/Microsoft.ML.Functional.Tests/ONNX.cs b/test/Microsoft.ML.Functional.Tests/ONNX.cs index e239c1d888..78ba1bab70 100644 --- a/test/Microsoft.ML.Functional.Tests/ONNX.cs +++ b/test/Microsoft.ML.Functional.Tests/ONNX.cs @@ -15,11 +15,6 @@ namespace Microsoft.ML.Functional.Tests { - internal sealed class OnnxScoreColumn - { - public float[] Score { get; set; } - } - public class ONNX : BaseTestClass { public ONNX(ITestOutputHelper output) : base(output) @@ -60,7 +55,7 @@ public void SaveOnnxModelLoadAndScoreFastTree() // Create prediction engine and test predictions. var originalPredictionEngine = mlContext.Model.CreatePredictionEngine(model); - var onnxPredictionEngine = mlContext.Model.CreatePredictionEngine(onnxModel); + var onnxPredictionEngine = mlContext.Model.CreatePredictionEngine(onnxModel); // Take a handful of examples out of the dataset and compute predictions. var dataEnumerator = mlContext.Data.CreateEnumerable(mlContext.Data.TakeRows(data, 5), false); From 962e8b497ce9cfe92b0f8db2fb6c46c6842bd7f5 Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Tue, 9 Apr 2019 22:47:09 -0700 Subject: [PATCH 4/6] Updating comment to reflect the current onnx behavior --- test/Microsoft.ML.Functional.Tests/ONNX.cs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/Microsoft.ML.Functional.Tests/ONNX.cs b/test/Microsoft.ML.Functional.Tests/ONNX.cs index 78ba1bab70..c11b145148 100644 --- a/test/Microsoft.ML.Functional.Tests/ONNX.cs +++ b/test/Microsoft.ML.Functional.Tests/ONNX.cs @@ -49,6 +49,14 @@ public void SaveOnnxModelLoadAndScoreFastTree() mlContext.Model.ConvertToOnnx(model, data, file); // Load the model as a transform. + // Note that when saving an ML.NET model as an ONNX model, the column types and column names will + // change. The name changes as ONNX doesn't not allow the same name for an input and output within the ONNX model. + // Therefore names maintained but have a number appended to the end of the name. In this case, Score0 is the output + // of the ONNX model. We are renaming Score0 to Score using Copy Columns. + // ONNX also uses tensors and will return an output of a tensor with the dimension of [1,1] for a single float. + // Therefore the VectorScoreColumn class (which contains a float [] field called Score) is used for the return + // type on the Prediction engine. + // See #2980 and #2981 for more information. var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(modelPath) .Append(mlContext.Transforms.CopyColumns("Score", "Score0")); var onnxModel = onnxEstimator.Fit(data); From 155212bf04efe68285edc5a7f488d16cd31f6619 Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Wed, 17 Apr 2019 10:11:28 -0700 Subject: [PATCH 5/6] Adding in a check for variable vector --- src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs b/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs index d32a002626..7ff375c9cb 100644 --- a/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs +++ b/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs @@ -329,6 +329,7 @@ public Mapper(OnnxTransformer parent, DataViewSchema inputSchema) : var col = inputSchema.GetColumnOrNull(_parent.Inputs[i]); if (!col.HasValue) throw Host.ExceptSchemaMismatch( nameof(inputSchema),"input", _parent.Inputs[i]); + _inputColIndices[i] = col.Value.Index; var type = inputSchema[_inputColIndices[i]].Type; @@ -574,6 +575,8 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) var expectedType = OnnxUtils.OnnxToMlNetType(inputNodeInfo.Type); if (col.ItemType != expectedType) throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", input, expectedType.ToString(), col.ItemType.ToString()); + if (col.Kind == SchemaShape.Column.VectorKind.VariableVector) + throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", input, "vector", col.GetTypeString()); } for (var i = 0; i < Transformer.Outputs.Length; i++) From e215b333dea41f5246cb7f1f15b04ffd55988590 Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Wed, 17 Apr 2019 10:16:49 -0700 Subject: [PATCH 6/6] Moved the check for variable vector --- src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs b/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs index 7ff375c9cb..9e8660a074 100644 --- a/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs +++ b/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs @@ -565,6 +565,8 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) var input = Transformer.Inputs[i]; if (!inputSchema.TryFindColumn(input, out var col)) throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", input); + if (col.Kind == SchemaShape.Column.VectorKind.VariableVector) + throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", input, "vector", col.GetTypeString()); var inputsInfo = Transformer.Model.ModelInfo.InputsInfo; var idx = Transformer.Model.InputNames.IndexOf(input); @@ -575,8 +577,6 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) var expectedType = OnnxUtils.OnnxToMlNetType(inputNodeInfo.Type); if (col.ItemType != expectedType) throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", input, expectedType.ToString(), col.ItemType.ToString()); - if (col.Kind == SchemaShape.Column.VectorKind.VariableVector) - throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", input, "vector", col.GetTypeString()); } for (var i = 0; i < Transformer.Outputs.Length; i++)