Expose the Onnx runtime option for setting the number of threads (#5962)

yaeldMS · web-flow · commit 15eeef7bedd8 · 2021-10-11T19:09:21.000-07:00
diff --git a/src/Microsoft.ML.OnnxTransformer/OnnxCatalog.cs b/src/Microsoft.ML.OnnxTransformer/OnnxCatalog.cs
@@ -96,8 +96,19 @@ public static OnnxScoringEstimator ApplyOnnxModel(this TransformsCatalog catalog
             string modelFile,
             int? gpuDeviceId = null,
             bool fallbackToCpu = false)
-        => new OnnxScoringEstimator(CatalogUtils.GetEnvironment(catalog), new[] { outputColumnName }, new[] { inputColumnName },
-            modelFile, gpuDeviceId, fallbackToCpu);
+            => new OnnxScoringEstimator(CatalogUtils.GetEnvironment(catalog), new[] { outputColumnName }, new[] { inputColumnName },
+                modelFile, gpuDeviceId, fallbackToCpu);
+
+        /// <summary>
+        /// Create a <see cref="OnnxScoringEstimator"/> using the specified <see cref="OnnxOptions"/>.
+        /// Please refer to <see cref="OnnxScoringEstimator"/> to learn more about the necessary dependencies,
+        /// and how to run it on a GPU.
+        /// </summary>
+        /// <param name="catalog">The transform's catalog.</param>
+        /// <param name="options">Options for the <see cref="OnnxScoringEstimator"/>.</param>
+        public static OnnxScoringEstimator ApplyOnnxModel(this TransformsCatalog catalog, OnnxOptions options)
+            => new OnnxScoringEstimator(CatalogUtils.GetEnvironment(catalog), options.OutputColumns, options.InputColumns, options.ModelFile,
+                options.GpuDeviceId, options.FallbackToCpu, options.ShapeDictionary, options.RecursionLimit, options.InterOpNumThreads, options.IntraOpNumThreads);
 
         /// <summary>
         /// Create a <see cref="OnnxScoringEstimator"/>, which applies a pre-trained Onnx model to the <paramref name="inputColumnName"/> column.
diff --git a/src/Microsoft.ML.OnnxTransformer/OnnxOptions.cs b/src/Microsoft.ML.OnnxTransformer/OnnxOptions.cs
@@ -0,0 +1,59 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Collections.Generic;
+
+namespace Microsoft.ML.Transforms.Onnx
+{
+    /// <summary>
+    /// The options for an <see cref="OnnxScoringEstimator"/>.
+    /// </summary>
+    public sealed class OnnxOptions
+    {
+        /// <summary>
+        /// Path to the onnx model file.
+        /// </summary>
+        public string ModelFile;
+
+        /// <summary>
+        /// Name of the input column.
+        /// </summary>
+        public string[] InputColumns;
+
+        /// <summary>
+        /// Name of the output column.
+        /// </summary>
+        public string[] OutputColumns;
+
+        /// <summary>
+        /// GPU device id to run on (e.g. 0,1,..). Null for CPU. Requires CUDA 9.1.
+        /// </summary>
+        public int? GpuDeviceId = null;
+
+        /// <summary>
+        /// If true, resumes execution on CPU upon GPU error. If false, will raise the GPU exception.
+        /// </summary>
+        public bool FallbackToCpu = false;
+
+        /// <summary>
+        /// ONNX shapes to be used over those loaded from <see cref="ModelFile"/>.
+        /// </summary>
+        public IDictionary<string, int[]> ShapeDictionary;
+
+        /// <summary>
+        /// Protobuf CodedInputStream recursion limit.
+        /// </summary>
+        public int RecursionLimit = 100;
+
+        /// <summary>
+        /// Controls the number of threads used to parallelize the execution of the graph (across nodes).
+        /// </summary>
+        public int? InterOpNumThreads = null;
+
+        /// <summary>
+        /// Controls the number of threads to use to run the model.
+        /// </summary>
+        public int? IntraOpNumThreads = null;
+    }
+}
diff --git a/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs b/src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs
@@ -90,6 +90,12 @@ internal sealed class Options : TransformInputBase
 
             [Argument(ArgumentType.AtMostOnce, HelpText = "Protobuf CodedInputStream recursion limit.", SortOrder = 6)]
             public int RecursionLimit = 100;
+
+            [Argument(ArgumentType.AtMostOnce, HelpText = "Controls the number of threads used to parallelize the execution of the graph (across nodes).", SortOrder = 7)]
+            public int? InterOpNumThreads = null;
+
+            [Argument(ArgumentType.AtMostOnce, HelpText = "Controls the number of threads to use to run the model.", SortOrder = 8)]
+            public int? IntraOpNumThreads = null;
         }
 
         /// <summary>
@@ -244,7 +250,8 @@ private OnnxTransformer(IHostEnvironment env, Options options, byte[] modelBytes
                     Host.CheckNonWhiteSpace(options.ModelFile, nameof(options.ModelFile));
                     Host.CheckIO(File.Exists(options.ModelFile), "Model file {0} does not exists.", options.ModelFile);
                     // Because we cannot delete the user file, ownModelFile should be false.
-                    Model = new OnnxModel(options.ModelFile, options.GpuDeviceId, options.FallbackToCpu, ownModelFile: false, shapeDictionary: shapeDictionary, options.RecursionLimit);
+                    Model = new OnnxModel(options.ModelFile, options.GpuDeviceId, options.FallbackToCpu, ownModelFile: false, shapeDictionary: shapeDictionary, options.RecursionLimit,
+                        options.InterOpNumThreads, options.IntraOpNumThreads);
                 }
                 else
                 {
@@ -309,8 +316,11 @@ internal OnnxTransformer(IHostEnvironment env, string modelFile, int? gpuDeviceI
         /// <param name="fallbackToCpu">If GPU error, raise exception or fallback to CPU.</param>
         /// <param name="shapeDictionary"></param>
         /// <param name="recursionLimit">Optional, specifies the Protobuf CodedInputStream recursion limit. Default value is 100.</param>
+        /// <param name="interOpNumThreads">Controls the number of threads used to parallelize the execution of the graph (across nodes).</param>
+        /// <param name="intraOpNumThreads">Controls the number of threads to use to run the model.</param>
         internal OnnxTransformer(IHostEnvironment env, string[] outputColumnNames, string[] inputColumnNames, string modelFile, int? gpuDeviceId = null, bool fallbackToCpu = false,
-             IDictionary<string, int[]> shapeDictionary = null, int recursionLimit = 100)
+            IDictionary<string, int[]> shapeDictionary = null, int recursionLimit = 100,
+            int? interOpNumThreads = null, int? intraOpNumThreads = null)
             : this(env, new Options()
             {
                 ModelFile = modelFile,
@@ -319,7 +329,9 @@ internal OnnxTransformer(IHostEnvironment env, string[] outputColumnNames, strin
                 GpuDeviceId = gpuDeviceId,
                 FallbackToCpu = fallbackToCpu,
                 CustomShapeInfos = shapeDictionary?.Select(pair => new CustomShapeInfo(pair.Key, pair.Value)).ToArray(),
-                RecursionLimit = recursionLimit
+                RecursionLimit = recursionLimit,
+                InterOpNumThreads = interOpNumThreads,
+                IntraOpNumThreads = intraOpNumThreads
             })
         {
         }
@@ -856,9 +868,12 @@ internal OnnxScoringEstimator(IHostEnvironment env, string modelFile, int? gpuDe
         /// <param name="fallbackToCpu">If GPU error, raise exception or fallback to CPU.</param>
         /// <param name="shapeDictionary"></param>
         /// <param name="recursionLimit">Optional, specifies the Protobuf CodedInputStream recursion limit. Default value is 100.</param>
+        /// <param name="interOpNumThreads">Controls the number of threads used to parallelize the execution of the graph (across nodes).</param>
+        /// <param name="intraOpNumThreads">Controls the number of threads to use to run the model.</param>
         internal OnnxScoringEstimator(IHostEnvironment env, string[] outputColumnNames, string[] inputColumnNames, string modelFile,
-            int? gpuDeviceId = null, bool fallbackToCpu = false, IDictionary<string, int[]> shapeDictionary = null, int recursionLimit = 100)
-           : this(env, new OnnxTransformer(env, outputColumnNames, inputColumnNames, modelFile, gpuDeviceId, fallbackToCpu, shapeDictionary, recursionLimit))
+            int? gpuDeviceId = null, bool fallbackToCpu = false, IDictionary<string, int[]> shapeDictionary = null, int recursionLimit = 100,
+            int? interOpNumThreads = null, int? intraOpNumThreads = null)
+           : this(env, new OnnxTransformer(env, outputColumnNames, inputColumnNames, modelFile, gpuDeviceId, fallbackToCpu, shapeDictionary, recursionLimit, interOpNumThreads, intraOpNumThreads))
         {
         }
 
diff --git a/src/Microsoft.ML.OnnxTransformer/OnnxUtils.cs b/src/Microsoft.ML.OnnxTransformer/OnnxUtils.cs
@@ -165,8 +165,11 @@ public OnnxVariableInfo(string name, OnnxShape shape, Type typeInOnnxRuntime, Da
         /// no longer needed.</param>
         /// <param name="shapeDictionary"></param>
         /// <param name="recursionLimit">Optional, specifies the Protobuf CodedInputStream recursion limit. Default value is 100.</param>
+        /// <param name="interOpNumThreads">Controls the number of threads used to parallelize the execution of the graph (across nodes).</param>
+        /// <param name="intraOpNumThreads">Controls the number of threads to use to run the model.</param>
         public OnnxModel(string modelFile, int? gpuDeviceId = null, bool fallbackToCpu = false,
-            bool ownModelFile = false, IDictionary<string, int[]> shapeDictionary = null, int recursionLimit = 100)
+            bool ownModelFile = false, IDictionary<string, int[]> shapeDictionary = null, int recursionLimit = 100,
+            int? interOpNumThreads = null, int? intraOpNumThreads = null)
         {
             // If we don't own the model file, _disposed should be false to prevent deleting user's file.
             _disposed = false;
@@ -181,15 +184,27 @@ public OnnxModel(string modelFile, int? gpuDeviceId = null, bool fallbackToCpu =
                 catch (OnnxRuntimeException)
                 {
                     if (fallbackToCpu)
-                        _session = new InferenceSession(modelFile);
+                    {
+                        var sessionOptions = new SessionOptions()
+                        {
+                            InterOpNumThreads = interOpNumThreads.GetValueOrDefault(),
+                            IntraOpNumThreads = intraOpNumThreads.GetValueOrDefault()
+                        };
+                        _session = new InferenceSession(modelFile, sessionOptions);
+                    }
                     else
                         // If called from OnnxTransform, is caught and rethrown
                         throw;
                 }
             }
             else
             {
-                _session = new InferenceSession(modelFile);
+                var sessionOptions = new SessionOptions()
+                {
+                    InterOpNumThreads = interOpNumThreads.GetValueOrDefault(),
+                    IntraOpNumThreads = intraOpNumThreads.GetValueOrDefault()
+                };
+                _session = new InferenceSession(modelFile, sessionOptions);
             }
 
             try
diff --git a/test/Microsoft.ML.OnnxTransformerTest/OnnxTransformTests.cs b/test/Microsoft.ML.OnnxTransformerTest/OnnxTransformTests.cs
@@ -1,4 +1,4 @@
-// Licensed to the .NET Foundation under one or more agreements.
+﻿// Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
@@ -119,8 +119,10 @@ public OnnxTransformTests(ITestOutputHelper output) : base(output)
         {
         }
 
-        [OnnxFact]
-        public void TestSimpleCase()
+        [OnnxTheory]
+        [InlineData(false)]
+        [InlineData(true)]
+        public void TestSimpleCase(bool useOptionsCtor)
         {
             var modelFile = "squeezenet/00000001/model.onnx";
             var samplevector = GetSampleArrayData();
@@ -139,7 +141,19 @@ public void TestSimpleCase()
             var xyData = new List<TestDataXY> { new TestDataXY() { A = new float[InputSize] } };
             var stringData = new List<TestDataDifferntType> { new TestDataDifferntType() { data_0 = new string[InputSize] } };
             var sizeData = new List<TestDataSize> { new TestDataSize() { data_0 = new float[2] } };
-            var pipe = ML.Transforms.ApplyOnnxModel(new[] { "softmaxout_1" }, new[] { "data_0" }, modelFile, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
+            var options = new OnnxOptions()
+            {
+                OutputColumns = new[] { "softmaxout_1" },
+                InputColumns = new[] {"data_0" },
+                ModelFile = modelFile,
+                GpuDeviceId = _gpuDeviceId,
+                FallbackToCpu = _fallbackToCpu,
+                InterOpNumThreads = 1,
+                IntraOpNumThreads = 1
+            };
+            var pipe = useOptionsCtor ?
+                ML.Transforms.ApplyOnnxModel(options) :
+                ML.Transforms.ApplyOnnxModel(options.OutputColumns, options.InputColumns, modelFile, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
 
             var invalidDataWrongNames = ML.Data.LoadFromEnumerable(xyData);
             var invalidDataWrongTypes = ML.Data.LoadFromEnumerable(stringData);
@@ -713,14 +727,14 @@ public void TestOnnxModelNotDisposal()
 
         private class OnnxMapInput
         {
-            [OnnxMapType(typeof(int),typeof(float))]
-            public IDictionary<int,float> Input { get; set; }
+            [OnnxMapType(typeof(int), typeof(float))]
+            public IDictionary<int, float> Input { get; set; }
         }
 
         private class OnnxMapOutput
         {
-            [OnnxMapType(typeof(int),typeof(float))]
-            public IDictionary<int,float> Output { get; set; }
+            [OnnxMapType(typeof(int), typeof(float))]
+            public IDictionary<int, float> Output { get; set; }
         }
 
         /// <summary>
@@ -753,10 +767,10 @@ public void SmokeInMemoryOnnxMapTypeTest()
             var transformedDataView = model.Transform(dataView);
             var transformedDataPoints = ML.Data.CreateEnumerable<OnnxMapOutput>(transformedDataView, false).ToList();
 
-            for(int i = 0; i < dataPoints.Count(); ++i)
+            for (int i = 0; i < dataPoints.Count(); ++i)
             {
                 Assert.Equal(dataPoints[i].Input.Count(), transformedDataPoints[i].Output.Count());
-                foreach(var pair in dataPoints[i].Input)
+                foreach (var pair in dataPoints[i].Input)
                     Assert.Equal(pair.Value, transformedDataPoints[i].Output[pair.Key + 1]);
             }
         }
@@ -815,7 +829,7 @@ public void TestOnnxTransformWithCustomShapes()
             transformedDataViews[2] = onnxTransformer[2].Transform(dataView);
 
             // Conduct the same check for all the 3 called public APIs.
-            foreach(var transformedDataView in transformedDataViews)
+            foreach (var transformedDataView in transformedDataViews)
             {
                 var transformedDataPoints = ML.Data.CreateEnumerable<PredictionWithCustomShape>(transformedDataView, false).ToList();
 
@@ -901,32 +915,32 @@ public void SpecifyOnnxShapes()
             Assert.False(somethingWrong);
 
             // Case 3: this shape conflicts with output shape [1, 1, 1, 5] loaded from the model.
-            shapeDictionary= new Dictionary<string, int[]>() {
+            shapeDictionary = new Dictionary<string, int[]>() {
                 { "outb", new int[] { 5, 6 } },
             };
-            somethingWrong= false;
+            somethingWrong = false;
             try
             {
                 TryModelWithCustomShapesHelper(shapeDictionary);
             }
             catch
             {
-                somethingWrong= true;
+                somethingWrong = true;
             }
             Assert.True(somethingWrong);
 
             // Case 4: this shape works with output shape [1, 1, 1, 5] loaded from the model.
-            shapeDictionary= new Dictionary<string, int[]>() {
+            shapeDictionary = new Dictionary<string, int[]>() {
                 { "outb", new int[] { -1, -1, -1, -1 } },
             };
-            somethingWrong= false;
+            somethingWrong = false;
             try
             {
                 TryModelWithCustomShapesHelper(shapeDictionary);
             }
             catch
             {
-                somethingWrong= true;
+                somethingWrong = true;
             }
             Assert.False(somethingWrong);
         }
@@ -1024,7 +1038,7 @@ public void TestOnnxTransformSaveAndLoadWithRecursionLimit()
             var pipe = ML.Transforms.LoadImages("data_0", imageFolder, "imagePath")
                 .Append(ML.Transforms.ResizeImages("data_0", imageHeight, imageWidth))
                 .Append(ML.Transforms.ExtractPixels("data_0", interleavePixelColors: true))
-                .Append(ML.Transforms.ApplyOnnxModel(new []{ "softmaxout_1" }, new []{ "data_0" }, modelFile, 
+                .Append(ML.Transforms.ApplyOnnxModel(new[] { "softmaxout_1" }, new[] { "data_0" }, modelFile,
                     gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu, shapeDictionary: null, recursionLimit: 50));
 
             TestEstimatorCore(pipe, data);