merge master to arcade branch (#5424)

frank-dong-ms-zz · antoniovs1029 · eerhardt · web-flow · commit 62cdfa08205b · 2020-10-08T10:18:03.000-07:00
* Update to Onnxruntime 1.5.1 (#5406) * Added variables to tests to control Gpu settings * Added dependency to prerelease * Updated to 1.5.1 * Remove prerelease feed * Nit on GPU variables * Change the _maxCalibrationExamples default on CalibratorUtils (#5415) * Change the _maxCalibrationExamples default * Improving comments * Fix perf regression in ShuffleRows (#5417) RowShufflingTransformer is using ChannelReader incorrectly. It needs to block waiting for items to read and was Thread.Sleeping in order to wait, but not spin the current core. This caused a major perf regression. The fix is to block synchronously correctly - by calling AsTask() on the ValueTask that is returned from the ChannelReader and block on the Task. Fix #5416 Co-authored-by: Antonio Velázquez <38739674+antoniovs1029@users.noreply.github.com> Co-authored-by: Eric Erhardt <eric.erhardt@microsoft.com>
diff --git a/build/Dependencies.props b/build/Dependencies.props
@@ -16,7 +16,7 @@
     <GoogleProtobufPackageVersion>3.10.1</GoogleProtobufPackageVersion>
     <LightGBMPackageVersion>2.2.3</LightGBMPackageVersion>
     <MicrosoftExtensionsPackageVersion>2.1.0</MicrosoftExtensionsPackageVersion>
-    <MicrosoftMLOnnxRuntimePackageVersion>1.3.0</MicrosoftMLOnnxRuntimePackageVersion>
+    <MicrosoftMLOnnxRuntimePackageVersion>1.5.1</MicrosoftMLOnnxRuntimePackageVersion>
     <MlNetMklDepsPackageVersion>0.0.0.9</MlNetMklDepsPackageVersion>
     <ParquetDotNetPackageVersion>2.1.3</ParquetDotNetPackageVersion>
     <SystemDrawingCommonPackageVersion>4.5.0</SystemDrawingCommonPackageVersion>
diff --git a/src/Microsoft.ML.Data/Prediction/Calibrator.cs b/src/Microsoft.ML.Data/Prediction/Calibrator.cs
@@ -837,8 +837,9 @@ internal static object Create(IHostEnvironment env, ModelLoadContext ctx, object
     [BestFriend]
     internal static class CalibratorUtils
     {
-        // maximum number of rows passed to the calibrator.
-        private const int _maxCalibrationExamples = 1000000;
+        // Maximum number of rows to process when training the Calibrator.
+        // If 0, we'll actually process the whole dataset.
+        private const int _maxCalibrationExamples = 0;
 
         private static bool NeedCalibration(IHostEnvironment env, IChannel ch, ICalibratorTrainer calibrator,
             ITrainer trainer, IPredictor predictor, RoleMappedSchema schema)
@@ -988,6 +989,10 @@ public static ICalibrator TrainCalibrator(IHostEnvironment env, IChannel ch, ICa
                     caliTrainer.ProcessTrainingExample(score, label > 0, weight);
 
                     if (maxRows > 0 && ++num >= maxRows)
+                        // If maxRows was 0, we'll process all of the rows in the dataset
+                        // Notice that depending on the calibrator, "processing" might mean
+                        // randomly choosing some of the "processed" rows
+                        // to actually train the calibrator.
                         break;
                 }
             }
diff --git a/src/Microsoft.ML.Data/Transforms/RowShufflingTransformer.cs b/src/Microsoft.ML.Data/Transforms/RowShufflingTransformer.cs
@@ -634,26 +634,25 @@ protected override bool MoveNextCore()
                 while (_liveCount < _poolRows && !_doneConsuming)
                 {
                     // We are under capacity. Try to get some more.
-                    var hasReadItem = _toConsumeChannel.Reader.TryRead(out int got);
-                    if (hasReadItem)
+                    ValueTask<int> readTask = _toConsumeChannel.Reader.ReadAsync();
+
+                    // Note you can't wait synchronously on a ValueTask. So if it
+                    // hasn't been completed yet, need to call AsTask() to get a Task
+                    // which can be waited on synchronously.
+                    int got = readTask.IsCompletedSuccessfully ?
+                        readTask.Result :
+                        readTask.AsTask().GetAwaiter().GetResult();
+                    if (got == 0)
                     {
-                        if (got == 0)
-                        {
-                            // We've reached the end of the Channel. There's no reason
-                            // to attempt further communication with the producer.
-                            // Check whether something horrible happened.
-                            if (_producerTaskException != null)
-                                throw Ch.Except(_producerTaskException, "Shuffle input cursor reader failed with an exception");
-                            _doneConsuming = true;
-                            break;
-                        }
-                        _liveCount += got;
-                    }
-                    else
-                    {
-                        // Sleeping for one millisecond to stop the thread from spinning while waiting for the producer.
-                        Thread.Sleep(1);
+                        // We've reached the end of the Channel. There's no reason
+                        // to attempt further communication with the producer.
+                        // Check whether something horrible happened.
+                        if (_producerTaskException != null)
+                            throw Ch.Except(_producerTaskException, "Shuffle input cursor reader failed with an exception");
+                        _doneConsuming = true;
+                        break;
                     }
+                    _liveCount += got;
                 }
                 if (_liveCount == 0)
                     return false;
diff --git a/test/Microsoft.ML.Benchmarks/ShuffleRowsBench.cs b/test/Microsoft.ML.Benchmarks/ShuffleRowsBench.cs
@@ -0,0 +1,53 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using BenchmarkDotNet.Attributes;
+using Microsoft.ML.Benchmarks.Harness;
+using Microsoft.ML.Data;
+
+namespace Microsoft.ML.Benchmarks
+{
+    [CIBenchmark]
+    public class ShuffleRowsBench : BenchmarkBase
+    {
+        private TrainRow[] _rows;
+        private MLContext _context;
+        
+        [GlobalSetup]
+        public void Setup()
+        {
+            _rows = new TrainRow[10_000];
+            for (var i = 0; i < _rows.Length; i++)
+            {
+                _rows[i] = new TrainRow() { Sample = i.ToString(), Week = i, Label = i / 2 };
+            }
+
+            _context = new MLContext();
+        }
+
+        [Benchmark]
+        public void ShuffleRows()
+        {
+            IDataView data = _context.Data.LoadFromEnumerable(_rows);
+
+            IDataView shuffledData = _context.Data.ShuffleRows(data, seed: 0);
+
+            foreach (string sample in shuffledData.GetColumn<string>("Sample"))
+            {
+            }
+        }
+
+        private class TrainRow
+        {
+            [ColumnName("Sample")]
+            public string Sample;
+
+            [ColumnName("Week")]
+            public float Week;
+
+            [ColumnName("Label")]
+            public float Label;
+        }
+    }
+}
diff --git a/test/Microsoft.ML.Functional.Tests/ONNX.cs b/test/Microsoft.ML.Functional.Tests/ONNX.cs
@@ -16,6 +16,11 @@ namespace Microsoft.ML.Functional.Tests
 {
     public class ONNX : FunctionalTestBaseClass
     {
+        // These two members are meant to be changed
+        // Only when manually testing the Onnx GPU nuggets
+        private const bool _fallbackToCpu = true;
+        private static int? _gpuDeviceId = null;
+
         public ONNX(ITestOutputHelper output) : base(output)
         {
         }
@@ -52,7 +57,7 @@ public void SaveOnnxModelLoadAndScoreFastTree()
             // Therefore the VectorScoreColumn class (which contains a float [] field called Score) is used for the return
             // type on the Prediction engine.
             // See #2980 and #2981 for more information.
-            var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(modelPath);
+            var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(modelPath, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
             var onnxModel = onnxEstimator.Fit(data);
 
             // Create prediction engine and test predictions.
@@ -98,7 +103,7 @@ public void SaveOnnxModelLoadAndScoreKMeans()
                 mlContext.Model.ConvertToOnnx(model, data, file);
 
             // Load the model as a transform.
-            var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(modelPath);
+            var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(modelPath, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
             var onnxModel = onnxEstimator.Fit(data);
 
             // TODO #2980: ONNX outputs don't match the outputs of the model, so we must hand-correct this for now.
@@ -150,7 +155,7 @@ public void SaveOnnxModelLoadAndScoreSDCA()
                 mlContext.Model.ConvertToOnnx(model, data, file);
 
             // Load the model as a transform.
-            var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(modelPath);
+            var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(modelPath, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
             var onnxModel = onnxEstimator.Fit(data);
 
             // Create prediction engine and test predictions.
diff --git a/test/Microsoft.ML.OnnxTransformerTest/OnnxTransformTests.cs b/test/Microsoft.ML.OnnxTransformerTest/OnnxTransformTests.cs
@@ -23,6 +23,11 @@ namespace Microsoft.ML.Tests
 {
     public class OnnxTransformTests : TestDataPipeBase
     {
+        // These two members are meant to be changed
+        // Only when manually testing the Onnx GPU nuggets
+        private const bool _fallbackToCpu = true;
+        private static int? _gpuDeviceId = null;
+
         private const int InputSize = 150528;
 
         private class TestData
@@ -134,7 +139,7 @@ public void TestSimpleCase()
             var xyData = new List<TestDataXY> { new TestDataXY() { A = new float[InputSize] } };
             var stringData = new List<TestDataDifferntType> { new TestDataDifferntType() { data_0 = new string[InputSize] } };
             var sizeData = new List<TestDataSize> { new TestDataSize() { data_0 = new float[2] } };
-            var pipe = ML.Transforms.ApplyOnnxModel(new[] { "softmaxout_1" }, new[] { "data_0" }, modelFile);
+            var pipe = ML.Transforms.ApplyOnnxModel(new[] { "softmaxout_1" }, new[] { "data_0" }, modelFile, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
 
             var invalidDataWrongNames = ML.Data.LoadFromEnumerable(xyData);
             var invalidDataWrongTypes = ML.Data.LoadFromEnumerable(stringData);
@@ -231,7 +236,7 @@ public void OnnxWorkout()
             var pipe = ML.Transforms.LoadImages("data_0", imageFolder, "imagePath")
                 .Append(ML.Transforms.ResizeImages("data_0", imageHeight, imageWidth))
                 .Append(ML.Transforms.ExtractPixels("data_0", interleavePixelColors: true))
-                .Append(ML.Transforms.ApplyOnnxModel("softmaxout_1", "data_0", modelFile));
+                .Append(ML.Transforms.ApplyOnnxModel("softmaxout_1", "data_0", modelFile, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu));
 
             TestEstimatorCore(pipe, data);
 
@@ -292,7 +297,7 @@ public void OnnxModelScenario()
                     }
                 });
 
-            var pipeline = ML.Transforms.ApplyOnnxModel("softmaxout_1", "data_0", modelFile);
+            var pipeline = ML.Transforms.ApplyOnnxModel("softmaxout_1", "data_0", modelFile, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
             var onnxTransformer = pipeline.Fit(dataView);
             var onnx = onnxTransformer.Transform(dataView);
             var scoreCol = onnx.Schema["softmaxout_1"];
@@ -325,7 +330,7 @@ public void OnnxModelMultiInput()
                         inb = new float[] {1,2,3,4,5}
                     }
                 });
-            var pipeline = ML.Transforms.ApplyOnnxModel(new[] { "outa", "outb" }, new[] { "ina", "inb" }, modelFile);
+            var pipeline = ML.Transforms.ApplyOnnxModel(new[] { "outa", "outb" }, new[] { "ina", "inb" }, modelFile, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
             var onnxTransformer = pipeline.Fit(dataView);
             var onnx = onnxTransformer.Transform(dataView);
 
@@ -365,7 +370,7 @@ public void OnnxModelOutputDifferentOrder()
                     }
                 });
             // The model returns the output columns in the order outa, outb. We are doing the opposite here, making sure the name mapping is correct.
-            var pipeline = ML.Transforms.ApplyOnnxModel(new[] { "outb", "outa" }, new[] { "ina", "inb" }, modelFile);
+            var pipeline = ML.Transforms.ApplyOnnxModel(new[] { "outb", "outa" }, new[] { "ina", "inb" }, modelFile, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
             var onnxTransformer = pipeline.Fit(dataView);
             var onnx = onnxTransformer.Transform(dataView);
 
@@ -391,7 +396,7 @@ public void OnnxModelOutputDifferentOrder()
             (onnxTransformer as IDisposable)?.Dispose();
 
             // The model returns the output columns in the order outa, outb. We are doing only a subset, outb, to make sure the mapping works.
-            pipeline = ML.Transforms.ApplyOnnxModel(new[] { "outb" }, new[] { "ina", "inb" }, modelFile);
+            pipeline = ML.Transforms.ApplyOnnxModel(new[] { "outb" }, new[] { "ina", "inb" }, modelFile, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
             onnxTransformer = pipeline.Fit(dataView);
             onnx = onnxTransformer.Transform(dataView);
 
@@ -425,7 +430,7 @@ public void TestUnknownDimensions()
                     new TestDataUnknownDimensions(){input = new float[] {-1.1f, -1.3f, 1.2f }},
                 };
             var idv = mlContext.Data.LoadFromEnumerable(data);
-            var pipeline = ML.Transforms.ApplyOnnxModel(modelFile);
+            var pipeline = ML.Transforms.ApplyOnnxModel(modelFile, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
             var onnxTransformer = pipeline.Fit(idv);
             var transformedValues = onnxTransformer.Transform(idv);
             var predictions = mlContext.Data.CreateEnumerable<PredictionUnknownDimensions>(transformedValues, reuseRowObject: false).ToArray();
@@ -451,7 +456,7 @@ public void TestOnnxNoneDimValue()
                     new TestDataNoneDimension(){features = new float[] { 6.3f, 3.3f, 6.0f, 2.5f }},
             };
             var idv = mlContext.Data.LoadFromEnumerable(data);
-            var pipeline = ML.Transforms.ApplyOnnxModel(modelFile);
+            var pipeline = ML.Transforms.ApplyOnnxModel(modelFile, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
             var onnxTransformer = pipeline.Fit(idv);
             var transformedValues = onnxTransformer.Transform(idv);
             var predictions = mlContext.Data.CreateEnumerable<PredictionNoneDimension>(transformedValues, reuseRowObject: false).ToArray();
@@ -526,7 +531,7 @@ public void OnnxModelInMemoryImage()
             // "softmaxout_1" are model input and output names stored in the used ONNX model file. Users may need to inspect their own models to
             // get the right input and output column names.
             var pipeline = ML.Transforms.ExtractPixels("data_0", "Image")                   // Map column "Image" to column "data_0"
-                .Append(ML.Transforms.ApplyOnnxModel("softmaxout_1", "data_0", modelFile)); // Map column "data_0" to column "softmaxout_1"
+                .Append(ML.Transforms.ApplyOnnxModel("softmaxout_1", "data_0", modelFile, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu)); // Map column "data_0" to column "softmaxout_1"
             var model = pipeline.Fit(dataView);
             var onnx = model.Transform(dataView);
 
@@ -576,7 +581,7 @@ public void TestOnnxZipMapWithInt64Keys()
             };
 
             var dataView = ML.Data.LoadFromEnumerable(dataPoints);
-            var pipeline = ML.Transforms.ApplyOnnxModel(new[] { "output" }, new[] { "input" }, modelFile);
+            var pipeline = ML.Transforms.ApplyOnnxModel(new[] { "output" }, new[] { "input" }, modelFile, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
             var onnxTransformer = pipeline.Fit(dataView);
             var transformedDataView = onnxTransformer.Transform(dataView);
 
@@ -629,7 +634,7 @@ public void TestOnnxZipMapWithStringKeys()
             };
 
             var dataView = ML.Data.LoadFromEnumerable(dataPoints);
-            var pipeline = ML.Transforms.ApplyOnnxModel(new[] { "output" }, new[] { "input" }, modelFile);
+            var pipeline = ML.Transforms.ApplyOnnxModel(new[] { "output" }, new[] { "input" }, modelFile, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
             var onnxTransformer = pipeline.Fit(dataView);
             var transformedDataView = onnxTransformer.Transform(dataView);
 
@@ -794,19 +799,19 @@ public void TestOnnxTransformWithCustomShapes()
             // Test 1.
             pipeline[0] = ML.Transforms.ApplyOnnxModel(
                 new[] { nameof(PredictionWithCustomShape.argmax) }, new[] { nameof(InputWithCustomShape.input) },
-                modelFile, shapeDictionary);
+                modelFile, shapeDictionary, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
             onnxTransformer[0] = pipeline[0].Fit(dataView);
             transformedDataViews[0] = onnxTransformer[0].Transform(dataView);
 
             // Test 2.
             pipeline[1] = ML.Transforms.ApplyOnnxModel(
                 nameof(PredictionWithCustomShape.argmax), nameof(InputWithCustomShape.input),
-                modelFile, shapeDictionary);
+                modelFile, shapeDictionary, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
             onnxTransformer[1] = pipeline[1].Fit(dataView);
             transformedDataViews[1] = onnxTransformer[1].Transform(dataView);
 
             // Test 3.
-            pipeline[2] = ML.Transforms.ApplyOnnxModel(modelFile, shapeDictionary);
+            pipeline[2] = ML.Transforms.ApplyOnnxModel(modelFile, shapeDictionary, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
             onnxTransformer[2] = pipeline[2].Fit(dataView);
             transformedDataViews[2] = onnxTransformer[2].Transform(dataView);
 
@@ -856,7 +861,7 @@ private void TryModelWithCustomShapesHelper(IDictionary<string, int[]> shapeDict
 
             // Define a ONNX transform, trains it, and apply it to the input data. 
             var pipeline = ML.Transforms.ApplyOnnxModel(new[] { "outa", "outb" }, new[] { "ina", "inb" },
-                modelFile, shapeDictionary);
+                modelFile, shapeDictionary, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
         }
 
         /// <summary>
@@ -956,7 +961,7 @@ public void TestOnnxTransformSaveAndLoadWithCustomShapes()
             var dataView = ML.Data.LoadFromEnumerable(dataPoints);
 
             var pipeline = ML.Transforms.ApplyOnnxModel(nameof(PredictionWithCustomShape.argmax),
-                nameof(InputWithCustomShape.input), modelFile, shapeDictionary);
+                nameof(InputWithCustomShape.input), modelFile, shapeDictionary, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
 
             var model = pipeline.Fit(dataView);
 
diff --git a/test/Microsoft.ML.Tests/OnnxConversionTest.cs b/test/Microsoft.ML.Tests/OnnxConversionTest.cs
diff --git a/test/Microsoft.ML.Tests/OnnxSequenceTypeWithAttributesTest.cs b/test/Microsoft.ML.Tests/OnnxSequenceTypeWithAttributesTest.cs