Skip to content

merge master to arcade branch #5424

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build/Dependencies.props
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
<GoogleProtobufPackageVersion>3.10.1</GoogleProtobufPackageVersion>
<LightGBMPackageVersion>2.2.3</LightGBMPackageVersion>
<MicrosoftExtensionsPackageVersion>2.1.0</MicrosoftExtensionsPackageVersion>
<MicrosoftMLOnnxRuntimePackageVersion>1.3.0</MicrosoftMLOnnxRuntimePackageVersion>
<MicrosoftMLOnnxRuntimePackageVersion>1.5.1</MicrosoftMLOnnxRuntimePackageVersion>
<MlNetMklDepsPackageVersion>0.0.0.9</MlNetMklDepsPackageVersion>
<ParquetDotNetPackageVersion>2.1.3</ParquetDotNetPackageVersion>
<SystemDrawingCommonPackageVersion>4.5.0</SystemDrawingCommonPackageVersion>
Expand Down
9 changes: 7 additions & 2 deletions src/Microsoft.ML.Data/Prediction/Calibrator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -837,8 +837,9 @@ internal static object Create(IHostEnvironment env, ModelLoadContext ctx, object
[BestFriend]
internal static class CalibratorUtils
{
// maximum number of rows passed to the calibrator.
private const int _maxCalibrationExamples = 1000000;
// Maximum number of rows to process when training the Calibrator.
// If 0, we'll actually process the whole dataset.
private const int _maxCalibrationExamples = 0;

private static bool NeedCalibration(IHostEnvironment env, IChannel ch, ICalibratorTrainer calibrator,
ITrainer trainer, IPredictor predictor, RoleMappedSchema schema)
Expand Down Expand Up @@ -988,6 +989,10 @@ public static ICalibrator TrainCalibrator(IHostEnvironment env, IChannel ch, ICa
caliTrainer.ProcessTrainingExample(score, label > 0, weight);

if (maxRows > 0 && ++num >= maxRows)
// If maxRows was 0, we'll process all of the rows in the dataset
// Notice that depending on the calibrator, "processing" might mean
// randomly choosing some of the "processed" rows
// to actually train the calibrator.
break;
}
}
Expand Down
35 changes: 17 additions & 18 deletions src/Microsoft.ML.Data/Transforms/RowShufflingTransformer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -634,26 +634,25 @@ protected override bool MoveNextCore()
while (_liveCount < _poolRows && !_doneConsuming)
{
// We are under capacity. Try to get some more.
var hasReadItem = _toConsumeChannel.Reader.TryRead(out int got);
if (hasReadItem)
ValueTask<int> readTask = _toConsumeChannel.Reader.ReadAsync();

// Note you can't wait synchronously on a ValueTask. So if it
// hasn't been completed yet, need to call AsTask() to get a Task
// which can be waited on synchronously.
int got = readTask.IsCompletedSuccessfully ?
readTask.Result :
readTask.AsTask().GetAwaiter().GetResult();
if (got == 0)
{
if (got == 0)
{
// We've reached the end of the Channel. There's no reason
// to attempt further communication with the producer.
// Check whether something horrible happened.
if (_producerTaskException != null)
throw Ch.Except(_producerTaskException, "Shuffle input cursor reader failed with an exception");
_doneConsuming = true;
break;
}
_liveCount += got;
}
else
{
// Sleeping for one millisecond to stop the thread from spinning while waiting for the producer.
Thread.Sleep(1);
// We've reached the end of the Channel. There's no reason
// to attempt further communication with the producer.
// Check whether something horrible happened.
if (_producerTaskException != null)
throw Ch.Except(_producerTaskException, "Shuffle input cursor reader failed with an exception");
_doneConsuming = true;
break;
}
_liveCount += got;
}
if (_liveCount == 0)
return false;
Expand Down
53 changes: 53 additions & 0 deletions test/Microsoft.ML.Benchmarks/ShuffleRowsBench.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using BenchmarkDotNet.Attributes;
using Microsoft.ML.Benchmarks.Harness;
using Microsoft.ML.Data;

namespace Microsoft.ML.Benchmarks
{
[CIBenchmark]
public class ShuffleRowsBench : BenchmarkBase
{
private TrainRow[] _rows;
private MLContext _context;

[GlobalSetup]
public void Setup()
{
_rows = new TrainRow[10_000];
for (var i = 0; i < _rows.Length; i++)
{
_rows[i] = new TrainRow() { Sample = i.ToString(), Week = i, Label = i / 2 };
}

_context = new MLContext();
}

[Benchmark]
public void ShuffleRows()
{
IDataView data = _context.Data.LoadFromEnumerable(_rows);

IDataView shuffledData = _context.Data.ShuffleRows(data, seed: 0);

foreach (string sample in shuffledData.GetColumn<string>("Sample"))
{
}
}

private class TrainRow
{
[ColumnName("Sample")]
public string Sample;

[ColumnName("Week")]
public float Week;

[ColumnName("Label")]
public float Label;
}
}
}
11 changes: 8 additions & 3 deletions test/Microsoft.ML.Functional.Tests/ONNX.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ namespace Microsoft.ML.Functional.Tests
{
public class ONNX : FunctionalTestBaseClass
{
// These two members are meant to be changed
// Only when manually testing the Onnx GPU nuggets
private const bool _fallbackToCpu = true;
private static int? _gpuDeviceId = null;

public ONNX(ITestOutputHelper output) : base(output)
{
}
Expand Down Expand Up @@ -52,7 +57,7 @@ public void SaveOnnxModelLoadAndScoreFastTree()
// Therefore the VectorScoreColumn class (which contains a float [] field called Score) is used for the return
// type on the Prediction engine.
// See #2980 and #2981 for more information.
var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(modelPath);
var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(modelPath, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
var onnxModel = onnxEstimator.Fit(data);

// Create prediction engine and test predictions.
Expand Down Expand Up @@ -98,7 +103,7 @@ public void SaveOnnxModelLoadAndScoreKMeans()
mlContext.Model.ConvertToOnnx(model, data, file);

// Load the model as a transform.
var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(modelPath);
var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(modelPath, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
var onnxModel = onnxEstimator.Fit(data);

// TODO #2980: ONNX outputs don't match the outputs of the model, so we must hand-correct this for now.
Expand Down Expand Up @@ -150,7 +155,7 @@ public void SaveOnnxModelLoadAndScoreSDCA()
mlContext.Model.ConvertToOnnx(model, data, file);

// Load the model as a transform.
var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(modelPath);
var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(modelPath, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
var onnxModel = onnxEstimator.Fit(data);

// Create prediction engine and test predictions.
Expand Down
37 changes: 21 additions & 16 deletions test/Microsoft.ML.OnnxTransformerTest/OnnxTransformTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ namespace Microsoft.ML.Tests
{
public class OnnxTransformTests : TestDataPipeBase
{
// These two members are meant to be changed
// Only when manually testing the Onnx GPU nuggets
private const bool _fallbackToCpu = true;
private static int? _gpuDeviceId = null;

private const int InputSize = 150528;

private class TestData
Expand Down Expand Up @@ -134,7 +139,7 @@ public void TestSimpleCase()
var xyData = new List<TestDataXY> { new TestDataXY() { A = new float[InputSize] } };
var stringData = new List<TestDataDifferntType> { new TestDataDifferntType() { data_0 = new string[InputSize] } };
var sizeData = new List<TestDataSize> { new TestDataSize() { data_0 = new float[2] } };
var pipe = ML.Transforms.ApplyOnnxModel(new[] { "softmaxout_1" }, new[] { "data_0" }, modelFile);
var pipe = ML.Transforms.ApplyOnnxModel(new[] { "softmaxout_1" }, new[] { "data_0" }, modelFile, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);

var invalidDataWrongNames = ML.Data.LoadFromEnumerable(xyData);
var invalidDataWrongTypes = ML.Data.LoadFromEnumerable(stringData);
Expand Down Expand Up @@ -231,7 +236,7 @@ public void OnnxWorkout()
var pipe = ML.Transforms.LoadImages("data_0", imageFolder, "imagePath")
.Append(ML.Transforms.ResizeImages("data_0", imageHeight, imageWidth))
.Append(ML.Transforms.ExtractPixels("data_0", interleavePixelColors: true))
.Append(ML.Transforms.ApplyOnnxModel("softmaxout_1", "data_0", modelFile));
.Append(ML.Transforms.ApplyOnnxModel("softmaxout_1", "data_0", modelFile, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu));

TestEstimatorCore(pipe, data);

Expand Down Expand Up @@ -292,7 +297,7 @@ public void OnnxModelScenario()
}
});

var pipeline = ML.Transforms.ApplyOnnxModel("softmaxout_1", "data_0", modelFile);
var pipeline = ML.Transforms.ApplyOnnxModel("softmaxout_1", "data_0", modelFile, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
var onnxTransformer = pipeline.Fit(dataView);
var onnx = onnxTransformer.Transform(dataView);
var scoreCol = onnx.Schema["softmaxout_1"];
Expand Down Expand Up @@ -325,7 +330,7 @@ public void OnnxModelMultiInput()
inb = new float[] {1,2,3,4,5}
}
});
var pipeline = ML.Transforms.ApplyOnnxModel(new[] { "outa", "outb" }, new[] { "ina", "inb" }, modelFile);
var pipeline = ML.Transforms.ApplyOnnxModel(new[] { "outa", "outb" }, new[] { "ina", "inb" }, modelFile, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
var onnxTransformer = pipeline.Fit(dataView);
var onnx = onnxTransformer.Transform(dataView);

Expand Down Expand Up @@ -365,7 +370,7 @@ public void OnnxModelOutputDifferentOrder()
}
});
// The model returns the output columns in the order outa, outb. We are doing the opposite here, making sure the name mapping is correct.
var pipeline = ML.Transforms.ApplyOnnxModel(new[] { "outb", "outa" }, new[] { "ina", "inb" }, modelFile);
var pipeline = ML.Transforms.ApplyOnnxModel(new[] { "outb", "outa" }, new[] { "ina", "inb" }, modelFile, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
var onnxTransformer = pipeline.Fit(dataView);
var onnx = onnxTransformer.Transform(dataView);

Expand All @@ -391,7 +396,7 @@ public void OnnxModelOutputDifferentOrder()
(onnxTransformer as IDisposable)?.Dispose();

// The model returns the output columns in the order outa, outb. We are doing only a subset, outb, to make sure the mapping works.
pipeline = ML.Transforms.ApplyOnnxModel(new[] { "outb" }, new[] { "ina", "inb" }, modelFile);
pipeline = ML.Transforms.ApplyOnnxModel(new[] { "outb" }, new[] { "ina", "inb" }, modelFile, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
onnxTransformer = pipeline.Fit(dataView);
onnx = onnxTransformer.Transform(dataView);

Expand Down Expand Up @@ -425,7 +430,7 @@ public void TestUnknownDimensions()
new TestDataUnknownDimensions(){input = new float[] {-1.1f, -1.3f, 1.2f }},
};
var idv = mlContext.Data.LoadFromEnumerable(data);
var pipeline = ML.Transforms.ApplyOnnxModel(modelFile);
var pipeline = ML.Transforms.ApplyOnnxModel(modelFile, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
var onnxTransformer = pipeline.Fit(idv);
var transformedValues = onnxTransformer.Transform(idv);
var predictions = mlContext.Data.CreateEnumerable<PredictionUnknownDimensions>(transformedValues, reuseRowObject: false).ToArray();
Expand All @@ -451,7 +456,7 @@ public void TestOnnxNoneDimValue()
new TestDataNoneDimension(){features = new float[] { 6.3f, 3.3f, 6.0f, 2.5f }},
};
var idv = mlContext.Data.LoadFromEnumerable(data);
var pipeline = ML.Transforms.ApplyOnnxModel(modelFile);
var pipeline = ML.Transforms.ApplyOnnxModel(modelFile, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
var onnxTransformer = pipeline.Fit(idv);
var transformedValues = onnxTransformer.Transform(idv);
var predictions = mlContext.Data.CreateEnumerable<PredictionNoneDimension>(transformedValues, reuseRowObject: false).ToArray();
Expand Down Expand Up @@ -526,7 +531,7 @@ public void OnnxModelInMemoryImage()
// "softmaxout_1" are model input and output names stored in the used ONNX model file. Users may need to inspect their own models to
// get the right input and output column names.
var pipeline = ML.Transforms.ExtractPixels("data_0", "Image") // Map column "Image" to column "data_0"
.Append(ML.Transforms.ApplyOnnxModel("softmaxout_1", "data_0", modelFile)); // Map column "data_0" to column "softmaxout_1"
.Append(ML.Transforms.ApplyOnnxModel("softmaxout_1", "data_0", modelFile, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu)); // Map column "data_0" to column "softmaxout_1"
var model = pipeline.Fit(dataView);
var onnx = model.Transform(dataView);

Expand Down Expand Up @@ -576,7 +581,7 @@ public void TestOnnxZipMapWithInt64Keys()
};

var dataView = ML.Data.LoadFromEnumerable(dataPoints);
var pipeline = ML.Transforms.ApplyOnnxModel(new[] { "output" }, new[] { "input" }, modelFile);
var pipeline = ML.Transforms.ApplyOnnxModel(new[] { "output" }, new[] { "input" }, modelFile, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
var onnxTransformer = pipeline.Fit(dataView);
var transformedDataView = onnxTransformer.Transform(dataView);

Expand Down Expand Up @@ -629,7 +634,7 @@ public void TestOnnxZipMapWithStringKeys()
};

var dataView = ML.Data.LoadFromEnumerable(dataPoints);
var pipeline = ML.Transforms.ApplyOnnxModel(new[] { "output" }, new[] { "input" }, modelFile);
var pipeline = ML.Transforms.ApplyOnnxModel(new[] { "output" }, new[] { "input" }, modelFile, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
var onnxTransformer = pipeline.Fit(dataView);
var transformedDataView = onnxTransformer.Transform(dataView);

Expand Down Expand Up @@ -794,19 +799,19 @@ public void TestOnnxTransformWithCustomShapes()
// Test 1.
pipeline[0] = ML.Transforms.ApplyOnnxModel(
new[] { nameof(PredictionWithCustomShape.argmax) }, new[] { nameof(InputWithCustomShape.input) },
modelFile, shapeDictionary);
modelFile, shapeDictionary, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
onnxTransformer[0] = pipeline[0].Fit(dataView);
transformedDataViews[0] = onnxTransformer[0].Transform(dataView);

// Test 2.
pipeline[1] = ML.Transforms.ApplyOnnxModel(
nameof(PredictionWithCustomShape.argmax), nameof(InputWithCustomShape.input),
modelFile, shapeDictionary);
modelFile, shapeDictionary, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
onnxTransformer[1] = pipeline[1].Fit(dataView);
transformedDataViews[1] = onnxTransformer[1].Transform(dataView);

// Test 3.
pipeline[2] = ML.Transforms.ApplyOnnxModel(modelFile, shapeDictionary);
pipeline[2] = ML.Transforms.ApplyOnnxModel(modelFile, shapeDictionary, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
onnxTransformer[2] = pipeline[2].Fit(dataView);
transformedDataViews[2] = onnxTransformer[2].Transform(dataView);

Expand Down Expand Up @@ -856,7 +861,7 @@ private void TryModelWithCustomShapesHelper(IDictionary<string, int[]> shapeDict

// Define a ONNX transform, trains it, and apply it to the input data.
var pipeline = ML.Transforms.ApplyOnnxModel(new[] { "outa", "outb" }, new[] { "ina", "inb" },
modelFile, shapeDictionary);
modelFile, shapeDictionary, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
}

/// <summary>
Expand Down Expand Up @@ -956,7 +961,7 @@ public void TestOnnxTransformSaveAndLoadWithCustomShapes()
var dataView = ML.Data.LoadFromEnumerable(dataPoints);

var pipeline = ML.Transforms.ApplyOnnxModel(nameof(PredictionWithCustomShape.argmax),
nameof(InputWithCustomShape.input), modelFile, shapeDictionary);
nameof(InputWithCustomShape.input), modelFile, shapeDictionary, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);

var model = pipeline.Fit(dataView);

Expand Down
Loading