|
21 | 21 | using Microsoft.ML.Trainers;
|
22 | 22 | using Microsoft.ML.Transforms;
|
23 | 23 | using Microsoft.ML.Transforms.Onnx;
|
24 |
| -using Microsoft.ML.Transforms.Text; |
25 | 24 | using Newtonsoft.Json;
|
26 | 25 | using Xunit;
|
27 | 26 | using Xunit.Abstractions;
|
28 | 27 | using static Microsoft.ML.Model.OnnxConverter.OnnxCSharpToProtoWrapper;
|
29 | 28 |
|
30 | 29 | namespace Microsoft.ML.Tests
|
31 | 30 | {
|
32 |
| - |
33 |
| -public class OnnxConversionTest : BaseTestBaseline |
| 31 | + public class OnnxConversionTest : BaseTestBaseline |
34 | 32 | {
|
35 |
| - |
36 |
| - private static IEnumerable<DataPoint2> GenerateRandomDataPoints(int count, |
37 |
| - int seed = 0) |
38 |
| - { |
39 |
| - var random = new Random(seed); |
40 |
| - for (int i = 0; i < count; i++) |
41 |
| - { |
42 |
| - float label = (float)random.NextDouble(); |
43 |
| - yield return new DataPoint2 |
44 |
| - { |
45 |
| - Label = label, |
46 |
| - // Create random features that are correlated with the label. |
47 |
| - Features = Enumerable.Repeat(label, 50).Select( |
48 |
| - x => x + (float)random.NextDouble()).ToArray() |
49 |
| - }; |
50 |
| - } |
51 |
| - } |
52 |
| - |
53 |
| - // Example with label and 50 feature values. A data set is a collection of |
54 |
| - // such examples. |
55 |
| - private class DataPoint2 |
56 |
| - { |
57 |
| - public float Label { get; set; } |
58 |
| - [VectorType(50)] |
59 |
| - public float[] Features { get; set; } |
60 |
| - } |
61 |
| - |
62 |
| - // Class used to capture predictions. |
63 |
| - private class Prediction |
64 |
| - { |
65 |
| - // Original label. |
66 |
| - public float Label { get; set; } |
67 |
| - // Predicted score from the trainer. |
68 |
| - public float Score { get; set; } |
69 |
| - } |
70 | 33 | private class AdultData
|
71 | 34 | {
|
72 | 35 | [LoadColumn(0, 10), ColumnName("FeatureVector")]
|
@@ -145,7 +108,8 @@ public void SimpleEndToEndOnnxConversionTest()
|
145 | 108 | private class BreastCancerFeatureVector
|
146 | 109 | {
|
147 | 110 | [LoadColumn(1, 9), VectorType(9)]
|
148 |
| - public float[] Features; } |
| 111 | + public float[] Features; |
| 112 | + } |
149 | 113 |
|
150 | 114 | private class BreastCancerCatFeatureExample
|
151 | 115 | {
|
@@ -223,160 +187,7 @@ public void KmeansOnnxConversionTest()
|
223 | 187 | Done();
|
224 | 188 | }
|
225 | 189 |
|
226 |
| - [Fact] |
227 |
| - public void WordEmbeddingEstimatorOnnxConversionTest() //can't find the class - maybe |
228 |
| - { |
229 |
| - // Step 1: Create and train a ML.NET pipeline. |
230 |
| - var mlContext = new MLContext(seed: 1); |
231 |
| - string dataPath = GetDataPath(TestDatasets.Sentiment.trainFilename); |
232 |
| - var data = new TextLoader(ML, |
233 |
| - new TextLoader.Options() |
234 |
| - { |
235 |
| - Separator = "\t", |
236 |
| - HasHeader = true, |
237 |
| - Columns = new[] |
238 |
| - { |
239 |
| - new TextLoader.Column("Label", DataKind.Boolean, 0), |
240 |
| - new TextLoader.Column("SentimentText", DataKind.String, 1) |
241 |
| - } |
242 |
| - }).Load(GetDataPath(dataPath)); |
243 |
| - |
244 |
| - IEstimator<ITransformer>[] estimators = { }; |
245 |
| - var textPipeline = mlContext.Transforms.Text.NormalizeText("SentimentText") |
246 |
| - .Append(mlContext.Transforms.Text.TokenizeIntoWords("Tokens", |
247 |
| - "SentimentText")) |
248 |
| - .Append(mlContext.Transforms.Text.ApplyWordEmbedding("Features", |
249 |
| - "Tokens", WordEmbeddingEstimator.PretrainedModelKind |
250 |
| - .SentimentSpecificWordEmbedding)); |
251 |
| - var model = textPipeline.Fit(data); |
252 |
| - var transformedData = model.Transform(data); |
253 |
| - |
254 |
| - var onnxModel = mlContext.Model.ConvertToOnnxProtobuf(model, data); |
255 |
| - // Compare results produced by ML.NET and ONNX's runtime. |
256 |
| - if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) && Environment.Is64BitProcess) |
257 |
| - { |
258 |
| - var onnxFileName = "WordEmbeddingEstimator.onnx"; |
259 |
| - var onnxModelPath = GetOutputPath(onnxFileName); |
260 |
| - SaveOnnxModel(onnxModel, onnxModelPath, null); |
261 |
| - |
262 |
| - // Evaluate the saved ONNX model using the data used to train the ML.NET pipeline. |
263 |
| - string[] inputNames = onnxModel.Graph.Input.Select(valueInfoProto => valueInfoProto.Name).ToArray(); |
264 |
| - string[] outputNames = onnxModel.Graph.Output.Select(valueInfoProto => valueInfoProto.Name).ToArray(); |
265 |
| - var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(outputNames, inputNames, onnxModelPath); |
266 |
| - var onnxTransformer = onnxEstimator.Fit(data); |
267 |
| - var onnxResult = onnxTransformer.Transform(data); |
268 |
| - CompareSelectedR4VectorColumns("Score", "Score0", transformedData, onnxResult, 3); |
269 |
| - } |
270 |
| - Done(); |
271 |
| - } |
272 |
| - |
273 |
| - [Fact] |
274 |
| - // Conversion tests for regression |
275 |
| - public void regressionOnnxConversionTest() |
276 |
| - { |
277 |
| - /* |
278 |
| - var mlContext = new MLContext(seed: 1); |
279 |
| - string dataPath = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename); |
280 |
| -
|
281 |
| - // Now read the file (remember though, readers are lazy, so the actual reading will happen when the data is accessed). |
282 |
| - var dataView = mlContext.Data.LoadFromTextFile<AdultData>(dataPath, |
283 |
| - separatorChar: ';', |
284 |
| - hasHeader: true); |
285 |
| - IEstimator<ITransformer>[] estimators = { |
286 |
| - //mlContext.Regression.Trainers.Ols(new OlsTrainer.Options() { |
287 |
| - // LabelColumnName = "Target", |
288 |
| - // FeatureColumnName = "FeatureVector", |
289 |
| - //}), |
290 |
| - //mlContext.Regression.Trainers.OnlineGradientDescent(new OnlineGradientDescentTrainer.Options(){ |
291 |
| - // LabelColumnName = "Target", |
292 |
| - // FeatureColumnName = "FeatureVector", |
293 |
| - //}), |
294 |
| - //mlContext.Transforms.DetectAnomalyBySrCnn("Target","FeatureVector"), // needs separate data |
295 |
| - mlContext.Regression.Trainers.FastForest("Target", "FeatureVector"), |
296 |
| - //mlContext.Regression.Trainers.FastTree("Target", "FeatureVector"), |
297 |
| - //mlContext.Regression.Trainers.FastTreeTweedie("Target", "FeatureVector"), |
298 |
| - //mlContext.Regression.Trainers.LightGbm("Target","FeatureVector"), |
299 |
| - //mlContext.Regression.Trainers.LbfgsPoissonRegression("Target", "FeatureVector"), |
300 |
| - }; |
301 |
| - */ |
302 |
| - // Create a new context for ML.NET operations. It can be used for |
303 |
| - // exception tracking and logging, as a catalog of available operations |
304 |
| - // and as the source of randomness. Setting the seed to a fixed number |
305 |
| - // in this example to make outputs deterministic. |
306 |
| - var mlContext = new MLContext(seed: 0); |
307 |
| - |
308 |
| - // Create a list of training data points. |
309 |
| - var dataPoints = GenerateRandomDataPoints(1000); |
310 |
| - |
311 |
| - // Convert the list of data points to an IDataView object, which is |
312 |
| - // consumable by ML.NET API. |
313 |
| - var trainingData = mlContext.Data.LoadFromEnumerable(dataPoints); |
314 |
| - |
315 |
| - // Define the trainer. |
316 |
| - var pipeline = mlContext.Regression.Trainers.FastTreeTweedie( |
317 |
| - labelColumnName: nameof(DataPoint2.Label), |
318 |
| - featureColumnName: nameof(DataPoint2.Features)); |
319 |
| - |
320 |
| - // Train the model. |
321 |
| - var model = pipeline.Fit(trainingData); |
322 |
| - |
323 |
| - // Create testing data. Use different random seed to make it different |
324 |
| - // from training data. |
325 |
| - var data = mlContext.Data.LoadFromEnumerable( |
326 |
| - GenerateRandomDataPoints(5, seed: 123)); |
327 |
| - |
328 |
| - // Run the model on test data set. |
329 |
| - var transformedTestData = model.Transform(data); |
330 |
| - // Convert IDataView object to a list. |
331 |
| - var onnxModel = mlContext.Model.ConvertToOnnxProtobuf(model, data); |
332 |
| - // Convert IDataView object to a list. |
333 |
| - var predictions = mlContext.Data.CreateEnumerable<Prediction>( |
334 |
| - transformedTestData, reuseRowObject: false).ToList(); |
335 |
| - foreach (var p in predictions) |
336 |
| - System.Diagnostics.Debug.WriteLine($"Label: {p.Label:F3}, Prediction: {p.Score:F3}"); |
337 |
| - // Compare results produced by ML.NET and ONNX's runtime. |
338 |
| - if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) && Environment.Is64BitProcess) |
339 |
| - { |
340 |
| - var onnxFileName = "test.onnx"; |
341 |
| - var onnxModelPath = GetOutputPath(onnxFileName); |
342 |
| - SaveOnnxModel(onnxModel, onnxModelPath, null); |
343 |
| - |
344 |
| - // Evaluate the saved ONNX model using the data used to train the ML.NET pipeline. |
345 |
| - string[] inputNames = onnxModel.Graph.Input.Select(valueInfoProto => valueInfoProto.Name).ToArray(); |
346 |
| - string[] outputNames = onnxModel.Graph.Output.Select(valueInfoProto => valueInfoProto.Name).ToArray(); |
347 |
| - var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(outputNames, inputNames, onnxModelPath); |
348 |
| - var onnxTransformer = onnxEstimator.Fit(data); |
349 |
| - var onnxResult = onnxTransformer.Transform(data); |
350 |
| - CompareSelectedR4ScalarColumns("Label", "Score0", data, onnxResult, 3); |
351 |
| - } |
352 |
| - Done(); |
353 |
| - /*var initialPipeline = mlContext.Transforms.NormalizeMinMax("FeatureVector"); |
354 |
| - foreach (var estimator in estimators) |
355 |
| - { |
356 |
| - //var pipeline = initialPipeline.Append(estimator); |
357 |
| - var pipeline = estimator; |
358 |
| -
|
359 |
| - var model = pipeline.Fit(dataView); |
360 |
| - var transformedData = model.Transform(dataView); |
361 |
| - var onnxModel = mlContext.Model.ConvertToOnnxProtobuf(model, dataView); |
362 |
| - var onnxFileName = $"{estimator.ToString()}.onnx"; |
363 |
| - var onnxModelPath = GetOutputPath(onnxFileName); |
364 |
| - SaveOnnxModel(onnxModel, onnxModelPath, null); |
365 |
| - // Compare model scores produced by ML.NET and ONNX's runtime. |
366 |
| - if (IsOnnxRuntimeSupported()) |
367 |
| - { |
368 |
| - // Evaluate the saved ONNX model using the data used to train the ML.NET pipeline. |
369 |
| - string[] inputNames = onnxModel.Graph.Input.Select(valueInfoProto => valueInfoProto.Name).ToArray(); |
370 |
| - string[] outputNames = onnxModel.Graph.Output.Select(valueInfoProto => valueInfoProto.Name).ToArray(); |
371 |
| - var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(outputNames, inputNames, onnxModelPath); |
372 |
| - var onnxTransformer = onnxEstimator.Fit(dataView); |
373 |
| - var onnxResult = onnxTransformer.Transform(dataView); //switched to 2 vause |
374 |
| - CompareSelectedR4ScalarColumns(transformedData.Schema[2].Name, outputNames[2], transformedData, onnxResult, 0); // compare score results |
375 |
| - } |
376 |
| - } */ |
377 |
| - //Done(); |
378 |
| - } |
379 |
| - private class DataPoint |
| 190 | + private class DataPoint |
380 | 191 | {
|
381 | 192 | [VectorType(3)]
|
382 | 193 | public float[] Features { get; set; }
|
@@ -569,7 +380,8 @@ public void LogisticRegressionOnnxConversionTest()
|
569 | 380 | var trainDataPath = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename);
|
570 | 381 | var mlContext = new MLContext(seed: 1);
|
571 | 382 | var data = mlContext.Data.LoadFromTextFile<AdultData>(trainDataPath,
|
572 |
| - separatorChar: ';', |
| 383 | + separatorChar: ';' |
| 384 | +, |
573 | 385 | hasHeader: true);
|
574 | 386 | var cachedTrainData = mlContext.Data.Cache(data);
|
575 | 387 | var dynamicPipeline =
|
@@ -846,21 +658,15 @@ public void WordEmbeddingsTest()
|
846 | 658 | var model = pipeline.Fit(data);
|
847 | 659 | var transformedData = model.Transform(data);
|
848 | 660 |
|
| 661 | + var subDir = Path.Combine("..", "..", "BaselineOutput", "Common", "Onnx", "Transforms", "Sentiment"); |
| 662 | + var onnxTextName = "SmallWordEmbed.txt"; |
| 663 | + var onnxFileName = "SmallWordEmbed.onnx"; |
| 664 | + var onnxTextPath = GetOutputPath(subDir, onnxTextName); |
| 665 | + var onnxFilePath = GetOutputPath(subDir, onnxFileName); |
849 | 666 | var onnxModel = mlContext.Model.ConvertToOnnxProtobuf(model, data);
|
850 |
| - if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows) && Environment.Is64BitProcess) |
851 |
| - { |
852 |
| - var onnxFileName = "WordEmbeddingEstimator.onnx"; |
853 |
| - var onnxModelPath = GetOutputPath(onnxFileName); |
854 |
| - SaveOnnxModel(onnxModel, onnxModelPath, null); |
| 667 | + SaveOnnxModel(onnxModel, onnxFilePath, onnxTextPath); |
855 | 668 |
|
856 |
| - // Evaluate the saved ONNX model using the data used to train the ML.NET pipeline. |
857 |
| - string[] inputNames = onnxModel.Graph.Input.Select(valueInfoProto => valueInfoProto.Name).ToArray(); |
858 |
| - string[] outputNames = onnxModel.Graph.Output.Select(valueInfoProto => valueInfoProto.Name).ToArray(); |
859 |
| - var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(outputNames, inputNames, onnxModelPath); |
860 |
| - var onnxTransformer = onnxEstimator.Fit(data); |
861 |
| - var onnxResult = onnxTransformer.Transform(data); |
862 |
| - CompareSelectedR4VectorColumns("Embed", "Embed0", transformedData, onnxResult); |
863 |
| - } |
| 669 | + CheckEquality(subDir, onnxTextName, parseOption: NumberParseOption.UseSingle); |
864 | 670 | Done();
|
865 | 671 | }
|
866 | 672 |
|
@@ -1178,44 +984,11 @@ private void CompareSelectedR4ScalarColumns(string leftColumnName, string rightC
|
1178 | 984 |
|
1179 | 985 | // Scalar such as R4 (float) is converted to [1, 1]-tensor in ONNX format for consitency of making batch prediction.
|
1180 | 986 | Assert.Equal(1, actual.Length);
|
1181 |
| - //Assert.Equal(expected, actual.GetItemOrDefault(0), precision); |
1182 |
| - //Output.WriteLine(actual.GetItemOrDefault(0)); |
1183 |
| - System.Diagnostics.Debug.WriteLine("Actual: " + actual.GetItemOrDefault(0)); |
1184 |
| - System.Diagnostics.Debug.WriteLine("Expected: " + expected); |
| 987 | + Assert.Equal(expected, actual.GetItemOrDefault(0), precision); |
1185 | 988 | }
|
1186 | 989 | }
|
1187 | 990 | }
|
1188 | 991 |
|
1189 |
| - private void CompareSelectedScalarColumns<T>(string leftColumnName, string rightColumnName, IDataView left, IDataView right) |
1190 |
| - { |
1191 |
| - var leftColumn = left.Schema[leftColumnName]; |
1192 |
| - var rightColumn = right.Schema[rightColumnName]; |
1193 |
| - |
1194 |
| - using (var expectedCursor = left.GetRowCursor(leftColumn)) |
1195 |
| - using (var actualCursor = right.GetRowCursor(rightColumn)) |
1196 |
| - { |
1197 |
| - T expected = default; |
1198 |
| - VBuffer<T> actual = default; |
1199 |
| - var expectedGetter = expectedCursor.GetGetter<T>(leftColumn); |
1200 |
| - var actualGetter = actualCursor.GetGetter<VBuffer<T>>(rightColumn); |
1201 |
| - while (expectedCursor.MoveNext() && actualCursor.MoveNext()) |
1202 |
| - { |
1203 |
| - expectedGetter(ref expected); |
1204 |
| - actualGetter(ref actual); |
1205 |
| - var actualVal = actual.GetItemOrDefault(0); |
1206 |
| - |
1207 |
| - Assert.Equal(1, actual.Length); |
1208 |
| - |
1209 |
| - if (typeof(T) == typeof(ReadOnlyMemory<Char>)) |
1210 |
| - Assert.Equal(expected.ToString(), actualVal.ToString()); |
1211 |
| - else |
1212 |
| - Assert.Equal(expected, actualVal); |
1213 |
| - } |
1214 |
| - } |
1215 |
| - } |
1216 |
| - |
1217 |
| - |
1218 |
| - |
1219 | 992 | private void SaveOnnxModel(ModelProto model, string binaryFormatPath, string textFormatPath)
|
1220 | 993 | {
|
1221 | 994 | DeleteOutputPath(binaryFormatPath); // Clean if such a file exists.
|
|
0 commit comments