From edba472bcaf082740da73f77c687be53ba0e98c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Fri, 18 May 2018 19:31:00 +0200 Subject: [PATCH 1/6] Compile CpuMathNative and FastTreeNative with charset=utf-8, fix for issue #78 --- src/Native/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Native/CMakeLists.txt b/src/Native/CMakeLists.txt index 3a0ed9b795..947a664ab6 100644 --- a/src/Native/CMakeLists.txt +++ b/src/Native/CMakeLists.txt @@ -10,6 +10,7 @@ include_directories("${CMAKE_BINARY_DIR}/../../") if(WIN32) add_definitions(-DWIN32) add_definitions(-D_WIN32=1) + add_definitions(-DUNICODE -D_UNICODE) if(IS_64BIT_BUILD) add_definitions(-D_WIN64=1) endif() From 95e3646b84fd8b1461da209db9415af28cb1776b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Sat, 30 Jun 2018 01:57:21 +0200 Subject: [PATCH 2/6] add method AddSerialize to declare entrypoints outside ML.net --- src/Microsoft.ML/Runtime/Experiment/Experiment.cs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Microsoft.ML/Runtime/Experiment/Experiment.cs b/src/Microsoft.ML/Runtime/Experiment/Experiment.cs index 108befb74b..2caa8a7176 100644 --- a/src/Microsoft.ML/Runtime/Experiment/Experiment.cs +++ b/src/Microsoft.ML/Runtime/Experiment/Experiment.cs @@ -170,6 +170,11 @@ private string Serialize(string name, object input, object output) } } + public void AddSerialize(string name, object input, object output) + { + _jsonNodes.Add(Serialize(name, input, output)); + } + private string GetEntryPointName(Type inputType) { if (inputType.FullName != null) From d5a419efaa1521430ee563b82d82136d39835404 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Thu, 26 Jul 2018 19:31:55 +0200 Subject: [PATCH 3/6] add a unit test to check the creation of a cursor on sparse vectors --- .../TestSparseDataView.cs | 94 +++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 test/Microsoft.ML.TestFramework/TestSparseDataView.cs diff --git a/test/Microsoft.ML.TestFramework/TestSparseDataView.cs b/test/Microsoft.ML.TestFramework/TestSparseDataView.cs new file mode 100644 index 0000000000..063a31c413 --- /dev/null +++ b/test/Microsoft.ML.TestFramework/TestSparseDataView.cs @@ -0,0 +1,94 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; +using Microsoft.ML.Runtime.Command; +using Microsoft.ML.Runtime.Api; +using Microsoft.ML.Runtime.Data; +using Microsoft.ML.Runtime.Internal.Utilities; +using Microsoft.ML.Runtime.Model; +using Microsoft.ML.Runtime.Tools; +using Xunit; +using Xunit.Abstractions; + +namespace Microsoft.ML.Runtime.RunTests +{ + public sealed partial class TestSparseDataView : TestDataViewBase + { + const string Cat = "DataView"; + + public TestSparseDataView(ITestOutputHelper obj): base(obj) + { + } + + class ExampleA + { + [VectorType(2)] + public float[] X; + } + + class ExampleASparse + { + [VectorType(5)] + public VBuffer X; + } + + [Fact] + [TestCategory(Cat)] + public void SparseDataView() + { + var inputs = new[] { + new ExampleASparse() { X = new VBuffer (5, 3, new float[] { 1, 10, 100 }, new int[] { 0, 2, 4 }) }, + new ExampleASparse() { X = new VBuffer (5, 3, new float[] { 2, 3, 5 }, new int[] { 0, 1, 3 }) } + }; + var host = new TlcEnvironment(); + var data = host.CreateStreamingDataView(inputs); + VBuffer value = new VBuffer(); + int n = 0; + using (var cur = data.GetRowCursor(i => true)) + { + var getter = cur.GetGetter>(0); + while (cur.MoveNext()) + { + getter(ref value); + Assert.True(value.Count == 3); + ++n; + } + } + Assert.True(n == 2); + Done(); + } + + [Fact] + [TestCategory(Cat)] + public void DenseDataView() + { + var inputs = new[] { + new ExampleA() { X = new float[] { 1, 10, 100 } }, + new ExampleA() { X = new float[] { 2, 3, 5 } } + }; + var host = new TlcEnvironment(); + var data = host.CreateStreamingDataView(inputs); + VBuffer value = new VBuffer(); + int n = 0; + using (var cur = data.GetRowCursor(i => true)) + { + var getter = cur.GetGetter>(0); + while (cur.MoveNext()) + { + getter(ref value); + Assert.True(value.Count == 3); + ++n; + } + } + Assert.True(n == 2); + Done(); + } + } +} From 3409b9f0fb5949f14a20b155c148212be9636dcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Fri, 27 Jul 2018 10:28:56 +0200 Subject: [PATCH 4/6] fix #586 and cursor creation on dataview with sparse vectors --- .../DataViewConstructionUtils.cs | 1 + src/Microsoft.ML.Api/TypedCursor.cs | 1 + .../TestSparseDataView.cs | 50 +++++++++++++------ 3 files changed, 37 insertions(+), 15 deletions(-) diff --git a/src/Microsoft.ML.Api/DataViewConstructionUtils.cs b/src/Microsoft.ML.Api/DataViewConstructionUtils.cs index 341e3a72af..e940ea9d4d 100644 --- a/src/Microsoft.ML.Api/DataViewConstructionUtils.cs +++ b/src/Microsoft.ML.Api/DataViewConstructionUtils.cs @@ -198,6 +198,7 @@ private Delegate CreateGetter(int index) Ch.Assert(outputType.GetGenericTypeDefinition() == typeof(VBuffer<>)); Ch.Assert(outputType.GetGenericArguments()[0] == colType.ItemType.RawType); del = CreateDirectVBufferGetterDelegate; + genericType = colType.ItemType.RawType; } else if (colType.IsPrimitive) { diff --git a/src/Microsoft.ML.Api/TypedCursor.cs b/src/Microsoft.ML.Api/TypedCursor.cs index f6ebaf687f..cd8198e14d 100644 --- a/src/Microsoft.ML.Api/TypedCursor.cs +++ b/src/Microsoft.ML.Api/TypedCursor.cs @@ -349,6 +349,7 @@ private Action GenerateSetter(IRow input, int index, InternalSchemaDefinit Ch.Assert(fieldType.GetGenericTypeDefinition() == typeof(VBuffer<>)); Ch.Assert(fieldType.GetGenericArguments()[0] == colType.ItemType.RawType); del = CreateVBufferToVBufferSetter; + genericType = colType.ItemType.RawType; } else if (colType.IsPrimitive) { diff --git a/test/Microsoft.ML.TestFramework/TestSparseDataView.cs b/test/Microsoft.ML.TestFramework/TestSparseDataView.cs index 063a31c413..a596671609 100644 --- a/test/Microsoft.ML.TestFramework/TestSparseDataView.cs +++ b/test/Microsoft.ML.TestFramework/TestSparseDataView.cs @@ -23,37 +23,48 @@ public sealed partial class TestSparseDataView : TestDataViewBase { const string Cat = "DataView"; - public TestSparseDataView(ITestOutputHelper obj): base(obj) + public TestSparseDataView(ITestOutputHelper obj) : base(obj) { } - class ExampleA + class DenseExample { [VectorType(2)] - public float[] X; + public T[] X; } - class ExampleASparse + class SparseExample { [VectorType(5)] - public VBuffer X; + public VBuffer X; } [Fact] [TestCategory(Cat)] public void SparseDataView() + { + GenericSparseDataView(new[] { 1f, 2f, 3f }, new[] { 1f, 10f, 100f }); + GenericSparseDataView(new DvInt4[] { 1, 2, 3 }, new DvInt4[] { 1, 10, 100 }); + GenericSparseDataView(new DvBool[] { true, true, true }, new DvBool[] { false, false, false }); + GenericSparseDataView(new double[] { 1, 2, 3 }, new double[] { 1, 10, 100 }); + GenericSparseDataView(new DvText[] { new DvText("a"), new DvText("b"), new DvText("c") }, + new DvText[] { new DvText("aa"), new DvText("bb"), new DvText("cc") }); + Done(); + } + + void GenericSparseDataView(T[] v1, T[] v2) { var inputs = new[] { - new ExampleASparse() { X = new VBuffer (5, 3, new float[] { 1, 10, 100 }, new int[] { 0, 2, 4 }) }, - new ExampleASparse() { X = new VBuffer (5, 3, new float[] { 2, 3, 5 }, new int[] { 0, 1, 3 }) } + new SparseExample() { X = new VBuffer (5, 3, v1, new int[] { 0, 2, 4 }) }, + new SparseExample() { X = new VBuffer (5, 3, v2, new int[] { 0, 1, 3 }) } }; var host = new TlcEnvironment(); var data = host.CreateStreamingDataView(inputs); - VBuffer value = new VBuffer(); + var value = new VBuffer(); int n = 0; using (var cur = data.GetRowCursor(i => true)) { - var getter = cur.GetGetter>(0); + var getter = cur.GetGetter>(0); while (cur.MoveNext()) { getter(ref value); @@ -62,24 +73,34 @@ public void SparseDataView() } } Assert.True(n == 2); - Done(); } [Fact] [TestCategory(Cat)] public void DenseDataView() + { + GenericDenseDataView(new[] { 1f, 2f, 3f }, new[] { 1f, 10f, 100f }); + GenericDenseDataView(new DvInt4[] { 1, 2, 3 }, new DvInt4[] { 1, 10, 100 }); + GenericDenseDataView(new DvBool[] { true, true, true }, new DvBool[] { false, false, false }); + GenericDenseDataView(new double[] { 1, 2, 3 }, new double[] { 1, 10, 100 }); + GenericDenseDataView(new DvText[] { new DvText("a"), new DvText("b"), new DvText("c") }, + new DvText[] { new DvText("aa"), new DvText("bb"), new DvText("cc") }); + Done(); + } + + void GenericDenseDataView(T[] v1, T[] v2) { var inputs = new[] { - new ExampleA() { X = new float[] { 1, 10, 100 } }, - new ExampleA() { X = new float[] { 2, 3, 5 } } + new DenseExample() { X = v1 }, + new DenseExample() { X = v2 } }; var host = new TlcEnvironment(); var data = host.CreateStreamingDataView(inputs); - VBuffer value = new VBuffer(); + var value = new VBuffer(); int n = 0; using (var cur = data.GetRowCursor(i => true)) { - var getter = cur.GetGetter>(0); + var getter = cur.GetGetter>(0); while (cur.MoveNext()) { getter(ref value); @@ -88,7 +109,6 @@ public void DenseDataView() } } Assert.True(n == 2); - Done(); } } } From 63fa647ce5ded127635d3986c6753b9d32bd2ed5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Fri, 27 Jul 2018 10:33:23 +0200 Subject: [PATCH 5/6] revert changes not related to #586 --- src/Microsoft.ML/Runtime/Experiment/Experiment.cs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Microsoft.ML/Runtime/Experiment/Experiment.cs b/src/Microsoft.ML/Runtime/Experiment/Experiment.cs index 2caa8a7176..108befb74b 100644 --- a/src/Microsoft.ML/Runtime/Experiment/Experiment.cs +++ b/src/Microsoft.ML/Runtime/Experiment/Experiment.cs @@ -170,11 +170,6 @@ private string Serialize(string name, object input, object output) } } - public void AddSerialize(string name, object input, object output) - { - _jsonNodes.Add(Serialize(name, input, output)); - } - private string GetEntryPointName(Type inputType) { if (inputType.FullName != null) From bebf262f9f8814a2554898e0d7c90b084a703214 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?xavier=20dupr=C3=A9?= Date: Fri, 27 Jul 2018 22:19:17 +0200 Subject: [PATCH 6/6] extend coverage of unit tests, addresses PR's comment --- .../TestSparseDataView.cs | 82 ++++++++++--------- 1 file changed, 42 insertions(+), 40 deletions(-) diff --git a/test/Microsoft.ML.TestFramework/TestSparseDataView.cs b/test/Microsoft.ML.TestFramework/TestSparseDataView.cs index a596671609..08c9e17a28 100644 --- a/test/Microsoft.ML.TestFramework/TestSparseDataView.cs +++ b/test/Microsoft.ML.TestFramework/TestSparseDataView.cs @@ -2,38 +2,28 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using System; -using System.Collections.Generic; -using System.IO; -using System.Linq; -using System.Threading; -using System.Threading.Tasks; -using Microsoft.ML.Runtime.Command; using Microsoft.ML.Runtime.Api; using Microsoft.ML.Runtime.Data; -using Microsoft.ML.Runtime.Internal.Utilities; -using Microsoft.ML.Runtime.Model; -using Microsoft.ML.Runtime.Tools; using Xunit; using Xunit.Abstractions; namespace Microsoft.ML.Runtime.RunTests { - public sealed partial class TestSparseDataView : TestDataViewBase + public sealed class TestSparseDataView : TestDataViewBase { - const string Cat = "DataView"; + private const string Cat = "DataView"; public TestSparseDataView(ITestOutputHelper obj) : base(obj) { } - class DenseExample + private class DenseExample { [VectorType(2)] public T[] X; } - class SparseExample + private class SparseExample { [VectorType(5)] public VBuffer X; @@ -49,30 +39,36 @@ public void SparseDataView() GenericSparseDataView(new double[] { 1, 2, 3 }, new double[] { 1, 10, 100 }); GenericSparseDataView(new DvText[] { new DvText("a"), new DvText("b"), new DvText("c") }, new DvText[] { new DvText("aa"), new DvText("bb"), new DvText("cc") }); - Done(); } - void GenericSparseDataView(T[] v1, T[] v2) + private void GenericSparseDataView(T[] v1, T[] v2) { var inputs = new[] { new SparseExample() { X = new VBuffer (5, 3, v1, new int[] { 0, 2, 4 }) }, new SparseExample() { X = new VBuffer (5, 3, v2, new int[] { 0, 1, 3 }) } }; - var host = new TlcEnvironment(); - var data = host.CreateStreamingDataView(inputs); - var value = new VBuffer(); - int n = 0; - using (var cur = data.GetRowCursor(i => true)) + using (var host = new TlcEnvironment()) { - var getter = cur.GetGetter>(0); - while (cur.MoveNext()) + var data = host.CreateStreamingDataView(inputs); + var value = new VBuffer(); + int n = 0; + using (var cur = data.GetRowCursor(i => true)) { - getter(ref value); - Assert.True(value.Count == 3); - ++n; + var getter = cur.GetGetter>(0); + while (cur.MoveNext()) + { + getter(ref value); + Assert.True(value.Count == 3); + ++n; + } } + Assert.True(n == 2); + var iter = data.AsEnumerable>(host, false).GetEnumerator(); + n = 0; + while (iter.MoveNext()) + ++n; + Assert.True(n == 2); } - Assert.True(n == 2); } [Fact] @@ -85,30 +81,36 @@ public void DenseDataView() GenericDenseDataView(new double[] { 1, 2, 3 }, new double[] { 1, 10, 100 }); GenericDenseDataView(new DvText[] { new DvText("a"), new DvText("b"), new DvText("c") }, new DvText[] { new DvText("aa"), new DvText("bb"), new DvText("cc") }); - Done(); } - void GenericDenseDataView(T[] v1, T[] v2) + private void GenericDenseDataView(T[] v1, T[] v2) { var inputs = new[] { new DenseExample() { X = v1 }, new DenseExample() { X = v2 } }; - var host = new TlcEnvironment(); - var data = host.CreateStreamingDataView(inputs); - var value = new VBuffer(); - int n = 0; - using (var cur = data.GetRowCursor(i => true)) + using (var host = new TlcEnvironment()) { - var getter = cur.GetGetter>(0); - while (cur.MoveNext()) + var data = host.CreateStreamingDataView(inputs); + var value = new VBuffer(); + int n = 0; + using (var cur = data.GetRowCursor(i => true)) { - getter(ref value); - Assert.True(value.Count == 3); - ++n; + var getter = cur.GetGetter>(0); + while (cur.MoveNext()) + { + getter(ref value); + Assert.True(value.Count == 3); + ++n; + } } + Assert.True(n == 2); + var iter = data.AsEnumerable>(host, false).GetEnumerator(); + n = 0; + while (iter.MoveNext()) + ++n; + Assert.True(n == 2); } - Assert.True(n == 2); } } }