|
| 1 | +// Licensed to the .NET Foundation under one or more agreements. |
| 2 | +// The .NET Foundation licenses this file to you under the MIT license. |
| 3 | +// See the LICENSE file in the project root for more information. |
| 4 | + |
| 5 | +using Microsoft.ML.Runtime.Api; |
| 6 | +using Microsoft.ML.Runtime.Data; |
| 7 | +using Microsoft.ML.Runtime.Model; |
| 8 | +using Microsoft.ML.Runtime.RunTests; |
| 9 | +using Microsoft.ML.Runtime.Tools; |
| 10 | +using Microsoft.ML.Transforms; |
| 11 | +using System; |
| 12 | +using System.IO; |
| 13 | +using System.Linq; |
| 14 | +using Xunit; |
| 15 | +using Xunit.Abstractions; |
| 16 | + |
| 17 | +namespace Microsoft.ML.Tests.Transformers |
| 18 | +{ |
| 19 | + public class HashTests : TestDataPipeBase |
| 20 | + { |
| 21 | + public HashTests(ITestOutputHelper output) : base(output) |
| 22 | + { |
| 23 | + } |
| 24 | + |
| 25 | + private class TestClass |
| 26 | + { |
| 27 | + public float A; |
| 28 | + public float B; |
| 29 | + public float C; |
| 30 | + } |
| 31 | + |
| 32 | + private class TestMeta |
| 33 | + { |
| 34 | + [VectorType(2)] |
| 35 | + public float[] A; |
| 36 | + public float B; |
| 37 | + [VectorType(2)] |
| 38 | + public double[] C; |
| 39 | + public double D; |
| 40 | + } |
| 41 | + |
| 42 | + [Fact] |
| 43 | + public void HashWorkout() |
| 44 | + { |
| 45 | + var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } }; |
| 46 | + |
| 47 | + var dataView = ComponentCreation.CreateDataView(Env, data); |
| 48 | + var pipe = new HashEstimator(Env, new[]{ |
| 49 | + new HashTransformer.ColumnInfo("A", "HashA", hashBits:4, invertHash:-1), |
| 50 | + new HashTransformer.ColumnInfo("B", "HashB", hashBits:3, ordered:true), |
| 51 | + new HashTransformer.ColumnInfo("C", "HashC", seed:42), |
| 52 | + new HashTransformer.ColumnInfo("A", "HashD"), |
| 53 | + }); |
| 54 | + |
| 55 | + TestEstimatorCore(pipe, dataView); |
| 56 | + Done(); |
| 57 | + } |
| 58 | + |
| 59 | + [Fact] |
| 60 | + public void TestMetadata() |
| 61 | + { |
| 62 | + |
| 63 | + var data = new[] { |
| 64 | + new TestMeta() { A=new float[2] { 3.5f, 2.5f}, B=1, C= new double[2] { 5.1f, 6.1f}, D= 7}, |
| 65 | + new TestMeta() { A=new float[2] { 3.5f, 2.5f}, B=1, C= new double[2] { 5.1f, 6.1f}, D= 7}, |
| 66 | + new TestMeta() { A=new float[2] { 3.5f, 2.5f}, B=1, C= new double[2] { 5.1f, 6.1f}, D= 7}}; |
| 67 | + |
| 68 | + |
| 69 | + var dataView = ComponentCreation.CreateDataView(Env, data); |
| 70 | + var pipe = new HashEstimator(Env, new[] { |
| 71 | + new HashTransformer.ColumnInfo("A", "HashA", invertHash:1, hashBits:10), |
| 72 | + new HashTransformer.ColumnInfo("A", "HashAUnlim", invertHash:-1, hashBits:10), |
| 73 | + new HashTransformer.ColumnInfo("A", "HashAUnlimOrdered", invertHash:-1, hashBits:10, ordered:true) |
| 74 | + }); |
| 75 | + var result = pipe.Fit(dataView).Transform(dataView); |
| 76 | + ValidateMetadata(result); |
| 77 | + Done(); |
| 78 | + } |
| 79 | + |
| 80 | + private void ValidateMetadata(IDataView result) |
| 81 | + { |
| 82 | + |
| 83 | + Assert.True(result.Schema.TryGetColumnIndex("HashA", out int HashA)); |
| 84 | + Assert.True(result.Schema.TryGetColumnIndex("HashAUnlim", out int HashAUnlim)); |
| 85 | + Assert.True(result.Schema.TryGetColumnIndex("HashAUnlimOrdered", out int HashAUnlimOrdered)); |
| 86 | + VBuffer<ReadOnlyMemory<char>> keys = default; |
| 87 | + var types = result.Schema.GetMetadataTypes(HashA); |
| 88 | + Assert.Equal(types.Select(x => x.Key), new string[1] { MetadataUtils.Kinds.KeyValues }); |
| 89 | + result.Schema.GetMetadata(MetadataUtils.Kinds.KeyValues, HashA, ref keys); |
| 90 | + Assert.True(keys.Length == 1024); |
| 91 | + //REVIEW: This is weird. I specified invertHash to 1 so I expect only one value to be in key values, but i got two. |
| 92 | + Assert.Equal(keys.Items().Select(x => x.Value.ToString()), new string[2] {"2.5", "3.5" }); |
| 93 | + |
| 94 | + types = result.Schema.GetMetadataTypes(HashAUnlim); |
| 95 | + Assert.Equal(types.Select(x => x.Key), new string[1] { MetadataUtils.Kinds.KeyValues }); |
| 96 | + result.Schema.GetMetadata(MetadataUtils.Kinds.KeyValues, HashA, ref keys); |
| 97 | + Assert.True(keys.Length == 1024); |
| 98 | + Assert.Equal(keys.Items().Select(x => x.Value.ToString()), new string[2] { "2.5", "3.5" }); |
| 99 | + |
| 100 | + types = result.Schema.GetMetadataTypes(HashAUnlimOrdered); |
| 101 | + Assert.Equal(types.Select(x => x.Key), new string[1] { MetadataUtils.Kinds.KeyValues }); |
| 102 | + result.Schema.GetMetadata(MetadataUtils.Kinds.KeyValues, HashA, ref keys); |
| 103 | + Assert.True(keys.Length == 1024); |
| 104 | + Assert.Equal(keys.Items().Select(x => x.Value.ToString()), new string[2] { "2.5", "3.5" }); |
| 105 | + } |
| 106 | + |
| 107 | + [Fact] |
| 108 | + public void TestCommandLine() |
| 109 | + { |
| 110 | + Assert.Equal(Maml.Main(new[] { @"showschema loader=Text{col=A:R4:0} xf=Hash{col=B:A} in=f:\2.txt" }), (int)0); |
| 111 | + } |
| 112 | + |
| 113 | + [Fact] |
| 114 | + public void TestOldSavingAndLoading() |
| 115 | + { |
| 116 | + var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } }; |
| 117 | + var dataView = ComponentCreation.CreateDataView(Env, data); |
| 118 | + var pipe = new HashEstimator(Env, new[]{ |
| 119 | + new HashTransformer.ColumnInfo("A", "HashA", hashBits:4, invertHash:-1), |
| 120 | + new HashTransformer.ColumnInfo("B", "HashB", hashBits:3, ordered:true), |
| 121 | + new HashTransformer.ColumnInfo("C", "HashC", seed:42), |
| 122 | + new HashTransformer.ColumnInfo("A", "HashD"), |
| 123 | + }); |
| 124 | + var result = pipe.Fit(dataView).Transform(dataView); |
| 125 | + var resultRoles = new RoleMappedData(result); |
| 126 | + using (var ms = new MemoryStream()) |
| 127 | + { |
| 128 | + TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles); |
| 129 | + ms.Position = 0; |
| 130 | + var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms); |
| 131 | + } |
| 132 | + } |
| 133 | + } |
| 134 | +} |
0 commit comments