|
6 | 6 | using System.Collections.Generic;
|
7 | 7 | using System.IO;
|
8 | 8 | using System.Linq;
|
| 9 | +using Microsoft.Data.DataView; |
9 | 10 | using Microsoft.ML.Core.Data;
|
10 | 11 | using Microsoft.ML.Data;
|
11 | 12 | using Microsoft.ML.Model;
|
@@ -159,6 +160,13 @@ public void ValueMapVectorValueTest()
|
159 | 160 | new int[] {400, 500, 600, 700 }};
|
160 | 161 |
|
161 | 162 | var estimator = new ValueMappingEstimator<string, int>(Env, keys, values, new[] { ("D", "A"), ("E", "B"), ("F", "C") });
|
| 163 | + var schema = estimator.GetOutputSchema(SchemaShape.Create(dataView.Schema)); |
| 164 | + foreach (var name in new[] { "D", "E", "F" }) |
| 165 | + { |
| 166 | + Assert.True(schema.TryFindColumn(name, out var originalColumn)); |
| 167 | + Assert.Equal(SchemaShape.Column.VectorKind.VariableVector, originalColumn.Kind); |
| 168 | + } |
| 169 | + |
162 | 170 | var t = estimator.Fit(dataView);
|
163 | 171 |
|
164 | 172 | var result = t.Transform(dataView);
|
@@ -509,6 +517,42 @@ public void ValueMappingWorkout()
|
509 | 517 | TestEstimatorCore(est, validFitInput: dataView, invalidInput: badDataView);
|
510 | 518 | }
|
511 | 519 |
|
| 520 | + [Fact] |
| 521 | + public void ValueMappingValueTypeIsVectorWorkout() |
| 522 | + { |
| 523 | + var data = new[] { new TestClass() { A = "bar", B = "test", C = "foo" } }; |
| 524 | + var dataView = ML.Data.ReadFromEnumerable(data); |
| 525 | + var badData = new[] { new TestWrong() { A = "bar", B = 1.2f } }; |
| 526 | + var badDataView = ML.Data.ReadFromEnumerable(badData); |
| 527 | + |
| 528 | + var keys = new List<string>() { "foo", "bar", "test" }; |
| 529 | + var values = new List<int[]>() { |
| 530 | + new int[] {2, 3, 4 }, |
| 531 | + new int[] {100, 200 }, |
| 532 | + new int[] {400, 500, 600, 700 }}; |
| 533 | + |
| 534 | + // Workout on value mapping |
| 535 | + var est = ML.Transforms.Conversion.ValueMap(keys, values, new[] { ("D", "A"), ("E", "B"), ("F", "C") }); |
| 536 | + TestEstimatorCore(est, validFitInput: dataView, invalidInput: badDataView); |
| 537 | + } |
| 538 | + |
| 539 | + [Fact] |
| 540 | + public void ValueMappingInputIsVectorWorkout() |
| 541 | + { |
| 542 | + var data = new[] { new TestClass() { B = "bar test foo" } }; |
| 543 | + var dataView = ML.Data.ReadFromEnumerable(data); |
| 544 | + |
| 545 | + var badData = new[] { new TestWrong() { B = 1.2f } }; |
| 546 | + var badDataView = ML.Data.ReadFromEnumerable(badData); |
| 547 | + |
| 548 | + var keys = new List<ReadOnlyMemory<char>>() { "foo".AsMemory(), "bar".AsMemory(), "test".AsMemory(), "wahoo".AsMemory() }; |
| 549 | + var values = new List<int>() { 1, 2, 3, 4 }; |
| 550 | + |
| 551 | + var est = ML.Transforms.Text.TokenizeWords("TokenizeB", "B") |
| 552 | + .Append(ML.Transforms.Conversion.ValueMap(keys, values, new[] { ("VecB", "TokenizeB") })); |
| 553 | + TestEstimatorCore(est, validFitInput: dataView, invalidInput: badDataView); |
| 554 | + } |
| 555 | + |
512 | 556 | [Fact]
|
513 | 557 | void TestCommandLine()
|
514 | 558 | {
|
|
0 commit comments