Skip to content

Commit ff9b14a

Browse files
authored
infer purpose of hidden columns as 'ignore' (dotnet#142)
1 parent 38595ab commit ff9b14a

File tree

2 files changed

+43
-0
lines changed

2 files changed

+43
-0
lines changed

src/Microsoft.ML.Auto/ColumnInference/PurposeInference.cs

+4
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,10 @@ public static PurposeInference.Column[] InferPurposes(MLContext context, IDataVi
280280
{
281281
intermediateCol = new IntermediateColumn(data, i, ColumnPurpose.Label);
282282
}
283+
else if (column.IsHidden)
284+
{
285+
intermediateCol = new IntermediateColumn(data, i, ColumnPurpose.Ignore);
286+
}
283287
else if(columnOverrides != null && columnOverrides.TryGetValue(column.Name, out var columnPurpose))
284288
{
285289
intermediateCol = new IntermediateColumn(data, i, columnPurpose);

src/Test/PurposeInferenceTests.cs

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
using System.Linq;
2+
using Microsoft.Data.DataView;
3+
using Microsoft.ML.Data;
4+
using Microsoft.VisualStudio.TestTools.UnitTesting;
5+
6+
namespace Microsoft.ML.Auto.Test
7+
{
8+
[TestClass]
9+
public class PurposeInferenceTests
10+
{
11+
[TestMethod]
12+
public void PurposeInferenceHiddenColumnsTest()
13+
{
14+
var context = new MLContext();
15+
16+
// build basic data view
17+
var schemaBuilder = new SchemaBuilder();
18+
schemaBuilder.AddColumn(DefaultColumnNames.Label, BoolType.Instance);
19+
schemaBuilder.AddColumn(DefaultColumnNames.Features, NumberType.R4);
20+
var schema = schemaBuilder.GetSchema();
21+
IDataView data = new EmptyDataView(context, schema);
22+
23+
// normalize 'Features' column. this has the effect of creating 2 columns named
24+
// 'Features' in the data view, the first of which gets marked as 'Hidden'
25+
var normalizer = context.Transforms.Normalize(DefaultColumnNames.Features);
26+
data = normalizer.Fit(data).Transform(data);
27+
28+
// infer purposes
29+
var purposes = PurposeInference.InferPurposes(context, data, DefaultColumnNames.Label);
30+
31+
Assert.AreEqual(3, purposes.Count());
32+
Assert.AreEqual(ColumnPurpose.Label, purposes[0].Purpose);
33+
// assert first 'Features' purpose (hidden column) is Ignore
34+
Assert.AreEqual(ColumnPurpose.Ignore, purposes[1].Purpose);
35+
// assert second 'Features' purpose is NumericFeature
36+
Assert.AreEqual(ColumnPurpose.NumericFeature, purposes[2].Purpose);
37+
}
38+
}
39+
}

0 commit comments

Comments
 (0)