Skip to content

Commit b509add

Browse files
authored
First round of Schema final polish (#1680)
* Changed Schema.Column to be a struct added index and IsHidden Added Schema.DetachedColumn for columns that are not bound to schema Moved Schema to Data namespace
1 parent 4b9beed commit b509add

File tree

187 files changed

+863
-545
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

187 files changed

+863
-545
lines changed

src/Microsoft.ML.Api/CustomMappingTransformer.cs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// See the LICENSE file in the project root for more information.
44

55
using Microsoft.ML.Core.Data;
6+
using Microsoft.ML.Data;
67
using Microsoft.ML.Runtime;
78
using Microsoft.ML.Runtime.Api;
89
using Microsoft.ML.Runtime.Data;
@@ -168,11 +169,11 @@ public Func<int, bool> GetDependencies(Func<int, bool> activeOutput)
168169
return col => false;
169170
}
170171

171-
public Schema.Column[] GetOutputColumns()
172+
public Schema.DetachedColumn[] GetOutputColumns()
172173
{
173174
var dstRow = new DataViewConstructionUtils.InputRow<TDst>(_host, _parent.AddedSchema);
174175
// All the output columns of dstRow are our outputs.
175-
return Enumerable.Range(0, dstRow.Schema.ColumnCount).Select(x => dstRow.Schema[x]).ToArray();
176+
return Enumerable.Range(0, dstRow.Schema.ColumnCount).Select(x => new Schema.DetachedColumn(dstRow.Schema[x])).ToArray();
176177
}
177178

178179
public void Save(ModelSaveContext ctx)
@@ -202,7 +203,7 @@ public CustomMappingEstimator(IHostEnvironment env, Action<TSrc, TDst> mapAction
202203
public override SchemaShape GetOutputSchema(SchemaShape inputSchema)
203204
{
204205
var addedCols = DataViewConstructionUtils.GetSchemaColumns(Transformer.AddedSchema);
205-
var addedSchemaShape = SchemaShape.Create(new Schema(addedCols));
206+
var addedSchemaShape = SchemaShape.Create(SchemaBuilder.MakeSchema(addedCols));
206207

207208
var result = inputSchema.Columns.ToDictionary(x => x.Name);
208209
var inputDef = InternalSchemaDefinition.Create(typeof(TSrc), Transformer.InputSchemaDefinition);

src/Microsoft.ML.Api/DataViewConstructionUtils.cs

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5+
using Microsoft.ML.Data;
56
using Microsoft.ML.Runtime.Data;
67
using Microsoft.ML.Runtime.Internal.Utilities;
78
using Microsoft.ML.Runtime.Model;
@@ -83,7 +84,7 @@ public sealed class InputRow<TRow> : InputRowBase<TRow>, IRowBackedBy<TRow>
8384
public override long Position => _position;
8485

8586
public InputRow(IHostEnvironment env, InternalSchemaDefinition schemaDef)
86-
: base(env, new Schema(GetSchemaColumns(schemaDef)), schemaDef, MakePeeks(schemaDef), c => true)
87+
: base(env, SchemaBuilder.MakeSchema(GetSchemaColumns(schemaDef)), schemaDef, MakePeeks(schemaDef), c => true)
8788
{
8889
_position = -1;
8990
}
@@ -384,7 +385,7 @@ protected DataViewBase(IHostEnvironment env, string name, InternalSchemaDefiniti
384385
Host.AssertValue(schemaDefn);
385386

386387
_schemaDefn = schemaDefn;
387-
_schema = new Schema(GetSchemaColumns(schemaDefn));
388+
_schema = SchemaBuilder.MakeSchema(GetSchemaColumns(schemaDefn));
388389
int n = schemaDefn.Columns.Length;
389390
_peeks = new Delegate[n];
390391
for (var i = 0; i < n; i++)
@@ -790,17 +791,17 @@ protected override bool MoveManyCore(long count)
790791
}
791792
}
792793

793-
internal static Schema.Column[] GetSchemaColumns(InternalSchemaDefinition schemaDefn)
794+
internal static Schema.DetachedColumn[] GetSchemaColumns(InternalSchemaDefinition schemaDefn)
794795
{
795796
Contracts.AssertValue(schemaDefn);
796-
var columns = new Schema.Column[schemaDefn.Columns.Length];
797+
var columns = new Schema.DetachedColumn[schemaDefn.Columns.Length];
797798
for (int i = 0; i < columns.Length; i++)
798799
{
799800
var col = schemaDefn.Columns[i];
800-
var meta = new Schema.Metadata.Builder();
801+
var meta = new MetadataBuilder();
801802
foreach (var kvp in col.Metadata)
802-
meta.Add(new Schema.Column(kvp.Value.Kind, kvp.Value.MetadataType, null), kvp.Value.GetGetterDelegate());
803-
columns[i] = new Schema.Column(col.ColumnName, col.ColumnType, meta.GetMetadata());
803+
meta.Add(kvp.Value.Kind, kvp.Value.MetadataType, kvp.Value.GetGetterDelegate());
804+
columns[i] = new Schema.DetachedColumn(col.ColumnName, col.ColumnType, meta.GetMetadata());
804805
}
805806

806807
return columns;

src/Microsoft.ML.Api/PredictionEngine.cs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,12 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5-
using Float = System.Single;
6-
7-
using System.Collections.Generic;
8-
using System.IO;
9-
using Microsoft.ML.Runtime.Data;
10-
using Microsoft.ML.Runtime.Model;
115
using Microsoft.ML.Core.Data;
6+
using Microsoft.ML.Data;
7+
using Microsoft.ML.Runtime.Data;
128
using System;
9+
using System.Collections.Generic;
10+
using System.IO;
1311

1412
namespace Microsoft.ML.Runtime.Api
1513
{

src/Microsoft.ML.Api/StatefulFilterTransform.cs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5+
using Microsoft.ML.Data;
56
using Microsoft.ML.Runtime.Data;
67
using Microsoft.ML.Transforms;
78
using System;
@@ -76,7 +77,7 @@ public StatefulFilterTransform(IHostEnvironment env, IDataView source, Func<TSrc
7677

7778
var outSchema = InternalSchemaDefinition.Create(typeof(TDst), outputSchemaDefinition);
7879
_addedSchema = outSchema;
79-
_bindings = new ColumnBindings(Data.Schema.Create(Source.Schema), DataViewConstructionUtils.GetSchemaColumns(outSchema));
80+
_bindings = new ColumnBindings(Schema.Create(Source.Schema), DataViewConstructionUtils.GetSchemaColumns(outSchema));
8081
}
8182

8283
/// <summary>
@@ -92,7 +93,7 @@ private StatefulFilterTransform(IHostEnvironment env, StatefulFilterTransform<TS
9293
_typedSource = TypedCursorable<TSrc>.Create(Host, newSource, false, transform._inputSchemaDefinition);
9394

9495
_addedSchema = transform._addedSchema;
95-
_bindings = new ColumnBindings(Data.Schema.Create(newSource.Schema), DataViewConstructionUtils.GetSchemaColumns(_addedSchema));
96+
_bindings = new ColumnBindings(Schema.Create(newSource.Schema), DataViewConstructionUtils.GetSchemaColumns(_addedSchema));
9697
}
9798

9899
public bool CanShuffle { get { return false; } }

src/Microsoft.ML.Api/TypedCursor.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5+
using Microsoft.ML.Data;
56
using Microsoft.ML.Runtime.Data;
67
using Microsoft.ML.Runtime.Internal.Utilities;
78
using System;

src/Microsoft.ML.Core/Data/IDataView.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5+
using Microsoft.ML.Data;
56
using System;
67
using System.Collections.Generic;
78

src/Microsoft.ML.Core/Data/IEstimator.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5+
using Microsoft.ML.Data;
56
using Microsoft.ML.Runtime;
67
using Microsoft.ML.Runtime.Data;
78
using System;
@@ -155,7 +156,7 @@ public static SchemaShape Create(Schema schema)
155156
Contracts.CheckValue(schema, nameof(schema));
156157
var cols = new List<Column>();
157158

158-
for (int iCol = 0; iCol < schema.ColumnCount; iCol++)
159+
for (int iCol = 0; iCol < schema.Count; iCol++)
159160
{
160161
if (!schema.IsHidden(iCol))
161162
{

src/Microsoft.ML.Core/Data/ISchemaBindableMapper.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5+
using Microsoft.ML.Data;
56
using System;
67
using System.Collections.Generic;
78

src/Microsoft.ML.Core/Data/ITransformModel.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
using System;
66
using System.IO;
7+
using Microsoft.ML.Data;
78
using Microsoft.ML.Runtime.Data;
89

910
namespace Microsoft.ML.Runtime.EntryPoints

src/Microsoft.ML.Core/Data/LinkedRowFilterCursorBase.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5+
using Microsoft.ML.Data;
6+
57
namespace Microsoft.ML.Runtime.Data
68
{
79
/// <summary>

src/Microsoft.ML.Core/Data/LinkedRowRootCursorBase.cs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5+
using Microsoft.ML.Data;
6+
57
namespace Microsoft.ML.Runtime.Data
68
{
79
/// <summary>
@@ -22,14 +24,14 @@ protected LinkedRowRootCursorBase(IChannelProvider provider, IRowCursor input, S
2224
: base(provider, input)
2325
{
2426
Ch.CheckValue(schema, nameof(schema));
25-
Ch.Check(active == null || active.Length == schema.ColumnCount);
27+
Ch.Check(active == null || active.Length == schema.Count);
2628
_active = active;
2729
Schema = schema;
2830
}
2931

3032
public bool IsColumnActive(int col)
3133
{
32-
Ch.Check(0 <= col && col < Schema.ColumnCount);
34+
Ch.Check(0 <= col && col < Schema.Count);
3335
return _active == null || _active[col];
3436
}
3537

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using Microsoft.ML.Runtime;
6+
using Microsoft.ML.Runtime.Data;
7+
using Microsoft.ML.Runtime.Internal.Utilities;
8+
using System;
9+
using System.Collections.Generic;
10+
using System.Linq;
11+
12+
namespace Microsoft.ML.Data
13+
{
14+
/// <summary>
15+
/// The class that incrementally builds a <see cref="Schema.Metadata"/>.
16+
/// </summary>
17+
public sealed class MetadataBuilder
18+
{
19+
private readonly List<(string Name, ColumnType Type, Delegate Getter)> _items;
20+
21+
public MetadataBuilder()
22+
{
23+
_items = new List<(string Name, ColumnType Type, Delegate Getter)>();
24+
}
25+
26+
/// <summary>
27+
/// Add some columns from <paramref name="metadata"/> into our new metadata, by applying <paramref name="selector"/>
28+
/// to all the names.
29+
/// </summary>
30+
/// <param name="metadata">The metadata row to take values from.</param>
31+
/// <param name="selector">The predicate describing which metadata columns to keep.</param>
32+
public void Add(Schema.Metadata metadata, Func<string, bool> selector)
33+
{
34+
Contracts.CheckValueOrNull(metadata);
35+
Contracts.CheckValue(selector, nameof(selector));
36+
37+
if (metadata == null)
38+
return;
39+
40+
foreach (var column in metadata.Schema)
41+
{
42+
if (selector(column.Name))
43+
_items.Add((column.Name, column.Type, metadata.Getters[column.Index]));
44+
}
45+
}
46+
47+
/// <summary>
48+
/// Add one metadata column, strongly-typed version.
49+
/// </summary>
50+
/// <typeparam name="TValue">The type of the value.</typeparam>
51+
/// <param name="name">The metadata name.</param>
52+
/// <param name="type">The metadata type.</param>
53+
/// <param name="getter">The getter delegate.</param>
54+
public void Add<TValue>(string name, ColumnType type, ValueGetter<TValue> getter)
55+
{
56+
Contracts.CheckNonEmpty(name, nameof(name));
57+
Contracts.CheckValue(type, nameof(type));
58+
Contracts.CheckValue(getter, nameof(getter));
59+
Contracts.CheckParam(type.RawType == typeof(TValue), nameof(getter));
60+
_items.Add((name, type, getter));
61+
}
62+
63+
/// <summary>
64+
/// Add one metadata column, weakly-typed version.
65+
/// </summary>
66+
/// <param name="name">The metadata name.</param>
67+
/// <param name="type">The metadata type.</param>
68+
/// <param name="getter">The getter delegate that provides the value. Note that the type of the getter is still checked
69+
/// inside this method.</param>
70+
public void Add(string name, ColumnType type, Delegate getter)
71+
{
72+
Contracts.CheckNonEmpty(name, nameof(name));
73+
Contracts.CheckValue(type, nameof(type));
74+
Utils.MarshalActionInvoke(AddDelegate<int>, type.RawType, name, type, getter);
75+
}
76+
77+
/// <summary>
78+
/// Add slot names metadata.
79+
/// </summary>
80+
/// <param name="size">The size of the slot names vector.</param>
81+
/// <param name="getter">The getter delegate for the slot names.</param>
82+
public void AddSlotNames(int size, ValueGetter<VBuffer<ReadOnlyMemory<char>>> getter)
83+
=> Add(MetadataUtils.Kinds.SlotNames, new VectorType(TextType.Instance, size), getter);
84+
85+
/// <summary>
86+
/// Add key values metadata.
87+
/// </summary>
88+
/// <typeparam name="TValue">The value type of key values.</typeparam>
89+
/// <param name="size">The size of key values vector.</param>
90+
/// <param name="valueType">The value type of key values. Its raw type must match <typeparamref name="TValue"/>.</param>
91+
/// <param name="getter">The getter delegate for the key values.</param>
92+
public void AddKeyValues<TValue>(int size, PrimitiveType valueType, ValueGetter<VBuffer<TValue>> getter)
93+
=> Add(MetadataUtils.Kinds.KeyValues, new VectorType(valueType, size), getter);
94+
95+
/// <summary>
96+
/// Produce the metadata row that the builder has so far.
97+
/// Can be called multiple times.
98+
/// </summary>
99+
public Schema.Metadata GetMetadata()
100+
{
101+
var builder = new SchemaBuilder();
102+
foreach (var item in _items)
103+
builder.AddColumn(item.Name, item.Type, null);
104+
return new Schema.Metadata(builder.GetSchema(), _items.Select(x => x.Getter).ToArray());
105+
}
106+
107+
private void AddDelegate<TValue>(string name, ColumnType type, Delegate getter)
108+
{
109+
Contracts.AssertNonEmpty(name);
110+
Contracts.AssertValue(type);
111+
Contracts.AssertValue(getter);
112+
113+
var typedGetter = getter as ValueGetter<TValue>;
114+
Contracts.CheckParam(typedGetter != null, nameof(getter));
115+
_items.Add((name, type, typedGetter));
116+
}
117+
}
118+
}

src/Microsoft.ML.Core/Data/MetadataUtils.cs

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
using System.Linq;
1010
using System.Threading;
1111
using Microsoft.ML.Core.Data;
12+
using Microsoft.ML.Data;
1213
using Microsoft.ML.Runtime.Internal.Utilities;
1314

1415
namespace Microsoft.ML.Runtime.Data
@@ -231,7 +232,7 @@ public static uint GetMaxMetadataKind(this Schema schema, out int colMax, string
231232
{
232233
uint max = 0;
233234
colMax = -1;
234-
for (int col = 0; col < schema.ColumnCount; col++)
235+
for (int col = 0; col < schema.Count; col++)
235236
{
236237
var columnType = schema.GetMetadataTypeOrNull(metadataKind, col);
237238
if (columnType == null || !columnType.IsKey || columnType.RawKind != DataKind.U4)
@@ -255,7 +256,7 @@ public static uint GetMaxMetadataKind(this Schema schema, out int colMax, string
255256
/// </summary>
256257
public static IEnumerable<int> GetColumnSet(this Schema schema, string metadataKind, uint value)
257258
{
258-
for (int col = 0; col < schema.ColumnCount; col++)
259+
for (int col = 0; col < schema.Count; col++)
259260
{
260261
var columnType = schema.GetMetadataTypeOrNull(metadataKind, col);
261262
if (columnType != null && columnType.IsKey && columnType.RawKind == DataKind.U4)
@@ -274,7 +275,7 @@ public static IEnumerable<int> GetColumnSet(this Schema schema, string metadataK
274275
/// </summary>
275276
public static IEnumerable<int> GetColumnSet(this Schema schema, string metadataKind, string value)
276277
{
277-
for (int col = 0; col < schema.ColumnCount; col++)
278+
for (int col = 0; col < schema.Count; col++)
278279
{
279280
var columnType = schema.GetMetadataTypeOrNull(metadataKind, col);
280281
if (columnType != null && columnType.IsText)
@@ -413,15 +414,7 @@ public static bool TryGetMetadata<T>(this Schema schema, PrimitiveType type, str
413414
/// <summary>
414415
/// Return whether the given column index is hidden in the given schema.
415416
/// </summary>
416-
public static bool IsHidden(this Schema schema, int col)
417-
{
418-
Contracts.CheckValue(schema, nameof(schema));
419-
string name = schema.GetColumnName(col);
420-
int top;
421-
bool tmp = schema.TryGetColumnIndex(name, out top);
422-
Contracts.Assert(tmp); // This would only be false if the implementation of schema were buggy.
423-
return !tmp || top != col;
424-
}
417+
public static bool IsHidden(this Schema schema, int col) => schema[col].IsHidden;
425418

426419
/// <summary>
427420
/// The categoricalFeatures is a vector of the indices of categorical features slots.

src/Microsoft.ML.Core/Data/RoleMappedSchema.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// See the LICENSE file in the project root for more information.
44

55
using System.Collections.Generic;
6+
using Microsoft.ML.Data;
67
using Microsoft.ML.Runtime.Internal.Utilities;
78

89
namespace Microsoft.ML.Runtime.Data

0 commit comments

Comments
 (0)