@@ -31,14 +31,18 @@ public enum VectorKind
31
31
public readonly VectorKind Kind ;
32
32
public readonly DataKind ItemKind ;
33
33
public readonly bool IsKey ;
34
+ public readonly string [ ] MetadataKinds ;
34
35
35
- public Column ( string name , VectorKind vecKind , DataKind itemKind , bool isKey )
36
+ public Column ( string name , VectorKind vecKind , DataKind itemKind , bool isKey , string [ ] metadataKinds )
36
37
{
37
38
Contracts . CheckNonEmpty ( name , nameof ( name ) ) ;
39
+ Contracts . CheckValue ( metadataKinds , nameof ( metadataKinds ) ) ;
40
+
38
41
Name = name ;
39
42
Kind = vecKind ;
40
43
ItemKind = itemKind ;
41
44
IsKey = isKey ;
45
+ MetadataKinds = metadataKinds ;
42
46
}
43
47
}
44
48
@@ -68,9 +72,14 @@ public static SchemaShape Create(ISchema schema)
68
72
vecKind = Column . VectorKind . VariableVector ;
69
73
else
70
74
vecKind = Column . VectorKind . Scalar ;
75
+
71
76
var kind = type . ItemType . RawKind ;
72
77
var isKey = type . ItemType . IsKey ;
73
- cols . Add ( new Column ( schema . GetColumnName ( iCol ) , vecKind , kind , isKey ) ) ;
78
+
79
+ var metadataNames = schema . GetMetadataTypes ( iCol )
80
+ . Select ( kvp => kvp . Key )
81
+ . ToArray ( ) ;
82
+ cols . Add ( new Column ( schema . GetColumnName ( iCol ) , vecKind , kind , isKey , metadataNames ) ) ;
74
83
}
75
84
}
76
85
return new SchemaShape ( cols . ToArray ( ) ) ;
@@ -93,14 +102,14 @@ public Column FindColumn(string name)
93
102
/// <summary>
94
103
/// The 'data reader' takes certain kind of input and turns it into an <see cref="IDataView"/>.
95
104
/// </summary>
96
- /// <typeparam name="TIn ">The type of input the reader takes.</typeparam>
97
- public interface IDataReader < TIn >
105
+ /// <typeparam name="TSource ">The type of input the reader takes.</typeparam>
106
+ public interface IDataReader < in TSource >
98
107
{
99
108
/// <summary>
100
109
/// Take the data in, make transformations, output the data.
101
110
/// Note that <see cref="IDataView"/>'s are lazy, so no actual transformations happen here, just schema validation.
102
111
/// </summary>
103
- IDataView Read ( TIn input ) ;
112
+ IDataView Read ( TSource input ) ;
104
113
105
114
/// <summary>
106
115
/// The output schema of the reader.
@@ -111,16 +120,16 @@ public interface IDataReader<TIn>
111
120
/// <summary>
112
121
/// Sometimes we need to 'fit' an <see cref="IDataReader{TIn}"/>. This interface is representing the 'unfitted' version.
113
122
/// </summary>
114
- /// <typeparam name="TIn ">The type of input the estimator (and eventually transformer) takes.</typeparam>
115
- public interface IDataReaderEstimator < TIn >
123
+ /// <typeparam name="TSource ">The type of input the estimator (and eventually transformer) takes.</typeparam>
124
+ public interface IDataReaderEstimator < in TSource >
116
125
{
117
126
/// <summary>
118
127
/// Train and return a transformer.
119
128
///
120
- /// REVIEW: you could consider the transformer to take a different <typeparamref name="TIn "/>, but we don't have such components
129
+ /// REVIEW: you could consider the transformer to take a different <typeparamref name="TSource "/>, but we don't have such components
121
130
/// yet, so why complicate matters?
122
131
/// </summary>
123
- IDataReader < TIn > Fit ( TIn input ) ;
132
+ IDataReader < TSource > Fit ( TSource input ) ;
124
133
125
134
/// <summary>
126
135
/// The 'promise' of the output schema.
@@ -130,12 +139,12 @@ public interface IDataReaderEstimator<TIn>
130
139
}
131
140
132
141
/// <summary>
133
- /// A DataReader estimator that provides more details about the produced reader, in the form of <typeparamref name="TTransformer "/>.
142
+ /// A DataReader estimator that provides more details about the produced reader, in the form of <typeparamref name="TReader "/>.
134
143
/// </summary>
135
- public interface IDataReaderEstimator < TIn , out TTransformer > : IDataReaderEstimator < TIn >
136
- where TTransformer : IDataReader < TIn >
144
+ public interface IDataReaderEstimator < in TSource , out TReader > : IDataReaderEstimator < TSource >
145
+ where TReader : IDataReader < TSource >
137
146
{
138
- new TTransformer Fit ( TIn input ) ;
147
+ new TReader Fit ( TSource input ) ;
139
148
}
140
149
141
150
/// <summary>
@@ -157,18 +166,19 @@ public interface ITransformer
157
166
/// </summary>
158
167
IDataView Transform ( IDataView input ) ;
159
168
}
160
-
169
+
161
170
/// <summary>
162
171
/// The estimator (in Spark terminology) is an 'untrained transformer'. It needs to 'fit' on the data to manufacture
163
172
/// a transformer.
164
173
/// It also provides the 'schema propagation' like transformers do, but over <see cref="SchemaShape"/> instead of <see cref="ISchema"/>.
165
174
/// </summary>
166
- public interface IEstimator
175
+ public interface IEstimator < out TTransformer >
176
+ where TTransformer : ITransformer
167
177
{
168
178
/// <summary>
169
179
/// Train and return a transformer.
170
180
/// </summary>
171
- ITransformer Fit ( IDataView input ) ;
181
+ TTransformer Fit ( IDataView input ) ;
172
182
173
183
/// <summary>
174
184
/// Schema propagation for estimators.
@@ -177,13 +187,4 @@ public interface IEstimator
177
187
/// </summary>
178
188
SchemaShape GetOutputSchema ( SchemaShape inputSchema ) ;
179
189
}
180
-
181
- /// <summary>
182
- /// An estimator that provides more details about the produced transformer, in the form of <typeparamref name="TTransformer"/>.
183
- /// </summary>
184
- public interface IDataEstimator < out TTransformer > : IEstimator
185
- where TTransformer : ITransformer
186
- {
187
- new TTransformer Fit ( IDataView input ) ;
188
- }
189
190
}
0 commit comments