Skip to content

Commit f84e67a

Browse files
author
Pete Luferenko
committed
Some renaming to interfaces
Removed non-typed estimator Fixed collections in ad-hoc tests
1 parent 49730cb commit f84e67a

File tree

2 files changed

+172
-110
lines changed

2 files changed

+172
-110
lines changed

src/Microsoft.ML.Core/Data/IEstimator.cs

Lines changed: 26 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,18 @@ public enum VectorKind
3131
public readonly VectorKind Kind;
3232
public readonly DataKind ItemKind;
3333
public readonly bool IsKey;
34+
public readonly string[] MetadataKinds;
3435

35-
public Column(string name, VectorKind vecKind, DataKind itemKind, bool isKey)
36+
public Column(string name, VectorKind vecKind, DataKind itemKind, bool isKey, string[] metadataKinds)
3637
{
3738
Contracts.CheckNonEmpty(name, nameof(name));
39+
Contracts.CheckValue(metadataKinds, nameof(metadataKinds));
40+
3841
Name = name;
3942
Kind = vecKind;
4043
ItemKind = itemKind;
4144
IsKey = isKey;
45+
MetadataKinds = metadataKinds;
4246
}
4347
}
4448

@@ -68,9 +72,14 @@ public static SchemaShape Create(ISchema schema)
6872
vecKind = Column.VectorKind.VariableVector;
6973
else
7074
vecKind = Column.VectorKind.Scalar;
75+
7176
var kind = type.ItemType.RawKind;
7277
var isKey = type.ItemType.IsKey;
73-
cols.Add(new Column(schema.GetColumnName(iCol), vecKind, kind, isKey));
78+
79+
var metadataNames = schema.GetMetadataTypes(iCol)
80+
.Select(kvp => kvp.Key)
81+
.ToArray();
82+
cols.Add(new Column(schema.GetColumnName(iCol), vecKind, kind, isKey, metadataNames));
7483
}
7584
}
7685
return new SchemaShape(cols.ToArray());
@@ -93,14 +102,14 @@ public Column FindColumn(string name)
93102
/// <summary>
94103
/// The 'data reader' takes certain kind of input and turns it into an <see cref="IDataView"/>.
95104
/// </summary>
96-
/// <typeparam name="TIn">The type of input the reader takes.</typeparam>
97-
public interface IDataReader<TIn>
105+
/// <typeparam name="TSource">The type of input the reader takes.</typeparam>
106+
public interface IDataReader<in TSource>
98107
{
99108
/// <summary>
100109
/// Take the data in, make transformations, output the data.
101110
/// Note that <see cref="IDataView"/>'s are lazy, so no actual transformations happen here, just schema validation.
102111
/// </summary>
103-
IDataView Read(TIn input);
112+
IDataView Read(TSource input);
104113

105114
/// <summary>
106115
/// The output schema of the reader.
@@ -111,16 +120,16 @@ public interface IDataReader<TIn>
111120
/// <summary>
112121
/// Sometimes we need to 'fit' an <see cref="IDataReader{TIn}"/>. This interface is representing the 'unfitted' version.
113122
/// </summary>
114-
/// <typeparam name="TIn">The type of input the estimator (and eventually transformer) takes.</typeparam>
115-
public interface IDataReaderEstimator<TIn>
123+
/// <typeparam name="TSource">The type of input the estimator (and eventually transformer) takes.</typeparam>
124+
public interface IDataReaderEstimator<in TSource>
116125
{
117126
/// <summary>
118127
/// Train and return a transformer.
119128
///
120-
/// REVIEW: you could consider the transformer to take a different <typeparamref name="TIn"/>, but we don't have such components
129+
/// REVIEW: you could consider the transformer to take a different <typeparamref name="TSource"/>, but we don't have such components
121130
/// yet, so why complicate matters?
122131
/// </summary>
123-
IDataReader<TIn> Fit(TIn input);
132+
IDataReader<TSource> Fit(TSource input);
124133

125134
/// <summary>
126135
/// The 'promise' of the output schema.
@@ -130,12 +139,12 @@ public interface IDataReaderEstimator<TIn>
130139
}
131140

132141
/// <summary>
133-
/// A DataReader estimator that provides more details about the produced reader, in the form of <typeparamref name="TTransformer"/>.
142+
/// A DataReader estimator that provides more details about the produced reader, in the form of <typeparamref name="TReader"/>.
134143
/// </summary>
135-
public interface IDataReaderEstimator<TIn, out TTransformer> : IDataReaderEstimator<TIn>
136-
where TTransformer : IDataReader<TIn>
144+
public interface IDataReaderEstimator<in TSource, out TReader> : IDataReaderEstimator<TSource>
145+
where TReader : IDataReader<TSource>
137146
{
138-
new TTransformer Fit(TIn input);
147+
new TReader Fit(TSource input);
139148
}
140149

141150
/// <summary>
@@ -157,18 +166,19 @@ public interface ITransformer
157166
/// </summary>
158167
IDataView Transform(IDataView input);
159168
}
160-
169+
161170
/// <summary>
162171
/// The estimator (in Spark terminology) is an 'untrained transformer'. It needs to 'fit' on the data to manufacture
163172
/// a transformer.
164173
/// It also provides the 'schema propagation' like transformers do, but over <see cref="SchemaShape"/> instead of <see cref="ISchema"/>.
165174
/// </summary>
166-
public interface IEstimator
175+
public interface IEstimator<out TTransformer>
176+
where TTransformer : ITransformer
167177
{
168178
/// <summary>
169179
/// Train and return a transformer.
170180
/// </summary>
171-
ITransformer Fit(IDataView input);
181+
TTransformer Fit(IDataView input);
172182

173183
/// <summary>
174184
/// Schema propagation for estimators.
@@ -177,13 +187,4 @@ public interface IEstimator
177187
/// </summary>
178188
SchemaShape GetOutputSchema(SchemaShape inputSchema);
179189
}
180-
181-
/// <summary>
182-
/// An estimator that provides more details about the produced transformer, in the form of <typeparamref name="TTransformer"/>.
183-
/// </summary>
184-
public interface IDataEstimator<out TTransformer> : IEstimator
185-
where TTransformer : ITransformer
186-
{
187-
new TTransformer Fit(IDataView input);
188-
}
189190
}

0 commit comments

Comments
 (0)