Skip to content

Commit 7ff9724

Browse files
committed
review comments
1 parent f75b504 commit 7ff9724

File tree

1 file changed

+30
-17
lines changed

1 file changed

+30
-17
lines changed

src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs

+30-17
Original file line numberDiff line numberDiff line change
@@ -32,18 +32,16 @@ public sealed partial class TextLoader : IDataReader<IMultiStreamSource>, ICanSa
3232
/// <summary>
3333
/// Describes how an input column should be mapped to an <see cref="IDataView"/> column.
3434
/// </summary>
35-
/// <example>
36-
/// Scalar column of <seealso cref="DataKind"/> I4 sourced from 2nd column
37-
/// col=ColumnName:I4:1
38-
///
39-
/// Vector column of <seealso cref="DataKind"/> I4 that contains values from columns 1, 3 to 10
40-
/// col=ColumnName:I4:1,3-10
41-
///
42-
/// Key range column of KeyType with underlying storage type U4 that contains values from columns 1, 3 to 10, that can go from 1 to 100 (0 reserved for out of range)
43-
/// col=ColumnName:U4[100]:1,3-10
44-
/// </example>
4535
public sealed class Column
4636
{
37+
// Examples of how a column is defined in command line API:
38+
// Scalar column of <seealso cref="DataKind"/> I4 sourced from 2nd column
39+
// col=ColumnName:I4:1
40+
// Vector column of <seealso cref="DataKind"/> I4 that contains values from columns 1, 3 to 10
41+
// col=ColumnName:I4:1,3-10
42+
// Key range column of KeyType with underlying storage type U4 that contains values from columns 1, 3 to 10, that can go from 1 to 100 (0 reserved for out of range)
43+
// col=ColumnName:U4[100]:1,3-10
44+
4745
/// <summary>
4846
/// Describes how an input column should be mapped to an <see cref="IDataView"/> column.
4947
/// </summary>
@@ -53,7 +51,7 @@ public Column() { }
5351
/// Describes how an input column should be mapped to an <see cref="IDataView"/> column.
5452
/// </summary>
5553
/// <param name="name">Name of the column.</param>
56-
/// <param name="type">Type of the items in the column.</param>
54+
/// <param name="type"><see cref="DataKind"/> of the items in the column. If <see langword="null"/> defaults to a float.</param>
5755
/// <param name="index">Index of the column.</param>
5856
public Column(string name, DataKind? type, int index)
5957
: this(name, type, new[] { new Range(index) }) { }
@@ -62,7 +60,7 @@ public Column(string name, DataKind? type, int index)
6260
/// Describes how an input column should be mapped to an <see cref="IDataView"/> column.
6361
/// </summary>
6462
/// <param name="name">Name of the column.</param>
65-
/// <param name="type">Type of the items in the column.</param>
63+
/// <param name="type"><see cref="DataKind"/> of the items in the column. If <see langword="null"/> defaults to a float.</param>
6664
/// <param name="minIndex">The minimum inclusive index of the column.</param>
6765
/// <param name="maxIndex">The maximum-inclusive index of the column.</param>
6866
public Column(string name, DataKind? type, int minIndex, int maxIndex)
@@ -74,7 +72,7 @@ public Column(string name, DataKind? type, int minIndex, int maxIndex)
7472
/// Describes how an input column should be mapped to an <see cref="IDataView"/> column.
7573
/// </summary>
7674
/// <param name="name">Name of the column.</param>
77-
/// <param name="type">Type of the items in the column.</param>
75+
/// <param name="type"><see cref="DataKind"/> of the items in the column. If <see langword="null"/> defaults to a float.</param>
7876
/// <param name="source">Source index range(s) of the column.</param>
7977
/// <param name="keyCount">For a key column, this defines the range of values.</param>
8078
public Column(string name, DataKind? type, Range[] source, KeyCount keyCount = null)
@@ -95,7 +93,7 @@ public Column(string name, DataKind? type, Range[] source, KeyCount keyCount = n
9593
public string Name;
9694

9795
/// <summary>
98-
/// Type of the items in the column.
96+
/// <see cref="DataKind"/> of the items in the column. If <see langword="null"/> defaults to a float.
9997
/// </summary>
10098
[Argument(ArgumentType.AtMostOnce, HelpText = "Type of the items in the column")]
10199
public DataKind? Type;
@@ -276,13 +274,13 @@ public Range(int min, int? max)
276274
}
277275

278276
/// <summary>
279-
/// The minimum inclusive index of the column.
277+
/// The minimum index of the column, inclusive.
280278
/// </summary>
281279
[Argument(ArgumentType.Required, HelpText = "First index in the range")]
282280
public int Min;
283281

284282
/// <summary>
285-
/// The maximum-inclusive index of the column. If <see langword="null"/>
283+
/// The maximum index of the column, inclusive. If <see langword="null"/>
286284
/// indicates that the <see cref="TextLoader"/> should auto-detect the legnth
287285
/// of the lines, and read untill the end.
288286
/// If max is specified, the fields <see cref="AutoEnd"/> and <see cref="VariableEnd"/> are ignored.
@@ -1304,6 +1302,21 @@ private static bool TryParseSchema(IHost host, IMultiStreamSource files,
13041302
}
13051303
}
13061304

1305+
/// <summary>
1306+
/// Checks whether the source contains the valid TextLoader.Arguments depiction.
1307+
/// </summary>
1308+
internal static bool FileContainsValidSchema(IHostEnvironment env, IMultiStreamSource files, out Options options)
1309+
{
1310+
Contracts.CheckValue(env, nameof(env));
1311+
var h = env.Register(RegistrationName);
1312+
h.CheckValue(files, nameof(files));
1313+
options = new Options();
1314+
Column[] cols;
1315+
bool error;
1316+
bool found = TryParseSchema(h, files, ref options, out cols, out error);
1317+
return found && !error && options.IsValid();
1318+
}
1319+
13071320
private TextLoader(IHost host, ModelLoadContext ctx)
13081321
{
13091322
Contracts.AssertValue(host, "host");
@@ -1398,7 +1411,7 @@ public void Save(ModelSaveContext ctx)
13981411
}
13991412

14001413
/// <summary>
1401-
/// Returns the <see cref="SchemaShape"/> of the schema which will be produced by the transformer.
1414+
/// Returns the <see cref="SchemaShape"/> of the schema which will be produced by the <see cref="TextLoader"/>.
14021415
/// Used for schema propagation and verification in a pipeline.
14031416
/// </summary>
14041417
public Schema GetOutputSchema() => _bindings.OutputSchema;

0 commit comments

Comments
 (0)