diff --git a/src/Microsoft.ML.Data/DataLoadSave/DataReaderExtensions.cs b/src/Microsoft.ML.Data/DataLoadSave/DataReaderExtensions.cs index 16b488802a..8fc5a740da 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/DataReaderExtensions.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/DataReaderExtensions.cs @@ -9,14 +9,12 @@ namespace Microsoft.ML { public static class DataReaderExtensions { - public static IDataView Read(this IDataReader reader, string path) - { - return reader.Read(new MultiFileSource(path)); - } - + /// + /// Reads data from one or more file into an . + /// + /// The reader to use. + /// One or more paths from which to load data. public static IDataView Read(this IDataReader reader, params string[] path) - { - return reader.Read(new MultiFileSource(path)); - } + => reader.Read(new MultiFileSource(path)); } } diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs index b1c0a40d88..af4616ef35 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs @@ -24,32 +24,56 @@ namespace Microsoft.ML.Data { /// - /// Loads a text file into an IDataView. Supports basic mapping from input columns to IDataView columns. + /// Loads a text file into an IDataView. Supports basic mapping from input columns to columns. /// public sealed partial class TextLoader : IDataReader, ICanSaveModel { - /// - /// Scalar column of I4 sourced from 2nd column - /// col=ColumnName:I4:1 - /// - /// Vector column of I4 that contains values from columns 1, 3 to 10 - /// col=ColumnName:I4:1,3-10 - /// - /// Key range column of KeyType with underlying storage type U4 that contains values from columns 1, 3 to 10, that can go from 1 to 100 (0 reserved for out of range) - /// col=ColumnName:U4[100]:1,3-10 - /// + /// + /// Describes how an input column should be mapped to an column. + /// public sealed class Column { + // Examples of how a column is defined in command line API: + // Scalar column of I4 sourced from 2nd column + // col=ColumnName:I4:1 + // Vector column of I4 that contains values from columns 1, 3 to 10 + // col=ColumnName:I4:1,3-10 + // Key range column of KeyType with underlying storage type U4 that contains values from columns 1, 3 to 10, that can go from 1 to 100 (0 reserved for out of range) + // col=ColumnName:U4[100]:1,3-10 + + /// + /// Describes how an input column should be mapped to an column. + /// public Column() { } + /// + /// Describes how an input column should be mapped to an column. + /// + /// Name of the column. + /// of the items in the column. If defaults to a float. + /// Index of the column. public Column(string name, DataKind? type, int index) : this(name, type, new[] { new Range(index) }) { } + /// + /// Describes how an input column should be mapped to an column. + /// + /// Name of the column. + /// of the items in the column. If defaults to a float. + /// The minimum inclusive index of the column. + /// The maximum-inclusive index of the column. public Column(string name, DataKind? type, int minIndex, int maxIndex) : this(name, type, new[] { new Range(minIndex, maxIndex) }) { } + /// + /// Describes how an input column should be mapped to an column. + /// + /// Name of the column. + /// of the items in the column. If defaults to a float. + /// Source index range(s) of the column. + /// For a key column, this defines the range of values. public Column(string name, DataKind? type, Range[] source, KeyCount keyCount = null) { Contracts.CheckValue(name, nameof(name)); @@ -61,15 +85,27 @@ public Column(string name, DataKind? type, Range[] source, KeyCount keyCount = n KeyCount = keyCount; } + /// + /// Name of the column. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Name of the column")] public string Name; + /// + /// of the items in the column. If defaults to a float. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Type of the items in the column")] public DataKind? Type; + /// + /// Source index range(s) of the column. + /// [Argument(ArgumentType.Multiple, HelpText = "Source index range(s) of the column", ShortName = "src")] public Range[] Source; + /// + /// For a key column, this defines the range of values. + /// [Argument(ArgumentType.Multiple, HelpText = "For a key column, this defines the range of values", ShortName = "key")] public KeyCount KeyCount; @@ -108,7 +144,7 @@ private bool TryParse(string str) private bool TryParseSource(string str) => TryParseSourceEx(str, out Source); - public static bool TryParseSourceEx(string str, out Range[] ranges) + internal static bool TryParseSourceEx(string str, out Range[] ranges) { ranges = null; var strs = str.Split(','); @@ -198,6 +234,9 @@ internal bool IsValid() } } + /// + /// Specifies the range of indices of input columns that should be mapped to an output column. + /// public sealed class Range { public Range() { } @@ -219,7 +258,7 @@ public Range(int index) /// The minimum inclusive index of the column. /// The maximum-inclusive index of the column. If null /// indicates that the should auto-detect the legnth - /// of the lines, and read till the end. + /// of the lines, and read untill the end. public Range(int min, int? max) { Contracts.CheckParam(min >= 0, nameof(min), "Must be non-negative"); @@ -233,27 +272,49 @@ public Range(int min, int? max) AutoEnd = max == null; } + /// + /// The minimum index of the column, inclusive. + /// [Argument(ArgumentType.Required, HelpText = "First index in the range")] public int Min; - // If max is specified, the fields autoEnd and variableEnd are ignored. - // Otherwise, if autoEnd is true, then variableEnd is ignored. + /// + /// The maximum index of the column, inclusive. If + /// indicates that the should auto-detect the legnth + /// of the lines, and read untill the end. + /// If max is specified, the fields and are ignored. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Last index in the range")] public int? Max; + /// + /// Whether this range extends to the end of the line, but should be a fixed number of items. + /// If is specified, the fields and are ignored. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "This range extends to the end of the line, but should be a fixed number of items", ShortName = "auto")] public bool AutoEnd; + /// + /// Whether this range extends to the end of the line, which can vary from line to line. + /// If is specified, the fields and are ignored. + /// If is , then is ignored. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "This range extends to the end of the line, which can vary from line to line", ShortName = "var")] public bool VariableEnd; + /// + /// Whether this range includes only other indices not specified. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "This range includes only other indices not specified", ShortName = "other")] public bool AllOther; + /// + /// Force scalar columns to be treated as vectors of length one. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Force scalar columns to be treated as vectors of length one", ShortName = "vector")] public bool ForceVector; @@ -333,8 +394,16 @@ internal bool TryUnparse(StringBuilder sb) } } - public class ArgumentsCore + /// + /// The settings for + /// + public class Options { + /// + /// Whether the input may include quoted values, which can contain separator characters, colons, + /// and distinguish empty values from missing values. When true, consecutive separators denote a + /// missing value and an empty value is denoted by \"\". When false, consecutive separators denote an empty value. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Whether the input may include quoted values, which can contain separator characters, colons," + @@ -342,11 +411,17 @@ public class ArgumentsCore " missing value and an empty value is denoted by \"\". When false, consecutive separators" + " denote an empty value.", ShortName = "quote")] - public bool AllowQuoting = DefaultArguments.AllowQuoting; + public bool AllowQuoting = Defaults.AllowQuoting; + /// + /// Whether the input may include sparse representations. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Whether the input may include sparse representations", ShortName = "sparse")] - public bool AllowSparse = DefaultArguments.AllowSparse; + public bool AllowSparse = Defaults.AllowSparse; + /// + /// Number of source columns in the text data. Default is that sparse rows contain their size information. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Number of source columns in the text data. Default is that sparse rows contain their size information.", ShortName = "size")] @@ -354,45 +429,63 @@ public class ArgumentsCore [Argument(ArgumentType.AtMostOnce, Visibility = ArgumentAttribute.VisibilityType.CmdLineOnly, HelpText = "Source column separator. Options: tab, space, comma, single character", ShortName = "sep")] // this is internal as it only serves the command line interface - internal string Separator = DefaultArguments.Separator.ToString(); + internal string Separator = Defaults.Separator.ToString(); + /// + /// The characters that should be used as separators column separator. + /// [Argument(ArgumentType.AtMostOnce, Name = nameof(Separator), Visibility = ArgumentAttribute.VisibilityType.EntryPointsOnly, HelpText = "Source column separator.", ShortName = "sep")] - public char[] Separators = new[] { DefaultArguments.Separator }; + public char[] Separators = new[] { Defaults.Separator }; + /// + /// Specifies the input columns that should be mapped to columns. + /// [Argument(ArgumentType.Multiple, HelpText = "Column groups. Each group is specified as name:type:numeric-ranges, eg, col=Features:R4:1-17,26,35-40", Name = "Column", ShortName = "col", SortOrder = 1)] public Column[] Columns; + /// + /// Wheter to remove trailing whitespace from lines. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Remove trailing whitespace from lines", ShortName = "trim")] - public bool TrimWhitespace = DefaultArguments.TrimWhitespace; + public bool TrimWhitespace = Defaults.TrimWhitespace; + /// + /// Whether the data file has a header with feature names. + /// [Argument(ArgumentType.AtMostOnce, ShortName = "header", HelpText = "Data file has header with feature names. Header is read only if options 'hs' and 'hf' are not specified.")] public bool HasHeader; /// - /// Checks that all column specifications are valid (that is, ranges are disjoint and have min<=max). + /// Whether to use separate parsing threads. /// - public bool IsValid() - { - return Utils.Size(Columns) == 0 || Columns.All(x => x.IsValid()); - } - } - - public sealed class Options : ArgumentsCore - { [Argument(ArgumentType.AtMostOnce, HelpText = "Use separate parsing threads?", ShortName = "threads", Hide = true)] public bool UseThreads = true; + /// + /// File containing a header with feature names. If specified, the header defined in the data file is ignored regardless of . + /// [Argument(ArgumentType.AtMostOnce, HelpText = "File containing a header with feature names. If specified, header defined in the data file (header+) is ignored.", ShortName = "hf", IsInputFileName = true)] public string HeaderFile; + /// + /// Maximum number of rows to produce. + /// [Argument(ArgumentType.AtMostOnce, HelpText = "Maximum number of rows to produce", ShortName = "rows", Hide = true)] public long? MaxRows; + + /// + /// Checks that all column specifications are valid (that is, ranges are disjoint and have min<=max). + /// + internal bool IsValid() + { + return Utils.Size(Columns) == 0 || Columns.All(x => x.IsValid()); + } } - internal static class DefaultArguments + internal static class Defaults { internal const bool AllowQuoting = true; internal const bool AllowSparse = true; @@ -532,7 +625,7 @@ private sealed class Bindings /// private readonly VBuffer>[] _slotNames; /// - /// Empty if is , no header presents, or upon load + /// Empty if is , no header presents, or upon load /// there was no header stored in the model. /// private readonly ReadOnlyMemory _header; @@ -975,7 +1068,7 @@ private bool HasHeader /// Whether the file has a header. /// The character used as separator between data points in a row. By default the tab character is used as separator. /// Allows to expose items that can be used for reading. - public TextLoader(IHostEnvironment env, Column[] columns, bool hasHeader = false, char separatorChar = '\t', IMultiStreamSource dataSample = null) + internal TextLoader(IHostEnvironment env, Column[] columns, bool hasHeader = false, char separatorChar = '\t', IMultiStreamSource dataSample = null) : this(env, MakeArgs(columns, hasHeader, new[] { separatorChar }), dataSample) { } @@ -993,7 +1086,7 @@ private static Options MakeArgs(Column[] columns, bool hasHeader, char[] separat /// The environment to use. /// Defines the settings of the load operation. /// Allows to expose items that can be used for reading. - public TextLoader(IHostEnvironment env, Options options = null, IMultiStreamSource dataSample = null) + internal TextLoader(IHostEnvironment env, Options options = null, IMultiStreamSource dataSample = null) { options = options ?? new Options(); @@ -1049,7 +1142,7 @@ public TextLoader(IHostEnvironment env, Options options = null, IMultiStreamSour _host.CheckNonEmpty(options.Separator, nameof(options.Separator), "Must specify a separator"); - //Default arg.Separator is tab and default args.Separators is also a '\t'. + //Default arg.Separator is tab and default options. Separators is also a '\t'. //At a time only one default can be different and whichever is different that will //be used. if (options.Separators.Length > 1 || options.Separators[0] != '\t') @@ -1130,21 +1223,38 @@ private sealed class LoaderHolder #pragma warning restore 649 // never assigned } - // See if we can extract valid arguments from the first data file. - // If so, update args and set cols to the combined set of columns. - // If not, set error to true if there was an error condition. + /// + /// See if we can extract valid arguments from the first data file. If so, update options and set cols to the combined set of columns. + /// If not, set error to true if there was an error condition. + /// + /// + /// Not all arguments are extracted from the data file. There are three arguments that can vary from iteration to iteration and that are set + /// directly by the user in the options class. These three arguments are: + /// , + /// , + /// + /// private static bool TryParseSchema(IHost host, IMultiStreamSource files, ref Options options, out Column[] cols, out bool error) { host.AssertValue(host); host.AssertValue(files); + host.CheckValue(options, nameof(options)); cols = null; error = false; // Verify that the current schema-defining arguments are default. - // Get settings just for core arguments, not everything. - string tmp = CmdParser.GetSettings(host, options, new ArgumentsCore()); + // Get a string representation of the settings for all the fields of the Options class besides the following three + // UseThreads, HeaderFile, MaxRows, which are set by the user directly. + string tmp = CmdParser.GetSettings(host, options, new Options() + { + // It's fine if the user sets the following three arguments, as they are instance specific. + // Setting the defaults to the user provided values will avoid these in the output of the call CmdParser.GetSettings. + UseThreads = options.UseThreads, + HeaderFile = options.HeaderFile, + MaxRows = options.MaxRows + }); // Try to get the schema information from the file. string str = Cursor.GetEmbeddedArgs(files); @@ -1154,7 +1264,7 @@ private static bool TryParseSchema(IHost host, IMultiStreamSource files, // Parse the extracted information. using (var ch = host.Start("Parsing options from file")) { - // If tmp is not empty, this means the user specified some additional arguments in the command line, + // If tmp is not empty, this means the user specified some additional arguments in the options or command line, // such as quote- or sparse-. Warn them about it, since this means that the columns will not be read from the file. if (!string.IsNullOrWhiteSpace(tmp)) { @@ -1179,20 +1289,22 @@ private static bool TryParseSchema(IHost host, IMultiStreamSource files, if (info.Type != typeof(IDataLoader) || info.ArgType != typeof(Options)) goto LDone; - var argsNew = new Options(); - // Copy the non-core arguments to the new args (we already know that all the core arguments are default). - var parsed = CmdParser.ParseArguments(host, CmdParser.GetSettings(host, options, new Options()), argsNew); - ch.Assert(parsed); - // Copy the core arguments to the new args. - if (!CmdParser.ParseArguments(host, loader.GetSettingsString(), argsNew, typeof(ArgumentsCore), msg => ch.Error(msg))) + var optionsNew = new Options(); + // Set the fields of optionsNew to the arguments parsed from the file. + if (!CmdParser.ParseArguments(host, loader.GetSettingsString(), optionsNew, typeof(Options), msg => ch.Error(msg))) goto LDone; - cols = argsNew.Columns; + // Overwrite the three arguments that vary from iteration to iteration with the values specified by the user in the options class. + optionsNew.UseThreads = options.UseThreads; + optionsNew.HeaderFile = options.HeaderFile; + optionsNew.MaxRows = options.MaxRows; + + cols = optionsNew.Columns; if (Utils.Size(cols) == 0) goto LDone; error = false; - options = argsNew; + options = optionsNew; LDone: return !error; @@ -1200,9 +1312,9 @@ private static bool TryParseSchema(IHost host, IMultiStreamSource files, } /// - /// Checks whether the source contains the valid TextLoader.Arguments depiction. + /// Checks whether the source contains the valid TextLoader.Options depiction. /// - public static bool FileContainsValidSchema(IHostEnvironment env, IMultiStreamSource files, out Options options) + internal static bool FileContainsValidSchema(IHostEnvironment env, IMultiStreamSource files, out Options options) { Contracts.CheckValue(env, nameof(env)); var h = env.Register(RegistrationName); @@ -1307,16 +1419,23 @@ void ICanSaveModel.Save(ModelSaveContext ctx) _bindings.Save(ctx); } + /// + /// The output that will be produced by the reader. + /// public DataViewSchema GetOutputSchema() => _bindings.OutputSchema; + /// + /// Reads data from into an . + /// + /// The source from which to load data. public IDataView Read(IMultiStreamSource source) => new BoundLoader(this, source); internal static TextLoader CreateTextReader(IHostEnvironment host, - bool hasHeader = DefaultArguments.HasHeader, - char separator = DefaultArguments.Separator, - bool allowQuotedStrings = DefaultArguments.AllowQuoting, - bool supportSparse = DefaultArguments.AllowSparse, - bool trimWhitespace = DefaultArguments.TrimWhitespace) + bool hasHeader = Defaults.HasHeader, + char separator = Defaults.Separator, + bool allowQuotedStrings = Defaults.AllowQuoting, + bool supportSparse = Defaults.AllowSparse, + bool trimWhitespace = Defaults.TrimWhitespace) { var userType = typeof(TInput); diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs index 7b5e684b46..e924523db1 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderSaverCatalog.cs @@ -21,8 +21,8 @@ public static class TextLoaderSaverCatalog /// The optional location of a data sample. The sample can be used to infer column names and number of slots in each column. public static TextLoader CreateTextLoader(this DataOperationsCatalog catalog, TextLoader.Column[] columns, - bool hasHeader = TextLoader.DefaultArguments.HasHeader, - char separatorChar = TextLoader.DefaultArguments.Separator, + bool hasHeader = TextLoader.Defaults.HasHeader, + char separatorChar = TextLoader.Defaults.Separator, IMultiStreamSource dataSample = null) => new TextLoader(CatalogUtils.GetEnvironment(catalog), columns, hasHeader, separatorChar, dataSample); @@ -53,11 +53,11 @@ public static TextLoader CreateTextLoader(this DataOperationsCatalog catalog, /// except for 3rd and 5th columns which have values 6 and 3 /// Remove trailing whitespace from lines public static TextLoader CreateTextLoader(this DataOperationsCatalog catalog, - bool hasHeader = TextLoader.DefaultArguments.HasHeader, - char separatorChar = TextLoader.DefaultArguments.Separator, - bool allowQuotedStrings = TextLoader.DefaultArguments.AllowQuoting, - bool supportSparse = TextLoader.DefaultArguments.AllowSparse, - bool trimWhitespace = TextLoader.DefaultArguments.TrimWhitespace) + bool hasHeader = TextLoader.Defaults.HasHeader, + char separatorChar = TextLoader.Defaults.Separator, + bool allowQuotedStrings = TextLoader.Defaults.AllowQuoting, + bool supportSparse = TextLoader.Defaults.AllowSparse, + bool trimWhitespace = TextLoader.Defaults.TrimWhitespace) => TextLoader.CreateTextReader(CatalogUtils.GetEnvironment(catalog), hasHeader, separatorChar, allowQuotedStrings, supportSparse, trimWhitespace); /// @@ -72,8 +72,8 @@ public static TextLoader CreateTextLoader(this DataOperationsCatalog cat public static IDataView ReadFromTextFile(this DataOperationsCatalog catalog, string path, TextLoader.Column[] columns, - bool hasHeader = TextLoader.DefaultArguments.HasHeader, - char separatorChar = TextLoader.DefaultArguments.Separator) + bool hasHeader = TextLoader.Defaults.HasHeader, + char separatorChar = TextLoader.Defaults.Separator) { Contracts.CheckNonEmpty(path, nameof(path)); @@ -104,11 +104,11 @@ public static IDataView ReadFromTextFile(this DataOperationsCatalog catalog, /// The data view. public static IDataView ReadFromTextFile(this DataOperationsCatalog catalog, string path, - bool hasHeader = TextLoader.DefaultArguments.HasHeader, - char separatorChar = TextLoader.DefaultArguments.Separator, - bool allowQuotedStrings = TextLoader.DefaultArguments.AllowQuoting, - bool supportSparse = TextLoader.DefaultArguments.AllowSparse, - bool trimWhitespace = TextLoader.DefaultArguments.TrimWhitespace) + bool hasHeader = TextLoader.Defaults.HasHeader, + char separatorChar = TextLoader.Defaults.Separator, + bool allowQuotedStrings = TextLoader.Defaults.AllowQuoting, + bool supportSparse = TextLoader.Defaults.AllowSparse, + bool trimWhitespace = TextLoader.Defaults.TrimWhitespace) { Contracts.CheckNonEmpty(path, nameof(path)); @@ -147,8 +147,8 @@ public static IDataView ReadFromTextFile(this DataOperationsCatalog catalog, str public static void SaveAsText(this DataOperationsCatalog catalog, IDataView data, Stream stream, - char separatorChar = TextLoader.DefaultArguments.Separator, - bool headerRow = TextLoader.DefaultArguments.HasHeader, + char separatorChar = TextLoader.Defaults.Separator, + bool headerRow = TextLoader.Defaults.HasHeader, bool schema = true, bool keepHidden = false) { diff --git a/src/Microsoft.ML.StaticPipe/LocalPathReader.cs b/src/Microsoft.ML.StaticPipe/LocalPathReader.cs index 619d933b00..d917e51f59 100644 --- a/src/Microsoft.ML.StaticPipe/LocalPathReader.cs +++ b/src/Microsoft.ML.StaticPipe/LocalPathReader.cs @@ -2,20 +2,20 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System.Data; using Microsoft.ML.Data; namespace Microsoft.ML.StaticPipe { public static class LocalPathReader { - public static DataView Read(this DataReader reader, string path) - { - return reader.Read(new MultiFileSource(path)); - } + /// + /// Reads data from one or more file into an . + /// + /// The reader to use. + /// One or more paths from which to load data. public static DataView Read(this DataReader reader, params string[] path) - { - return reader.Read(new MultiFileSource(path)); - } + => reader.Read(new MultiFileSource(path)); } } \ No newline at end of file diff --git a/test/BaselineOutput/Common/EntryPoints/core_manifest.json b/test/BaselineOutput/Common/EntryPoints/core_manifest.json index 9fb6b0a308..71bd8a06b5 100644 --- a/test/BaselineOutput/Common/EntryPoints/core_manifest.json +++ b/test/BaselineOutput/Common/EntryPoints/core_manifest.json @@ -316,42 +316,6 @@ "IsNullable": false, "Default": null }, - { - "Name": "UseThreads", - "Type": "Bool", - "Desc": "Use separate parsing threads?", - "Aliases": [ - "threads" - ], - "Required": false, - "SortOrder": 150.0, - "IsNullable": false, - "Default": true - }, - { - "Name": "HeaderFile", - "Type": "String", - "Desc": "File containing a header with feature names. If specified, header defined in the data file (header+) is ignored.", - "Aliases": [ - "hf" - ], - "Required": false, - "SortOrder": 150.0, - "IsNullable": false, - "Default": null - }, - { - "Name": "MaxRows", - "Type": "Int", - "Desc": "Maximum number of rows to produce", - "Aliases": [ - "rows" - ], - "Required": false, - "SortOrder": 150.0, - "IsNullable": true, - "Default": null - }, { "Name": "AllowQuoting", "Type": "Bool", @@ -428,6 +392,42 @@ "SortOrder": 150.0, "IsNullable": false, "Default": false + }, + { + "Name": "UseThreads", + "Type": "Bool", + "Desc": "Use separate parsing threads?", + "Aliases": [ + "threads" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": true + }, + { + "Name": "HeaderFile", + "Type": "String", + "Desc": "File containing a header with feature names. If specified, header defined in the data file (header+) is ignored.", + "Aliases": [ + "hf" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": false, + "Default": null + }, + { + "Name": "MaxRows", + "Type": "Int", + "Desc": "Maximum number of rows to produce", + "Aliases": [ + "rows" + ], + "Required": false, + "SortOrder": 150.0, + "IsNullable": true, + "Default": null } ] }, diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs index acf3ce6c95..6e55bff1bb 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs @@ -463,7 +463,7 @@ public void EntryPointCreateEnsemble() MetadataUtils.Const.ScoreValueKind.Score) ).Transform(individualScores[i]); - individualScores[i] = ColumnSelectingTransformer.CreateDrop(Env, individualScores[i], MetadataUtils.Const.ScoreValueKind.Score); + individualScores[i] = new ColumnSelectingTransformer(Env, null, new[] { MetadataUtils.Const.ScoreValueKind.Score }).Transform(individualScores[i]); } var avgEnsembleInput = new EnsembleCreator.ClassifierInput { Models = predictorModels, ModelCombiner = EnsembleCreator.ClassifierCombiner.Average }; diff --git a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs index cfea0d9ce6..8a7db3ecaf 100644 --- a/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs +++ b/test/Microsoft.ML.Predictor.Tests/TestPredictors.cs @@ -1806,7 +1806,7 @@ public void CompareSvmPredictorResultsToLibSvm() { using (var env = new LocalEnvironment(1, conc: 1)) { - IDataView trainView = new TextLoader(env, new TextLoader.Arguments(), new MultiFileSource(GetDataPath(TestDatasets.mnistOneClass.trainFilename))); + IDataView trainView = new TextLoader(env, new TextLoader.Options(), new MultiFileSource(GetDataPath(TestDatasets.mnistOneClass.trainFilename))); trainView = NormalizeTransform.Create(env, new NormalizeTransform.MinMaxArguments() @@ -1815,7 +1815,7 @@ public void CompareSvmPredictorResultsToLibSvm() }, trainView); var trainData = new RoleMappedData(trainView, "Label", "Features"); - IDataView testView = new TextLoader(env, new TextLoader.Arguments(), new MultiFileSource(GetDataPath(TestDatasets.mnistOneClass.testFilename))); + IDataView testView = new TextLoader(env, new TextLoader.Options(), new MultiFileSource(GetDataPath(TestDatasets.mnistOneClass.testFilename))); ApplyTransformUtils.ApplyAllTransformsToData(env, trainView, testView); var testData = new RoleMappedData(testView, "Label", "Features");