Skip to content

Commit a443be8

Browse files
daholsteDmitry-A
authored andcommitted
Upgrade ML.NET package to 0.10.0 (dotnet#70)
1 parent 4832bd3 commit a443be8

37 files changed

+208
-217
lines changed

src/AutoML/API/MLContextAutoFitExtensions.cs

+14-14
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,16 @@
44

55
using System;
66
using System.Collections.Generic;
7-
using System.Threading;
7+
using System.Linq;
8+
using Microsoft.Data.DataView;
89
using Microsoft.ML.Core.Data;
910
using Microsoft.ML.Data;
10-
using System.Linq;
1111

1212
namespace Microsoft.ML.Auto
1313
{
1414
public static class RegressionExtensions
1515
{
16-
public static IEnumerable<IterationResult<RegressionMetrics>> AutoFit(this RegressionContext context,
16+
public static IEnumerable<IterationResult<RegressionMetrics>> AutoFit(this RegressionCatalog catalog,
1717
IDataView trainData,
1818
string label = DefaultColumnNames.Label,
1919
IDataView validationData = null,
@@ -24,11 +24,11 @@ public static IEnumerable<IterationResult<RegressionMetrics>> AutoFit(this Regre
2424
var settings = new AutoFitSettings();
2525
settings.StoppingCriteria.TimeOutInMinutes = timeoutInMinutes;
2626

27-
return AutoFit(context, trainData, label, validationData, settings,
27+
return AutoFit(catalog, trainData, label, validationData, settings,
2828
preFeaturizers, columnPurposes, null);
2929
}
3030

31-
internal static IEnumerable<IterationResult<RegressionMetrics>> AutoFit(this RegressionContext context,
31+
internal static IEnumerable<IterationResult<RegressionMetrics>> AutoFit(this RegressionCatalog catalog,
3232
IDataView trainData,
3333
string label = DefaultColumnNames.Label,
3434
IDataView validationData = null,
@@ -41,7 +41,7 @@ internal static IEnumerable<IterationResult<RegressionMetrics>> AutoFit(this Reg
4141

4242
if (validationData == null)
4343
{
44-
(trainData, validationData) = context.TestValidateSplit(trainData);
44+
(trainData, validationData) = catalog.TestValidateSplit(trainData);
4545
}
4646

4747
// run autofit & get all pipelines run in that process
@@ -55,7 +55,7 @@ internal static IEnumerable<IterationResult<RegressionMetrics>> AutoFit(this Reg
5555

5656
public static class BinaryClassificationExtensions
5757
{
58-
public static IEnumerable<IterationResult<BinaryClassificationMetrics>> AutoFit(this BinaryClassificationContext context,
58+
public static IEnumerable<IterationResult<BinaryClassificationMetrics>> AutoFit(this BinaryClassificationCatalog catalog,
5959
IDataView trainData,
6060
string label = DefaultColumnNames.Label,
6161
IDataView validationData = null,
@@ -66,11 +66,11 @@ public static IEnumerable<IterationResult<BinaryClassificationMetrics>> AutoFit(
6666
var settings = new AutoFitSettings();
6767
settings.StoppingCriteria.TimeOutInMinutes = timeoutInMinutes;
6868

69-
return AutoFit(context, trainData, label, validationData, settings,
69+
return AutoFit(catalog, trainData, label, validationData, settings,
7070
preFeaturizers, columnPurposes, null);
7171
}
7272

73-
internal static IEnumerable<IterationResult<BinaryClassificationMetrics>> AutoFit(this BinaryClassificationContext context,
73+
internal static IEnumerable<IterationResult<BinaryClassificationMetrics>> AutoFit(this BinaryClassificationCatalog catalog,
7474
IDataView trainData,
7575
string label = DefaultColumnNames.Label,
7676
IDataView validationData = null,
@@ -83,7 +83,7 @@ internal static IEnumerable<IterationResult<BinaryClassificationMetrics>> AutoFi
8383

8484
if (validationData == null)
8585
{
86-
(trainData, validationData) = context.TestValidateSplit(trainData);
86+
(trainData, validationData) = catalog.TestValidateSplit(trainData);
8787
}
8888

8989
// run autofit & get all pipelines run in that process
@@ -97,7 +97,7 @@ internal static IEnumerable<IterationResult<BinaryClassificationMetrics>> AutoFi
9797

9898
public static class MulticlassExtensions
9999
{
100-
public static IEnumerable<IterationResult<MultiClassClassifierMetrics>> AutoFit(this MulticlassClassificationContext context,
100+
public static IEnumerable<IterationResult<MultiClassClassifierMetrics>> AutoFit(this MulticlassClassificationCatalog catalog,
101101
IDataView trainData,
102102
string label = DefaultColumnNames.Label,
103103
IDataView validationData = null,
@@ -108,11 +108,11 @@ public static IEnumerable<IterationResult<MultiClassClassifierMetrics>> AutoFit(
108108
var settings = new AutoFitSettings();
109109
settings.StoppingCriteria.TimeOutInMinutes = timeoutInMinutes;
110110

111-
return AutoFit(context, trainData, label, validationData, settings,
111+
return AutoFit(catalog, trainData, label, validationData, settings,
112112
preFeaturizers, columnPurposes, null);
113113
}
114114

115-
internal static IEnumerable<IterationResult<MultiClassClassifierMetrics>> AutoFit(this MulticlassClassificationContext context,
115+
internal static IEnumerable<IterationResult<MultiClassClassifierMetrics>> AutoFit(this MulticlassClassificationCatalog catalog,
116116
IDataView trainData,
117117
string label = DefaultColumnNames.Label,
118118
IDataView validationData = null,
@@ -125,7 +125,7 @@ internal static IEnumerable<IterationResult<MultiClassClassifierMetrics>> AutoFi
125125

126126
if (validationData == null)
127127
{
128-
(trainData, validationData) = context.TestValidateSplit(trainData);
128+
(trainData, validationData) = catalog.TestValidateSplit(trainData);
129129
}
130130

131131
// run autofit & get all pipelines run in that process

src/AutoML/API/MLContextDataExtensions.cs

+9-8
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,23 @@
55
using System;
66
using System.Collections.Generic;
77
using System.Linq;
8+
using Microsoft.Data.DataView;
89
using Microsoft.ML.Data;
910

1011
namespace Microsoft.ML.Auto
1112
{
1213
public static class DataExtensions
1314
{
1415
// Delimiter, header, column datatype inference
15-
public static ColumnInferenceResult InferColumns(this DataOperations catalog, string path, string label,
16+
public static ColumnInferenceResult InferColumns(this DataOperationsCatalog catalog, string path, string label,
1617
bool hasHeader = false, char? separatorChar = null, bool? allowQuotedStrings = null, bool? supportSparse = null, bool trimWhitespace = false, bool groupColumns = true)
1718
{
1819
UserInputValidationUtil.ValidateInferColumnsArgs(path, label);
1920
var mlContext = new MLContext();
2021
return ColumnInferenceApi.InferColumns(mlContext, path, label, hasHeader, separatorChar, allowQuotedStrings, supportSparse, trimWhitespace, groupColumns);
2122
}
2223

23-
public static IDataView AutoRead(this DataOperations catalog, string path, string label,
24+
public static IDataView AutoRead(this DataOperationsCatalog catalog, string path, string label,
2425
bool hasHeader = false, char? separatorChar = null, bool? allowQuotedStrings = null, bool? supportSparse = null, bool trimWhitespace = false, bool groupColumns = true)
2526
{
2627
UserInputValidationUtil.ValidateAutoReadArgs(path, label);
@@ -30,14 +31,14 @@ public static IDataView AutoRead(this DataOperations catalog, string path, strin
3031
return textLoader.Read(path);
3132
}
3233

33-
public static TextLoader CreateTextReader(this DataOperations catalog, ColumnInferenceResult columnInferenceResult)
34+
public static TextLoader CreateTextLoader(this DataOperationsCatalog catalog, ColumnInferenceResult columnInferenceResult)
3435
{
3536
UserInputValidationUtil.ValidateCreateTextReaderArgs(columnInferenceResult);
3637
return columnInferenceResult.BuildTextLoader();
3738
}
3839

3940
// Task inference
40-
public static MachineLearningTaskType InferTask(this DataOperations catalog, IDataView dataView)
41+
public static MachineLearningTaskType InferTask(this DataOperationsCatalog catalog, IDataView dataView)
4142
{
4243
throw new NotImplementedException();
4344
}
@@ -55,17 +56,17 @@ public class ColumnInferenceResult
5556
public readonly IEnumerable<(TextLoader.Column, ColumnPurpose)> Columns;
5657
public readonly bool AllowQuotedStrings;
5758
public readonly bool SupportSparse;
58-
public readonly string Separator;
59+
public readonly char[] Separators;
5960
public readonly bool HasHeader;
6061
public readonly bool TrimWhitespace;
6162

6263
public ColumnInferenceResult(IEnumerable<(TextLoader.Column, ColumnPurpose)> columns,
63-
bool allowQuotedStrings, bool supportSparse, string separator, bool hasHeader, bool trimWhitespace)
64+
bool allowQuotedStrings, bool supportSparse, char[] separators, bool hasHeader, bool trimWhitespace)
6465
{
6566
Columns = columns;
6667
AllowQuotedStrings = allowQuotedStrings;
6768
SupportSparse = supportSparse;
68-
Separator = separator;
69+
Separators = separators;
6970
HasHeader = hasHeader;
7071
TrimWhitespace = trimWhitespace;
7172
}
@@ -78,7 +79,7 @@ internal TextLoader BuildTextLoader()
7879
AllowQuoting = AllowQuotedStrings,
7980
AllowSparse = SupportSparse,
8081
Column = Columns.Select(c => c.Item1).ToArray(),
81-
Separator = Separator,
82+
Separators = Separators,
8283
HasHeader = HasHeader,
8384
TrimWhitespace = TrimWhitespace
8485
});

src/AutoML/AutoFitter/AutoFitter.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
using System.Diagnostics;
88
using System.Linq;
99
using System.Text;
10-
using System.Threading;
10+
using Microsoft.Data.DataView;
1111
using Microsoft.ML.Core.Data;
1212
using Microsoft.ML.Data;
1313

src/AutoML/AutoFitter/SuggestedPipeline.cs

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
using System;
66
using System.Collections.Generic;
77
using System.Linq;
8+
using Microsoft.Data.DataView;
89
using Microsoft.ML.Core.Data;
910
using Microsoft.ML.Data;
1011

src/AutoML/AutoML.csproj

+3-3
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
</PropertyGroup>
1616

1717
<ItemGroup>
18-
<PackageReference Include="Microsoft.ML" Version="0.9.0" />
19-
<PackageReference Include="Microsoft.ML.HalLearners" Version="0.9.0" />
20-
<PackageReference Include="Microsoft.ML.LightGBM" Version="0.9.0" />
18+
<PackageReference Include="Microsoft.ML" Version="0.10.0" />
19+
<PackageReference Include="Microsoft.ML.HalLearners" Version="0.10.0" />
20+
<PackageReference Include="Microsoft.ML.LightGBM" Version="0.10.0" />
2121
</ItemGroup>
2222
</Project>

src/AutoML/AutoMlUtils.cs

+5-7
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
using System;
66
using System.Collections.Generic;
77
using System.Linq;
8-
using Microsoft.ML.Data;
8+
using Microsoft.Data.DataView;
99
using Microsoft.ML.Transforms;
1010

1111
namespace Microsoft.ML.Auto
@@ -26,19 +26,18 @@ public static void Assert(bool boolVal, string message = null)
2626
public static IDataView Take(this IDataView data, int count)
2727
{
2828
var context = new MLContext();
29-
var filter = SkipTakeFilter.Create(context, new SkipTakeFilter.TakeArguments { Count = count }, data);
30-
return new CacheDataView(context, filter, Enumerable.Range(0, data.Schema.Count).ToArray());
29+
return TakeFilter.Create(context, data, count);
3130
}
3231

3332
public static IDataView DropLastColumn(this IDataView data)
3433
{
3534
return new MLContext().Transforms.DropColumns(data.Schema[data.Schema.Count - 1].Name).Fit(data).Transform(data);
3635
}
3736

38-
public static (IDataView testData, IDataView validationData) TestValidateSplit(this TrainContextBase context, IDataView trainData)
37+
public static (IDataView testData, IDataView validationData) TestValidateSplit(this TrainCatalogBase catalog, IDataView trainData)
3938
{
4039
IDataView validationData;
41-
(trainData, validationData) = context.TrainTestSplit(trainData);
40+
(trainData, validationData) = catalog.TrainTestSplit(trainData);
4241
trainData = trainData.DropLastColumn();
4342
validationData = validationData.DropLastColumn();
4443
return (trainData, validationData);
@@ -47,8 +46,7 @@ public static (IDataView testData, IDataView validationData) TestValidateSplit(t
4746
public static IDataView Skip(this IDataView data, int count)
4847
{
4948
var context = new MLContext();
50-
var filter = SkipTakeFilter.Create(context, new SkipTakeFilter.SkipArguments { Count = count }, data);
51-
return new CacheDataView(context, filter, Enumerable.Range(0, data.Schema.Count).ToArray());
49+
return SkipFilter.Create(context, data, count);
5250
}
5351

5452
public static (string, ColumnType, ColumnPurpose, ColumnDimensions)[] GetColumnInfoTuples(MLContext context,

src/AutoML/ColumnInference/ColumnGroupingInference.cs

+23-28
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
using System.Linq;
88
using System.Text;
99
using Microsoft.ML.Data;
10+
using static Microsoft.ML.Data.TextLoader;
1011

1112
namespace Microsoft.ML.Auto
1213
{
@@ -25,19 +26,19 @@ public class GroupingColumn
2526
public string SuggestedName;
2627
public DataKind ItemKind;
2728
public ColumnPurpose Purpose;
28-
public string ColumnRangeSelector;
29+
public Range[] Ranges;
2930

30-
public GroupingColumn(string name, DataKind kind, ColumnPurpose purpose, string rangeSelector)
31+
public GroupingColumn(string name, DataKind kind, ColumnPurpose purpose, Range[] ranges)
3132
{
3233
SuggestedName = name;
3334
ItemKind = kind;
3435
Purpose = purpose;
35-
ColumnRangeSelector = rangeSelector;
36+
Ranges = ranges;
3637
}
3738

3839
public TextLoader.Column GenerateTextLoaderColumn()
3940
{
40-
return TextLoader.Column.Parse(string.Format("{0}:{1}:{2}", SuggestedName, ItemKind, ColumnRangeSelector));
41+
return new TextLoader.Column(SuggestedName, ItemKind, Ranges);
4142
}
4243
}
4344

@@ -71,10 +72,10 @@ into g
7172
{
7273
string name = (hasHeader && g.Count() == 1)
7374
? g.First().Item1.SuggestedName
74-
: GetName(g.Key.ItemType.RawKind(), g.Key.Purpose, result);
75+
: GetName(g.Key.ItemType.GetRawKind(), g.Key.Purpose, result);
7576

76-
string range = GetRange(g.Select(t => t.Item1.ColumnIndex).ToArray());
77-
result.Add(new GroupingColumn(name, g.Key.ItemType.RawKind(), g.Key.Purpose, range));
77+
var ranges = GetRanges(g.Select(t => t.Item1.ColumnIndex).ToArray());
78+
result.Add(new GroupingColumn(name, g.Key.ItemType.GetRawKind(), g.Key.Purpose, ranges));
7879
}
7980

8081
return result.ToArray();
@@ -122,33 +123,27 @@ private static string GetPurposeName(ColumnPurpose purpose, DataKind itemKind)
122123
}
123124

124125
/// <summary>
125-
/// Generates a range selector from the array of indices.
126+
/// Generates a collection of Ranges from indices.
126127
/// </summary>
127-
private static string GetRange(int[] indices)
128+
private static Range[] GetRanges(int[] indices)
128129
{
129-
var sb = new StringBuilder();
130-
var sorted = indices.OrderBy(x => x).ToArray();
131-
132-
sb.Append(indices[0]);
133-
var prev = sorted[0];
134-
var start = sorted[0];
135-
for (int i = 1; i < sorted.Length; i++)
130+
Array.Sort(indices);
131+
var allRanges = new List<Range>();
132+
var currRange = new Range(indices[0]);
133+
for (int i = 1; i < indices.Length; i++)
136134
{
137-
if (sorted[i] > prev + 1)
135+
if (indices[i] == currRange.Max + 1)
138136
{
139-
if (prev > start)
140-
sb.AppendFormat("-{0}", prev);
141-
start = sorted[i];
142-
sb.AppendFormat(",{0}", start);
137+
currRange.Max++;
138+
}
139+
else
140+
{
141+
allRanges.Add(currRange);
142+
currRange = new Range(indices[i]);
143143
}
144-
prev = sorted[i];
145-
}
146-
if (prev > start)
147-
{
148-
sb.AppendFormat("-{0}", prev);
149144
}
150-
151-
return sb.ToString();
145+
allRanges.Add(currRange);
146+
return allRanges.ToArray();
152147
}
153148
}
154149
}

src/AutoML/ColumnInference/ColumnInferenceApi.cs

+4-4
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,13 @@ public static ColumnInferenceResult InferColumns(MLContext context, string path,
2323
var typedLoaderArgs = new TextLoader.Arguments
2424
{
2525
Column = loaderColumns,
26-
Separator = splitInference.Separator,
26+
Separators = new[] { splitInference.Separator.Value },
2727
AllowSparse = splitInference.AllowSparse,
2828
AllowQuoting = splitInference.AllowQuote,
2929
HasHeader = hasHeader,
3030
TrimWhitespace = trimWhitespace
3131
};
32-
var textLoader = context.Data.CreateTextReader(typedLoaderArgs);
32+
var textLoader = context.Data.CreateTextLoader(typedLoaderArgs);
3333
var dataView = textLoader.Read(path);
3434

3535
var purposeInferenceResult = PurposeInference.InferPurposes(context, dataView, label);
@@ -52,7 +52,7 @@ public static ColumnInferenceResult InferColumns(MLContext context, string path,
5252
inferredColumns[i] = (loaderColumns[i], purposeInferenceResult[i].Purpose);
5353
}
5454
}
55-
return new ColumnInferenceResult(inferredColumns, splitInference.AllowQuote, splitInference.AllowSparse, splitInference.Separator, hasHeader, trimWhitespace);
55+
return new ColumnInferenceResult(inferredColumns, splitInference.AllowQuote, splitInference.AllowSparse, new char[] { splitInference.Separator.Value }, hasHeader, trimWhitespace);
5656
}
5757

5858
private static TextFileContents.ColumnSplitResult InferSplit(TextFileSample sample, char? separatorChar, bool? allowQuotedStrings, bool? supportSparse)
@@ -86,7 +86,7 @@ private static ColumnTypeInference.InferenceResult InferColumnTypes(MLContext co
8686
new ColumnTypeInference.Arguments
8787
{
8888
ColumnCount = splitInference.ColumnCount,
89-
Separator = splitInference.Separator,
89+
Separator = splitInference.Separator.Value,
9090
AllowSparse = splitInference.AllowSparse,
9191
AllowQuote = splitInference.AllowQuote,
9292
HasHeader = hasHeader

0 commit comments

Comments
 (0)