Skip to content

Commit dcc8ae8

Browse files
authored
adding a dependency to the MlNEtMklDeps package (#594)
Introducing the entry point for Ols, and enabling its test Adding documentation for Ols
1 parent 2107b82 commit dcc8ae8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+59467
-8
lines changed

build/Dependencies.props

+1
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,6 @@
88
<SystemReflectionEmitLightweightPackageVersion>4.3.0</SystemReflectionEmitLightweightPackageVersion>
99
<PublishSymbolsPackageVersion>1.0.0-beta-62824-02</PublishSymbolsPackageVersion>
1010
<LightGBMPackageVersion>2.1.2.2</LightGBMPackageVersion>
11+
<MlNetMklDepsPackageVersion>0.0.0.1</MlNetMklDepsPackageVersion>
1112
</PropertyGroup>
1213
</Project>

src/Microsoft.ML.StandardLearners/Microsoft.ML.StandardLearners.csproj

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
<ProjectReference Include="..\Microsoft.ML.CpuMath\Microsoft.ML.CpuMath.csproj" />
1212
<ProjectReference Include="..\Microsoft.ML.Data\Microsoft.ML.Data.csproj" />
1313
<ProjectReference Include="..\Microsoft.ML\Microsoft.ML.csproj" />
14+
<PackageReference Include="MlNetMklDeps" Version="$(MlNetMklDepsPackageVersion)" />
1415
</ItemGroup>
1516

1617
</Project>

src/Microsoft.ML.StandardLearners/Standard/OlsLinearRegression.cs

+21-5
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,11 @@
2828
"OLS Linear Regression Executor",
2929
OlsLinearRegressionPredictor.LoaderSignature)]
3030

31+
[assembly: LoadableClass(typeof(void), typeof(OlsLinearRegressionTrainer), null, typeof(SignatureEntryPointModule), OlsLinearRegressionTrainer.LoadNameValue)]
32+
3133
namespace Microsoft.ML.Runtime.Learners
3234
{
35+
/// <include file='doc.xml' path='doc/members/member[@name="OLS"]/*' />
3336
public sealed class OlsLinearRegressionTrainer : TrainerBase<OlsLinearRegressionPredictor>
3437
{
3538
public sealed class Arguments : LearnerInputBaseWithWeight
@@ -51,11 +54,6 @@ public sealed class Arguments : LearnerInputBaseWithWeight
5154
public const string ShortName = "ols";
5255
internal const string Summary = "The ordinary least square regression fits the target function as a linear function of the numerical features "
5356
+ "that minimizes the square loss function.";
54-
internal const string Remarks = @"<remarks>
55-
<a href='https://en.wikipedia.org/wiki/Ordinary_least_squares'>Ordinary least squares (OLS)</a> is a parameterized regression method.
56-
It assumes that the conditional mean of the dependent variable follows a linear function of the dependent variables.
57-
By minimizing the squares of the difference between observed values and the predictions, the parameters of the regressor can be estimated.
58-
</remarks>";
5957

6058
private readonly Float _l2Weight;
6159
private readonly bool _perParameterSignificance;
@@ -463,6 +461,24 @@ public static void Pptri(Layout layout, UpLo uplo, int n, Double[] ap)
463461
}
464462
}
465463
}
464+
465+
[TlcModule.EntryPoint(Name = "Trainers.OrdinaryLeastSquaresRegressor",
466+
Desc = "Train an OLS regression model.",
467+
UserName = UserNameValue,
468+
ShortName = ShortName,
469+
XmlInclude = new[] { @"<include file='../Microsoft.ML.StandardLearners/Standard/doc.xml' path='doc/members/member[@name=""OLS""]/*' />" })]
470+
public static CommonOutputs.RegressionOutput TrainRegression(IHostEnvironment env, Arguments input)
471+
{
472+
Contracts.CheckValue(env, nameof(env));
473+
var host = env.Register("TrainOLS");
474+
host.CheckValue(input, nameof(input));
475+
EntryPointUtils.CheckInputArgs(host, input);
476+
477+
return LearnerEntryPointsUtils.Train<Arguments, CommonOutputs.RegressionOutput>(host, input,
478+
() => new OlsLinearRegressionTrainer(host, input),
479+
() => LearnerEntryPointsUtils.FindColumn(host, input.TrainingData.Schema, input.LabelColumn),
480+
() => LearnerEntryPointsUtils.FindColumn(host, input.TrainingData.Schema, input.WeightColumn));
481+
}
466482
}
467483

468484
/// <summary>

src/Microsoft.ML.StandardLearners/Standard/doc.xml

+21
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,27 @@
6868
</code>
6969
</example>
7070
</example>
71+
72+
<member name="OLS">
73+
<summary>
74+
Train an OLS regression model.
75+
</summary>
76+
<remarks>
77+
<a href='https://en.wikipedia.org/wiki/Ordinary_least_squares'>Ordinary least squares (OLS)</a> is a parameterized regression method.
78+
It assumes that the conditional mean of the dependent variable follows a linear function of the dependent variables.
79+
The parameters of the regressor can be estimated by minimizing the squares of the difference between observed values and the predictions.
80+
</remarks>
81+
<example>
82+
<code language="csharp">
83+
new OrdinaryLeastSquaresRegressor
84+
{
85+
L2Weight = 0.1,
86+
PerParameterSignificance = false,
87+
NormalizeFeatures = Microsoft.ML.Models.NormalizeOption.Yes
88+
}
89+
</code>
90+
</example>
91+
</member>
7192

7293
</members>
7394
</doc>

src/Microsoft.ML/CSharpApi.cs

+99
Original file line numberDiff line numberDiff line change
@@ -754,6 +754,18 @@ public void Add(Microsoft.ML.Trainers.OnlineGradientDescentRegressor input, Micr
754754
_jsonNodes.Add(Serialize("Trainers.OnlineGradientDescentRegressor", input, output));
755755
}
756756

757+
public Microsoft.ML.Trainers.OrdinaryLeastSquaresRegressor.Output Add(Microsoft.ML.Trainers.OrdinaryLeastSquaresRegressor input)
758+
{
759+
var output = new Microsoft.ML.Trainers.OrdinaryLeastSquaresRegressor.Output();
760+
Add(input, output);
761+
return output;
762+
}
763+
764+
public void Add(Microsoft.ML.Trainers.OrdinaryLeastSquaresRegressor input, Microsoft.ML.Trainers.OrdinaryLeastSquaresRegressor.Output output)
765+
{
766+
_jsonNodes.Add(Serialize("Trainers.OrdinaryLeastSquaresRegressor", input, output));
767+
}
768+
757769
public Microsoft.ML.Trainers.PcaAnomalyDetector.Output Add(Microsoft.ML.Trainers.PcaAnomalyDetector input)
758770
{
759771
var output = new Microsoft.ML.Trainers.PcaAnomalyDetector.Output();
@@ -8824,6 +8836,93 @@ public OnlineGradientDescentRegressorPipelineStep(Output output)
88248836
}
88258837
}
88268838

8839+
namespace Trainers
8840+
{
8841+
8842+
/// <include file='../Microsoft.ML.StandardLearners/Standard/doc.xml' path='doc/members/member[@name="OLS"]/*' />
8843+
public sealed partial class OrdinaryLeastSquaresRegressor : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithWeight, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
8844+
{
8845+
8846+
8847+
/// <summary>
8848+
/// L2 regularization weight
8849+
/// </summary>
8850+
[TlcModule.SweepableDiscreteParamAttribute("L2Weight", new object[]{1E-06f, 0.1f, 1f})]
8851+
public float L2Weight { get; set; } = 1E-06f;
8852+
8853+
/// <summary>
8854+
/// Whether to calculate per parameter significance statistics
8855+
/// </summary>
8856+
public bool PerParameterSignificance { get; set; } = true;
8857+
8858+
/// <summary>
8859+
/// Column to use for example weight
8860+
/// </summary>
8861+
public Microsoft.ML.Runtime.EntryPoints.Optional<string> WeightColumn { get; set; }
8862+
8863+
/// <summary>
8864+
/// Column to use for labels
8865+
/// </summary>
8866+
public string LabelColumn { get; set; } = "Label";
8867+
8868+
/// <summary>
8869+
/// The data to be used for training
8870+
/// </summary>
8871+
public Var<Microsoft.ML.Runtime.Data.IDataView> TrainingData { get; set; } = new Var<Microsoft.ML.Runtime.Data.IDataView>();
8872+
8873+
/// <summary>
8874+
/// Column to use for features
8875+
/// </summary>
8876+
public string FeatureColumn { get; set; } = "Features";
8877+
8878+
/// <summary>
8879+
/// Normalize option for the feature column
8880+
/// </summary>
8881+
public Microsoft.ML.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Models.NormalizeOption.Auto;
8882+
8883+
/// <summary>
8884+
/// Whether learner should cache input training data
8885+
/// </summary>
8886+
public Microsoft.ML.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Models.CachingOptions.Auto;
8887+
8888+
8889+
public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IRegressionOutput, Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITrainerOutput
8890+
{
8891+
/// <summary>
8892+
/// The trained model
8893+
/// </summary>
8894+
public Var<Microsoft.ML.Runtime.EntryPoints.IPredictorModel> PredictorModel { get; set; } = new Var<Microsoft.ML.Runtime.EntryPoints.IPredictorModel>();
8895+
8896+
}
8897+
public Var<IDataView> GetInputData() => TrainingData;
8898+
8899+
public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment)
8900+
{
8901+
if (previousStep != null)
8902+
{
8903+
if (!(previousStep is ILearningPipelineDataStep dataStep))
8904+
{
8905+
throw new InvalidOperationException($"{ nameof(OrdinaryLeastSquaresRegressor)} only supports an { nameof(ILearningPipelineDataStep)} as an input.");
8906+
}
8907+
8908+
TrainingData = dataStep.Data;
8909+
}
8910+
Output output = experiment.Add(this);
8911+
return new OrdinaryLeastSquaresRegressorPipelineStep(output);
8912+
}
8913+
8914+
private class OrdinaryLeastSquaresRegressorPipelineStep : ILearningPipelinePredictorStep
8915+
{
8916+
public OrdinaryLeastSquaresRegressorPipelineStep(Output output)
8917+
{
8918+
Model = output.PredictorModel;
8919+
}
8920+
8921+
public Var<IPredictorModel> Model { get; }
8922+
}
8923+
}
8924+
}
8925+
88278926
namespace Trainers
88288927
{
88298928

test/BaselineOutput/Common/EntryPoints/core_ep-list.tsv

+1
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ Trainers.LogisticRegressionBinaryClassifier Logistic Regression is a method in s
5959
Trainers.LogisticRegressionClassifier Logistic Regression is a method in statistics used to predict the probability of occurrence of an event and can be used as a classification algorithm. The algorithm predicts the probability of occurrence of an event by fitting data to a logistical function. Microsoft.ML.Runtime.Learners.LogisticRegression TrainMultiClass Microsoft.ML.Runtime.Learners.MulticlassLogisticRegression+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+MulticlassClassificationOutput
6060
Trainers.NaiveBayesClassifier Train a MultiClassNaiveBayesTrainer. Microsoft.ML.Runtime.Learners.MultiClassNaiveBayesTrainer TrainMultiClassNaiveBayesTrainer Microsoft.ML.Runtime.Learners.MultiClassNaiveBayesTrainer+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+MulticlassClassificationOutput
6161
Trainers.OnlineGradientDescentRegressor Train a Online gradient descent perceptron. Microsoft.ML.Runtime.Learners.OnlineGradientDescentTrainer TrainRegression Microsoft.ML.Runtime.Learners.OnlineGradientDescentTrainer+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+RegressionOutput
62+
Trainers.OrdinaryLeastSquaresRegressor Train an OLS regression model. Microsoft.ML.Runtime.Learners.OlsLinearRegressionTrainer TrainRegression Microsoft.ML.Runtime.Learners.OlsLinearRegressionTrainer+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+RegressionOutput
6263
Trainers.PcaAnomalyDetector Train an PCA Anomaly model. Microsoft.ML.Runtime.PCA.RandomizedPcaTrainer TrainPcaAnomaly Microsoft.ML.Runtime.PCA.RandomizedPcaTrainer+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+AnomalyDetectionOutput
6364
Trainers.PoissonRegressor Train an Poisson regression model. Microsoft.ML.Runtime.Learners.PoissonRegression TrainRegression Microsoft.ML.Runtime.Learners.PoissonRegression+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+RegressionOutput
6465
Trainers.StochasticDualCoordinateAscentBinaryClassifier Train an SDCA binary model. Microsoft.ML.Runtime.Learners.Sdca TrainBinary Microsoft.ML.Runtime.Learners.LinearClassificationTrainer+Arguments Microsoft.ML.Runtime.EntryPoints.CommonOutputs+BinaryClassificationOutput

test/BaselineOutput/Common/EntryPoints/core_manifest.json

+143
Original file line numberDiff line numberDiff line change
@@ -14050,6 +14050,149 @@
1405014050
"ITrainerOutput"
1405114051
]
1405214052
},
14053+
{
14054+
"Name": "Trainers.OrdinaryLeastSquaresRegressor",
14055+
"Desc": "Train an OLS regression model.",
14056+
"FriendlyName": "Ordinary Least Squares (Regression)",
14057+
"ShortName": "ols",
14058+
"Inputs": [
14059+
{
14060+
"Name": "TrainingData",
14061+
"Type": "DataView",
14062+
"Desc": "The data to be used for training",
14063+
"Aliases": [
14064+
"data"
14065+
],
14066+
"Required": true,
14067+
"SortOrder": 1.0,
14068+
"IsNullable": false
14069+
},
14070+
{
14071+
"Name": "FeatureColumn",
14072+
"Type": "String",
14073+
"Desc": "Column to use for features",
14074+
"Aliases": [
14075+
"feat"
14076+
],
14077+
"Required": false,
14078+
"SortOrder": 2.0,
14079+
"IsNullable": false,
14080+
"Default": "Features"
14081+
},
14082+
{
14083+
"Name": "LabelColumn",
14084+
"Type": "String",
14085+
"Desc": "Column to use for labels",
14086+
"Aliases": [
14087+
"lab"
14088+
],
14089+
"Required": false,
14090+
"SortOrder": 3.0,
14091+
"IsNullable": false,
14092+
"Default": "Label"
14093+
},
14094+
{
14095+
"Name": "WeightColumn",
14096+
"Type": "String",
14097+
"Desc": "Column to use for example weight",
14098+
"Aliases": [
14099+
"weight"
14100+
],
14101+
"Required": false,
14102+
"SortOrder": 4.0,
14103+
"IsNullable": false,
14104+
"Default": "Weight"
14105+
},
14106+
{
14107+
"Name": "NormalizeFeatures",
14108+
"Type": {
14109+
"Kind": "Enum",
14110+
"Values": [
14111+
"No",
14112+
"Warn",
14113+
"Auto",
14114+
"Yes"
14115+
]
14116+
},
14117+
"Desc": "Normalize option for the feature column",
14118+
"Aliases": [
14119+
"norm"
14120+
],
14121+
"Required": false,
14122+
"SortOrder": 5.0,
14123+
"IsNullable": false,
14124+
"Default": "Auto"
14125+
},
14126+
{
14127+
"Name": "Caching",
14128+
"Type": {
14129+
"Kind": "Enum",
14130+
"Values": [
14131+
"Auto",
14132+
"Memory",
14133+
"Disk",
14134+
"None"
14135+
]
14136+
},
14137+
"Desc": "Whether learner should cache input training data",
14138+
"Aliases": [
14139+
"cache"
14140+
],
14141+
"Required": false,
14142+
"SortOrder": 6.0,
14143+
"IsNullable": false,
14144+
"Default": "Auto"
14145+
},
14146+
{
14147+
"Name": "L2Weight",
14148+
"Type": "Float",
14149+
"Desc": "L2 regularization weight",
14150+
"Aliases": [
14151+
"l2"
14152+
],
14153+
"Required": false,
14154+
"SortOrder": 50.0,
14155+
"IsNullable": false,
14156+
"Default": 1E-06,
14157+
"SweepRange": {
14158+
"RangeType": "Discrete",
14159+
"Values": [
14160+
1E-06,
14161+
0.1,
14162+
1.0
14163+
]
14164+
}
14165+
},
14166+
{
14167+
"Name": "PerParameterSignificance",
14168+
"Type": "Bool",
14169+
"Desc": "Whether to calculate per parameter significance statistics",
14170+
"Aliases": [
14171+
"sig"
14172+
],
14173+
"Required": false,
14174+
"SortOrder": 150.0,
14175+
"IsNullable": false,
14176+
"Default": true
14177+
}
14178+
],
14179+
"Outputs": [
14180+
{
14181+
"Name": "PredictorModel",
14182+
"Type": "PredictorModel",
14183+
"Desc": "The trained model"
14184+
}
14185+
],
14186+
"InputKind": [
14187+
"ITrainerInputWithWeight",
14188+
"ITrainerInputWithLabel",
14189+
"ITrainerInput"
14190+
],
14191+
"OutputKind": [
14192+
"IRegressionOutput",
14193+
"ITrainerOutput"
14194+
]
14195+
},
1405314196
{
1405414197
"Name": "Trainers.PcaAnomalyDetector",
1405514198
"Desc": "Train an PCA Anomaly model.",
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
maml.exe CV tr=OLS threads=- norm=No dout=%Output% loader=Text{col=Label:R4:11 col=Features:R4:0-10 sep=; header+} data=%Data% seed=1
2+
Not adding a normalizer.
3+
Trainer solving for 12 parameters across 2409 examples
4+
Coefficient of determination R2 = 0.291173667189042, or 0.287920813763543 (adjusted)
5+
Not training a calibrator because it is not needed.
6+
Not adding a normalizer.
7+
Trainer solving for 12 parameters across 2489 examples
8+
Coefficient of determination R2 = 0.280280855195625, or 0.277084686203761 (adjusted)
9+
Not training a calibrator because it is not needed.
10+
L1(avg): 0.586798
11+
L2(avg): 0.573048
12+
RMS(avg): 0.756999
13+
Loss-fn(avg): 0.573048
14+
R Squared: 0.263841
15+
L1(avg): 0.587999
16+
L2(avg): 0.571859
17+
RMS(avg): 0.756214
18+
Loss-fn(avg): 0.571859
19+
R Squared: 0.276072
20+
21+
OVERALL RESULTS
22+
---------------------------------------
23+
L1(avg): 0.587398 (0.0006)
24+
L2(avg): 0.572454 (0.0006)
25+
RMS(avg): 0.756606 (0.0004)
26+
Loss-fn(avg): 0.572454 (0.0006)
27+
R Squared: 0.269956 (0.0061)
28+
29+
---------------------------------------
30+
Physical memory usage(MB): %Number%
31+
Virtual memory usage(MB): %Number%
32+
%DateTime% Time elapsed(s): %Number%
33+

0 commit comments

Comments
 (0)