-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Conversion of ITrainer.Train returns predictor, accepts +TrainContext #522
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
da06e4d
fa02820
0bff1ff
c16012e
e864d21
f5766fc
da2acdb
82f44e8
3905894
9d7eb02
9d34675
63ec9fa
837733a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
// See the LICENSE file in the project root for more information. | ||
|
||
using Microsoft.ML.Runtime.Data; | ||
|
||
namespace Microsoft.ML.Runtime | ||
{ | ||
/// <summary> | ||
/// Holds information relevant to trainers. Instances of this class are meant to be constructed and passed | ||
/// into <see cref="ITrainer{TPredictor}.Train(TrainContext)"/> or <see cref="ITrainer.Train(TrainContext)"/>. | ||
/// This holds at least a training set, as well as optioonally a predictor. | ||
/// </summary> | ||
public sealed class TrainContext | ||
{ | ||
/// <summary> | ||
/// The training set. Cannot be <c>null</c>. | ||
/// </summary> | ||
public RoleMappedData TrainingSet { get; } | ||
|
||
/// <summary> | ||
/// The validation set. Can be <c>null</c>. Note that passing a non-<c>null</c> validation set into | ||
/// a trainer that does not support validation sets should not be considered an error condition. It | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Can we encourage trainer implementer to show at least warning? if it make sense. #Closed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd prefer to not. Warnings are already printed by the tooling utilizing it as we see here for validation sets and here for initial predictors. To decentralize the checking, in addition to the problems of having to insert that boilerplate somehow, also runs somewhat contrary to the philosophy behind In reply to: 202148448 [](ancestors = 202148448) |
||
/// should simply be ignored in that case. | ||
/// </summary> | ||
public RoleMappedData ValidationSet { get; } | ||
|
||
/// <summary> | ||
/// The initial predictor, for incremental training. Note that if a <see cref="ITrainer"/> implementor | ||
/// does not support incremental training, then it can ignore it similarly to how one would ignore | ||
/// <see cref="ValidationSet"/>. However, if the trainer does support incremental training and there | ||
/// is something wrong with a non-<c>null</c> value of this, then the trainer ought to throw an exception. | ||
/// </summary> | ||
public IPredictor InitialPredictor { get; } | ||
|
||
|
||
/// <summary> | ||
/// Constructor, given a training set and optional other arguments. | ||
/// </summary> | ||
/// <param name="trainingSet">Will set <see cref="TrainingSet"/> to this value. This must be specified</param> | ||
/// <param name="validationSet">Will set <see cref="ValidationSet"/> to this value if specified</param> | ||
/// <param name="initialPredictor">Will set <see cref="InitialPredictor"/> to this value if specified</param> | ||
public TrainContext(RoleMappedData trainingSet, RoleMappedData validationSet = null, IPredictor initialPredictor = null) | ||
{ | ||
Contracts.CheckValue(trainingSet, nameof(trainingSet)); | ||
Contracts.CheckValueOrNull(validationSet); | ||
Contracts.CheckValueOrNull(initialPredictor); | ||
|
||
// REVIEW: Should there be code here to ensure that the role mappings between the two are compatible? | ||
// That is, all the role mappings are the same and the columns between them have identical types? | ||
|
||
TrainingSet = trainingSet; | ||
ValidationSet = validationSet; | ||
InitialPredictor = initialPredictor; | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
// See the LICENSE file in the project root for more information. | ||
|
||
namespace Microsoft.ML.Runtime | ||
{ | ||
/// <summary> | ||
/// Instances of this class posses information about trainers, in terms of their requirements and capabilities. | ||
/// The intended usage is as the value for <see cref="ITrainer.Info"/>. | ||
/// </summary> | ||
public sealed class TrainerInfo | ||
{ | ||
// REVIEW: Ideally trainers should be able to communicate | ||
// something about the type of data they are capable of being trained | ||
// on, e.g., what ColumnKinds they want, how many of each, of what type, | ||
// etc. This interface seems like the most natural conduit for that sort | ||
// of extra information. | ||
|
||
/// <summary> | ||
/// Whether the trainer needs to see data in normalized form. Only non-parametric learners will tend to produce | ||
/// normalization here. | ||
/// </summary> | ||
public bool NeedNormalization { get; } | ||
|
||
/// <summary> | ||
/// Whether the trainer needs calibration to produce probabilities. As a general rule only trainers that produce | ||
/// binary classifier predictors that also do not have a natural probabilistic interpretation should have a | ||
/// <c>true</c> value here. | ||
/// </summary> | ||
public bool NeedCalibration { get; } | ||
|
||
/// <summary> | ||
/// Whether this trainer could benefit from a cached view of the data. Trainers that have few passes over the | ||
/// data, or that need to build their own custom data structure over the data, will have a <c>false</c> here. | ||
/// </summary> | ||
public bool WantCaching { get; } | ||
|
||
/// <summary> | ||
/// Whether the trainer supports validation sets via <see cref="TrainContext.ValidationSet"/>. Not implementing | ||
/// this interface and returning <c>true</c> from this property is an indication the trainer does not support | ||
/// that. | ||
/// </summary> | ||
public bool SupportsValidation { get; } | ||
|
||
/// <summary> | ||
/// Whether the trainer can support incremental trainers via <see cref="TrainContext.InitialPredictor"/>. Not | ||
/// implementing this interface and returning <c>true</c> from this property is an indication the trainer does | ||
/// not support that. | ||
/// </summary> | ||
public bool SupportsIncrementalTraining { get; } | ||
|
||
/// <summary> | ||
/// Initializes with the given parameters. The parameters have default values for the most typical values | ||
/// for most classical trainers. | ||
/// </summary> | ||
/// <param name="normalization">The value for the property <see cref="NeedNormalization"/></param> | ||
/// <param name="calibration">The value for the property <see cref="NeedCalibration"/></param> | ||
/// <param name="caching">The value for the property <see cref="WantCaching"/></param> | ||
/// <param name="supportValid">The value for the property <see cref="SupportsValidation"/></param> | ||
/// <param name="supportIncrementalTrain">The value for the property <see cref="SupportsIncrementalTraining"/></param> | ||
public TrainerInfo(bool normalization = true, bool calibration = false, bool caching = true, | ||
bool supportValid = false, bool supportIncrementalTrain = false) | ||
{ | ||
NeedNormalization = normalization; | ||
NeedCalibration = calibration; | ||
WantCaching = caching; | ||
SupportsValidation = supportValid; | ||
SupportsIncrementalTraining = supportIncrementalTrain; | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you think this could/should move into TrainerInfo? Or is the thought we are going to delete it all together? So no need to move it, just to delete it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd rather get rid of
PredictionKind
altogether, but not as part of this PR, since what we replace it with will require some discussion and consideration in a separate issue. That is, get rid of it from bothIPredictor
andITrainer
, and replace with something that uses the types instead. If, however, we were to keep it, there's something attractive about havingIPredictor
resembleITrainer
.