Skip to content

adding some trainer extensions on the StandardLearners catalog. Correcting namespace, and names #1682

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Nov 27, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions docs/code/MlNetCookBook.md
Original file line number Diff line number Diff line change
Expand Up @@ -1132,7 +1132,7 @@ var learningPipeline = reader.MakeNewEstimator()
BagOfBigrams: r.Message.NormalizeText().ToBagofHashedWords(ngramLength: 2, allLengths: false),

// NLP pipeline 3: bag of tri-character sequences with TF-IDF weighting.
BagOfTrichar: r.Message.TokenizeIntoCharacters().ToNgrams(ngramLength: 3, weighting: NgramCountingEstimator.WeightingCriteria.TfIdf),
BagOfTrichar: r.Message.TokenizeIntoCharacters().ToNgrams(ngramLength: 3, weighting: NgramExtractingEstimator.WeightingCriteria.TfIdf),

// NLP pipeline 4: word embeddings.
Embeddings: r.Message.NormalizeText().TokenizeText().WordEmbeddings(WordEmbeddingsExtractorTransformer.PretrainedModelKind.GloVeTwitter25D)
Expand Down Expand Up @@ -1186,8 +1186,8 @@ var dynamicPipeline =

// NLP pipeline 3: bag of tri-character sequences with TF-IDF weighting.
.Append(mlContext.Transforms.Text.TokenizeCharacters("Message", "MessageChars"))
.Append(new NgramCountingEstimator(mlContext, "MessageChars", "BagOfTrichar",
ngramLength: 3, weighting: NgramTokenizingTransformer.WeightingCriteria.TfIdf))
.Append(new NgramExtractingEstimator(mlContext, "MessageChars", "BagOfTrichar",
ngramLength: 3, weighting: NgramExtractingEstimator.WeightingCriteria.TfIdf))

// NLP pipeline 4: word embeddings.
.Append(mlContext.Transforms.Text.TokenizeWords("NormalizedMessage", "TokenizedMessage"))
Expand Down Expand Up @@ -1297,7 +1297,7 @@ var dynamicPipeline =
// Concatenate all the features together into one column 'Features'.
mlContext.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
// Note that the label is text, so it needs to be converted to key.
.Append(mlContext.Transforms.Categorical.MapValueToKey("Label"), TransformerScope.TrainTest)
.Append(mlContext.Transforms.Conversions.MapValueToKey("Label"), TransformerScope.TrainTest)
// Use the multi-class SDCA model to predict the label using features.
.Append(mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@
using System;
using System.Collections.Generic;

// NOTE: WHEN ADDING TO THE FILE, ALWAYS APPEND TO THE END OF IT.
// If you change the existinc content, check that the files referencing it in the XML documentation are still correct, as they reference
// line by line.
namespace Microsoft.ML.Samples.Dynamic
{
public class MatrixFactorizationExample
Expand Down Expand Up @@ -67,7 +64,7 @@ public static void MatrixFactorizationInMemoryData()
// Create a matrix factorization trainer which may consume "Value" as the training label, "MatrixColumnIndex" as the
// matrix's column index, and "MatrixRowIndex" as the matrix's row index. Here nameof(...) is used to extract field
// names' in MatrixElement class.
var pipeline = new MatrixFactorizationTrainer(mlContext,
var pipeline = mlContext.Recommendation().Trainers.MatrixFactorization(
nameof(MatrixElement.MatrixColumnIndex),
nameof(MatrixElement.MatrixRowIndex),
nameof(MatrixElement.Value),
Expand All @@ -85,7 +82,7 @@ public static void MatrixFactorizationInMemoryData()
var prediction = model.Transform(dataView);

// Calculate regression matrices for the prediction result.
var metrics = mlContext.Regression.Evaluate(prediction,
var metrics = mlContext.Recommendation().Evaluate(prediction,
label: nameof(MatrixElement.Value), score: nameof(MatrixElementForScore.Score));

// Print out some metrics for checking the model's quality.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

<NativeAssemblyReference Include="CpuMathNative" />
<NativeAssemblyReference Include="FastTreeNative" />
<NativeAssemblyReference Include="MatrixFactorizationNative" />

<ProjectReference Include="..\..\..\src\Microsoft.ML.Analyzer\Microsoft.ML.Analyzer.csproj">
<ReferenceOutputAssembly>false</ReferenceOutputAssembly>
Expand Down
2 changes: 1 addition & 1 deletion docs/samples/Microsoft.ML.Samples/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ internal static class Program
{
static void Main(string[] args)
{
NormalizerExample.Normalizer();
MatrixFactorizationExample.MatrixFactorizationInMemoryData();
}
}
}
6 changes: 3 additions & 3 deletions src/Microsoft.ML.Data/EntryPoints/SchemaManipulation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,14 @@ public static CommonOutputs.TransformOutput SelectColumns(IHostEnvironment env,
return new CommonOutputs.TransformOutput { Model = new TransformModel(env, xf, input.Data), OutputData = xf };
}

[TlcModule.EntryPoint(Name = "Transforms.ColumnCopier", Desc = "Duplicates columns from the dataset", UserName = ColumnsCopyingTransformer.UserName, ShortName = ColumnsCopyingTransformer.ShortName)]
public static CommonOutputs.TransformOutput CopyColumns(IHostEnvironment env, ColumnsCopyingTransformer.Arguments input)
[TlcModule.EntryPoint(Name = "Transforms.ColumnCopier", Desc = "Duplicates columns from the dataset", UserName = ColumnCopyingTransformer.UserName, ShortName = ColumnCopyingTransformer.ShortName)]
public static CommonOutputs.TransformOutput CopyColumns(IHostEnvironment env, ColumnCopyingTransformer.Arguments input)
{
Contracts.CheckValue(env, nameof(env));
var host = env.Register("CopyColumns");
host.CheckValue(input, nameof(input));
EntryPointUtils.CheckInputArgs(host, input);
var xf = ColumnsCopyingTransformer.Create(env, input, input.Data);
var xf = ColumnCopyingTransformer.Create(env, input, input.Data);
return new CommonOutputs.TransformOutput { Model = new TransformModel(env, xf, input.Data), OutputData = xf };
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Data/EntryPoints/ScoreColumnSelector.cs
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ public static CommonOutputs.TransformOutput RenameBinaryPredictionScoreColumns(I
copyCols.Add((source, name));
}

var copyColumn = new ColumnsCopyingTransformer(env, copyCols.ToArray()).Transform(input.Data);
var copyColumn = new ColumnCopyingTransformer(env, copyCols.ToArray()).Transform(input.Data);
var dropColumn = ColumnSelectingTransformer.CreateDrop(env, copyColumn, copyCols.Select(c => c.Source).ToArray());
return new CommonOutputs.TransformOutput { Model = new TransformModel(env, dropColumn, input.Data), OutputData = dropColumn };
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -724,7 +724,7 @@ protected override void PrintFoldResultsCore(IChannel ch, Dictionary<string, IDa
colsToKeep.Add(AnomalyDetectionEvaluator.OverallMetrics.ThreshAtNumPos);
colsToKeep.Add(BinaryClassifierEvaluator.Auc);

overall = new ColumnsCopyingTransformer(Host, cols).Transform(overall);
overall = new ColumnCopyingTransformer(Host, cols).Transform(overall);
IDataView fold = ColumnSelectingTransformer.CreateKeep(Host, overall, colsToKeep.ToArray());

string weightedFold;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1356,7 +1356,7 @@ protected override void PrintFoldResultsCore(IChannel ch, Dictionary<string, IDa
if (fold.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.StratVal, out index))
colsToKeep.Add(MetricKinds.ColumnNames.StratVal);

fold = new ColumnsCopyingTransformer(Host, cols).Transform(fold);
fold = new ColumnCopyingTransformer(Host, cols).Transform(fold);

// Select the columns that are specified in the Copy
fold = ColumnSelectingTransformer.CreateKeep(Host, fold, colsToKeep.ToArray());
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Data/Evaluators/MamlEvaluator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ private IDataView WrapPerInstance(RoleMappedData perInst)
foreach (var col in GetPerInstanceColumnsToSave(perInst.Schema))
colsToKeep.Add(col);

idv = new ColumnsCopyingTransformer(Host, cols.ToArray()).Transform(idv);
idv = new ColumnCopyingTransformer(Host, cols.ToArray()).Transform(idv);
idv = ColumnSelectingTransformer.CreateKeep(Host, idv, colsToKeep.ToArray());
return GetPerInstanceMetricsCore(idv, perInst.Schema);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1040,7 +1040,7 @@ protected override IDataView GetOverallResultsCore(IDataView overall)

private IDataView ChangeTopKAccColumnName(IDataView input)
{
input = new ColumnsCopyingTransformer(Host, (MultiClassClassifierEvaluator.TopKAccuracy, string.Format(TopKAccuracyFormat, _outputTopKAcc))).Transform(input);
input = new ColumnCopyingTransformer(Host, (MultiClassClassifierEvaluator.TopKAccuracy, string.Format(TopKAccuracyFormat, _outputTopKAcc))).Transform(input);
return ColumnSelectingTransformer.CreateDrop(Host, input, MultiClassClassifierEvaluator.TopKAccuracy);
}

Expand Down
1 change: 1 addition & 0 deletions src/Microsoft.ML.Data/MLContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ public sealed class MLContext : IHostEnvironment
/// Trainers and tasks specific to clustering problems.
/// </summary>
public ClusteringContext Clustering { get; }

/// <summary>
/// Trainers and tasks specific to ranking problems.
/// </summary>
Expand Down
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Data/StaticPipe/Reconciler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public EstimatorReconciler() : base() { }
/// subset of estimator transforms do not allow this: they produce columns whose names are unconfigurable. For
/// these, there is this collection which provides the names used by the analysis tool. If the estimator under
/// construction must use one of the names here, then they are responsible for "saving" the column they will
/// overwrite using applications of the <see cref="ColumnsCopyingEstimator"/>. Note that if the estimator under
/// overwrite using applications of the <see cref="ColumnCopyingEstimator"/>. Note that if the estimator under
/// construction has complete control over what columns it produces, there is no need for it to pay this argument
/// any attention.</param>
/// <returns>Returns an estimator.</returns>
Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.ML.Data/StaticPipe/StaticPipeUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ internal static IDataReaderEstimator<TIn, IDataReader<TIn>>

// If any renamings were necessary, create the CopyColumns estimator.
if (toCopy.Count > 0)
estimator = new ColumnsCopyingEstimator(env, toCopy.ToArray());
estimator = new ColumnCopyingEstimator(env, toCopy.ToArray());

// First clear the inputs from zero-dependencies yet to be resolved.
foreach (var col in baseInputs)
Expand Down Expand Up @@ -282,7 +282,7 @@ internal static IDataReaderEstimator<TIn, IDataReader<TIn>>
// If any final renamings were necessary, insert the appropriate CopyColumns transform.
if (toCopy.Count > 0)
{
var copyEstimator = new ColumnsCopyingEstimator(env, toCopy.ToArray());
var copyEstimator = new ColumnCopyingEstimator(env, toCopy.ToArray());
if (estimator == null)
estimator = copyEstimator;
else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ protected TrainerEstimatorReconciler(PipelineColumn[] inputs, string[] outputNam

/// <summary>
/// Produces the estimator. Note that this is made out of <see cref="ReconcileCore(IHostEnvironment, string[])"/>'s
/// return value, plus whatever usages of <see cref="ColumnsCopyingEstimator"/> are necessary to avoid collisions with
/// return value, plus whatever usages of <see cref="ColumnCopyingEstimator"/> are necessary to avoid collisions with
/// the output names fed to the constructor. This class provides the implementation, and subclasses should instead
/// override <see cref="ReconcileCore(IHostEnvironment, string[])"/>.
/// </summary>
Expand Down Expand Up @@ -103,7 +103,7 @@ public sealed override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
newInputNames[p.Key] = old2New.ContainsKey(p.Value) ? old2New[p.Value] : p.Value;
inputNames = newInputNames;
}
result = new ColumnsCopyingEstimator(env, old2New.Select(p => (p.Key, p.Value)).ToArray());
result = new ColumnCopyingEstimator(env, old2New.Select(p => (p.Key, p.Value)).ToArray());
}

// Map the inputs to the names.
Expand All @@ -129,7 +129,7 @@ public sealed override IEstimator<ITransformer> Reconcile(IHostEnvironment env,
foreach (var p in old2New)
toRename.Add((p.Value, p.Key));
if (toRename.Count > 0)
result = result.Append(new ColumnsCopyingEstimator(env, toRename.ToArray()));
result = result.Append(new ColumnCopyingEstimator(env, toRename.ToArray()));

return result;
}
Expand Down
5 changes: 4 additions & 1 deletion src/Microsoft.ML.Data/TrainContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ namespace Microsoft.ML
public abstract class TrainContextBase
{
protected readonly IHost Host;

[BestFriend]
internal IHostEnvironment Environment => Host;

/// <summary>
Expand Down Expand Up @@ -162,6 +164,7 @@ private void EnsureStratificationColumn(ref IDataView data, ref string stratific
/// </summary>
public abstract class ContextInstantiatorBase
{
[BestFriend]
internal TrainContextBase Owner { get; }

protected ContextInstantiatorBase(TrainContextBase ctx)
Expand Down Expand Up @@ -498,7 +501,7 @@ public RegressionEvaluator.Result Evaluate(IDataView data, string label = Defaul
}

/// <summary>
/// The central context for regression trainers.
/// The central context for ranking trainers.
/// </summary>
public sealed class RankingContext : TrainContextBase
{
Expand Down
47 changes: 0 additions & 47 deletions src/Microsoft.ML.Data/Transforms/CategoricalCatalog.cs

This file was deleted.

2 changes: 1 addition & 1 deletion src/Microsoft.ML.Data/Transforms/ColumnBindingsBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
using Microsoft.ML.Runtime.CommandLine;
using Microsoft.ML.Runtime.Internal.Utilities;
using Microsoft.ML.Runtime.Model;
using Microsoft.ML.Transforms.Categorical;
using Microsoft.ML.Transforms.Conversions;
using System;
using System.Collections.Generic;
using System.Linq;
Expand Down
Loading