diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/StochasticDualCoordinateAscent.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/StochasticDualCoordinateAscent.cs index ce43ab733f..616d495738 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/StochasticDualCoordinateAscent.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/MulticlassClassification/StochasticDualCoordinateAscent.cs @@ -1,5 +1,4 @@ -using Microsoft.ML.Data; -using Microsoft.ML.SamplesUtils; +using Microsoft.ML.SamplesUtils; namespace Microsoft.ML.Samples.Dynamic.Trainers.MulticlassClassification { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertType.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertType.cs index bbffb6564a..8349922768 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertType.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertType.cs @@ -1,7 +1,8 @@ using System; +using Microsoft.ML; using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public static class ConvertType { @@ -39,10 +40,12 @@ public static void Example() // A: False Aconv:0 // A: False Aconv:0 } + private class InputData { public bool Survived; } + private sealed class TransformedData : InputData { public Int32 SurvivedInt32 { get; set; } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertTypeMultiColumn.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertTypeMultiColumn.cs index 325dfe5271..b7790d9f11 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertTypeMultiColumn.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertTypeMultiColumn.cs @@ -1,7 +1,8 @@ using System; +using Microsoft.ML; using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { // This example illustrates how to convert multiple columns of different types to one type, in this case System.Single. // This is often a useful data transformation before concatenating the features together and passing them to a particular estimator. @@ -58,6 +59,7 @@ public static void Example() // 1 8904 6.368924E+17 8.09 } + // The initial data type private class InputData { @@ -66,6 +68,7 @@ private class InputData public DateTime Feature3; public double Feature4; } + // The resulting data type after the transformation private class TransformedData : InputData { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/Hash.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/Hash.cs index a4bfeec09d..daee047cff 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/Hash.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/Hash.cs @@ -1,7 +1,8 @@ using System; +using Microsoft.ML; using Microsoft.ML.Data; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { // This example demonstrates hashing of categorical string and integer data types. public static class Hash diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/KeyToValueToKey.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/KeyToValueToKey.cs index c7f5636d47..8ff7778ada 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/KeyToValueToKey.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/KeyToValueToKey.cs @@ -1,11 +1,13 @@ using System; using System.Collections.Generic; +using Microsoft.ML; using Microsoft.ML.Data; +using Microsoft.ML.SamplesUtils; using Microsoft.ML.Transforms; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { - public class KeyToValueValueToKey + public class KeyToValueToKey { public static void Example() { @@ -14,7 +16,7 @@ public static void Example() var ml = new MLContext(); // Get a small dataset as an IEnumerable and load it into ML.NET data set. - IEnumerable data = SamplesUtils.DatasetUtils.GetTopicsData(); + IEnumerable data = DatasetUtils.GetTopicsData(); var trainData = ml.Data.LoadFromEnumerable(data); // Preview of one of the columns of the the topics data. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToValueMultiColumn.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToValueMultiColumn.cs new file mode 100644 index 0000000000..d77be471ee --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToValueMultiColumn.cs @@ -0,0 +1,80 @@ +using System; +using Microsoft.ML; +using Microsoft.ML.SamplesUtils; + +namespace Samples.Dynamic +{ + /// This example demonstrates the use of the ValueToKeyMappingEstimator, by mapping KeyType values to the original strings. + /// For more on ML.NET KeyTypes see: https://github.com/dotnet/machinelearning/blob/master/docs/code/IDataViewTypeSystem.md#key-types + /// It is possible to have multiple values map to the same category. + + public class MapKeyToValueMultiColumn + { + public static void Example() + { + // Create a new context for ML.NET operations. It can be used for exception tracking and logging, + // as a catalog of available operations and as the source of randomness. + // Setting the seed to a fixed number in this example to make outputs deterministic. + var mlContext = new MLContext(seed: 0); + + // Create a list of data examples. + var examples = DatasetUtils.GenerateRandomMulticlassClassificationExamples(1000); + + // Convert the examples list to an IDataView object, which is consumable by ML.NET API. + var dataView = mlContext.Data.LoadFromEnumerable(examples); + + //////////////////// Data Preview //////////////////// + // Label Features + // AA 0.7262433,0.8173254,0.7680227,0.5581612,0.2060332,0.5588848,0.9060271,0.4421779,0.9775497,0.2737045 + // BB 0.4919063,0.6673147,0.8326591,0.6695119,1.182151,0.230367,1.06237,1.195347,0.8771811,0.5145918 + // CC 1.216908,1.248052,1.391902,0.4326252,1.099942,0.9262842,1.334019,1.08762,0.9468155,0.4811099 + // DD 0.7871246,1.053327,0.8971719,1.588544,1.242697,1.362964,0.6303943,0.9810045,0.9431419,1.557455 + + // Create a pipeline. + var pipeline = + // Convert the string labels into key types. + mlContext.Transforms.Conversion.MapValueToKey("Label") + // Apply StochasticDualCoordinateAscent multiclass trainer. + .Append(mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy()); + + // Train the model and do predictions on same data set. + // Typically predictions would be in a different, validation set. + var dataWithPredictions = pipeline.Fit(dataView).Transform(dataView); + + // at this point, the Label colum is tranformed from strings, to DataViewKeyType and + // the transformation has added the PredictedLabel column, with + var newPipeline = mlContext.Transforms.Conversion.MapKeyToValue(new[] + { + new InputOutputColumnPair("LabelOriginalValue","Label"), + new InputOutputColumnPair("PredictedLabelOriginalValue","PredictedLabel") + }); + + var transformedData = newPipeline.Fit(dataWithPredictions).Transform(dataWithPredictions); + + var values = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); + + // Printing the columns of the transformed data. + Console.WriteLine($" Label LabelOriginalValue PredictedLabel PredictedLabelOriginalValue"); + foreach (var row in values) + Console.WriteLine($"{row.Label}\t\t{row.LabelOriginalValue}\t\t\t{row.PredictedLabel}\t\t\t{row.PredictedLabelOriginalValue}"); + + // Label LabelOriginalValue PredictedLabel PredictedLabelOriginalValue + // 1 AA 2 BB + // 1 AA 1 AA + // 4 DD 4 DD + // 2 BB 2 BB + // 1 AA 1 AA + // 1 AA 1 AA + // 1 AA 1 AA + // 2 BB 2 BB + + } + private class TransformedData + { + public uint Label { get; set; } + public uint PredictedLabel { get; set; } + public string LabelOriginalValue { get; set; } + public string PredictedLabelOriginalValue { get; set; } + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValue.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValue.cs index 09c0a53a1d..0b7b5ab4dd 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValue.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValue.cs @@ -1,9 +1,8 @@ using System; using System.Collections.Generic; -using Microsoft.ML.Data; +using Microsoft.ML; - -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public static class MapValue { @@ -79,11 +78,13 @@ public static void Example() // 12-25yrs Long 3 5 High // 25+yrs Long 3 5 High } + private class DataPoint { public string Timeframe { get; set; } public int Score { get; set; } } + private class TransformedData : DataPoint { public string TimeframeCategory { get; set; } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueIdvLookup.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueIdvLookup.cs index 76977cd40b..9e1cbf2a99 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueIdvLookup.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueIdvLookup.cs @@ -1,7 +1,8 @@ using System; using System.Collections.Generic; +using Microsoft.ML; -namespace Microsoft.ML.Samples.Dynamic +namespace Samples.Dynamic { public static class MapValueIdvLookup { @@ -68,10 +69,12 @@ private class LookupMap public float Value { get; set; } public string Category { get; set; } } + private class DataPoint { public float Price { get; set; } } + private class TransformedData : DataPoint { public string PriceCategory { get; set; } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueToArray.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueToArray.cs index 0c7128d439..18362ec9a0 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueToArray.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueToArray.cs @@ -1,6 +1,8 @@ using System; using System.Collections.Generic; -namespace Microsoft.ML.Samples.Dynamic +using Microsoft.ML; + +namespace Samples.Dynamic { public static class MapValueToArray { @@ -55,10 +57,12 @@ public static void Example() // 12-25yrs 12, 50,300 // 25+yrs 12, 50, 300 } + public class DataPoint { public string Timeframe { get; set; } } + public class TransformedData : DataPoint { public int[] Features { get; set; } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueToKeyMultiColumn.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueToKeyMultiColumn.cs new file mode 100644 index 0000000000..ccd79f4fae --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueToKeyMultiColumn.cs @@ -0,0 +1,116 @@ +using System; +using System.Collections.Generic; +using Microsoft.ML; + +namespace Samples.Dynamic +{ + public static class MapValueToKeyMultiColumn + { + /// This example demonstrates the use of the ValueToKeyMappingEstimator, by mapping strings to KeyType values. + /// For more on ML.NET KeyTypes see: https://github.com/dotnet/machinelearning/blob/master/docs/code/IDataViewTypeSystem.md#key-types + /// It is possible to have multiple values map to the same category. + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var mlContext = new MLContext(); + + // Get a small dataset as an IEnumerable. + var rawData = new[] { + new DataPoint() { StudyTime = "0-4yrs" , Course = "CS" }, + new DataPoint() { StudyTime = "6-11yrs" , Course = "CS" }, + new DataPoint() { StudyTime = "12-25yrs" , Course = "LA" }, + new DataPoint() { StudyTime = "0-5yrs" , Course = "DS" } + }; + + var data = mlContext.Data.LoadFromEnumerable(rawData); + + // Constructs the ML.net pipeline + var pipeline = mlContext.Transforms.Conversion.MapValueToKey(new[] { + new InputOutputColumnPair("StudyTimeCategory", "StudyTime"), + new InputOutputColumnPair("CourseCategory", "Course") + }, + keyOrdinality: Microsoft.ML.Transforms.ValueToKeyMappingEstimator.KeyOrdinality.ByValue, + addKeyValueAnnotationsAsText: true); + + // Fits the pipeline to the data. + IDataView transformedData = pipeline.Fit(data).Transform(data); + + // Getting the resulting data as an IEnumerable. + // This will contain the newly created columns. + IEnumerable features = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); + + Console.WriteLine($" StudyTime StudyTimeCategory Course CourseCategory"); + foreach (var featureRow in features) + Console.WriteLine($"{featureRow.StudyTime}\t\t{featureRow.StudyTimeCategory}\t\t\t{featureRow.Course}\t\t{featureRow.CourseCategory}"); + + // TransformedData obtained post-transformation. + // + // StudyTime StudyTimeCategory Course CourseCategory + // 0-4yrs 1 CS 1 + // 6-11yrs 4 CS 1 + // 12-25yrs 3 LA 3 + // 0-5yrs 2 DS 2 + + // If we wanted to provide the mapping, rather than letting the transform create it, + // we could do so by creating an IDataView one column containing the values to map to. + // If the values in the dataset are not found in the lookup IDataView they will get mapped to the mising value, 0. + // The keyData are shared among the columns, therefore the keys are not contiguous for the column. + // Create the lookup map data IEnumerable. + var lookupData = new[] { + new LookupMap { Key = "0-4yrs" }, + new LookupMap { Key = "6-11yrs" }, + new LookupMap { Key = "25+yrs" }, + new LookupMap { Key = "CS" }, + new LookupMap { Key = "DS" }, + new LookupMap { Key = "LA" } + }; + + // Convert to IDataView + var lookupIdvMap = mlContext.Data.LoadFromEnumerable(lookupData); + + // Constructs the ML.net pipeline + var pipelineWithLookupMap = mlContext.Transforms.Conversion.MapValueToKey(new[] { + new InputOutputColumnPair("StudyTimeCategory", "StudyTime"), + new InputOutputColumnPair("CourseCategory", "Course") + }, + keyData: lookupIdvMap); + + // Fits the pipeline to the data. + transformedData = pipelineWithLookupMap.Fit(data).Transform(data); + + // Getting the resulting data as an IEnumerable. + // This will contain the newly created columns. + features = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); + + Console.WriteLine($" StudyTime StudyTimeCategory Course CourseCategory"); + foreach (var featureRow in features) + Console.WriteLine($"{featureRow.StudyTime}\t\t{featureRow.StudyTimeCategory}\t\t\t{featureRow.Course}\t\t{featureRow.CourseCategory}"); + + // StudyTime StudyTimeCategory Course CourseCategory + // 0 - 4yrs 1 CS 4 + // 6 - 11yrs 2 CS 4 + // 12 - 25yrs 0 LA 6 + // 0 - 5yrs 0 DS 5 + + } + + private class DataPoint + { + public string StudyTime { get; set; } + public string Course { get; set; } + } + + private class TransformedData : DataPoint + { + public uint StudyTimeCategory { get; set; } + public uint CourseCategory { get; set; } + } + + // Type for the IDataView that will be serving as the map + private class LookupMap + { + public string Key { get; set; } + } + } +} diff --git a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs index 0fab6d1b19..a89c56c707 100644 --- a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs @@ -106,8 +106,9 @@ internal static TypeConvertingEstimator ConvertType(this TransformsCatalog.Conve /// /// /// + /// [!code-csharp[MapKeyToValue](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/KeyToValueToKey.cs)] + /// ]]> + /// /// public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, string outputColumnName, string inputColumnName = null) => new KeyToValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName); @@ -117,6 +118,13 @@ public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.Co /// /// The conversion transform's catalog. /// Specifies the names of the columns on which to apply the transformation. + /// + /// + /// + /// + /// public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, InputOutputColumnPair[] columns) { var env = CatalogUtils.GetEnvironment(catalog); @@ -179,7 +187,7 @@ public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog. /// /// /// /// /// @@ -205,6 +213,13 @@ public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.Co /// The data view containing the terms. If specified, this should be a single column data /// view, and the key-values will be taken from that column. If unspecified, the key-values will be determined /// from the input data upon fitting. + /// + /// + /// + /// + /// public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.ConversionTransforms catalog, InputOutputColumnPair[] columns, int maximumNumberOfKeys = ValueToKeyMappingEstimator.Defaults.MaximumNumberOfKeys, @@ -226,13 +241,6 @@ public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.Co /// The data view containing the terms. If specified, this should be a single column data /// view, and the key-values will be taken from that column. If unspecified, the key-values will be determined /// from the input data upon fitting. - /// - /// - /// - /// - /// [BestFriend] internal static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.ConversionTransforms catalog, ValueToKeyMappingEstimator.ColumnOptions[] columns, IDataView keyData = null)