From 114fe15114cc904ca8a719c51dd22cdbf0e6c8b6 Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Tue, 26 Mar 2019 15:52:28 -0700 Subject: [PATCH 1/3] adding a sample for convert MultiColumns. Moving files around. --- .../Conversion/ConvertTypeMultiColumn.cs | 75 +++++++++++++++++++ .../Conversion/KeyToValueToKey.cs} | 0 .../Conversion}/ValueMapping.cs | 0 .../Conversion}/ValueMappingFloatToString.cs | 0 .../Conversion}/ValueMappingStringToArray.cs | 0 .../ValueMappingStringToKeyType.cs | 0 .../ConversionsExtensionsCatalog.cs | 62 ++++++++------- 7 files changed, 109 insertions(+), 28 deletions(-) create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertTypeMultiColumn.cs rename docs/samples/Microsoft.ML.Samples/Dynamic/{KeyToValueValueToKey.cs => Transforms/Conversion/KeyToValueToKey.cs} (100%) rename docs/samples/Microsoft.ML.Samples/Dynamic/{ => Transforms/Conversion}/ValueMapping.cs (100%) rename docs/samples/Microsoft.ML.Samples/Dynamic/{ => Transforms/Conversion}/ValueMappingFloatToString.cs (100%) rename docs/samples/Microsoft.ML.Samples/Dynamic/{ => Transforms/Conversion}/ValueMappingStringToArray.cs (100%) rename docs/samples/Microsoft.ML.Samples/Dynamic/{ => Transforms/Conversion}/ValueMappingStringToKeyType.cs (100%) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertTypeMultiColumn.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertTypeMultiColumn.cs new file mode 100644 index 0000000000..63d9a544fb --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertTypeMultiColumn.cs @@ -0,0 +1,75 @@ +using System; +using Microsoft.ML.Data; + +namespace Microsoft.ML.Samples.Dynamic +{ + // This example illustrates how to convert multiple columns of different types to one type, in this case System.Single. + // This is often a useful data transformation before concatenting the features together and passing them to a particular estimator. + public static class ConvertTypeMultiColumn + { + // The initial data type + private class InputData + { + public bool Feature1; + public string Feature2; + public DateTime Feature3; + public double Feature4; + } + + // The resulting data type after the transformation + private sealed class TransformedData : InputData + { + public float Converted1 { get; set; } + public float Converted2 { get; set; } + + public float Converted3 { get; set; } + public float Converted4 { get; set; } + } + + public static void Example() + { + var mlContext = new MLContext(seed: 1); + var rawData = new[] { + new InputData() { Feature1 = true, Feature2 = "0.4", Feature3 = DateTime.Now, Feature4 = 0.145}, + new InputData() { Feature1 = false, Feature2 = "0.5", Feature3 = DateTime.Today, Feature4 = 3.14}, + new InputData() { Feature1 = false, Feature2 = "14", Feature3 = DateTime.Today, Feature4 = 0.2046}, + new InputData() { Feature1 = false, Feature2 = "23", Feature3 = DateTime.Now, Feature4 = 0.1206}, + new InputData() { Feature1 = true, Feature2 = "8904", Feature3 = DateTime.UtcNow, Feature4 = 8.09}, + }; + + var data = mlContext.Data.LoadFromEnumerable(rawData); + + // Construct the pipeline. + var pipeline = mlContext.Transforms.Conversion.ConvertType(new[] + { + new InputOutputColumnPair("Converted1", "Feature1"), + new InputOutputColumnPair("Converted2", "Feature2"), + new InputOutputColumnPair("Converted3", "Feature3"), + new InputOutputColumnPair("Converted4", "Feature4"), + + }, DataKind.Single); + + // Let's train our pipeline, and then apply it to the same data. + var transformer = pipeline.Fit(data); + var transformedData = transformer.Transform(data); + + // Shape the transformed data as a strongly typed IEnumerable + var convertedData = mlContext.Data.CreateEnumerable(transformedData, true); + + // Printing the results. + Console.WriteLine("Converted1\t Converted2\t Converted3\t Converted4"); + foreach (var item in convertedData) + Console.WriteLine($"\t{item.Converted1}\t {item.Converted2}\t\t {item.Converted3}\t {item.Converted4}"); + + // Output + // + // Converted1 Converted2 Converted3 Converted4 + // 1 0.4 6.368921E+17 0.145 + // 0 0.5 6.368916E+17 3.14 + // 0 14 6.368916E+17 0.2046 + // 0 23 6.368921E+17 0.1206 + // 1 8904 6.368924E+17 8.09 + + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/KeyToValueValueToKey.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/KeyToValueToKey.cs similarity index 100% rename from docs/samples/Microsoft.ML.Samples/Dynamic/KeyToValueValueToKey.cs rename to docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/KeyToValueToKey.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ValueMapping.cs similarity index 100% rename from docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs rename to docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ValueMapping.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ValueMappingFloatToString.cs similarity index 100% rename from docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs rename to docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ValueMappingFloatToString.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ValueMappingStringToArray.cs similarity index 100% rename from docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs rename to docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ValueMappingStringToArray.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ValueMappingStringToKeyType.cs similarity index 100% rename from docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs rename to docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ValueMappingStringToKeyType.cs diff --git a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs index e8edbc9c9d..e2a1ffba0b 100644 --- a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs @@ -72,6 +72,12 @@ public static TypeConvertingEstimator ConvertType(this TransformsCatalog.Convers /// The conversion transform's catalog. /// Specifies the names of the columns on which to apply the transformation. /// The expected kind of the output column. + /// + /// + /// + /// public static TypeConvertingEstimator ConvertType(this TransformsCatalog.ConversionTransforms catalog, InputOutputColumnPair[] columns, DataKind outputKind = ConvertDefaults.DefaultOutputKind) @@ -100,7 +106,7 @@ internal static TypeConvertingEstimator ConvertType(this TransformsCatalog.Conve /// /// /// /// public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, string outputColumnName, string inputColumnName = null) @@ -173,7 +179,7 @@ public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog. /// /// /// /// /// @@ -223,7 +229,7 @@ public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.Co /// /// /// /// /// @@ -246,10 +252,10 @@ internal static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog. /// /// /// /// public static ValueMappingEstimator MapValue( @@ -277,10 +283,10 @@ public static ValueMappingEstimator MapValue /// /// /// [BestFriend] @@ -309,7 +315,7 @@ internal static ValueMappingEstimator MapValue /// /// /// [BestFriend] @@ -340,10 +346,10 @@ internal static ValueMappingEstimator MapValue /// /// /// public static ValueMappingEstimator MapValue( @@ -370,10 +376,10 @@ public static ValueMappingEstimator MapValue /// /// /// [BestFriend] @@ -403,10 +409,10 @@ internal static ValueMappingEstimator MapValue /// /// /// public static ValueMappingEstimator MapValue( @@ -429,10 +435,10 @@ public static ValueMappingEstimator MapValue( /// /// /// /// [BestFriend] From a51e688567cd4b050b95cb095d3b4e1f296c91cd Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Mon, 1 Apr 2019 23:25:08 -0700 Subject: [PATCH 2/3] Adjust the samples about ValueMapping --- .../Dynamic/Transforms/Conversion/MapValue.cs | 101 ++++++++++++++++++ .../Conversion/MapValueIDVLookup.cs | 83 ++++++++++++++ .../Transforms/Conversion/MapValueToArray.cs | 71 ++++++++++++ .../Transforms/Conversion/ValueMapping.cs | 72 ------------- .../Conversion/ValueMappingFloatToString.cs | 64 ----------- .../Conversion/ValueMappingStringToArray.cs | 67 ------------ .../Conversion/ValueMappingStringToKeyType.cs | 73 ------------- .../ConversionsExtensionsCatalog.cs | 48 +-------- 8 files changed, 258 insertions(+), 321 deletions(-) create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValue.cs create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueIDVLookup.cs create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueToArray.cs delete mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ValueMapping.cs delete mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ValueMappingFloatToString.cs delete mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ValueMappingStringToArray.cs delete mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ValueMappingStringToKeyType.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValue.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValue.cs new file mode 100644 index 0000000000..3899ecacab --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValue.cs @@ -0,0 +1,101 @@ +using System; +using System.Collections.Generic; +using Microsoft.ML.Data; + +namespace Microsoft.ML.Samples.Dynamic +{ + public static class MapValue + { + class DataPoint + { + public string Timeframe { get; set; } + public int Score { get; set; } + } + + class TransformedData : DataPoint + { + public string TimeframeCategory { get; set; } + public string ScoreCategory { get; set; } + + public uint Label { get; set; } + } + + + + /// This example demonstrates the use of the ValueMappingEstimator by mapping strings to other string values, or floats to strings. + /// This is useful to map types to a grouping. + /// It is possible to have multiple values map to the same category. + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var mlContext = new MLContext(); + + // Get a small dataset as an IEnumerable. + var rawData = new[] { + new DataPoint() { Timeframe = "0-4yrs" , Score = 1 }, + new DataPoint() { Timeframe = "6-11yrs" , Score = 2 }, + new DataPoint() { Timeframe = "12-25yrs" , Score = 3 }, + new DataPoint() { Timeframe = "0-5yrs" , Score = 4 }, + new DataPoint() { Timeframe = "12-25yrs" , Score = 5 }, + new DataPoint() { Timeframe = "25+yrs" , Score = 5 }, + }; + + var data = mlContext.Data.LoadFromEnumerable(rawData); + + // Construct the mapping to other strings for the Timeframe column. + var timeframeMap = new Dictionary(); + timeframeMap["0-4yrs"] = "Short"; + timeframeMap["0-5yrs"] = "Short"; + timeframeMap["6-11yrs"] = "Medium"; + timeframeMap["12-25yrs"] = "Long"; + timeframeMap["25+yrs"] = "Long"; + + // Construct the mapping of strings to keys(uints) for the Timeframe column. + var timeframeKeyMap = new Dictionary(); + timeframeKeyMap["0-4yrs"] = 1; + timeframeKeyMap["0-5yrs"] = 1; + timeframeKeyMap["6-11yrs"] = 2; + timeframeKeyMap["12-25yrs"] = 3; + timeframeKeyMap["25+yrs"] = 3; + + // Construct the mapping of ints to strings for the Score column. + var scoreMap = new Dictionary(); + scoreMap[1] = "Low"; + scoreMap[2] = "Low"; + scoreMap[3] = "Average"; + scoreMap[4] = "High"; + scoreMap[5] = "High"; + + // Constructs the ML.net pipeline + var pipeline = mlContext.Transforms.Conversion.MapValue("TimeframeCategory", timeframeMap, "Timeframe") + .Append(mlContext.Transforms.Conversion.MapValue("ScoreCategory", scoreMap, "Score")) + // on the MapValue below, the treatValuesAsKeyType is set to true. The type of the Label column will be a key type, + // and it can be used as input for trainers performing multiclass classification. + .Append(mlContext.Transforms.Conversion.MapValue("Label", timeframeKeyMap, "Timeframe", treatValuesAsKeyType: true)); + + // Fits the pipeline to the data. + IDataView transformedData = pipeline.Fit(data).Transform(data); + + // Getting the resulting data as an IEnumerable. + // This will contain the newly created columns. + IEnumerable features = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); + + Console.WriteLine($" Timeframe TimeframeCategory Label Score ScoreCategory"); + foreach (var featureRow in features) + { + Console.WriteLine($"{featureRow.Timeframe}\t\t{featureRow.TimeframeCategory}\t\t\t{featureRow.Label}\t\t{featureRow.Score}\t{featureRow.ScoreCategory}"); + } + + // TransformedData obtained post-transformation. + // + // Timeframe TimeframeCategory Label Score ScoreCategory + // 0 - 4yrs Short 1 1 Low + // 6 - 11yrs Medium 2 2 Low + // 12 - 25yrs Long 3 3 Average + // 0 - 5yrs Short 1 4 High + // 12 - 25yrs Long 3 5 High + // 25 + yrs Long 3 5 High + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueIDVLookup.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueIDVLookup.cs new file mode 100644 index 0000000000..1c26b75942 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueIDVLookup.cs @@ -0,0 +1,83 @@ +using System; +using System.Collections.Generic; + +namespace Microsoft.ML.Samples.Dynamic +{ + public static class MapValueIdvLookup + { + // Type for the IDataVIew that will be serving as the map + private class LookupMap + { + public float Value { get; set; } + public string Category { get; set; } + + } + + private class DataPoint + { + public float Price { get; set; } + } + + private class TransformedData : DataPoint + { + public string PriceCategory { get; set; } + } + + /// This example demonstrates the use of MapValue by mapping floats to strings, looking up the mapping in an IDataView. + /// This is useful to map types to a grouping. + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var mlContext = new MLContext(); + + // Get a small dataset as an IEnumerable. + var rawData = new[] { + new DataPoint() { Price = 3.14f }, + new DataPoint() { Price = 2000f }, + new DataPoint() { Price = 1.19f }, + new DataPoint() { Price = 2.17f }, + new DataPoint() { Price = 33.784f }, + + }; + + // Convert to IDataView + var data = mlContext.Data.LoadFromEnumerable(rawData); + + // Create the lookup map data IEnumerable. + var lookupData = new[] { + new LookupMap { Value = 3.14f, Category = "Low" }, + new LookupMap { Category = "Low" , Value = 1.19f }, + new LookupMap { Category = "Low" , Value = 2.17f }, + new LookupMap { Category = "Medium", Value = 33.784f}, + new LookupMap { Category = "High", Value = 2000f} + + }; + + // Convert to IDataView + var lookupIdvMap = mlContext.Data.LoadFromEnumerable(lookupData); + + // Constructs the ValueMappingEstimator making the ML.net pipeline + var pipeline = mlContext.Transforms.Conversion.MapValue("PriceCategory", lookupIdvMap, lookupIdvMap.Schema["Value"], lookupIdvMap.Schema["Category"], "Price"); + + // Fits the ValueMappingEstimator and transforms the data converting the Price to PriceCategory. + IDataView transformedData = pipeline.Fit(data).Transform(data); + + // Getting the resulting data as an IEnumerable. + IEnumerable features = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); + + Console.WriteLine($" Price PriceCategory"); + foreach (var featureRow in features) + Console.WriteLine($"{featureRow.Price}\t\t{featureRow.PriceCategory}"); + + // TransformedData obtained post-transformation. + // + // Price PriceCategory + // 3.14 Low + // 2000 High + // 1.19 Low + // 2.17 Low + // 33.784 Medium + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueToArray.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueToArray.cs new file mode 100644 index 0000000000..bc17e34589 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueToArray.cs @@ -0,0 +1,71 @@ +using System; +using System.Collections.Generic; +namespace Microsoft.ML.Samples.Dynamic +{ + public static class MapValueToArray + { + class DataPoint + { + public string Timeframe { get; set; } + } + + class TransformedData : DataPoint + { + public int[] Feature { get; set; } + } + + /// This example demonstrates the use of MapValue by mapping strings to array values, which allows for mapping data to numeric arrays. + /// This functionality is useful when the generated column will serve as the Features column for a trainer. Most of the trainers take a numeric vector, as the Features column. + /// In this example, we are mapping the Timeframe data to arbitrary integer arrays. + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var mlContext = new MLContext(); + + // Get a small dataset as an IEnumerable. + var rawData = new[] { + new DataPoint() { Timeframe = "0-4yrs" }, + new DataPoint() { Timeframe = "6-11yrs" }, + new DataPoint() { Timeframe = "12-25yrs" }, + new DataPoint() { Timeframe = "0-5yrs" }, + new DataPoint() { Timeframe = "12-25yrs" }, + new DataPoint() { Timeframe = "25+yrs" }, + }; + + var data = mlContext.Data.LoadFromEnumerable(rawData); + + // If the list of keys and values are known, they can be passed to the API. + // Creating a list of key-value pairs based on the dataset + var timeframeMap = new Dictionary(); + timeframeMap["0-4yrs"] = new int[] { 0, 5, 300 }; + timeframeMap["0-5yrs"] = new int[] { 0, 5, 300 }; + timeframeMap["6-11yrs"] = new int[] { 6, 11, 300 }; + timeframeMap["12-25yrs"] = new int[] { 12, 50, 300 }; + timeframeMap["25+yrs"] = new int[] { 12, 50, 300 }; + + // Constructs the ValueMappingEstimator making the ML.net pipeline. + var pipeline = mlContext.Transforms.Conversion.MapValue("Feature", timeframeMap, "Timeframe"); + + // Fits the ValueMappingEstimator and transforms the data adding the Features column. + IDataView transformedData = pipeline.Fit(data).Transform(data); + + // Getting the resulting data as an IEnumerable. + IEnumerable featuresColumn = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); + + Console.WriteLine($"Timeframe Feature"); + foreach (var featureRow in featuresColumn) + { + Console.WriteLine($"{featureRow.Timeframe}\t\t {string.Join(",", featureRow.Feature)}"); + } + + // Timeframe Feature + // 0 - 4yrs 0, 5, 300 + // 6 - 11yrs 6, 11, 300 + // 12 - 25yrs 12, 50, 300 + // 0 - 5yrs 0, 5, 300 + // 12 - 25yrs 12, 50,300 + // 25 + yrs 12, 50, 300 + } + } +} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ValueMapping.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ValueMapping.cs deleted file mode 100644 index 2df356760a..0000000000 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ValueMapping.cs +++ /dev/null @@ -1,72 +0,0 @@ -using System; -using System.Collections.Generic; -namespace Microsoft.ML.Samples.Dynamic -{ - public static partial class ValueMapping - { - class SampleInfertDataWithFeatures - { - public float Age = 0; - public string Education = default; - public string EducationCategory = default; - } - - /// This example demonstrates the use of the ValueMappingEstimator by mapping string-to-string values. This is useful - /// to map strings to a grouping. In this example, the education data maps to the groups Undergraduate and Postgraduate: - /// 0-5yrs -> Undergraduate - /// 6-11yrs -> Postgraduate - /// 12+yrs -> Postgraduate - /// Its possible to have multiple keys map to the same value. - public static void Example() - { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. - var mlContext = new MLContext(); - - // Get a small dataset as an IEnumerable. - IEnumerable data = SamplesUtils.DatasetUtils.GetInfertData(); - IDataView trainData = mlContext.Data.LoadFromEnumerable(data); - - // Preview of the data. - // - // Age Case Education induced parity pooled.stratum row_num ... - // 26.0 1.0 0-5yrs 1.0 6.0 3.0 1.0 ... - // 42.0 1.0 0-5yrs 1.0 1.0 1.0 2.0 ... - // 39.0 1.0 12+yrs 2.0 6.0 4.0 3.0 ... - // 34.0 1.0 0-5yrs 2.0 4.0 2.0 4.0 ... - // 35.0 1.0 6-11yrs 1.0 3.0 32.0 5.0 ... - - // If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView - // Creating a list of key-value pairs based on the Education values from the dataset. - var educationMap = new Dictionary (); - educationMap["0-5yrs"] = "Undergraduate"; - educationMap["6-11yrs"] = "Postgraduate"; - educationMap["12+yrs"] = "Postgraduate"; - - // Constructs the ValueMappingEstimator making the ML.net pipeline - var pipeline = mlContext.Transforms.Conversion.MapValue("EducationCategory", educationMap, "Education"); - - // Fits the ValueMappingEstimator and transforms the data converting the Education to EducationCategory. - IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); - - // Getting the resulting data as an IEnumerable of SampleInfertDataWithFeatures. This will contain the newly created column EducationCategory - IEnumerable featureRows = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); - - Console.WriteLine($"Example of mapping string->string"); - Console.WriteLine($"Age\tEducation\tEducationCategory"); - foreach (var featureRow in featureRows) - { - Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{featureRow.EducationCategory}"); - } - - // Features column obtained post-transformation. - // - // Age Education EducationCategory - // 26 0-5yrs Undergraduate - // 42 0-5yrs Undergraudate - // 39 12+yrs Postgraduate - // 34 0-5yrs Undergraduate - // 35 6-11yrs Postgraduate - } - } -} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ValueMappingFloatToString.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ValueMappingFloatToString.cs deleted file mode 100644 index 5cf34572ba..0000000000 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ValueMappingFloatToString.cs +++ /dev/null @@ -1,64 +0,0 @@ -using System; -using System.Collections.Generic; -namespace Microsoft.ML.Samples.Dynamic -{ - public static class ValueMappingFloatToString - { - /// - /// Helper class for retrieving the resulting data - /// - class SampleTemperatureDataWithCategory - { - public DateTime Date = default; - public float Temperature = 0.0f; - public string TemperatureCategory = default; - } - - /// This example demonstrates the use of ValueMappingEstimator by mapping float-to-string values. This is useful if the key - /// data are floating point and need to be grouped into string values. In this example, the Induction value is mapped to - /// "T1", "T2", "T3", and "T4" groups. - public static void Example() - { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. - var mlContext = new MLContext(); - - // Get a small dataset as an IEnumerable. - IEnumerable data = SamplesUtils.DatasetUtils.GetSampleTemperatureData(5); - IDataView trainData = mlContext.Data.LoadFromEnumerable(data); - - // If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView - // Creating a list of key-value pairs based on the induced value from the dataset - var temperatureMap = new Dictionary(); - temperatureMap[36.0f] = "T1"; - temperatureMap[35.0f] = "T2"; - temperatureMap[34.0f] = "T3"; - - // Constructs the ValueMappingEstimator making the ML.net pipeline - var pipeline = mlContext.Transforms.Conversion.MapValue("TemperatureCategory", temperatureMap, "Temperature"); - - // Fits the ValueMappingEstimator and transforms the data adding the TemperatureCategory column. - IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); - - // Getting the resulting data as an IEnumerable of SampleTemperatureDataWithCategory. This will contain the newly created column TemperatureCategory - IEnumerable featureRows = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); - - Console.WriteLine($"Example of mapping float->string"); - Console.WriteLine($"Date\t\tTemperature\tTemperatureCategory"); - foreach (var featureRow in featureRows) - { - Console.WriteLine($"{featureRow.Date.ToString("d")}\t{featureRow.Temperature}\t\t{featureRow.TemperatureCategory}"); - } - - // Expected output: - // Features column obtained post-transformation. - // Example of mapping float->string - // Date Temperature TemperatureCategory - // 1/2/2012 36 T1 - // 1/3/2012 36 T1 - // 1/4/2012 34 T3 - // 1/5/2012 35 T2 - // 1/6/2012 35 T2 - } - } -} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ValueMappingStringToArray.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ValueMappingStringToArray.cs deleted file mode 100644 index f008d559d8..0000000000 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ValueMappingStringToArray.cs +++ /dev/null @@ -1,67 +0,0 @@ -using System; -using System.Collections.Generic; -namespace Microsoft.ML.Samples.Dynamic -{ - public static class ValueMappingStringToArray - { - /// - /// Helper class for retrieving the resulting data - /// - class SampleInfertDataWithIntArray - { - public float Age = 0; - public string Education = default; - public int[] EducationFeature = default; - } - - /// This example demonstrates the use of the ValueMappingEstimator by mapping string-to-array values which allows for mapping string data - /// to numeric arrays that can then be used as a feature set for a trainer. In this example, we are mapping the education data to - /// arbitrary integer arrays with the following association: - /// 0-5yrs -> 1, 2, 3 - /// 6-11yrs -> 5, 6, 7 - /// 12+yrs -> 42,32,64 - public static void Example() - { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. - var mlContext = new MLContext(); - - // Get a small dataset as an IEnumerable. - IEnumerable data = SamplesUtils.DatasetUtils.GetInfertData(); - IDataView trainData = mlContext.Data.LoadFromEnumerable(data); - - // If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView - // Creating a list of key-value pairs based on the Education values from the dataset - var educationMap = new Dictionary(); - educationMap["0-5yrs"] = new int[] { 1, 2, 3 }; - educationMap["6-11yrs"] = new int[] { 5, 6, 7 }; - educationMap["12+yrs"] = new int[] { 42, 32, 64 }; - - // Constructs the ValueMappingEstimator making the ML.net pipeline - var pipeline = mlContext.Transforms.Conversion.MapValue("EducationFeature", educationMap, "Education"); - - // Fits the ValueMappingEstimator and transforms the data adding the EducationFeature column. - IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); - - // Getting the resulting data as an IEnumerable of SampleInfertDataWithIntArray. This will contain the newly created column EducationCategory - IEnumerable featuresColumn = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); - - Console.WriteLine($"Example of mapping string->array"); - Console.WriteLine($"Age\tEducation\tEducationFeature"); - foreach (var featureRow in featuresColumn) - { - Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{string.Join(",", featureRow.EducationFeature)}"); - } - - // Features column obtained post-transformation. - // - // Example of mapping string->array - // Age Education EducationFeature - // 26 0 - 5yrs 1,2,3 - // 42 0 - 5yrs 1,2,3 - // 39 12 + yrs 42,32,64 - // 34 0 - 5yrs 1,2,3 - // 35 6 - 11yrs 5,6,7 - } - } -} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ValueMappingStringToKeyType.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ValueMappingStringToKeyType.cs deleted file mode 100644 index 8c01d35e78..0000000000 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ValueMappingStringToKeyType.cs +++ /dev/null @@ -1,73 +0,0 @@ -using System; -using System.Collections.Generic; -namespace Microsoft.ML.Samples.Dynamic -{ - public static class ValueMappingStringToKeyType - { - /// - /// Helper class for retrieving the resulting data - /// - class SampleInfertDataWithFeatures - - { - public float Age = 0; - public string Education = default; - public string EducationCategory = default; - } - - /// This example demonstrates the use of KeyTypes using both the ValueMappingEstimator and KeyToValueEstimator. Using a KeyType - /// instead of the actual value provides a unique integer representation of the value. When the treatValueAsKeyTypes is true, - /// the ValueMappingEstimator will generate a KeyType for each unique value. - /// - /// In this example, the education data is mapped to a grouping of 'Undergraduate' and 'Postgraduate'. Because KeyTypes are used, the - /// ValueMappingEstimator will output the KeyType value rather than string value of 'Undergraduate' or 'Postgraduate'. - /// - /// The KeyToValueEstimator is added to the pipeline to convert the KeyType back to the original value. Therefore the output of this example - /// results in the string value of 'Undergraduate' and 'Postgraduate'. - public static void Example() - { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. - var mlContext = new MLContext(); - - // Get a small dataset as an IEnumerable. - IEnumerable data = SamplesUtils.DatasetUtils.GetInfertData(); - IDataView trainData = mlContext.Data.LoadFromEnumerable(data); - - // Creating a list of key-value pairs based on the Education values from the dataset - // These lists are created by hand for the demonstration, but the ValueMappingEstimator does take an IEnumerable. - var educationMap = new Dictionary(); - educationMap["0-5yrs"] = "Undergraduate"; - educationMap["6-11yrs"] = "Postgraduate"; - educationMap["12+yrs"] = "Postgraduate"; - - // Generate the ValueMappingEstimator that will output KeyTypes even though our values are strings. - // The KeyToValueMappingEstimator is added to provide a reverse lookup of the KeyType, converting the KeyType value back - // to the original value. - var pipeline = mlContext.Transforms.Conversion.MapValue("EducationKeyType", educationMap, "Education", true) - .Append(mlContext.Transforms.Conversion.MapKeyToValue("EducationCategory", "EducationKeyType")); - - // Fits the ValueMappingEstimator and transforms the data adding the EducationKeyType column. - IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); - - // Getting the resulting data as an IEnumerable of SampleInfertDataWithFeatures. - IEnumerable featureRows = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); - - Console.WriteLine($"Example of mapping string->keytype"); - Console.WriteLine($"Age\tEducation\tEducationCategory"); - foreach (var featureRow in featureRows) - { - Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{featureRow.EducationCategory}"); - } - - // Features column obtained post-transformation. - // - // Age Education EducationCategory - // 26 0-5yrs Undergraduate - // 42 0-5yrs Undergraduate - // 39 12+yrs Postgraduate - // 34 0-5yrs Undergraduate - // 35 6-11yrs Postgraduate - } - } -} \ No newline at end of file diff --git a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs index e2a1ffba0b..7fc47edf5d 100644 --- a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs @@ -252,10 +252,7 @@ internal static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog. /// /// /// /// public static ValueMappingEstimator MapValue( @@ -280,15 +277,6 @@ public static ValueMappingEstimator MapValueSpecifies the mapping that will be perfomed. The keys will be mapped to the values as specified in the . /// The columns to apply this transform on. /// An instance of the - /// - /// - /// - /// [BestFriend] internal static ValueMappingEstimator MapValue( this TransformsCatalog.ConversionTransforms catalog, @@ -312,12 +300,6 @@ internal static ValueMappingEstimator MapValueWhether to treat the values as a . /// The columns to apply this transform on. /// An instance of the - /// - /// - /// - /// [BestFriend] internal static ValueMappingEstimator MapValue( this TransformsCatalog.ConversionTransforms catalog, @@ -346,10 +328,7 @@ internal static ValueMappingEstimator MapValue /// /// /// public static ValueMappingEstimator MapValue( @@ -373,15 +352,6 @@ public static ValueMappingEstimator MapValueSpecifies the mapping that will be perfomed. The keys will be mapped to the values as specified in the . /// The columns to apply this transform on. /// An instance of the - /// - /// - /// - /// [BestFriend] internal static ValueMappingEstimator MapValue( this TransformsCatalog.ConversionTransforms catalog, @@ -409,10 +379,7 @@ internal static ValueMappingEstimator MapValue /// /// /// public static ValueMappingEstimator MapValue( @@ -432,15 +399,6 @@ public static ValueMappingEstimator MapValue( /// The value column in . /// The columns to apply this transform on. /// A instance of the ValueMappingEstimator - /// - /// - /// - /// [BestFriend] internal static ValueMappingEstimator MapValue( this TransformsCatalog.ConversionTransforms catalog, From edbb1f0f6f6ca1036214fe2a50e706f326d922d1 Mon Sep 17 00:00:00 2001 From: Senja Filipi Date: Tue, 2 Apr 2019 13:28:45 -0700 Subject: [PATCH 3/3] Addressing PR comments --- .../Transforms/Conversion/ConvertType.cs | 20 +++---- .../Conversion/ConvertTypeMultiColumn.cs | 55 ++++++++++--------- .../Dynamic/Transforms/Conversion/MapValue.cs | 49 +++++++---------- ...ValueIDVLookup.cs => MapValueIdvLookup.cs} | 43 +++++++-------- .../Transforms/Conversion/MapValueToArray.cs | 50 ++++++++--------- .../ConversionsExtensionsCatalog.cs | 6 +- 6 files changed, 104 insertions(+), 119 deletions(-) rename docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/{MapValueIDVLookup.cs => MapValueIdvLookup.cs} (89%) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertType.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertType.cs index 931fdaaeb4..bbffb6564a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertType.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertType.cs @@ -5,18 +5,6 @@ namespace Microsoft.ML.Samples.Dynamic { public static class ConvertType { - private sealed class InputData - { - public bool Survived; - } - - private sealed class TransformedData - { - public bool Survived { get; set; } - - public Int32 SurvivedInt32 { get; set; } - } - public static void Example() { var mlContext = new MLContext(seed: 1); @@ -51,5 +39,13 @@ public static void Example() // A: False Aconv:0 // A: False Aconv:0 } + private class InputData + { + public bool Survived; + } + private sealed class TransformedData : InputData + { + public Int32 SurvivedInt32 { get; set; } + } } } \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertTypeMultiColumn.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertTypeMultiColumn.cs index 63d9a544fb..325dfe5271 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertTypeMultiColumn.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertTypeMultiColumn.cs @@ -4,31 +4,15 @@ namespace Microsoft.ML.Samples.Dynamic { // This example illustrates how to convert multiple columns of different types to one type, in this case System.Single. - // This is often a useful data transformation before concatenting the features together and passing them to a particular estimator. + // This is often a useful data transformation before concatenating the features together and passing them to a particular estimator. public static class ConvertTypeMultiColumn - { - // The initial data type - private class InputData - { - public bool Feature1; - public string Feature2; - public DateTime Feature3; - public double Feature4; - } - - // The resulting data type after the transformation - private sealed class TransformedData : InputData - { - public float Converted1 { get; set; } - public float Converted2 { get; set; } - - public float Converted3 { get; set; } - public float Converted4 { get; set; } - } - + { public static void Example() { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. var mlContext = new MLContext(seed: 1); + var rawData = new[] { new InputData() { Feature1 = true, Feature2 = "0.4", Feature3 = DateTime.Now, Feature4 = 0.145}, new InputData() { Feature1 = false, Feature2 = "0.5", Feature3 = DateTime.Today, Feature4 = 3.14}, @@ -37,6 +21,7 @@ public static void Example() new InputData() { Feature1 = true, Feature2 = "8904", Feature3 = DateTime.UtcNow, Feature4 = 8.09}, }; + // Convert the data to an IDataView. var data = mlContext.Data.LoadFromEnumerable(rawData); // Construct the pipeline. @@ -46,14 +31,16 @@ public static void Example() new InputOutputColumnPair("Converted2", "Feature2"), new InputOutputColumnPair("Converted3", "Feature3"), new InputOutputColumnPair("Converted4", "Feature4"), + }, + DataKind.Single); - }, DataKind.Single); - - // Let's train our pipeline, and then apply it to the same data. + // Let's fit our pipeline to the data. var transformer = pipeline.Fit(data); + // Transforming the same data. This will add the 4 columns defined in the pipeline, containing the converted + // values of the initial columns. var transformedData = transformer.Transform(data); - // Shape the transformed data as a strongly typed IEnumerable + // Shape the transformed data as a strongly typed IEnumerable. var convertedData = mlContext.Data.CreateEnumerable(transformedData, true); // Printing the results. @@ -61,7 +48,7 @@ public static void Example() foreach (var item in convertedData) Console.WriteLine($"\t{item.Converted1}\t {item.Converted2}\t\t {item.Converted3}\t {item.Converted4}"); - // Output + // Transformed data. // // Converted1 Converted2 Converted3 Converted4 // 1 0.4 6.368921E+17 0.145 @@ -71,5 +58,21 @@ public static void Example() // 1 8904 6.368924E+17 8.09 } + // The initial data type + private class InputData + { + public bool Feature1; + public string Feature2; + public DateTime Feature3; + public double Feature4; + } + // The resulting data type after the transformation + private class TransformedData : InputData + { + public float Converted1 { get; set; } + public float Converted2 { get; set; } + public float Converted3 { get; set; } + public float Converted4 { get; set; } + } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValue.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValue.cs index 3899ecacab..09c0a53a1d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValue.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValue.cs @@ -2,29 +2,13 @@ using System.Collections.Generic; using Microsoft.ML.Data; + namespace Microsoft.ML.Samples.Dynamic { public static class MapValue { - class DataPoint - { - public string Timeframe { get; set; } - public int Score { get; set; } - } - - class TransformedData : DataPoint - { - public string TimeframeCategory { get; set; } - public string ScoreCategory { get; set; } - - public uint Label { get; set; } - } - - - /// This example demonstrates the use of the ValueMappingEstimator by mapping strings to other string values, or floats to strings. - /// This is useful to map types to a grouping. - /// It is possible to have multiple values map to the same category. + /// This is useful to map types to a category. public static void Example() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, @@ -38,7 +22,7 @@ public static void Example() new DataPoint() { Timeframe = "12-25yrs" , Score = 3 }, new DataPoint() { Timeframe = "0-5yrs" , Score = 4 }, new DataPoint() { Timeframe = "12-25yrs" , Score = 5 }, - new DataPoint() { Timeframe = "25+yrs" , Score = 5 }, + new DataPoint() { Timeframe = "25+yrs" , Score = 5 }, }; var data = mlContext.Data.LoadFromEnumerable(rawData); @@ -70,7 +54,7 @@ public static void Example() // Constructs the ML.net pipeline var pipeline = mlContext.Transforms.Conversion.MapValue("TimeframeCategory", timeframeMap, "Timeframe") .Append(mlContext.Transforms.Conversion.MapValue("ScoreCategory", scoreMap, "Score")) - // on the MapValue below, the treatValuesAsKeyType is set to true. The type of the Label column will be a key type, + // on the MapValue below, the treatValuesAsKeyType is set to true. The type of the Label column will be a KeyDataViewType type, // and it can be used as input for trainers performing multiclass classification. .Append(mlContext.Transforms.Conversion.MapValue("Label", timeframeKeyMap, "Timeframe", treatValuesAsKeyType: true)); @@ -83,19 +67,28 @@ public static void Example() Console.WriteLine($" Timeframe TimeframeCategory Label Score ScoreCategory"); foreach (var featureRow in features) - { Console.WriteLine($"{featureRow.Timeframe}\t\t{featureRow.TimeframeCategory}\t\t\t{featureRow.Label}\t\t{featureRow.Score}\t{featureRow.ScoreCategory}"); - } // TransformedData obtained post-transformation. // // Timeframe TimeframeCategory Label Score ScoreCategory - // 0 - 4yrs Short 1 1 Low - // 6 - 11yrs Medium 2 2 Low - // 12 - 25yrs Long 3 3 Average - // 0 - 5yrs Short 1 4 High - // 12 - 25yrs Long 3 5 High - // 25 + yrs Long 3 5 High + // 0-4yrs Short 1 1 Low + // 6-11yrs Medium 2 2 Low + // 12-25yrs Long 3 3 Average + // 0-5yrs Short 1 4 High + // 12-25yrs Long 3 5 High + // 25+yrs Long 3 5 High + } + private class DataPoint + { + public string Timeframe { get; set; } + public int Score { get; set; } + } + private class TransformedData : DataPoint + { + public string TimeframeCategory { get; set; } + public string ScoreCategory { get; set; } + public uint Label { get; set; } } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueIDVLookup.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueIdvLookup.cs similarity index 89% rename from docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueIDVLookup.cs rename to docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueIdvLookup.cs index 1c26b75942..76977cd40b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueIDVLookup.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueIdvLookup.cs @@ -5,24 +5,6 @@ namespace Microsoft.ML.Samples.Dynamic { public static class MapValueIdvLookup { - // Type for the IDataVIew that will be serving as the map - private class LookupMap - { - public float Value { get; set; } - public string Category { get; set; } - - } - - private class DataPoint - { - public float Price { get; set; } - } - - private class TransformedData : DataPoint - { - public string PriceCategory { get; set; } - } - /// This example demonstrates the use of MapValue by mapping floats to strings, looking up the mapping in an IDataView. /// This is useful to map types to a grouping. public static void Example() @@ -47,17 +29,17 @@ public static void Example() // Create the lookup map data IEnumerable. var lookupData = new[] { new LookupMap { Value = 3.14f, Category = "Low" }, - new LookupMap { Category = "Low" , Value = 1.19f }, - new LookupMap { Category = "Low" , Value = 2.17f }, - new LookupMap { Category = "Medium", Value = 33.784f}, - new LookupMap { Category = "High", Value = 2000f} + new LookupMap { Value = 1.19f , Category = "Low" }, + new LookupMap { Value = 2.17f , Category = "Low" }, + new LookupMap { Value = 33.784f, Category = "Medium" }, + new LookupMap { Value = 2000f, Category = "High"} }; // Convert to IDataView var lookupIdvMap = mlContext.Data.LoadFromEnumerable(lookupData); - // Constructs the ValueMappingEstimator making the ML.net pipeline + // Constructs the ValueMappingEstimator making the ML.NET pipeline var pipeline = mlContext.Transforms.Conversion.MapValue("PriceCategory", lookupIdvMap, lookupIdvMap.Schema["Value"], lookupIdvMap.Schema["Category"], "Price"); // Fits the ValueMappingEstimator and transforms the data converting the Price to PriceCategory. @@ -79,5 +61,20 @@ public static void Example() // 2.17 Low // 33.784 Medium } + + // Type for the IDataView that will be serving as the map + private class LookupMap + { + public float Value { get; set; } + public string Category { get; set; } + } + private class DataPoint + { + public float Price { get; set; } + } + private class TransformedData : DataPoint + { + public string PriceCategory { get; set; } + } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueToArray.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueToArray.cs index bc17e34589..0c7128d439 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueToArray.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueToArray.cs @@ -3,17 +3,7 @@ namespace Microsoft.ML.Samples.Dynamic { public static class MapValueToArray - { - class DataPoint - { - public string Timeframe { get; set; } - } - - class TransformedData : DataPoint - { - public int[] Feature { get; set; } - } - + { /// This example demonstrates the use of MapValue by mapping strings to array values, which allows for mapping data to numeric arrays. /// This functionality is useful when the generated column will serve as the Features column for a trainer. Most of the trainers take a numeric vector, as the Features column. /// In this example, we are mapping the Timeframe data to arbitrary integer arrays. @@ -30,13 +20,13 @@ public static void Example() new DataPoint() { Timeframe = "12-25yrs" }, new DataPoint() { Timeframe = "0-5yrs" }, new DataPoint() { Timeframe = "12-25yrs" }, - new DataPoint() { Timeframe = "25+yrs" }, + new DataPoint() { Timeframe = "25+yrs" }, }; var data = mlContext.Data.LoadFromEnumerable(rawData); - // If the list of keys and values are known, they can be passed to the API. - // Creating a list of key-value pairs based on the dataset + // Creating a list of key-value pairs to indicate the mapping between the + // DataPoint values, and the arrays they should map to. var timeframeMap = new Dictionary(); timeframeMap["0-4yrs"] = new int[] { 0, 5, 300 }; timeframeMap["0-5yrs"] = new int[] { 0, 5, 300 }; @@ -44,8 +34,8 @@ public static void Example() timeframeMap["12-25yrs"] = new int[] { 12, 50, 300 }; timeframeMap["25+yrs"] = new int[] { 12, 50, 300 }; - // Constructs the ValueMappingEstimator making the ML.net pipeline. - var pipeline = mlContext.Transforms.Conversion.MapValue("Feature", timeframeMap, "Timeframe"); + // Constructs the ValueMappingEstimator making the ML.NET pipeline. + var pipeline = mlContext.Transforms.Conversion.MapValue("Features", timeframeMap, "Timeframe"); // Fits the ValueMappingEstimator and transforms the data adding the Features column. IDataView transformedData = pipeline.Fit(data).Transform(data); @@ -53,19 +43,25 @@ public static void Example() // Getting the resulting data as an IEnumerable. IEnumerable featuresColumn = mlContext.Data.CreateEnumerable(transformedData, reuseRowObject: false); - Console.WriteLine($"Timeframe Feature"); + Console.WriteLine($"Timeframe Features"); foreach (var featureRow in featuresColumn) - { - Console.WriteLine($"{featureRow.Timeframe}\t\t {string.Join(",", featureRow.Feature)}"); - } + Console.WriteLine($"{featureRow.Timeframe}\t\t {string.Join(",", featureRow.Features)}"); - // Timeframe Feature - // 0 - 4yrs 0, 5, 300 - // 6 - 11yrs 6, 11, 300 - // 12 - 25yrs 12, 50, 300 - // 0 - 5yrs 0, 5, 300 - // 12 - 25yrs 12, 50,300 - // 25 + yrs 12, 50, 300 + // Timeframe Features + // 0-4yrs 0, 5, 300 + // 6-11yrs 6, 11, 300 + // 12-25yrs 12, 50, 300 + // 0-5yrs 0, 5, 300 + // 12-25yrs 12, 50,300 + // 25+yrs 12, 50, 300 + } + public class DataPoint + { + public string Timeframe { get; set; } + } + public class TransformedData : DataPoint + { + public int[] Features { get; set; } } } } \ No newline at end of file diff --git a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs index 7fc47edf5d..f1a790d2e4 100644 --- a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs @@ -106,7 +106,7 @@ internal static TypeConvertingEstimator ConvertType(this TransformsCatalog.Conve /// /// /// /// public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, string outputColumnName, string inputColumnName = null) @@ -179,7 +179,7 @@ public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog. /// /// /// /// /// @@ -229,7 +229,7 @@ public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.Co /// /// /// /// ///