From bfdbc4f6493aa4bb7ceccc6681b2f3c85db52705 Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Wed, 23 Jan 2019 17:37:46 -0800 Subject: [PATCH 1/6] ValueMappingEstimator example This provides an example that demonstrates different ways to use the ValueMappingEstimator. This is part of the original change to add the ValueMappingEstimator to the code base and references #754. --- .../Dynamic/ValueMapping.cs | 281 ++++++++++++++++++ .../SamplesDatasetUtils.cs | 2 +- 2 files changed, 282 insertions(+), 1 deletion(-) create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs new file mode 100644 index 0000000000..c59a4bafa6 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs @@ -0,0 +1,281 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.ML.Data; +using Microsoft.ML.Transforms; +using Microsoft.ML.Transforms.Conversions; + +namespace Microsoft.ML.Samples.Dynamic +{ + public class ValueMappingExample + { + class SampleInfertDataWithFeatures + { + public float Age = 0; + public string Education = default; + public string EducationCategory = default; + } + + class SampleInfertDataWithInducedCategory + { + public float Age = 0; + public float Induced = 0.0f; + public string InducedCategory = default; + } + + class SampleInfertDataWithIntArray + { + public float Age = 0; + public string Education = default; + public int[] EducationCategory = default; + } + + + public static void ValueMappingTransform() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var ml = new MLContext(); + + // Get a small dataset as an IEnumerable. + IEnumerable data = SamplesUtils.DatasetUtils.GetInfertData(); + var trainData = ml.CreateStreamingDataView(data); + + // Preview of the data. + // + // Age Case Education induced parity pooled.stratum row_num ... + // 26.0 1.0 0-5yrs 1.0 6.0 3.0 1.0 ... + // 42.0 1.0 0-5yrs 1.0 1.0 1.0 2.0 ... + // 39.0 1.0 12+yrs 2.0 6.0 4.0 3.0 ... + // 34.0 1.0 0-5yrs 2.0 4.0 2.0 4.0 ... + // 35.0 1.0 6-11yrs 1.0 3.0 32.0 5.0 ... + + StringToStringMappingExample(ml, trainData); + FloatToStringMappingExample(ml, trainData); + StringToKeyTypeMappingExample(ml, trainData); + StringToArrayMappingExample(ml, trainData); + } + + /// + /// This example demonstrates the use of the ValueMappingEstimator by mapping string-to-string values. The ValueMappingEstimator uses + /// level of education as keys to a respective string label which is the value. + /// The mapping looks like the following: + /// + /// 0-5yrs -> Cat1 + /// 6-11yrs -> Cat2 + /// 12+yrs -> Cat3 + /// + /// + public static void StringToStringMappingExample(MLContext ml, IDataView trainData) + { + // Creating a list of keys based on the Education values from the dataset + // These lists are created by hand for the demonstration, but the ValueMappingEstimator does take an IEnumerable. + var educationKeys = new List() + { + "0-5yrs", + "6-11yrs", + "12+yrs" + }; + + var educationValues = new List() + { + "Cat1", + "Cat2", + "Cat3" + }; + + var pipeline = new ValueMappingEstimator(ml, educationKeys, educationValues, ("Education", "EducationCategory")); + + // The transformed data. + var transformedData = pipeline.Fit(trainData).Transform(trainData); + + // Getting the data of the newly created column as an IEnumerable of SampleInfertDataWithFeatures. + var featuresColumn = ml.CreateEnumerable(transformedData, reuseRowObject: false); + + Console.WriteLine($"Example of mapping string->string"); + Console.WriteLine($"Age\tEducation\tEducationLabel"); + foreach (var featureRow in featuresColumn) + { + Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{featureRow.EducationCategory}"); + } + + // Features column obtained post-transformation. + // + // Age Education EducationLabel + // 26 0-5yrs Cat1 + // 42 0-5yrs Cat1 + // 39 12+yrs Cat3 + // 34 0-5yrs Cat1 + // 35 6-11yrs Cat2 + } + + /// + /// This example demonstrates the use of KeyTypes by setting treatValuesAsKeyTypes to true, + /// to true. + /// This is useful in cases where you want the output to be integer based rather than the actual value. + /// + /// When using KeyTypes as a Value, the ValueMappingEstimator will do one of the following: + /// 1) If the Value type is an unsigned int or unsigned long, the specified values are used directly as the KeyType values. + /// 2) If the Value type is not an unsigned int or unsigned long, new KeyType values are generated for each unique value. + /// + /// In this example, the Value type is a string. Since we are setting treatValueAsKeyTypes to true, + /// the ValueMappingEstimator will generate its own KeyType values for each unique string. + /// As with KeyTypes, they contain the actual Value information as part of the metadata, therefore + /// we can convert a KeyType back to the actual value the KeyType represents. To demonstrate + /// the reverse lookup and to confirm the correct value is mapped, a KeyToValueEstimator is added + /// to the pipeline to convert back to the original value. + /// + public static void StringToKeyTypeMappingExample(MLContext ml, IDataView trainData) + { + // Creating a list of keys based on the Education values from the dataset + // These lists are created by hand for the demonstration, but the ValueMappingEstimator does take an IEnumerable. + var educationKeys = new List() + { + "0-5yrs", + "6-11yrs", + "12+yrs" + }; + + // Sample string values + var educationValues = new List() + { + "Cat1", + "Cat2", + "Cat3" + }; + + // Generate the ValueMappingEstimator that will output KeyTypes even though our values are strings. + // The KeyToValueMappingEstimator is added to provide a reverse lookup of the KeyType, converting the KeyType value back + // to the original value. + var pipeline = new ValueMappingEstimator(ml, educationKeys, educationValues, true, ("Education", "EducationKeyType")) + .Append(new KeyToValueMappingEstimator(ml, ("EducationKeyType", "EducationCategory"))); + + // The transformed data. + var transformedData = pipeline.Fit(trainData).Transform(trainData); + + // Getting the data of the newly created column as an IEnumerable of SampleInfertDataWithFeatures. + var featuresColumn = ml.CreateEnumerable(transformedData, reuseRowObject: false); + + Console.WriteLine($"Example of mapping string->keytype"); + Console.WriteLine($"Age\tEducation\tEducationLabel"); + foreach (var featureRow in featuresColumn) + { + Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{featureRow.EducationCategory}"); + } + + // Features column obtained post-transformation. + // + // Age Education EducationLabel + // 26 0-5yrs Cat1 + // 42 0-5yrs Cat1 + // 39 12+yrs Cat3 + // 34 0-5yrs Cat1 + // 35 6-11yrs Cat2 + } + + /// + /// This example demonstrates the use of floating types as the key type for ValueMappingEstimator by mapping a float-to-string value. + /// The mapping looks like the following: + /// + /// 1.0 -> Cat1 + /// 2.0 -> Cat2 + /// + /// + public static void FloatToStringMappingExample(MLContext ml, IDataView trainData) + { + // Creating a list of keys based on the induced value from the dataset + // These lists are created by hand for the demonstration, but the ValueMappingEstimator does take an IEnumerable. + var inducedKeys = new List() + { + 1.0f, + 2.0f + }; + + // Sample list of associated string values + var inducedValues = new List() + { + "Cat1", + "Cat2" + }; + + var pipeline = new ValueMappingEstimator(ml, inducedKeys, inducedValues, ("Induced", "InducedCategory")); + + // The transformed data. + var transformedData = pipeline.Fit(trainData).Transform(trainData); + + // Getting the data of the newly created column as an IEnumerable of SampleInfertDataWithFeatures. + var featuresColumn = ml.CreateEnumerable(transformedData, reuseRowObject: false); + + Console.WriteLine($"Example of mapping float->string"); + Console.WriteLine($"Age\tInduced\tInducedCategory"); + foreach (var featureRow in featuresColumn) + { + Console.WriteLine($"{featureRow.Age}\t{featureRow.Induced}\t{featureRow.InducedCategory}"); + } + + // Features column obtained post-transformation. + // + // Example of mapping float->string + // Age Induced InducedCategory + // 26 1 Cat1 + // 42 1 Cat1 + // 39 2 Cat2 + // 34 2 Cat2 + // 35 1 Cat1 + } + + /// + /// This example demonstrates the use arrays as the values for the ValueMappingEstimator. It maps a set of keys that are type string + /// to a integer arrays of variable length. + /// The mapping looks like the following: + /// + /// 0-5yrs -> 1,2,3,4 + /// 6-11yrs -> 5,6,7 + /// 12+yrs -> 42, 32 + /// + /// + public static void StringToArrayMappingExample(MLContext ml, IDataView trainData) + { + // Creating a list of keys based on the Education values from the dataset + var educationKeys = new List() + { + "0-5yrs", + "6-11yrs", + "12+yrs" + }; + + // Sample list of associated array values + var educationValues = new List() + { + new int[] { 1,2,3,4 }, + new int[] { 5,6,7 }, + new int[] { 42, 32 } + }; + + var pipeline = new ValueMappingEstimator(ml, educationKeys, educationValues, ("Education", "EducationCategory")); + + // The transformed data. + var transformedData = pipeline.Fit(trainData).Transform(trainData); + + // Getting the data of the newly created column as an IEnumerable of SampleInfertDataWithFeatures. + var featuresColumn = ml.CreateEnumerable(transformedData, reuseRowObject: false); + + Console.WriteLine($"Example of mapping string->array"); + Console.WriteLine($"Age\tEducation\tEducationLabel"); + foreach (var featureRow in featuresColumn) + { + Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{string.Join(",", featureRow.EducationCategory)}"); + } + + // Features column obtained post-transformation. + // + // Example of mapping string->array + // Age Education EducationLabel + // 26 0 - 5yrs 1,2,3,4 + // 42 0 - 5yrs 1,2,3,4 + // 39 12 + yrs 42,32 + // 34 0 - 5yrs 1,2,3,4 + // 35 6 - 11yrs 5,6,7 + } + } +} diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs index 90f8417d1f..dda46c155a 100644 --- a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs +++ b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs @@ -168,7 +168,7 @@ public static IEnumerable GetInfertData() data.Add(new SampleInfertData { RowNum = 2, - Education = "0-5yrs", + Education = "12+yrs", Age = 39, Parity = 6, Induced = 2, From 9159ae5bf50b31a07b0e55e64c5825345ff8e615 Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Wed, 30 Jan 2019 12:54:58 -0800 Subject: [PATCH 2/6] - Moved all tests into individual files - Added further comments based upon feedback --- .../Dynamic/ValueMapping.cs | 233 ++---------------- .../Dynamic/ValueMappingFloatToString.cs | 81 ++++++ .../Dynamic/ValueMappingStringToArray.cs | 84 +++++++ .../Dynamic/ValueMappingStringToKeyType.cs | 94 +++++++ 4 files changed, 280 insertions(+), 212 deletions(-) create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs index c59a4bafa6..5d9611b71d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs @@ -1,8 +1,7 @@ using System; using System.Collections.Generic; -using System.Linq; +using Microsoft.Data.DataView; using Microsoft.ML.Data; -using Microsoft.ML.Transforms; using Microsoft.ML.Transforms.Conversions; namespace Microsoft.ML.Samples.Dynamic @@ -16,22 +15,17 @@ class SampleInfertDataWithFeatures public string EducationCategory = default; } - class SampleInfertDataWithInducedCategory - { - public float Age = 0; - public float Induced = 0.0f; - public string InducedCategory = default; - } - - class SampleInfertDataWithIntArray - { - public float Age = 0; - public string Education = default; - public int[] EducationCategory = default; - } - - - public static void ValueMappingTransform() + /// + /// This example demonstrates the use of the ValueMappingEstimator by mapping string-to-string values. The ValueMappingEstimator uses + /// level of education as keys to a respective string label which is the value. + /// The mapping looks like the following: + /// + /// 0-5yrs -> Cat1 + /// 6-11yrs -> Cat2 + /// 12+yrs -> Cat3 + /// + /// + public static void Run() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. @@ -39,7 +33,7 @@ public static void ValueMappingTransform() // Get a small dataset as an IEnumerable. IEnumerable data = SamplesUtils.DatasetUtils.GetInfertData(); - var trainData = ml.CreateStreamingDataView(data); + var trainData = ml.Data.ReadFromEnumerable(data); // Preview of the data. // @@ -50,24 +44,6 @@ public static void ValueMappingTransform() // 34.0 1.0 0-5yrs 2.0 4.0 2.0 4.0 ... // 35.0 1.0 6-11yrs 1.0 3.0 32.0 5.0 ... - StringToStringMappingExample(ml, trainData); - FloatToStringMappingExample(ml, trainData); - StringToKeyTypeMappingExample(ml, trainData); - StringToArrayMappingExample(ml, trainData); - } - - /// - /// This example demonstrates the use of the ValueMappingEstimator by mapping string-to-string values. The ValueMappingEstimator uses - /// level of education as keys to a respective string label which is the value. - /// The mapping looks like the following: - /// - /// 0-5yrs -> Cat1 - /// 6-11yrs -> Cat2 - /// 12+yrs -> Cat3 - /// - /// - public static void StringToStringMappingExample(MLContext ml, IDataView trainData) - { // Creating a list of keys based on the Education values from the dataset // These lists are created by hand for the demonstration, but the ValueMappingEstimator does take an IEnumerable. var educationKeys = new List() @@ -77,6 +53,7 @@ public static void StringToStringMappingExample(MLContext ml, IDataView trainDat "12+yrs" }; + // Creating a list of associated values that will map respectively to each educationKey var educationValues = new List() { "Cat1", @@ -84,17 +61,18 @@ public static void StringToStringMappingExample(MLContext ml, IDataView trainDat "Cat3" }; - var pipeline = new ValueMappingEstimator(ml, educationKeys, educationValues, ("Education", "EducationCategory")); + // Constructs the ValueMappingEstimator making the ML.net pipeline + var pipeline = new ValueMappingEstimator(ml, educationKeys, educationValues, ("EducationCategory", "Education")); - // The transformed data. - var transformedData = pipeline.Fit(trainData).Transform(trainData); + // Fits the ValueMappingEstimator and transforms the data converting the Education to EducationCategory. + IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); - // Getting the data of the newly created column as an IEnumerable of SampleInfertDataWithFeatures. - var featuresColumn = ml.CreateEnumerable(transformedData, reuseRowObject: false); + // Getting the resulting data as an IEnumerable of SampleInfertDataWithFeatures. This will contain the newly created column EducationCategory + IEnumerable featureRows = ml.CreateEnumerable(transformedData, reuseRowObject: false); Console.WriteLine($"Example of mapping string->string"); Console.WriteLine($"Age\tEducation\tEducationLabel"); - foreach (var featureRow in featuresColumn) + foreach (var featureRow in featureRows) { Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{featureRow.EducationCategory}"); } @@ -108,174 +86,5 @@ public static void StringToStringMappingExample(MLContext ml, IDataView trainDat // 34 0-5yrs Cat1 // 35 6-11yrs Cat2 } - - /// - /// This example demonstrates the use of KeyTypes by setting treatValuesAsKeyTypes to true, - /// to true. - /// This is useful in cases where you want the output to be integer based rather than the actual value. - /// - /// When using KeyTypes as a Value, the ValueMappingEstimator will do one of the following: - /// 1) If the Value type is an unsigned int or unsigned long, the specified values are used directly as the KeyType values. - /// 2) If the Value type is not an unsigned int or unsigned long, new KeyType values are generated for each unique value. - /// - /// In this example, the Value type is a string. Since we are setting treatValueAsKeyTypes to true, - /// the ValueMappingEstimator will generate its own KeyType values for each unique string. - /// As with KeyTypes, they contain the actual Value information as part of the metadata, therefore - /// we can convert a KeyType back to the actual value the KeyType represents. To demonstrate - /// the reverse lookup and to confirm the correct value is mapped, a KeyToValueEstimator is added - /// to the pipeline to convert back to the original value. - /// - public static void StringToKeyTypeMappingExample(MLContext ml, IDataView trainData) - { - // Creating a list of keys based on the Education values from the dataset - // These lists are created by hand for the demonstration, but the ValueMappingEstimator does take an IEnumerable. - var educationKeys = new List() - { - "0-5yrs", - "6-11yrs", - "12+yrs" - }; - - // Sample string values - var educationValues = new List() - { - "Cat1", - "Cat2", - "Cat3" - }; - - // Generate the ValueMappingEstimator that will output KeyTypes even though our values are strings. - // The KeyToValueMappingEstimator is added to provide a reverse lookup of the KeyType, converting the KeyType value back - // to the original value. - var pipeline = new ValueMappingEstimator(ml, educationKeys, educationValues, true, ("Education", "EducationKeyType")) - .Append(new KeyToValueMappingEstimator(ml, ("EducationKeyType", "EducationCategory"))); - - // The transformed data. - var transformedData = pipeline.Fit(trainData).Transform(trainData); - - // Getting the data of the newly created column as an IEnumerable of SampleInfertDataWithFeatures. - var featuresColumn = ml.CreateEnumerable(transformedData, reuseRowObject: false); - - Console.WriteLine($"Example of mapping string->keytype"); - Console.WriteLine($"Age\tEducation\tEducationLabel"); - foreach (var featureRow in featuresColumn) - { - Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{featureRow.EducationCategory}"); - } - - // Features column obtained post-transformation. - // - // Age Education EducationLabel - // 26 0-5yrs Cat1 - // 42 0-5yrs Cat1 - // 39 12+yrs Cat3 - // 34 0-5yrs Cat1 - // 35 6-11yrs Cat2 - } - - /// - /// This example demonstrates the use of floating types as the key type for ValueMappingEstimator by mapping a float-to-string value. - /// The mapping looks like the following: - /// - /// 1.0 -> Cat1 - /// 2.0 -> Cat2 - /// - /// - public static void FloatToStringMappingExample(MLContext ml, IDataView trainData) - { - // Creating a list of keys based on the induced value from the dataset - // These lists are created by hand for the demonstration, but the ValueMappingEstimator does take an IEnumerable. - var inducedKeys = new List() - { - 1.0f, - 2.0f - }; - - // Sample list of associated string values - var inducedValues = new List() - { - "Cat1", - "Cat2" - }; - - var pipeline = new ValueMappingEstimator(ml, inducedKeys, inducedValues, ("Induced", "InducedCategory")); - - // The transformed data. - var transformedData = pipeline.Fit(trainData).Transform(trainData); - - // Getting the data of the newly created column as an IEnumerable of SampleInfertDataWithFeatures. - var featuresColumn = ml.CreateEnumerable(transformedData, reuseRowObject: false); - - Console.WriteLine($"Example of mapping float->string"); - Console.WriteLine($"Age\tInduced\tInducedCategory"); - foreach (var featureRow in featuresColumn) - { - Console.WriteLine($"{featureRow.Age}\t{featureRow.Induced}\t{featureRow.InducedCategory}"); - } - - // Features column obtained post-transformation. - // - // Example of mapping float->string - // Age Induced InducedCategory - // 26 1 Cat1 - // 42 1 Cat1 - // 39 2 Cat2 - // 34 2 Cat2 - // 35 1 Cat1 - } - - /// - /// This example demonstrates the use arrays as the values for the ValueMappingEstimator. It maps a set of keys that are type string - /// to a integer arrays of variable length. - /// The mapping looks like the following: - /// - /// 0-5yrs -> 1,2,3,4 - /// 6-11yrs -> 5,6,7 - /// 12+yrs -> 42, 32 - /// - /// - public static void StringToArrayMappingExample(MLContext ml, IDataView trainData) - { - // Creating a list of keys based on the Education values from the dataset - var educationKeys = new List() - { - "0-5yrs", - "6-11yrs", - "12+yrs" - }; - - // Sample list of associated array values - var educationValues = new List() - { - new int[] { 1,2,3,4 }, - new int[] { 5,6,7 }, - new int[] { 42, 32 } - }; - - var pipeline = new ValueMappingEstimator(ml, educationKeys, educationValues, ("Education", "EducationCategory")); - - // The transformed data. - var transformedData = pipeline.Fit(trainData).Transform(trainData); - - // Getting the data of the newly created column as an IEnumerable of SampleInfertDataWithFeatures. - var featuresColumn = ml.CreateEnumerable(transformedData, reuseRowObject: false); - - Console.WriteLine($"Example of mapping string->array"); - Console.WriteLine($"Age\tEducation\tEducationLabel"); - foreach (var featureRow in featuresColumn) - { - Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{string.Join(",", featureRow.EducationCategory)}"); - } - - // Features column obtained post-transformation. - // - // Example of mapping string->array - // Age Education EducationLabel - // 26 0 - 5yrs 1,2,3,4 - // 42 0 - 5yrs 1,2,3,4 - // 39 12 + yrs 42,32 - // 34 0 - 5yrs 1,2,3,4 - // 35 6 - 11yrs 5,6,7 - } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs new file mode 100644 index 0000000000..fbe64f15e2 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs @@ -0,0 +1,81 @@ +using System; +using System.Collections.Generic; +using Microsoft.Data.DataView; +using Microsoft.ML.Data; +using Microsoft.ML.Transforms.Conversions; + +namespace Microsoft.ML.Samples.Dynamic +{ + public class ValueMappingFloatToStringExample + { + /// + /// Helper class for retrieving the resulting data + /// + class SampleInfertDataWithInducedCategory + { + public float Age = 0; + public float Induced = 0.0f; + public string InducedCategory = default; + } + + /// + /// This example demonstrates the use of floating types as the key type for ValueMappingEstimator by mapping a float-to-string value. + /// The mapping looks like the following: + /// + /// 1.0 -> Cat1 + /// 2.0 -> Cat2 + /// + /// + public static void Run() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var ml = new MLContext(); + + // Get a small dataset as an IEnumerable. + IEnumerable data = SamplesUtils.DatasetUtils.GetInfertData(); + var trainData = ml.Data.ReadFromEnumerable(data); + + // Creating a list of keys based on the induced value from the dataset + // These lists are created by hand for the demonstration, but the ValueMappingEstimator does take an IEnumerable. + var inducedKeys = new List() + { + 1.0f, + 2.0f + }; + + // Creating a list of values, these strings will map accordingly to each key. + var inducedValues = new List() + { + "Cat1", + "Cat2" + }; + + // Constructs the ValueMappingEstimator making the ML.net pipeline + var pipeline = new ValueMappingEstimator(ml, inducedKeys, inducedValues, ("InducedCategory", "Induced")); + + // Fits the ValueMappingEstimator and transforms the data adding the InducedCategory column. + IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); + + // Getting the resulting data as an IEnumerable of SampleInfertDataWithInducedCategory. This will contain the newly created column InducedCategory + IEnumerable featureRows = ml.CreateEnumerable(transformedData, reuseRowObject: false); + + Console.WriteLine($"Example of mapping float->string"); + Console.WriteLine($"Age\tInduced\tInducedCategory"); + foreach (var featureRow in featureRows) + { + Console.WriteLine($"{featureRow.Age}\t{featureRow.Induced}\t{featureRow.InducedCategory}"); + } + + // Features column obtained post-transformation. + // + // Example of mapping float->string + // Age Induced InducedCategory + // 26 1 Cat1 + // 42 1 Cat1 + // 39 2 Cat2 + // 34 2 Cat2 + // 35 1 Cat1 + } + } +} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs new file mode 100644 index 0000000000..f1544f558a --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs @@ -0,0 +1,84 @@ +using System; +using System.Collections.Generic; +using Microsoft.Data.DataView; +using Microsoft.ML.Data; +using Microsoft.ML.Transforms.Conversions; + +namespace Microsoft.ML.Samples.Dynamic +{ + public class ValueMappingStringToArrayExample + { + /// + /// Helper class for retrieving the resulting data + /// + class SampleInfertDataWithIntArray + { + public float Age = 0; + public string Education = default; + public int[] EducationCategory = default; + } + + /// + /// This example demonstrates the use arrays as the values for the ValueMappingEstimator. It maps a set of keys that are type string + /// to a integer arrays of variable length. + /// The mapping looks like the following: + /// + /// 0-5yrs -> 1,2,3,4 + /// 6-11yrs -> 5,6,7 + /// 12+yrs -> 42, 32 + /// + /// + public static void Run() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var ml = new MLContext(); + + // Get a small dataset as an IEnumerable. + IEnumerable data = SamplesUtils.DatasetUtils.GetInfertData(); + var trainData = ml.Data.ReadFromEnumerable(data); + + // Creating a list of keys based on the Education values from the dataset + var educationKeys = new List() + { + "0-5yrs", + "6-11yrs", + "12+yrs" + }; + + // Sample list of associated array values + var educationValues = new List() + { + new int[] { 1,2,3,4 }, + new int[] { 5,6,7 }, + new int[] { 42, 32 } + }; + + // Constructs the ValueMappingEstimator making the ML.net pipeline + var pipeline = new ValueMappingEstimator(ml, educationKeys, educationValues, ("EducationCategory", "Education")); + + // Fits the ValueMappingEstimator and transforms the data adding the EducationCategory column. + IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); + + // Getting the resulting data as an IEnumerable of SampleInfertDataWithIntArray. This will contain the newly created column EducationCategory + IEnumerable featuresColumn = ml.CreateEnumerable(transformedData, reuseRowObject: false); + + Console.WriteLine($"Example of mapping string->array"); + Console.WriteLine($"Age\tEducation\tEducationLabel"); + foreach (var featureRow in featuresColumn) + { + Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{string.Join(",", featureRow.EducationCategory)}"); + } + + // Features column obtained post-transformation. + // + // Example of mapping string->array + // Age Education EducationLabel + // 26 0 - 5yrs 1,2,3,4 + // 42 0 - 5yrs 1,2,3,4 + // 39 12 + yrs 42,32 + // 34 0 - 5yrs 1,2,3,4 + // 35 6 - 11yrs 5,6,7 + } + } +} \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs new file mode 100644 index 0000000000..43dca6e5db --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs @@ -0,0 +1,94 @@ +using System; +using System.Collections.Generic; +using Microsoft.Data.DataView; +using Microsoft.ML.Data; +using Microsoft.ML.Transforms.Conversions; + +namespace Microsoft.ML.Samples.Dynamic +{ + public class ValueMappingStringToKeyTypeExample + { + /// + /// Helper class for retrieving the resulting data + /// + class SampleInfertDataWithFeatures + + { + public float Age = 0; + public string Education = default; + public string EducationCategory = default; + } + + /// + /// This example demonstrates the use of KeyTypes by setting treatValuesAsKeyTypes to true, + /// to true. + /// This is useful in cases where you want the output to be integer based rather than the actual value. + /// + /// When using KeyTypes as a Value, the ValueMappingEstimator will do one of the following: + /// 1) If the Value type is an unsigned int or unsigned long, the specified values are used directly as the KeyType values. + /// 2) If the Value type is not an unsigned int or unsigned long, new KeyType values are generated for each unique value. + /// + /// In this example, the Value type is a string. Since we are setting treatValueAsKeyTypes to true, + /// the ValueMappingEstimator will generate its own KeyType values for each unique string. + /// As with KeyTypes, they contain the actual Value information as part of the metadata, therefore + /// we can convert a KeyType back to the actual value the KeyType represents. To demonstrate + /// the reverse lookup and to confirm the correct value is mapped, a KeyToValueEstimator is added + /// to the pipeline to convert back to the original value. + /// + public static void Run() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var ml = new MLContext(); + + // Get a small dataset as an IEnumerable. + IEnumerable data = SamplesUtils.DatasetUtils.GetInfertData(); + var trainData = ml.Data.ReadFromEnumerable(data); + + // Creating a list of keys based on the Education values from the dataset + // These lists are created by hand for the demonstration, but the ValueMappingEstimator does take an IEnumerable. + var educationKeys = new List() + { + "0-5yrs", + "6-11yrs", + "12+yrs" + }; + + // Creating a list of values that are sample strings. These will be converted to KeyTypes + var educationValues = new List() + { + "Cat1", + "Cat2", + "Cat3" + }; + + // Generate the ValueMappingEstimator that will output KeyTypes even though our values are strings. + // The KeyToValueMappingEstimator is added to provide a reverse lookup of the KeyType, converting the KeyType value back + // to the original value. + var pipeline = new ValueMappingEstimator(ml, educationKeys, educationValues, true, ("EducationKeyType", "Education")) + .Append(new KeyToValueMappingEstimator(ml, ("EducationCategory", "EducationKeyType"))); + + // Fits the ValueMappingEstimator and transforms the data adding the EducationKeyType column. + IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); + + // Getting the resulting data as an IEnumerable of SampleInfertDataWithFeatures. + IEnumerable featureRows = ml.CreateEnumerable(transformedData, reuseRowObject: false); + + Console.WriteLine($"Example of mapping string->keytype"); + Console.WriteLine($"Age\tEducation\tEducationLabel"); + foreach (var featureRow in featureRows) + { + Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{featureRow.EducationCategory}"); + } + + // Features column obtained post-transformation. + // + // Age Education EducationLabel + // 26 0-5yrs Cat1 + // 42 0-5yrs Cat1 + // 39 12+yrs Cat3 + // 34 0-5yrs Cat1 + // 35 6-11yrs Cat2 + } + } +} \ No newline at end of file From beb48ebd84ea455ea329f7decefbe4edddafe3ba Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Thu, 31 Jan 2019 13:00:11 -0800 Subject: [PATCH 3/6] - Added CDATA links to the examples for ValueMapping and KeyToValue - Added remarks section to ValueMapping - Other updates based upon feedback --- .../Dynamic/ValueMapping.cs | 27 +++++++---------- .../Dynamic/ValueMappingFloatToString.cs | 17 ++++------- .../Dynamic/ValueMappingStringToArray.cs | 23 ++++++--------- .../Dynamic/ValueMappingStringToKeyType.cs | 23 +++++++-------- .../ConversionsExtensionsCatalog.cs | 29 +++++++++++++++++-- .../Transforms/ValueMapping.cs | 5 +--- src/Microsoft.ML.Data/Transforms/doc.xml | 27 +++++++++++++++++ 7 files changed, 91 insertions(+), 60 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs index 5d9611b71d..14454fc679 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs @@ -2,7 +2,6 @@ using System.Collections.Generic; using Microsoft.Data.DataView; using Microsoft.ML.Data; -using Microsoft.ML.Transforms.Conversions; namespace Microsoft.ML.Samples.Dynamic { @@ -15,25 +14,21 @@ class SampleInfertDataWithFeatures public string EducationCategory = default; } - /// /// This example demonstrates the use of the ValueMappingEstimator by mapping string-to-string values. The ValueMappingEstimator uses /// level of education as keys to a respective string label which is the value. /// The mapping looks like the following: - /// - /// 0-5yrs -> Cat1 - /// 6-11yrs -> Cat2 - /// 12+yrs -> Cat3 - /// - /// + /// 0-5yrs -> Cat1 + /// 6-11yrs -> Cat2 + /// 12+yrs -> Cat3 public static void Run() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. - var ml = new MLContext(); + var mlContext = new MLContext(); // Get a small dataset as an IEnumerable. IEnumerable data = SamplesUtils.DatasetUtils.GetInfertData(); - var trainData = ml.Data.ReadFromEnumerable(data); + IDataView trainData = mlContext.Data.ReadFromEnumerable(data); // Preview of the data. // @@ -57,21 +52,21 @@ public static void Run() var educationValues = new List() { "Cat1", - "Cat2", + "Cat2", "Cat3" }; // Constructs the ValueMappingEstimator making the ML.net pipeline - var pipeline = new ValueMappingEstimator(ml, educationKeys, educationValues, ("EducationCategory", "Education")); + var pipeline = mlContext.Transforms.Conversion.ValueMap(educationKeys, educationValues, ("EducationCategory", "Education")); // Fits the ValueMappingEstimator and transforms the data converting the Education to EducationCategory. IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); // Getting the resulting data as an IEnumerable of SampleInfertDataWithFeatures. This will contain the newly created column EducationCategory - IEnumerable featureRows = ml.CreateEnumerable(transformedData, reuseRowObject: false); - + IEnumerable featureRows = mlContext.CreateEnumerable(transformedData, reuseRowObject: false); + Console.WriteLine($"Example of mapping string->string"); - Console.WriteLine($"Age\tEducation\tEducationLabel"); + Console.WriteLine($"Age\tEducation\tEducationCategory"); foreach (var featureRow in featureRows) { Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{featureRow.EducationCategory}"); @@ -79,7 +74,7 @@ public static void Run() // Features column obtained post-transformation. // - // Age Education EducationLabel + // Age Education EducationCategory // 26 0-5yrs Cat1 // 42 0-5yrs Cat1 // 39 12+yrs Cat3 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs index fbe64f15e2..107264be9a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs @@ -2,7 +2,6 @@ using System.Collections.Generic; using Microsoft.Data.DataView; using Microsoft.ML.Data; -using Microsoft.ML.Transforms.Conversions; namespace Microsoft.ML.Samples.Dynamic { @@ -18,23 +17,19 @@ class SampleInfertDataWithInducedCategory public string InducedCategory = default; } - /// /// This example demonstrates the use of floating types as the key type for ValueMappingEstimator by mapping a float-to-string value. /// The mapping looks like the following: - /// - /// 1.0 -> Cat1 - /// 2.0 -> Cat2 - /// - /// + /// 1.0 -> Cat1 + /// 2.0 -> Cat2 public static void Run() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. - var ml = new MLContext(); + var mlContext = new MLContext(); // Get a small dataset as an IEnumerable. IEnumerable data = SamplesUtils.DatasetUtils.GetInfertData(); - var trainData = ml.Data.ReadFromEnumerable(data); + IDataView trainData = mlContext.Data.ReadFromEnumerable(data); // Creating a list of keys based on the induced value from the dataset // These lists are created by hand for the demonstration, but the ValueMappingEstimator does take an IEnumerable. @@ -52,13 +47,13 @@ public static void Run() }; // Constructs the ValueMappingEstimator making the ML.net pipeline - var pipeline = new ValueMappingEstimator(ml, inducedKeys, inducedValues, ("InducedCategory", "Induced")); + var pipeline = mlContext.Transforms.Conversion.ValueMap(inducedKeys, inducedValues, ("InducedCategory", "Induced")); // Fits the ValueMappingEstimator and transforms the data adding the InducedCategory column. IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); // Getting the resulting data as an IEnumerable of SampleInfertDataWithInducedCategory. This will contain the newly created column InducedCategory - IEnumerable featureRows = ml.CreateEnumerable(transformedData, reuseRowObject: false); + IEnumerable featureRows = mlContext.CreateEnumerable(transformedData, reuseRowObject: false); Console.WriteLine($"Example of mapping float->string"); Console.WriteLine($"Age\tInduced\tInducedCategory"); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs index f1544f558a..c3a05e8cce 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs @@ -2,7 +2,6 @@ using System.Collections.Generic; using Microsoft.Data.DataView; using Microsoft.ML.Data; -using Microsoft.ML.Transforms.Conversions; namespace Microsoft.ML.Samples.Dynamic { @@ -18,25 +17,21 @@ class SampleInfertDataWithIntArray public int[] EducationCategory = default; } - /// /// This example demonstrates the use arrays as the values for the ValueMappingEstimator. It maps a set of keys that are type string /// to a integer arrays of variable length. /// The mapping looks like the following: - /// - /// 0-5yrs -> 1,2,3,4 - /// 6-11yrs -> 5,6,7 - /// 12+yrs -> 42, 32 - /// - /// + /// 0-5yrs -> 1,2,3,4 + /// 6-11yrs -> 5,6,7 + /// 12+yrs -> 42, 32 public static void Run() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. - var ml = new MLContext(); + var mlContext = new MLContext(); // Get a small dataset as an IEnumerable. IEnumerable data = SamplesUtils.DatasetUtils.GetInfertData(); - var trainData = ml.Data.ReadFromEnumerable(data); + IDataView trainData = mlContext.Data.ReadFromEnumerable(data); // Creating a list of keys based on the Education values from the dataset var educationKeys = new List() @@ -55,16 +50,16 @@ public static void Run() }; // Constructs the ValueMappingEstimator making the ML.net pipeline - var pipeline = new ValueMappingEstimator(ml, educationKeys, educationValues, ("EducationCategory", "Education")); + var pipeline = mlContext.Transforms.Conversion.ValueMap(educationKeys, educationValues, ("EducationCategory", "Education")); // Fits the ValueMappingEstimator and transforms the data adding the EducationCategory column. IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); // Getting the resulting data as an IEnumerable of SampleInfertDataWithIntArray. This will contain the newly created column EducationCategory - IEnumerable featuresColumn = ml.CreateEnumerable(transformedData, reuseRowObject: false); + IEnumerable featuresColumn = mlContext.CreateEnumerable(transformedData, reuseRowObject: false); Console.WriteLine($"Example of mapping string->array"); - Console.WriteLine($"Age\tEducation\tEducationLabel"); + Console.WriteLine($"Age\tEducation\tEducationCategory"); foreach (var featureRow in featuresColumn) { Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{string.Join(",", featureRow.EducationCategory)}"); @@ -73,7 +68,7 @@ public static void Run() // Features column obtained post-transformation. // // Example of mapping string->array - // Age Education EducationLabel + // Age Education EducationCategory // 26 0 - 5yrs 1,2,3,4 // 42 0 - 5yrs 1,2,3,4 // 39 12 + yrs 42,32 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs index 43dca6e5db..77a6596c0f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs @@ -19,9 +19,7 @@ class SampleInfertDataWithFeatures public string EducationCategory = default; } - /// - /// This example demonstrates the use of KeyTypes by setting treatValuesAsKeyTypes to true, - /// to true. + /// This example demonstrates the use of KeyTypes in the ValueMappingEstimator by setting treatValuesAsKeyTypes to true, /// This is useful in cases where you want the output to be integer based rather than the actual value. /// /// When using KeyTypes as a Value, the ValueMappingEstimator will do one of the following: @@ -34,16 +32,15 @@ class SampleInfertDataWithFeatures /// we can convert a KeyType back to the actual value the KeyType represents. To demonstrate /// the reverse lookup and to confirm the correct value is mapped, a KeyToValueEstimator is added /// to the pipeline to convert back to the original value. - /// public static void Run() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, // as well as the source of randomness. - var ml = new MLContext(); + var mlContext = new MLContext(); // Get a small dataset as an IEnumerable. IEnumerable data = SamplesUtils.DatasetUtils.GetInfertData(); - var trainData = ml.Data.ReadFromEnumerable(data); + IDataView trainData = mlContext.Data.ReadFromEnumerable(data); // Creating a list of keys based on the Education values from the dataset // These lists are created by hand for the demonstration, but the ValueMappingEstimator does take an IEnumerable. @@ -58,24 +55,24 @@ public static void Run() var educationValues = new List() { "Cat1", - "Cat2", + "Cat2", "Cat3" }; // Generate the ValueMappingEstimator that will output KeyTypes even though our values are strings. // The KeyToValueMappingEstimator is added to provide a reverse lookup of the KeyType, converting the KeyType value back // to the original value. - var pipeline = new ValueMappingEstimator(ml, educationKeys, educationValues, true, ("EducationKeyType", "Education")) - .Append(new KeyToValueMappingEstimator(ml, ("EducationCategory", "EducationKeyType"))); + var pipeline = new ValueMappingEstimator(mlContext, educationKeys, educationValues, true, ("EducationKeyType", "Education")) + .Append(new KeyToValueMappingEstimator(mlContext, ("EducationCategory", "EducationKeyType"))); // Fits the ValueMappingEstimator and transforms the data adding the EducationKeyType column. IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); // Getting the resulting data as an IEnumerable of SampleInfertDataWithFeatures. - IEnumerable featureRows = ml.CreateEnumerable(transformedData, reuseRowObject: false); - + IEnumerable featureRows = mlContext.CreateEnumerable(transformedData, reuseRowObject: false); + Console.WriteLine($"Example of mapping string->keytype"); - Console.WriteLine($"Age\tEducation\tEducationLabel"); + Console.WriteLine($"Age\tEducation\tEducationCategory"); foreach (var featureRow in featureRows) { Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{featureRow.EducationCategory}"); @@ -83,7 +80,7 @@ public static void Run() // Features column obtained post-transformation. // - // Age Education EducationLabel + // Age Education EducationCategory // 26 0-5yrs Cat1 // 42 0-5yrs Cat1 // 39 12+yrs Cat3 diff --git a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs index cdcd3a83f2..5e3ec5b45a 100644 --- a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs @@ -63,6 +63,12 @@ public static TypeConvertingEstimator ConvertType(this TransformsCatalog.Convers /// /// The categorical transform's catalog. /// Name of the column to transform. + /// + /// + /// + /// public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, string inputColumnName) => new KeyToValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumnName); @@ -72,6 +78,12 @@ public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.Co /// /// The categorical transform's catalog /// The pairs of input and output columns. + /// + /// + /// + /// public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, params (string outputColumnName, string inputColumnName)[] columns) => new KeyToValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns); @@ -133,7 +145,14 @@ public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.Co /// cannot contain duplicate keys. /// The list of values to pair with the keys for the mapping. This list must be equal to the same length as keys. /// The columns to apply this transform on. - /// + /// An instance of the ValueMappingEstimator + /// + /// + /// + /// + public static ValueMappingEstimator ValueMap( this TransformsCatalog.ConversionTransforms catalog, IEnumerable keys, @@ -152,7 +171,13 @@ public static ValueMappingEstimator ValueMapName of the key column in . /// Name of the value column in . /// The columns to apply this transform on. - /// + /// A instance of the ValueMappingEstimator + /// /// + /// + /// + /// public static ValueMappingEstimator ValueMap( this TransformsCatalog.ConversionTransforms catalog, IDataView lookupMap, string keyColumn, string valueColumn, params (string outputColumnName, string inputColumnName)[] columns) diff --git a/src/Microsoft.ML.Data/Transforms/ValueMapping.cs b/src/Microsoft.ML.Data/Transforms/ValueMapping.cs index 70b46fad01..75b1324601 100644 --- a/src/Microsoft.ML.Data/Transforms/ValueMapping.cs +++ b/src/Microsoft.ML.Data/Transforms/ValueMapping.cs @@ -33,10 +33,7 @@ namespace Microsoft.ML.Transforms.Conversions { - /// - /// The ValueMappingEstimator is a 1-1 mapping from a key to value. This particular class load the mappings from an . - /// This gives user the flexibility to load the mapping from file instead of using IEnumerable in - /// + /// public class ValueMappingEstimator : TrivialEstimator { private readonly (string outputColumnName, string inputColumnName)[] _columns; diff --git a/src/Microsoft.ML.Data/Transforms/doc.xml b/src/Microsoft.ML.Data/Transforms/doc.xml index a743ee06a4..e1907cebf5 100644 --- a/src/Microsoft.ML.Data/Transforms/doc.xml +++ b/src/Microsoft.ML.Data/Transforms/doc.xml @@ -63,6 +63,33 @@ but can be defined through other means: either with the mapping defined directly on the command line, or as loaded from an external file. + + + The ValueMappingEstimator is a 1-1 mapping from a key to value. + + + Given a set of keys and values, the ValueMappingEstimator builds up a dictionary so that when given a specific key it will return a + specific value. The ValueMappingEstimator supports keys and values of different to support different data types. + Examples for using a ValueMappingEstimator are: + +
  • + Converting a string value to a string value, this can be useful for grouping (i.e. 'cat', 'dog', 'horse' maps to 'mammals') +
  • +
  • + Converting a string value to a integer value (i.e. converting the text description like quality to an numeric where 'good' maps to 1, 'poor' maps to 0 +
  • +
  • + + Converting a float value to a string value and have the string value represented as a + (i.e. specific wind speeds could map to a group ('category1', 'category2',...) which will generate a unique integer identifier (1,2,...) that could used + as part of a feature set for a trainer). + +
  • +
    + Values can be repeated to allow for multiple keys to map to the same value, however keys can not be repeated. The mapping between keys and values + can be specified either through lists, where the key list and value list must be the same size or can be done through an . +
    +
    From b3d1df21f5ddb4c115dbdd43c367af3abb6fa475 Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Thu, 31 Jan 2019 14:47:03 -0800 Subject: [PATCH 4/6] ...updating from feedback --- .../Dynamic/ValueMapping.cs | 32 +++++----- .../Dynamic/ValueMappingFloatToString.cs | 60 ++++++++++--------- .../Dynamic/ValueMappingStringToArray.cs | 21 ++++--- .../Dynamic/ValueMappingStringToKeyType.cs | 36 +++++------ src/Microsoft.ML.Data/Transforms/doc.xml | 5 +- .../SamplesDatasetUtils.cs | 17 ++++++ 6 files changed, 95 insertions(+), 76 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs index 14454fc679..eac98c0342 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs @@ -14,12 +14,12 @@ class SampleInfertDataWithFeatures public string EducationCategory = default; } - /// This example demonstrates the use of the ValueMappingEstimator by mapping string-to-string values. The ValueMappingEstimator uses - /// level of education as keys to a respective string label which is the value. - /// The mapping looks like the following: - /// 0-5yrs -> Cat1 - /// 6-11yrs -> Cat2 - /// 12+yrs -> Cat3 + /// This example demonstrates the use of the ValueMappingEstimator by mapping string-to-string values. This is useful + /// to map strings to a grouping. In this example, the Education data maps to the groups Undergraduate and Postgraduate: + /// 0-5yrs -> Undergraduate + /// 6-11yrs -> Postgraduate + /// 12+yrs -> Postgraduate + /// Its possible to have multiple keys map to the same value. public static void Run() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, @@ -39,8 +39,8 @@ public static void Run() // 34.0 1.0 0-5yrs 2.0 4.0 2.0 4.0 ... // 35.0 1.0 6-11yrs 1.0 3.0 32.0 5.0 ... - // Creating a list of keys based on the Education values from the dataset - // These lists are created by hand for the demonstration, but the ValueMappingEstimator does take an IEnumerable. + // If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView + // Creating a list of keys based on the Education values from the dataset. var educationKeys = new List() { "0-5yrs", @@ -51,9 +51,9 @@ public static void Run() // Creating a list of associated values that will map respectively to each educationKey var educationValues = new List() { - "Cat1", - "Cat2", - "Cat3" + "Undergraduate", + "Postgraduate", + "Postgraduate" }; // Constructs the ValueMappingEstimator making the ML.net pipeline @@ -75,11 +75,11 @@ public static void Run() // Features column obtained post-transformation. // // Age Education EducationCategory - // 26 0-5yrs Cat1 - // 42 0-5yrs Cat1 - // 39 12+yrs Cat3 - // 34 0-5yrs Cat1 - // 35 6-11yrs Cat2 + // 26 0-5yrs Undergraduate + // 42 0-5yrs Undergraudate + // 39 12+yrs Postgraduate + // 34 0-5yrs Undergraduate + // 35 6-11yrs Postgraduate } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs index 107264be9a..948120a38f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs @@ -10,17 +10,16 @@ public class ValueMappingFloatToStringExample /// /// Helper class for retrieving the resulting data /// - class SampleInfertDataWithInducedCategory + class SampleTemperatureDataWithCategory { - public float Age = 0; - public float Induced = 0.0f; - public string InducedCategory = default; + public DateTime Date = default; + public float Temperature = 0.0f; + public string TemperatureCategory = default; } - /// This example demonstrates the use of floating types as the key type for ValueMappingEstimator by mapping a float-to-string value. - /// The mapping looks like the following: - /// 1.0 -> Cat1 - /// 2.0 -> Cat2 + /// This example demonstrates the use of ValueMappingEstimator by mapping float-to-string values. This is useful if the key + /// data are floating point and need to be grouped into string values. In this example, the Induction value is mapped to + /// "T1", "T2", "T3", and "T4" groups. public static void Run() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, @@ -28,49 +27,54 @@ public static void Run() var mlContext = new MLContext(); // Get a small dataset as an IEnumerable. - IEnumerable data = SamplesUtils.DatasetUtils.GetInfertData(); + IEnumerable data = SamplesUtils.DatasetUtils.GetSampleTemperatureData(); IDataView trainData = mlContext.Data.ReadFromEnumerable(data); + // If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView + // Creating a list of keys based on the induced value from the dataset - // These lists are created by hand for the demonstration, but the ValueMappingEstimator does take an IEnumerable. - var inducedKeys = new List() + var temperatureKeys = new List() { - 1.0f, - 2.0f + 39.0F, + 67.0F, + 75.0F, + 82.0F, }; // Creating a list of values, these strings will map accordingly to each key. - var inducedValues = new List() + var classificationValues = new List() { - "Cat1", - "Cat2" + "T1", + "T2", + "T3", + "T4" }; // Constructs the ValueMappingEstimator making the ML.net pipeline - var pipeline = mlContext.Transforms.Conversion.ValueMap(inducedKeys, inducedValues, ("InducedCategory", "Induced")); + var pipeline = mlContext.Transforms.Conversion.ValueMap(temperatureKeys, classificationValues, ("TemperatureCategory", "Temperature")); - // Fits the ValueMappingEstimator and transforms the data adding the InducedCategory column. + // Fits the ValueMappingEstimator and transforms the data adding the TemperatureCategory column. IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); - // Getting the resulting data as an IEnumerable of SampleInfertDataWithInducedCategory. This will contain the newly created column InducedCategory - IEnumerable featureRows = mlContext.CreateEnumerable(transformedData, reuseRowObject: false); + // Getting the resulting data as an IEnumerable of SampleTemperatureDataWithCategory. This will contain the newly created column TemperatureCategory + IEnumerable featureRows = mlContext.CreateEnumerable(transformedData, reuseRowObject: false); Console.WriteLine($"Example of mapping float->string"); - Console.WriteLine($"Age\tInduced\tInducedCategory"); + Console.WriteLine($"Date\t\tTemperature\tTemperatureCategory"); foreach (var featureRow in featureRows) { - Console.WriteLine($"{featureRow.Age}\t{featureRow.Induced}\t{featureRow.InducedCategory}"); + Console.WriteLine($"{featureRow.Date.ToString("d")}\t{featureRow.Temperature}\t\t{featureRow.TemperatureCategory}"); } // Features column obtained post-transformation. // // Example of mapping float->string - // Age Induced InducedCategory - // 26 1 Cat1 - // 42 1 Cat1 - // 39 2 Cat2 - // 34 2 Cat2 - // 35 1 Cat1 + // Date Temperature TemperatureCategory + // 1/1/2012 39 T1 + // 1/2/2012 82 T4 + // 1/3/2012 75 T3 + // 1/4/2012 67 T2 + // 1/5/2012 75 T3 } } } \ No newline at end of file diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs index c3a05e8cce..fdfd9e881b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using Microsoft.Data.DataView; using Microsoft.ML.Data; +using Microsoft.ML.Transforms.Conversions; namespace Microsoft.ML.Samples.Dynamic { @@ -14,12 +15,12 @@ class SampleInfertDataWithIntArray { public float Age = 0; public string Education = default; - public int[] EducationCategory = default; + public int[] EducationFeature = default; } - /// This example demonstrates the use arrays as the values for the ValueMappingEstimator. It maps a set of keys that are type string - /// to a integer arrays of variable length. - /// The mapping looks like the following: + /// This example demonstrates the use of the ValueMappingEstimator by mapping string-to-array values which allows for mapping string data + /// to numeric arrays that can then be used as a feature set for a trainer. In this example, we are mapping the education data to + /// arbitrary integer arrays with the following association: /// 0-5yrs -> 1,2,3,4 /// 6-11yrs -> 5,6,7 /// 12+yrs -> 42, 32 @@ -33,6 +34,8 @@ public static void Run() IEnumerable data = SamplesUtils.DatasetUtils.GetInfertData(); IDataView trainData = mlContext.Data.ReadFromEnumerable(data); + // If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView + // Creating a list of keys based on the Education values from the dataset var educationKeys = new List() { @@ -50,25 +53,25 @@ public static void Run() }; // Constructs the ValueMappingEstimator making the ML.net pipeline - var pipeline = mlContext.Transforms.Conversion.ValueMap(educationKeys, educationValues, ("EducationCategory", "Education")); + var pipeline = new ValueMappingEstimator(mlContext, educationKeys, educationValues, ("EducationFeature", "Education")); - // Fits the ValueMappingEstimator and transforms the data adding the EducationCategory column. + // Fits the ValueMappingEstimator and transforms the data adding the EducationFeature column. IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); // Getting the resulting data as an IEnumerable of SampleInfertDataWithIntArray. This will contain the newly created column EducationCategory IEnumerable featuresColumn = mlContext.CreateEnumerable(transformedData, reuseRowObject: false); Console.WriteLine($"Example of mapping string->array"); - Console.WriteLine($"Age\tEducation\tEducationCategory"); + Console.WriteLine($"Age\tEducation\tEducationFeature"); foreach (var featureRow in featuresColumn) { - Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{string.Join(",", featureRow.EducationCategory)}"); + Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{string.Join(",", featureRow.EducationFeature)}"); } // Features column obtained post-transformation. // // Example of mapping string->array - // Age Education EducationCategory + // Age Education EducationFeature // 26 0 - 5yrs 1,2,3,4 // 42 0 - 5yrs 1,2,3,4 // 39 12 + yrs 42,32 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs index 77a6596c0f..11412edf27 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs @@ -19,19 +19,15 @@ class SampleInfertDataWithFeatures public string EducationCategory = default; } - /// This example demonstrates the use of KeyTypes in the ValueMappingEstimator by setting treatValuesAsKeyTypes to true, - /// This is useful in cases where you want the output to be integer based rather than the actual value. - /// - /// When using KeyTypes as a Value, the ValueMappingEstimator will do one of the following: - /// 1) If the Value type is an unsigned int or unsigned long, the specified values are used directly as the KeyType values. - /// 2) If the Value type is not an unsigned int or unsigned long, new KeyType values are generated for each unique value. + /// This example demonstrates the use of KeyTypes using both the ValueMappingEstimator and KeyToValueEstimator. Using a KeyType + /// instead of the actual value provides a unique integer representation of the value. When the treatValueAsKeyTypes is true, + /// the ValueMappingEstimator will generate a KeyType for each unique value. /// - /// In this example, the Value type is a string. Since we are setting treatValueAsKeyTypes to true, - /// the ValueMappingEstimator will generate its own KeyType values for each unique string. - /// As with KeyTypes, they contain the actual Value information as part of the metadata, therefore - /// we can convert a KeyType back to the actual value the KeyType represents. To demonstrate - /// the reverse lookup and to confirm the correct value is mapped, a KeyToValueEstimator is added - /// to the pipeline to convert back to the original value. + /// In this example, the education data is mapped to a grouping of 'Undergraudate' and 'Postgraduate'. Because KeyTypes are used, the + /// ValueMappingEstimator will output the KeyType value rather than string value of 'Undergraduate' or 'Postgraduate'. + /// + /// The KeyToValueEstimator is added to the pipeline to convert the KeyType back to the original value. Therefore the output of this example + /// results in the string value of 'Undergraduate' and 'Postgraduate'. public static void Run() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, @@ -54,9 +50,9 @@ public static void Run() // Creating a list of values that are sample strings. These will be converted to KeyTypes var educationValues = new List() { - "Cat1", - "Cat2", - "Cat3" + "Undergraduate", + "Postgraduate", + "Postgraduate" }; // Generate the ValueMappingEstimator that will output KeyTypes even though our values are strings. @@ -81,11 +77,11 @@ public static void Run() // Features column obtained post-transformation. // // Age Education EducationCategory - // 26 0-5yrs Cat1 - // 42 0-5yrs Cat1 - // 39 12+yrs Cat3 - // 34 0-5yrs Cat1 - // 35 6-11yrs Cat2 + // 26 0-5yrs Undergraduate + // 42 0-5yrs Undergraduate + // 39 12+yrs Postgraduate + // 34 0-5yrs Undergraduate + // 35 6-11yrs Postgraduate } } } \ No newline at end of file diff --git a/src/Microsoft.ML.Data/Transforms/doc.xml b/src/Microsoft.ML.Data/Transforms/doc.xml index e1907cebf5..bee31ce44e 100644 --- a/src/Microsoft.ML.Data/Transforms/doc.xml +++ b/src/Microsoft.ML.Data/Transforms/doc.xml @@ -80,9 +80,8 @@
  • - Converting a float value to a string value and have the string value represented as a - (i.e. specific wind speeds could map to a group ('category1', 'category2',...) which will generate a unique integer identifier (1,2,...) that could used - as part of a feature set for a trainer). + Converting a integer value to a string value and have the string value represented as a + (i.e. convert zip codes to a state string value, which will generate a unique integer value that can be used as a label.
  • diff --git a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs index dda46c155a..b341486390 100644 --- a/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs +++ b/src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs @@ -118,6 +118,23 @@ public static IEnumerable GetTopicsData() return data; } + public class SampleTemperatureData + { + public DateTime Date {get; set; } + public float Temperature { get; set; } + } + + public static IEnumerable GetSampleTemperatureData() + { + var data = new List(); + data.Add(new SampleTemperatureData { Date = new DateTime(2012,1,1), Temperature = 39.0F }); + data.Add(new SampleTemperatureData { Date = new DateTime(2012,1,2), Temperature = 82.0F }); + data.Add(new SampleTemperatureData { Date = new DateTime(2012,1,3), Temperature = 75.0F }); + data.Add(new SampleTemperatureData { Date = new DateTime(2012,1,4), Temperature = 67.0F }); + data.Add(new SampleTemperatureData { Date = new DateTime(2012,1,5), Temperature = 75.0F }); + return data; + } + /// /// Represents the column of the infertility dataset. /// From a50a29f086c693ca4c8d8000d3c702e1edee6ba2 Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Thu, 31 Jan 2019 21:57:30 -0800 Subject: [PATCH 5/6] ..updating.. --- .../Dynamic/ValueMapping.cs | 2 +- .../Dynamic/ValueMappingFloatToString.cs | 1 - .../Dynamic/ValueMappingStringToArray.cs | 19 +++++++++---------- .../Dynamic/ValueMappingStringToKeyType.cs | 2 +- .../ConversionsExtensionsCatalog.cs | 8 ++------ src/Microsoft.ML.Data/Transforms/doc.xml | 12 ++++++------ 6 files changed, 19 insertions(+), 25 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs index eac98c0342..a6a2ce262a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs @@ -15,7 +15,7 @@ class SampleInfertDataWithFeatures } /// This example demonstrates the use of the ValueMappingEstimator by mapping string-to-string values. This is useful - /// to map strings to a grouping. In this example, the Education data maps to the groups Undergraduate and Postgraduate: + /// to map strings to a grouping. In this example, the education data maps to the groups Undergraduate and Postgraduate: /// 0-5yrs -> Undergraduate /// 6-11yrs -> Postgraduate /// 12+yrs -> Postgraduate diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs index 948120a38f..6d0c69d87c 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs @@ -31,7 +31,6 @@ public static void Run() IDataView trainData = mlContext.Data.ReadFromEnumerable(data); // If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView - // Creating a list of keys based on the induced value from the dataset var temperatureKeys = new List() { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs index fdfd9e881b..e15049b460 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs @@ -21,9 +21,9 @@ class SampleInfertDataWithIntArray /// This example demonstrates the use of the ValueMappingEstimator by mapping string-to-array values which allows for mapping string data /// to numeric arrays that can then be used as a feature set for a trainer. In this example, we are mapping the education data to /// arbitrary integer arrays with the following association: - /// 0-5yrs -> 1,2,3,4 - /// 6-11yrs -> 5,6,7 - /// 12+yrs -> 42, 32 + /// 0-5yrs -> 1, 2, 3 + /// 6-11yrs -> 5, 6, 7 + /// 12+yrs -> 42,32,64 public static void Run() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, @@ -35,7 +35,6 @@ public static void Run() IDataView trainData = mlContext.Data.ReadFromEnumerable(data); // If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView - // Creating a list of keys based on the Education values from the dataset var educationKeys = new List() { @@ -47,9 +46,9 @@ public static void Run() // Sample list of associated array values var educationValues = new List() { - new int[] { 1,2,3,4 }, + new int[] { 1,2,3 }, new int[] { 5,6,7 }, - new int[] { 42, 32 } + new int[] { 42,32,64 } }; // Constructs the ValueMappingEstimator making the ML.net pipeline @@ -72,10 +71,10 @@ public static void Run() // // Example of mapping string->array // Age Education EducationFeature - // 26 0 - 5yrs 1,2,3,4 - // 42 0 - 5yrs 1,2,3,4 - // 39 12 + yrs 42,32 - // 34 0 - 5yrs 1,2,3,4 + // 26 0 - 5yrs 1,2,3 + // 42 0 - 5yrs 1,2,3 + // 39 12 + yrs 42,32,64 + // 34 0 - 5yrs 1,2,3 // 35 6 - 11yrs 5,6,7 } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs index 11412edf27..39a14d2442 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs @@ -23,7 +23,7 @@ class SampleInfertDataWithFeatures /// instead of the actual value provides a unique integer representation of the value. When the treatValueAsKeyTypes is true, /// the ValueMappingEstimator will generate a KeyType for each unique value. /// - /// In this example, the education data is mapped to a grouping of 'Undergraudate' and 'Postgraduate'. Because KeyTypes are used, the + /// In this example, the education data is mapped to a grouping of 'Undergraduate' and 'Postgraduate'. Because KeyTypes are used, the /// ValueMappingEstimator will output the KeyType value rather than string value of 'Undergraduate' or 'Postgraduate'. /// /// The KeyToValueEstimator is added to the pipeline to convert the KeyType back to the original value. Therefore the output of this example diff --git a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs index 5e3ec5b45a..87ad5ceb93 100644 --- a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs @@ -136,7 +136,7 @@ public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.Co => new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns, keyData); /// - /// Maps specified keys to specified values + /// /// /// The key type. /// The value type. @@ -152,7 +152,6 @@ public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.Co /// [!code-csharp[ValueMappingEstimator](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs)] /// ]]> /// - public static ValueMappingEstimator ValueMap( this TransformsCatalog.ConversionTransforms catalog, IEnumerable keys, @@ -161,10 +160,7 @@ public static ValueMappingEstimator ValueMap new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), keys, values, columns); /// - /// Maps the using the keys in the dictionary to the values of dictionary i.e. - /// a value 'x' in the would be mappped to a value stored in dictionary[x]. - /// In this case, the is used to build up the dictionary where - /// and specify the keys and values of dictionary respectively. + /// /// /// The categorical transform's catalog /// An instance of that contains the key and value columns. diff --git a/src/Microsoft.ML.Data/Transforms/doc.xml b/src/Microsoft.ML.Data/Transforms/doc.xml index bee31ce44e..47671617b9 100644 --- a/src/Microsoft.ML.Data/Transforms/doc.xml +++ b/src/Microsoft.ML.Data/Transforms/doc.xml @@ -72,18 +72,18 @@ specific value. The ValueMappingEstimator supports keys and values of different to support different data types. Examples for using a ValueMappingEstimator are: -
  • + Converting a string value to a string value, this can be useful for grouping (i.e. 'cat', 'dog', 'horse' maps to 'mammals') -
  • -
  • + + Converting a string value to a integer value (i.e. converting the text description like quality to an numeric where 'good' maps to 1, 'poor' maps to 0 -
  • -
  • + + Converting a integer value to a string value and have the string value represented as a (i.e. convert zip codes to a state string value, which will generate a unique integer value that can be used as a label. -
  • +
    Values can be repeated to allow for multiple keys to map to the same value, however keys can not be repeated. The mapping between keys and values can be specified either through lists, where the key list and value list must be the same size or can be done through an . From 8542762c5da51489dcd8147564f1388fcfca7725 Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Fri, 1 Feb 2019 11:17:21 -0800 Subject: [PATCH 6/6] ...updating... --- .../Transforms/ConversionsExtensionsCatalog.cs | 8 +++++++- src/Microsoft.ML.Data/Transforms/ValueMapping.cs | 7 ++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs index 87ad5ceb93..f83e5fd95e 100644 --- a/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs +++ b/src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs @@ -150,6 +150,9 @@ public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.Co /// /// /// public static ValueMappingEstimator ValueMap( @@ -168,10 +171,13 @@ public static ValueMappingEstimator ValueMapName of the value column in . /// The columns to apply this transform on. /// A instance of the ValueMappingEstimator - /// /// + /// /// /// /// public static ValueMappingEstimator ValueMap( diff --git a/src/Microsoft.ML.Data/Transforms/ValueMapping.cs b/src/Microsoft.ML.Data/Transforms/ValueMapping.cs index 75b1324601..3d0e799a39 100644 --- a/src/Microsoft.ML.Data/Transforms/ValueMapping.cs +++ b/src/Microsoft.ML.Data/Transforms/ValueMapping.cs @@ -84,11 +84,7 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) } } - /// - /// The ValueMappingEstimator is a 1-1 mapping from a key to value. The key type and value type are specified - /// through TKey and TValue. TKey is always a scalar. TValue can be either a scalar or an array (array is only possible when input is scalar). - /// The mapping is specified, not trained by providing a list of keys and a list of values. - /// + /// /// Specifies the key type. /// Specifies the value type. public sealed class ValueMappingEstimator : ValueMappingEstimator @@ -299,6 +295,7 @@ internal static IDataView CreateDataView(IHostEnvironment env, } } + /// public class ValueMappingTransformer : OneToOneTransformerBase { internal const string Summary = "Maps text values columns to new columns using a map dataset.";