Skip to content

Commit 578c188

Browse files
authored
ValueMappingEstimator example (#2222)
ValueMappingEstimator example (References #754) Provides examples that demonstrate different ways to use the ValueMappingEstimator. * Added sample links to ValueMap catalog extensions * Added additional documentation to the ValueMappingEstimator, including remarks section.
1 parent 4bb3e00 commit 578c188

File tree

8 files changed

+413
-17
lines changed

8 files changed

+413
-17
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using Microsoft.Data.DataView;
4+
using Microsoft.ML.Data;
5+
6+
namespace Microsoft.ML.Samples.Dynamic
7+
{
8+
public class ValueMappingExample
9+
{
10+
class SampleInfertDataWithFeatures
11+
{
12+
public float Age = 0;
13+
public string Education = default;
14+
public string EducationCategory = default;
15+
}
16+
17+
/// This example demonstrates the use of the ValueMappingEstimator by mapping string-to-string values. This is useful
18+
/// to map strings to a grouping. In this example, the education data maps to the groups Undergraduate and Postgraduate:
19+
/// 0-5yrs -> Undergraduate
20+
/// 6-11yrs -> Postgraduate
21+
/// 12+yrs -> Postgraduate
22+
/// Its possible to have multiple keys map to the same value.
23+
public static void Run()
24+
{
25+
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
26+
// as well as the source of randomness.
27+
var mlContext = new MLContext();
28+
29+
// Get a small dataset as an IEnumerable.
30+
IEnumerable<SamplesUtils.DatasetUtils.SampleInfertData> data = SamplesUtils.DatasetUtils.GetInfertData();
31+
IDataView trainData = mlContext.Data.ReadFromEnumerable(data);
32+
33+
// Preview of the data.
34+
//
35+
// Age Case Education induced parity pooled.stratum row_num ...
36+
// 26.0 1.0 0-5yrs 1.0 6.0 3.0 1.0 ...
37+
// 42.0 1.0 0-5yrs 1.0 1.0 1.0 2.0 ...
38+
// 39.0 1.0 12+yrs 2.0 6.0 4.0 3.0 ...
39+
// 34.0 1.0 0-5yrs 2.0 4.0 2.0 4.0 ...
40+
// 35.0 1.0 6-11yrs 1.0 3.0 32.0 5.0 ...
41+
42+
// If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView
43+
// Creating a list of keys based on the Education values from the dataset.
44+
var educationKeys = new List<string>()
45+
{
46+
"0-5yrs",
47+
"6-11yrs",
48+
"12+yrs"
49+
};
50+
51+
// Creating a list of associated values that will map respectively to each educationKey
52+
var educationValues = new List<string>()
53+
{
54+
"Undergraduate",
55+
"Postgraduate",
56+
"Postgraduate"
57+
};
58+
59+
// Constructs the ValueMappingEstimator making the ML.net pipeline
60+
var pipeline = mlContext.Transforms.Conversion.ValueMap(educationKeys, educationValues, ("EducationCategory", "Education"));
61+
62+
// Fits the ValueMappingEstimator and transforms the data converting the Education to EducationCategory.
63+
IDataView transformedData = pipeline.Fit(trainData).Transform(trainData);
64+
65+
// Getting the resulting data as an IEnumerable of SampleInfertDataWithFeatures. This will contain the newly created column EducationCategory
66+
IEnumerable<SampleInfertDataWithFeatures> featureRows = mlContext.CreateEnumerable<SampleInfertDataWithFeatures>(transformedData, reuseRowObject: false);
67+
68+
Console.WriteLine($"Example of mapping string->string");
69+
Console.WriteLine($"Age\tEducation\tEducationCategory");
70+
foreach (var featureRow in featureRows)
71+
{
72+
Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{featureRow.EducationCategory}");
73+
}
74+
75+
// Features column obtained post-transformation.
76+
//
77+
// Age Education EducationCategory
78+
// 26 0-5yrs Undergraduate
79+
// 42 0-5yrs Undergraudate
80+
// 39 12+yrs Postgraduate
81+
// 34 0-5yrs Undergraduate
82+
// 35 6-11yrs Postgraduate
83+
}
84+
}
85+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using Microsoft.Data.DataView;
4+
using Microsoft.ML.Data;
5+
6+
namespace Microsoft.ML.Samples.Dynamic
7+
{
8+
public class ValueMappingFloatToStringExample
9+
{
10+
/// <summary>
11+
/// Helper class for retrieving the resulting data
12+
/// </summary>
13+
class SampleTemperatureDataWithCategory
14+
{
15+
public DateTime Date = default;
16+
public float Temperature = 0.0f;
17+
public string TemperatureCategory = default;
18+
}
19+
20+
/// This example demonstrates the use of ValueMappingEstimator by mapping float-to-string values. This is useful if the key
21+
/// data are floating point and need to be grouped into string values. In this example, the Induction value is mapped to
22+
/// "T1", "T2", "T3", and "T4" groups.
23+
public static void Run()
24+
{
25+
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
26+
// as well as the source of randomness.
27+
var mlContext = new MLContext();
28+
29+
// Get a small dataset as an IEnumerable.
30+
IEnumerable<SamplesUtils.DatasetUtils.SampleTemperatureData> data = SamplesUtils.DatasetUtils.GetSampleTemperatureData();
31+
IDataView trainData = mlContext.Data.ReadFromEnumerable(data);
32+
33+
// If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView
34+
// Creating a list of keys based on the induced value from the dataset
35+
var temperatureKeys = new List<float>()
36+
{
37+
39.0F,
38+
67.0F,
39+
75.0F,
40+
82.0F,
41+
};
42+
43+
// Creating a list of values, these strings will map accordingly to each key.
44+
var classificationValues = new List<string>()
45+
{
46+
"T1",
47+
"T2",
48+
"T3",
49+
"T4"
50+
};
51+
52+
// Constructs the ValueMappingEstimator making the ML.net pipeline
53+
var pipeline = mlContext.Transforms.Conversion.ValueMap(temperatureKeys, classificationValues, ("TemperatureCategory", "Temperature"));
54+
55+
// Fits the ValueMappingEstimator and transforms the data adding the TemperatureCategory column.
56+
IDataView transformedData = pipeline.Fit(trainData).Transform(trainData);
57+
58+
// Getting the resulting data as an IEnumerable of SampleTemperatureDataWithCategory. This will contain the newly created column TemperatureCategory
59+
IEnumerable<SampleTemperatureDataWithCategory> featureRows = mlContext.CreateEnumerable<SampleTemperatureDataWithCategory>(transformedData, reuseRowObject: false);
60+
61+
Console.WriteLine($"Example of mapping float->string");
62+
Console.WriteLine($"Date\t\tTemperature\tTemperatureCategory");
63+
foreach (var featureRow in featureRows)
64+
{
65+
Console.WriteLine($"{featureRow.Date.ToString("d")}\t{featureRow.Temperature}\t\t{featureRow.TemperatureCategory}");
66+
}
67+
68+
// Features column obtained post-transformation.
69+
//
70+
// Example of mapping float->string
71+
// Date Temperature TemperatureCategory
72+
// 1/1/2012 39 T1
73+
// 1/2/2012 82 T4
74+
// 1/3/2012 75 T3
75+
// 1/4/2012 67 T2
76+
// 1/5/2012 75 T3
77+
}
78+
}
79+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using Microsoft.Data.DataView;
4+
using Microsoft.ML.Data;
5+
using Microsoft.ML.Transforms.Conversions;
6+
7+
namespace Microsoft.ML.Samples.Dynamic
8+
{
9+
public class ValueMappingStringToArrayExample
10+
{
11+
/// <summary>
12+
/// Helper class for retrieving the resulting data
13+
/// </summary>
14+
class SampleInfertDataWithIntArray
15+
{
16+
public float Age = 0;
17+
public string Education = default;
18+
public int[] EducationFeature = default;
19+
}
20+
21+
/// This example demonstrates the use of the ValueMappingEstimator by mapping string-to-array values which allows for mapping string data
22+
/// to numeric arrays that can then be used as a feature set for a trainer. In this example, we are mapping the education data to
23+
/// arbitrary integer arrays with the following association:
24+
/// 0-5yrs -> 1, 2, 3
25+
/// 6-11yrs -> 5, 6, 7
26+
/// 12+yrs -> 42,32,64
27+
public static void Run()
28+
{
29+
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
30+
// as well as the source of randomness.
31+
var mlContext = new MLContext();
32+
33+
// Get a small dataset as an IEnumerable.
34+
IEnumerable<SamplesUtils.DatasetUtils.SampleInfertData> data = SamplesUtils.DatasetUtils.GetInfertData();
35+
IDataView trainData = mlContext.Data.ReadFromEnumerable(data);
36+
37+
// If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView
38+
// Creating a list of keys based on the Education values from the dataset
39+
var educationKeys = new List<string>()
40+
{
41+
"0-5yrs",
42+
"6-11yrs",
43+
"12+yrs"
44+
};
45+
46+
// Sample list of associated array values
47+
var educationValues = new List<int[]>()
48+
{
49+
new int[] { 1,2,3 },
50+
new int[] { 5,6,7 },
51+
new int[] { 42,32,64 }
52+
};
53+
54+
// Constructs the ValueMappingEstimator making the ML.net pipeline
55+
var pipeline = new ValueMappingEstimator<string, int>(mlContext, educationKeys, educationValues, ("EducationFeature", "Education"));
56+
57+
// Fits the ValueMappingEstimator and transforms the data adding the EducationFeature column.
58+
IDataView transformedData = pipeline.Fit(trainData).Transform(trainData);
59+
60+
// Getting the resulting data as an IEnumerable of SampleInfertDataWithIntArray. This will contain the newly created column EducationCategory
61+
IEnumerable<SampleInfertDataWithIntArray> featuresColumn = mlContext.CreateEnumerable<SampleInfertDataWithIntArray>(transformedData, reuseRowObject: false);
62+
63+
Console.WriteLine($"Example of mapping string->array");
64+
Console.WriteLine($"Age\tEducation\tEducationFeature");
65+
foreach (var featureRow in featuresColumn)
66+
{
67+
Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{string.Join(",", featureRow.EducationFeature)}");
68+
}
69+
70+
// Features column obtained post-transformation.
71+
//
72+
// Example of mapping string->array
73+
// Age Education EducationFeature
74+
// 26 0 - 5yrs 1,2,3
75+
// 42 0 - 5yrs 1,2,3
76+
// 39 12 + yrs 42,32,64
77+
// 34 0 - 5yrs 1,2,3
78+
// 35 6 - 11yrs 5,6,7
79+
}
80+
}
81+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using Microsoft.Data.DataView;
4+
using Microsoft.ML.Data;
5+
using Microsoft.ML.Transforms.Conversions;
6+
7+
namespace Microsoft.ML.Samples.Dynamic
8+
{
9+
public class ValueMappingStringToKeyTypeExample
10+
{
11+
/// <summary>
12+
/// Helper class for retrieving the resulting data
13+
/// </summary>
14+
class SampleInfertDataWithFeatures
15+
16+
{
17+
public float Age = 0;
18+
public string Education = default;
19+
public string EducationCategory = default;
20+
}
21+
22+
/// This example demonstrates the use of KeyTypes using both the ValueMappingEstimator and KeyToValueEstimator. Using a KeyType
23+
/// instead of the actual value provides a unique integer representation of the value. When the treatValueAsKeyTypes is true,
24+
/// the ValueMappingEstimator will generate a KeyType for each unique value.
25+
///
26+
/// In this example, the education data is mapped to a grouping of 'Undergraduate' and 'Postgraduate'. Because KeyTypes are used, the
27+
/// ValueMappingEstimator will output the KeyType value rather than string value of 'Undergraduate' or 'Postgraduate'.
28+
///
29+
/// The KeyToValueEstimator is added to the pipeline to convert the KeyType back to the original value. Therefore the output of this example
30+
/// results in the string value of 'Undergraduate' and 'Postgraduate'.
31+
public static void Run()
32+
{
33+
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
34+
// as well as the source of randomness.
35+
var mlContext = new MLContext();
36+
37+
// Get a small dataset as an IEnumerable.
38+
IEnumerable<SamplesUtils.DatasetUtils.SampleInfertData> data = SamplesUtils.DatasetUtils.GetInfertData();
39+
IDataView trainData = mlContext.Data.ReadFromEnumerable(data);
40+
41+
// Creating a list of keys based on the Education values from the dataset
42+
// These lists are created by hand for the demonstration, but the ValueMappingEstimator does take an IEnumerable.
43+
var educationKeys = new List<string>()
44+
{
45+
"0-5yrs",
46+
"6-11yrs",
47+
"12+yrs"
48+
};
49+
50+
// Creating a list of values that are sample strings. These will be converted to KeyTypes
51+
var educationValues = new List<string>()
52+
{
53+
"Undergraduate",
54+
"Postgraduate",
55+
"Postgraduate"
56+
};
57+
58+
// Generate the ValueMappingEstimator that will output KeyTypes even though our values are strings.
59+
// The KeyToValueMappingEstimator is added to provide a reverse lookup of the KeyType, converting the KeyType value back
60+
// to the original value.
61+
var pipeline = new ValueMappingEstimator<string, string>(mlContext, educationKeys, educationValues, true, ("EducationKeyType", "Education"))
62+
.Append(new KeyToValueMappingEstimator(mlContext, ("EducationCategory", "EducationKeyType")));
63+
64+
// Fits the ValueMappingEstimator and transforms the data adding the EducationKeyType column.
65+
IDataView transformedData = pipeline.Fit(trainData).Transform(trainData);
66+
67+
// Getting the resulting data as an IEnumerable of SampleInfertDataWithFeatures.
68+
IEnumerable<SampleInfertDataWithFeatures> featureRows = mlContext.CreateEnumerable<SampleInfertDataWithFeatures>(transformedData, reuseRowObject: false);
69+
70+
Console.WriteLine($"Example of mapping string->keytype");
71+
Console.WriteLine($"Age\tEducation\tEducationCategory");
72+
foreach (var featureRow in featureRows)
73+
{
74+
Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{featureRow.EducationCategory}");
75+
}
76+
77+
// Features column obtained post-transformation.
78+
//
79+
// Age Education EducationCategory
80+
// 26 0-5yrs Undergraduate
81+
// 42 0-5yrs Undergraduate
82+
// 39 12+yrs Postgraduate
83+
// 34 0-5yrs Undergraduate
84+
// 35 6-11yrs Postgraduate
85+
}
86+
}
87+
}

0 commit comments

Comments
 (0)