Skip to content

Commit bfdbc4f

Browse files
committed
ValueMappingEstimator example
This provides an example that demonstrates different ways to use the ValueMappingEstimator. This is part of the original change to add the ValueMappingEstimator to the code base and references dotnet#754.
1 parent 0ad1fb0 commit bfdbc4f

File tree

2 files changed

+282
-1
lines changed

2 files changed

+282
-1
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,281 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using Microsoft.ML.Data;
5+
using Microsoft.ML.Transforms;
6+
using Microsoft.ML.Transforms.Conversions;
7+
8+
namespace Microsoft.ML.Samples.Dynamic
9+
{
10+
public class ValueMappingExample
11+
{
12+
class SampleInfertDataWithFeatures
13+
{
14+
public float Age = 0;
15+
public string Education = default;
16+
public string EducationCategory = default;
17+
}
18+
19+
class SampleInfertDataWithInducedCategory
20+
{
21+
public float Age = 0;
22+
public float Induced = 0.0f;
23+
public string InducedCategory = default;
24+
}
25+
26+
class SampleInfertDataWithIntArray
27+
{
28+
public float Age = 0;
29+
public string Education = default;
30+
public int[] EducationCategory = default;
31+
}
32+
33+
34+
public static void ValueMappingTransform()
35+
{
36+
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
37+
// as well as the source of randomness.
38+
var ml = new MLContext();
39+
40+
// Get a small dataset as an IEnumerable.
41+
IEnumerable<SamplesUtils.DatasetUtils.SampleInfertData> data = SamplesUtils.DatasetUtils.GetInfertData();
42+
var trainData = ml.CreateStreamingDataView(data);
43+
44+
// Preview of the data.
45+
//
46+
// Age Case Education induced parity pooled.stratum row_num ...
47+
// 26.0 1.0 0-5yrs 1.0 6.0 3.0 1.0 ...
48+
// 42.0 1.0 0-5yrs 1.0 1.0 1.0 2.0 ...
49+
// 39.0 1.0 12+yrs 2.0 6.0 4.0 3.0 ...
50+
// 34.0 1.0 0-5yrs 2.0 4.0 2.0 4.0 ...
51+
// 35.0 1.0 6-11yrs 1.0 3.0 32.0 5.0 ...
52+
53+
StringToStringMappingExample(ml, trainData);
54+
FloatToStringMappingExample(ml, trainData);
55+
StringToKeyTypeMappingExample(ml, trainData);
56+
StringToArrayMappingExample(ml, trainData);
57+
}
58+
59+
///<summary>
60+
/// This example demonstrates the use of the ValueMappingEstimator by mapping string-to-string values. The ValueMappingEstimator uses
61+
/// level of education as keys to a respective string label which is the value.
62+
/// The mapping looks like the following:
63+
/// <list>
64+
/// <item>0-5yrs -> Cat1</item>
65+
/// <item>6-11yrs -> Cat2</item>
66+
/// <item>12+yrs -> Cat3</item>
67+
/// </list>
68+
/// </summary>
69+
public static void StringToStringMappingExample(MLContext ml, IDataView trainData)
70+
{
71+
// Creating a list of keys based on the Education values from the dataset
72+
// These lists are created by hand for the demonstration, but the ValueMappingEstimator does take an IEnumerable.
73+
var educationKeys = new List<string>()
74+
{
75+
"0-5yrs",
76+
"6-11yrs",
77+
"12+yrs"
78+
};
79+
80+
var educationValues = new List<string>()
81+
{
82+
"Cat1",
83+
"Cat2",
84+
"Cat3"
85+
};
86+
87+
var pipeline = new ValueMappingEstimator<string, string>(ml, educationKeys, educationValues, ("Education", "EducationCategory"));
88+
89+
// The transformed data.
90+
var transformedData = pipeline.Fit(trainData).Transform(trainData);
91+
92+
// Getting the data of the newly created column as an IEnumerable of SampleInfertDataWithFeatures.
93+
var featuresColumn = ml.CreateEnumerable<SampleInfertDataWithFeatures>(transformedData, reuseRowObject: false);
94+
95+
Console.WriteLine($"Example of mapping string->string");
96+
Console.WriteLine($"Age\tEducation\tEducationLabel");
97+
foreach (var featureRow in featuresColumn)
98+
{
99+
Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{featureRow.EducationCategory}");
100+
}
101+
102+
// Features column obtained post-transformation.
103+
//
104+
// Age Education EducationLabel
105+
// 26 0-5yrs Cat1
106+
// 42 0-5yrs Cat1
107+
// 39 12+yrs Cat3
108+
// 34 0-5yrs Cat1
109+
// 35 6-11yrs Cat2
110+
}
111+
112+
///<summary>
113+
/// This example demonstrates the use of KeyTypes by setting treatValuesAsKeyTypes to true,
114+
/// <see cref="ValueMappingEstimator.ValueMappingEstimator(IHostEnvironment, IEnumerable{TKey}, IEnumerable{TValue}, bool, (string input, string output)[])")/> to true.
115+
/// This is useful in cases where you want the output to be integer based rather than the actual value.
116+
///
117+
/// When using KeyTypes as a Value, the ValueMappingEstimator will do one of the following:
118+
/// 1) If the Value type is an unsigned int or unsigned long, the specified values are used directly as the KeyType values.
119+
/// 2) If the Value type is not an unsigned int or unsigned long, new KeyType values are generated for each unique value.
120+
///
121+
/// In this example, the Value type is a string. Since we are setting treatValueAsKeyTypes to true,
122+
/// the ValueMappingEstimator will generate its own KeyType values for each unique string.
123+
/// As with KeyTypes, they contain the actual Value information as part of the metadata, therefore
124+
/// we can convert a KeyType back to the actual value the KeyType represents. To demonstrate
125+
/// the reverse lookup and to confirm the correct value is mapped, a KeyToValueEstimator is added
126+
/// to the pipeline to convert back to the original value.
127+
/// </summary>
128+
public static void StringToKeyTypeMappingExample(MLContext ml, IDataView trainData)
129+
{
130+
// Creating a list of keys based on the Education values from the dataset
131+
// These lists are created by hand for the demonstration, but the ValueMappingEstimator does take an IEnumerable.
132+
var educationKeys = new List<string>()
133+
{
134+
"0-5yrs",
135+
"6-11yrs",
136+
"12+yrs"
137+
};
138+
139+
// Sample string values
140+
var educationValues = new List<string>()
141+
{
142+
"Cat1",
143+
"Cat2",
144+
"Cat3"
145+
};
146+
147+
// Generate the ValueMappingEstimator that will output KeyTypes even though our values are strings.
148+
// The KeyToValueMappingEstimator is added to provide a reverse lookup of the KeyType, converting the KeyType value back
149+
// to the original value.
150+
var pipeline = new ValueMappingEstimator<string, string>(ml, educationKeys, educationValues, true, ("Education", "EducationKeyType"))
151+
.Append(new KeyToValueMappingEstimator(ml, ("EducationKeyType", "EducationCategory")));
152+
153+
// The transformed data.
154+
var transformedData = pipeline.Fit(trainData).Transform(trainData);
155+
156+
// Getting the data of the newly created column as an IEnumerable of SampleInfertDataWithFeatures.
157+
var featuresColumn = ml.CreateEnumerable<SampleInfertDataWithFeatures>(transformedData, reuseRowObject: false);
158+
159+
Console.WriteLine($"Example of mapping string->keytype");
160+
Console.WriteLine($"Age\tEducation\tEducationLabel");
161+
foreach (var featureRow in featuresColumn)
162+
{
163+
Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{featureRow.EducationCategory}");
164+
}
165+
166+
// Features column obtained post-transformation.
167+
//
168+
// Age Education EducationLabel
169+
// 26 0-5yrs Cat1
170+
// 42 0-5yrs Cat1
171+
// 39 12+yrs Cat3
172+
// 34 0-5yrs Cat1
173+
// 35 6-11yrs Cat2
174+
}
175+
176+
///<summary>
177+
/// This example demonstrates the use of floating types as the key type for ValueMappingEstimator by mapping a float-to-string value.
178+
/// The mapping looks like the following:
179+
/// <list>
180+
/// <item>1.0 -> Cat1</item>
181+
/// <item>2.0 -> Cat2</item>
182+
/// </list>
183+
/// </summary>
184+
public static void FloatToStringMappingExample(MLContext ml, IDataView trainData)
185+
{
186+
// Creating a list of keys based on the induced value from the dataset
187+
// These lists are created by hand for the demonstration, but the ValueMappingEstimator does take an IEnumerable.
188+
var inducedKeys = new List<float>()
189+
{
190+
1.0f,
191+
2.0f
192+
};
193+
194+
// Sample list of associated string values
195+
var inducedValues = new List<string>()
196+
{
197+
"Cat1",
198+
"Cat2"
199+
};
200+
201+
var pipeline = new ValueMappingEstimator<float, string>(ml, inducedKeys, inducedValues, ("Induced", "InducedCategory"));
202+
203+
// The transformed data.
204+
var transformedData = pipeline.Fit(trainData).Transform(trainData);
205+
206+
// Getting the data of the newly created column as an IEnumerable of SampleInfertDataWithFeatures.
207+
var featuresColumn = ml.CreateEnumerable<SampleInfertDataWithInducedCategory>(transformedData, reuseRowObject: false);
208+
209+
Console.WriteLine($"Example of mapping float->string");
210+
Console.WriteLine($"Age\tInduced\tInducedCategory");
211+
foreach (var featureRow in featuresColumn)
212+
{
213+
Console.WriteLine($"{featureRow.Age}\t{featureRow.Induced}\t{featureRow.InducedCategory}");
214+
}
215+
216+
// Features column obtained post-transformation.
217+
//
218+
// Example of mapping float->string
219+
// Age Induced InducedCategory
220+
// 26 1 Cat1
221+
// 42 1 Cat1
222+
// 39 2 Cat2
223+
// 34 2 Cat2
224+
// 35 1 Cat1
225+
}
226+
227+
///<summary>
228+
/// This example demonstrates the use arrays as the values for the ValueMappingEstimator. It maps a set of keys that are type string
229+
/// to a integer arrays of variable length.
230+
/// The mapping looks like the following:
231+
/// <list>
232+
/// <item>0-5yrs -> 1,2,3,4</item>
233+
/// <item>6-11yrs -> 5,6,7</item>
234+
/// <item>12+yrs -> 42, 32</item>
235+
/// </list>
236+
/// </summary>
237+
public static void StringToArrayMappingExample(MLContext ml, IDataView trainData)
238+
{
239+
// Creating a list of keys based on the Education values from the dataset
240+
var educationKeys = new List<string>()
241+
{
242+
"0-5yrs",
243+
"6-11yrs",
244+
"12+yrs"
245+
};
246+
247+
// Sample list of associated array values
248+
var educationValues = new List<int[]>()
249+
{
250+
new int[] { 1,2,3,4 },
251+
new int[] { 5,6,7 },
252+
new int[] { 42, 32 }
253+
};
254+
255+
var pipeline = new ValueMappingEstimator<string, int>(ml, educationKeys, educationValues, ("Education", "EducationCategory"));
256+
257+
// The transformed data.
258+
var transformedData = pipeline.Fit(trainData).Transform(trainData);
259+
260+
// Getting the data of the newly created column as an IEnumerable of SampleInfertDataWithFeatures.
261+
var featuresColumn = ml.CreateEnumerable<SampleInfertDataWithIntArray>(transformedData, reuseRowObject: false);
262+
263+
Console.WriteLine($"Example of mapping string->array");
264+
Console.WriteLine($"Age\tEducation\tEducationLabel");
265+
foreach (var featureRow in featuresColumn)
266+
{
267+
Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{string.Join(",", featureRow.EducationCategory)}");
268+
}
269+
270+
// Features column obtained post-transformation.
271+
//
272+
// Example of mapping string->array
273+
// Age Education EducationLabel
274+
// 26 0 - 5yrs 1,2,3,4
275+
// 42 0 - 5yrs 1,2,3,4
276+
// 39 12 + yrs 42,32
277+
// 34 0 - 5yrs 1,2,3,4
278+
// 35 6 - 11yrs 5,6,7
279+
}
280+
}
281+
}

src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ public static IEnumerable<SampleInfertData> GetInfertData()
168168
data.Add(new SampleInfertData
169169
{
170170
RowNum = 2,
171-
Education = "0-5yrs",
171+
Education = "12+yrs",
172172
Age = 39,
173173
Parity = 6,
174174
Induced = 2,

0 commit comments

Comments
 (0)