Skip to content

Commit b3d1df2

Browse files
committed
...updating from feedback
1 parent beb48eb commit b3d1df2

File tree

6 files changed

+95
-76
lines changed

6 files changed

+95
-76
lines changed

docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs

+16-16
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,12 @@ class SampleInfertDataWithFeatures
1414
public string EducationCategory = default;
1515
}
1616

17-
/// This example demonstrates the use of the ValueMappingEstimator by mapping string-to-string values. The ValueMappingEstimator uses
18-
/// level of education as keys to a respective string label which is the value.
19-
/// The mapping looks like the following:
20-
/// 0-5yrs -> Cat1
21-
/// 6-11yrs -> Cat2
22-
/// 12+yrs -> Cat3
17+
/// This example demonstrates the use of the ValueMappingEstimator by mapping string-to-string values. This is useful
18+
/// to map strings to a grouping. In this example, the Education data maps to the groups Undergraduate and Postgraduate:
19+
/// 0-5yrs -> Undergraduate
20+
/// 6-11yrs -> Postgraduate
21+
/// 12+yrs -> Postgraduate
22+
/// Its possible to have multiple keys map to the same value.
2323
public static void Run()
2424
{
2525
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
@@ -39,8 +39,8 @@ public static void Run()
3939
// 34.0 1.0 0-5yrs 2.0 4.0 2.0 4.0 ...
4040
// 35.0 1.0 6-11yrs 1.0 3.0 32.0 5.0 ...
4141

42-
// Creating a list of keys based on the Education values from the dataset
43-
// These lists are created by hand for the demonstration, but the ValueMappingEstimator does take an IEnumerable.
42+
// If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView
43+
// Creating a list of keys based on the Education values from the dataset.
4444
var educationKeys = new List<string>()
4545
{
4646
"0-5yrs",
@@ -51,9 +51,9 @@ public static void Run()
5151
// Creating a list of associated values that will map respectively to each educationKey
5252
var educationValues = new List<string>()
5353
{
54-
"Cat1",
55-
"Cat2",
56-
"Cat3"
54+
"Undergraduate",
55+
"Postgraduate",
56+
"Postgraduate"
5757
};
5858

5959
// Constructs the ValueMappingEstimator making the ML.net pipeline
@@ -75,11 +75,11 @@ public static void Run()
7575
// Features column obtained post-transformation.
7676
//
7777
// Age Education EducationCategory
78-
// 26 0-5yrs Cat1
79-
// 42 0-5yrs Cat1
80-
// 39 12+yrs Cat3
81-
// 34 0-5yrs Cat1
82-
// 35 6-11yrs Cat2
78+
// 26 0-5yrs Undergraduate
79+
// 42 0-5yrs Undergraudate
80+
// 39 12+yrs Postgraduate
81+
// 34 0-5yrs Undergraduate
82+
// 35 6-11yrs Postgraduate
8383
}
8484
}
8585
}

docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs

+32-28
Original file line numberDiff line numberDiff line change
@@ -10,67 +10,71 @@ public class ValueMappingFloatToStringExample
1010
/// <summary>
1111
/// Helper class for retrieving the resulting data
1212
/// </summary>
13-
class SampleInfertDataWithInducedCategory
13+
class SampleTemperatureDataWithCategory
1414
{
15-
public float Age = 0;
16-
public float Induced = 0.0f;
17-
public string InducedCategory = default;
15+
public DateTime Date = default;
16+
public float Temperature = 0.0f;
17+
public string TemperatureCategory = default;
1818
}
1919

20-
/// This example demonstrates the use of floating types as the key type for ValueMappingEstimator by mapping a float-to-string value.
21-
/// The mapping looks like the following:
22-
/// 1.0 -> Cat1
23-
/// 2.0 -> Cat2
20+
/// This example demonstrates the use of ValueMappingEstimator by mapping float-to-string values. This is useful if the key
21+
/// data are floating point and need to be grouped into string values. In this example, the Induction value is mapped to
22+
/// "T1", "T2", "T3", and "T4" groups.
2423
public static void Run()
2524
{
2625
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
2726
// as well as the source of randomness.
2827
var mlContext = new MLContext();
2928

3029
// Get a small dataset as an IEnumerable.
31-
IEnumerable<SamplesUtils.DatasetUtils.SampleInfertData> data = SamplesUtils.DatasetUtils.GetInfertData();
30+
IEnumerable<SamplesUtils.DatasetUtils.SampleTemperatureData> data = SamplesUtils.DatasetUtils.GetSampleTemperatureData();
3231
IDataView trainData = mlContext.Data.ReadFromEnumerable(data);
3332

33+
// If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView
34+
3435
// Creating a list of keys based on the induced value from the dataset
35-
// These lists are created by hand for the demonstration, but the ValueMappingEstimator does take an IEnumerable.
36-
var inducedKeys = new List<float>()
36+
var temperatureKeys = new List<float>()
3737
{
38-
1.0f,
39-
2.0f
38+
39.0F,
39+
67.0F,
40+
75.0F,
41+
82.0F,
4042
};
4143

4244
// Creating a list of values, these strings will map accordingly to each key.
43-
var inducedValues = new List<string>()
45+
var classificationValues = new List<string>()
4446
{
45-
"Cat1",
46-
"Cat2"
47+
"T1",
48+
"T2",
49+
"T3",
50+
"T4"
4751
};
4852

4953
// Constructs the ValueMappingEstimator making the ML.net pipeline
50-
var pipeline = mlContext.Transforms.Conversion.ValueMap(inducedKeys, inducedValues, ("InducedCategory", "Induced"));
54+
var pipeline = mlContext.Transforms.Conversion.ValueMap(temperatureKeys, classificationValues, ("TemperatureCategory", "Temperature"));
5155

52-
// Fits the ValueMappingEstimator and transforms the data adding the InducedCategory column.
56+
// Fits the ValueMappingEstimator and transforms the data adding the TemperatureCategory column.
5357
IDataView transformedData = pipeline.Fit(trainData).Transform(trainData);
5458

55-
// Getting the resulting data as an IEnumerable of SampleInfertDataWithInducedCategory. This will contain the newly created column InducedCategory
56-
IEnumerable<SampleInfertDataWithInducedCategory> featureRows = mlContext.CreateEnumerable<SampleInfertDataWithInducedCategory>(transformedData, reuseRowObject: false);
59+
// Getting the resulting data as an IEnumerable of SampleTemperatureDataWithCategory. This will contain the newly created column TemperatureCategory
60+
IEnumerable<SampleTemperatureDataWithCategory> featureRows = mlContext.CreateEnumerable<SampleTemperatureDataWithCategory>(transformedData, reuseRowObject: false);
5761

5862
Console.WriteLine($"Example of mapping float->string");
59-
Console.WriteLine($"Age\tInduced\tInducedCategory");
63+
Console.WriteLine($"Date\t\tTemperature\tTemperatureCategory");
6064
foreach (var featureRow in featureRows)
6165
{
62-
Console.WriteLine($"{featureRow.Age}\t{featureRow.Induced}\t{featureRow.InducedCategory}");
66+
Console.WriteLine($"{featureRow.Date.ToString("d")}\t{featureRow.Temperature}\t\t{featureRow.TemperatureCategory}");
6367
}
6468

6569
// Features column obtained post-transformation.
6670
//
6771
// Example of mapping float->string
68-
// Age Induced InducedCategory
69-
// 26 1 Cat1
70-
// 42 1 Cat1
71-
// 39 2 Cat2
72-
// 34 2 Cat2
73-
// 35 1 Cat1
72+
// Date Temperature TemperatureCategory
73+
// 1/1/2012 39 T1
74+
// 1/2/2012 82 T4
75+
// 1/3/2012 75 T3
76+
// 1/4/2012 67 T2
77+
// 1/5/2012 75 T3
7478
}
7579
}
7680
}

docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs

+12-9
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
using System.Collections.Generic;
33
using Microsoft.Data.DataView;
44
using Microsoft.ML.Data;
5+
using Microsoft.ML.Transforms.Conversions;
56

67
namespace Microsoft.ML.Samples.Dynamic
78
{
@@ -14,12 +15,12 @@ class SampleInfertDataWithIntArray
1415
{
1516
public float Age = 0;
1617
public string Education = default;
17-
public int[] EducationCategory = default;
18+
public int[] EducationFeature = default;
1819
}
1920

20-
/// This example demonstrates the use arrays as the values for the ValueMappingEstimator. It maps a set of keys that are type string
21-
/// to a integer arrays of variable length.
22-
/// The mapping looks like the following:
21+
/// This example demonstrates the use of the ValueMappingEstimator by mapping string-to-array values which allows for mapping string data
22+
/// to numeric arrays that can then be used as a feature set for a trainer. In this example, we are mapping the education data to
23+
/// arbitrary integer arrays with the following association:
2324
/// 0-5yrs -> 1,2,3,4
2425
/// 6-11yrs -> 5,6,7
2526
/// 12+yrs -> 42, 32
@@ -33,6 +34,8 @@ public static void Run()
3334
IEnumerable<SamplesUtils.DatasetUtils.SampleInfertData> data = SamplesUtils.DatasetUtils.GetInfertData();
3435
IDataView trainData = mlContext.Data.ReadFromEnumerable(data);
3536

37+
// If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView
38+
3639
// Creating a list of keys based on the Education values from the dataset
3740
var educationKeys = new List<string>()
3841
{
@@ -50,25 +53,25 @@ public static void Run()
5053
};
5154

5255
// Constructs the ValueMappingEstimator making the ML.net pipeline
53-
var pipeline = mlContext.Transforms.Conversion.ValueMap(educationKeys, educationValues, ("EducationCategory", "Education"));
56+
var pipeline = new ValueMappingEstimator<string, int>(mlContext, educationKeys, educationValues, ("EducationFeature", "Education"));
5457

55-
// Fits the ValueMappingEstimator and transforms the data adding the EducationCategory column.
58+
// Fits the ValueMappingEstimator and transforms the data adding the EducationFeature column.
5659
IDataView transformedData = pipeline.Fit(trainData).Transform(trainData);
5760

5861
// Getting the resulting data as an IEnumerable of SampleInfertDataWithIntArray. This will contain the newly created column EducationCategory
5962
IEnumerable<SampleInfertDataWithIntArray> featuresColumn = mlContext.CreateEnumerable<SampleInfertDataWithIntArray>(transformedData, reuseRowObject: false);
6063

6164
Console.WriteLine($"Example of mapping string->array");
62-
Console.WriteLine($"Age\tEducation\tEducationCategory");
65+
Console.WriteLine($"Age\tEducation\tEducationFeature");
6366
foreach (var featureRow in featuresColumn)
6467
{
65-
Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{string.Join(",", featureRow.EducationCategory)}");
68+
Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{string.Join(",", featureRow.EducationFeature)}");
6669
}
6770

6871
// Features column obtained post-transformation.
6972
//
7073
// Example of mapping string->array
71-
// Age Education EducationCategory
74+
// Age Education EducationFeature
7275
// 26 0 - 5yrs 1,2,3,4
7376
// 42 0 - 5yrs 1,2,3,4
7477
// 39 12 + yrs 42,32

docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs

+16-20
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,15 @@ class SampleInfertDataWithFeatures
1919
public string EducationCategory = default;
2020
}
2121

22-
/// This example demonstrates the use of KeyTypes in the ValueMappingEstimator by setting treatValuesAsKeyTypes to true,
23-
/// This is useful in cases where you want the output to be integer based rather than the actual value.
24-
///
25-
/// When using KeyTypes as a Value, the ValueMappingEstimator will do one of the following:
26-
/// 1) If the Value type is an unsigned int or unsigned long, the specified values are used directly as the KeyType values.
27-
/// 2) If the Value type is not an unsigned int or unsigned long, new KeyType values are generated for each unique value.
22+
/// This example demonstrates the use of KeyTypes using both the ValueMappingEstimator and KeyToValueEstimator. Using a KeyType
23+
/// instead of the actual value provides a unique integer representation of the value. When the treatValueAsKeyTypes is true,
24+
/// the ValueMappingEstimator will generate a KeyType for each unique value.
2825
///
29-
/// In this example, the Value type is a string. Since we are setting treatValueAsKeyTypes to true,
30-
/// the ValueMappingEstimator will generate its own KeyType values for each unique string.
31-
/// As with KeyTypes, they contain the actual Value information as part of the metadata, therefore
32-
/// we can convert a KeyType back to the actual value the KeyType represents. To demonstrate
33-
/// the reverse lookup and to confirm the correct value is mapped, a KeyToValueEstimator is added
34-
/// to the pipeline to convert back to the original value.
26+
/// In this example, the education data is mapped to a grouping of 'Undergraudate' and 'Postgraduate'. Because KeyTypes are used, the
27+
/// ValueMappingEstimator will output the KeyType value rather than string value of 'Undergraduate' or 'Postgraduate'.
28+
///
29+
/// The KeyToValueEstimator is added to the pipeline to convert the KeyType back to the original value. Therefore the output of this example
30+
/// results in the string value of 'Undergraduate' and 'Postgraduate'.
3531
public static void Run()
3632
{
3733
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
@@ -54,9 +50,9 @@ public static void Run()
5450
// Creating a list of values that are sample strings. These will be converted to KeyTypes
5551
var educationValues = new List<string>()
5652
{
57-
"Cat1",
58-
"Cat2",
59-
"Cat3"
53+
"Undergraduate",
54+
"Postgraduate",
55+
"Postgraduate"
6056
};
6157

6258
// Generate the ValueMappingEstimator that will output KeyTypes even though our values are strings.
@@ -81,11 +77,11 @@ public static void Run()
8177
// Features column obtained post-transformation.
8278
//
8379
// Age Education EducationCategory
84-
// 26 0-5yrs Cat1
85-
// 42 0-5yrs Cat1
86-
// 39 12+yrs Cat3
87-
// 34 0-5yrs Cat1
88-
// 35 6-11yrs Cat2
80+
// 26 0-5yrs Undergraduate
81+
// 42 0-5yrs Undergraduate
82+
// 39 12+yrs Postgraduate
83+
// 34 0-5yrs Undergraduate
84+
// 35 6-11yrs Postgraduate
8985
}
9086
}
9187
}

src/Microsoft.ML.Data/Transforms/doc.xml

+2-3
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,8 @@
8080
</li>
8181
<li>
8282
<description>
83-
Converting a float value to a string value and have the string value represented as a <see cref="KeyType"/>
84-
(i.e. specific wind speeds could map to a group ('category1', 'category2',...) which will generate a unique integer identifier (1,2,...) that could used
85-
as part of a feature set for a trainer).
83+
Converting a integer value to a string value and have the string value represented as a <see cref="KeyType"/>
84+
(i.e. convert zip codes to a state string value, which will generate a unique integer value that can be used as a label.
8685
</description>
8786
</li>
8887
</list>

src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs

+17
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,23 @@ public static IEnumerable<SampleTopicsData> GetTopicsData()
118118
return data;
119119
}
120120

121+
public class SampleTemperatureData
122+
{
123+
public DateTime Date {get; set; }
124+
public float Temperature { get; set; }
125+
}
126+
127+
public static IEnumerable<SampleTemperatureData> GetSampleTemperatureData()
128+
{
129+
var data = new List<SampleTemperatureData>();
130+
data.Add(new SampleTemperatureData { Date = new DateTime(2012,1,1), Temperature = 39.0F });
131+
data.Add(new SampleTemperatureData { Date = new DateTime(2012,1,2), Temperature = 82.0F });
132+
data.Add(new SampleTemperatureData { Date = new DateTime(2012,1,3), Temperature = 75.0F });
133+
data.Add(new SampleTemperatureData { Date = new DateTime(2012,1,4), Temperature = 67.0F });
134+
data.Add(new SampleTemperatureData { Date = new DateTime(2012,1,5), Temperature = 75.0F });
135+
return data;
136+
}
137+
121138
/// <summary>
122139
/// Represents the column of the infertility dataset.
123140
/// </summary>

0 commit comments

Comments
 (0)