-
Notifications
You must be signed in to change notification settings - Fork 1.9k
ValueMappingEstimator example #2222
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
bfdbc4f
9ad04ff
9159ae5
beb48eb
b3d1df2
a50a29f
8542762
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using Microsoft.Data.DataView; | ||
using Microsoft.ML.Data; | ||
|
||
namespace Microsoft.ML.Samples.Dynamic | ||
{ | ||
public class ValueMappingExample | ||
{ | ||
class SampleInfertDataWithFeatures | ||
{ | ||
public float Age = 0; | ||
public string Education = default; | ||
public string EducationCategory = default; | ||
} | ||
|
||
/// This example demonstrates the use of the ValueMappingEstimator by mapping string-to-string values. This is useful | ||
/// to map strings to a grouping. In this example, the education data maps to the groups Undergraduate and Postgraduate: | ||
/// 0-5yrs -> Undergraduate | ||
/// 6-11yrs -> Postgraduate | ||
/// 12+yrs -> Postgraduate | ||
/// Its possible to have multiple keys map to the same value. | ||
public static void Run() | ||
{ | ||
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, | ||
// as well as the source of randomness. | ||
var mlContext = new MLContext(); | ||
|
||
// Get a small dataset as an IEnumerable. | ||
IEnumerable<SamplesUtils.DatasetUtils.SampleInfertData> data = SamplesUtils.DatasetUtils.GetInfertData(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
var #Resolved There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so the feedback was to not use var in this case because its more explicit about what we are doing. #Resolved |
||
IDataView trainData = mlContext.Data.ReadFromEnumerable(data); | ||
|
||
// Preview of the data. | ||
// | ||
// Age Case Education induced parity pooled.stratum row_num ... | ||
// 26.0 1.0 0-5yrs 1.0 6.0 3.0 1.0 ... | ||
// 42.0 1.0 0-5yrs 1.0 1.0 1.0 2.0 ... | ||
// 39.0 1.0 12+yrs 2.0 6.0 4.0 3.0 ... | ||
// 34.0 1.0 0-5yrs 2.0 4.0 2.0 4.0 ... | ||
// 35.0 1.0 6-11yrs 1.0 3.0 32.0 5.0 ... | ||
|
||
// If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView | ||
// Creating a list of keys based on the Education values from the dataset. | ||
var educationKeys = new List<string>() | ||
{ | ||
"0-5yrs", | ||
"6-11yrs", | ||
"12+yrs" | ||
}; | ||
|
||
// Creating a list of associated values that will map respectively to each educationKey | ||
var educationValues = new List<string>() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Explain what these are, just as you explained what the keys were. #Resolved |
||
{ | ||
"Undergraduate", | ||
"Postgraduate", | ||
"Postgraduate" | ||
}; | ||
|
||
// Constructs the ValueMappingEstimator making the ML.net pipeline | ||
var pipeline = mlContext.Transforms.Conversion.ValueMap(educationKeys, educationValues, ("EducationCategory", "Education")); | ||
|
||
// Fits the ValueMappingEstimator and transforms the data converting the Education to EducationCategory. | ||
IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); | ||
|
||
// Getting the resulting data as an IEnumerable of SampleInfertDataWithFeatures. This will contain the newly created column EducationCategory | ||
IEnumerable<SampleInfertDataWithFeatures> featureRows = mlContext.CreateEnumerable<SampleInfertDataWithFeatures>(transformedData, reuseRowObject: false); | ||
|
||
Console.WriteLine($"Example of mapping string->string"); | ||
Console.WriteLine($"Age\tEducation\tEducationCategory"); | ||
foreach (var featureRow in featureRows) | ||
{ | ||
Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{featureRow.EducationCategory}"); | ||
} | ||
|
||
// Features column obtained post-transformation. | ||
// | ||
// Age Education EducationCategory | ||
// 26 0-5yrs Undergraduate | ||
// 42 0-5yrs Undergraudate | ||
// 39 12+yrs Postgraduate | ||
// 34 0-5yrs Undergraduate | ||
// 35 6-11yrs Postgraduate | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using Microsoft.Data.DataView; | ||
using Microsoft.ML.Data; | ||
|
||
namespace Microsoft.ML.Samples.Dynamic | ||
{ | ||
public class ValueMappingFloatToStringExample | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment as above — what problem does this solve? #Resolved There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
this is not referenced from anything. #Resolved |
||
{ | ||
/// <summary> | ||
/// Helper class for retrieving the resulting data | ||
/// </summary> | ||
class SampleTemperatureDataWithCategory | ||
{ | ||
public DateTime Date = default; | ||
public float Temperature = 0.0f; | ||
public string TemperatureCategory = default; | ||
} | ||
|
||
/// This example demonstrates the use of ValueMappingEstimator by mapping float-to-string values. This is useful if the key | ||
/// data are floating point and need to be grouped into string values. In this example, the Induction value is mapped to | ||
/// "T1", "T2", "T3", and "T4" groups. | ||
public static void Run() | ||
{ | ||
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, | ||
// as well as the source of randomness. | ||
var mlContext = new MLContext(); | ||
|
||
// Get a small dataset as an IEnumerable. | ||
IEnumerable<SamplesUtils.DatasetUtils.SampleTemperatureData> data = SamplesUtils.DatasetUtils.GetSampleTemperatureData(); | ||
IDataView trainData = mlContext.Data.ReadFromEnumerable(data); | ||
|
||
// If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView | ||
// Creating a list of keys based on the induced value from the dataset | ||
var temperatureKeys = new List<float>() | ||
{ | ||
39.0F, | ||
67.0F, | ||
75.0F, | ||
82.0F, | ||
}; | ||
|
||
// Creating a list of values, these strings will map accordingly to each key. | ||
var classificationValues = new List<string>() | ||
{ | ||
"T1", | ||
"T2", | ||
"T3", | ||
"T4" | ||
}; | ||
|
||
// Constructs the ValueMappingEstimator making the ML.net pipeline | ||
var pipeline = mlContext.Transforms.Conversion.ValueMap(temperatureKeys, classificationValues, ("TemperatureCategory", "Temperature")); | ||
|
||
// Fits the ValueMappingEstimator and transforms the data adding the TemperatureCategory column. | ||
IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); | ||
|
||
// Getting the resulting data as an IEnumerable of SampleTemperatureDataWithCategory. This will contain the newly created column TemperatureCategory | ||
IEnumerable<SampleTemperatureDataWithCategory> featureRows = mlContext.CreateEnumerable<SampleTemperatureDataWithCategory>(transformedData, reuseRowObject: false); | ||
|
||
Console.WriteLine($"Example of mapping float->string"); | ||
Console.WriteLine($"Date\t\tTemperature\tTemperatureCategory"); | ||
foreach (var featureRow in featureRows) | ||
{ | ||
Console.WriteLine($"{featureRow.Date.ToString("d")}\t{featureRow.Temperature}\t\t{featureRow.TemperatureCategory}"); | ||
} | ||
|
||
// Features column obtained post-transformation. | ||
// | ||
// Example of mapping float->string | ||
// Date Temperature TemperatureCategory | ||
// 1/1/2012 39 T1 | ||
// 1/2/2012 82 T4 | ||
// 1/3/2012 75 T3 | ||
// 1/4/2012 67 T2 | ||
// 1/5/2012 75 T3 | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using Microsoft.Data.DataView; | ||
using Microsoft.ML.Data; | ||
using Microsoft.ML.Transforms.Conversions; | ||
|
||
namespace Microsoft.ML.Samples.Dynamic | ||
{ | ||
public class ValueMappingStringToArrayExample | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment as above — what problem does this solve? #Resolved There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
this is not referenced anywhere. #Resolved There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. RIght - this is just another example that is available to show how this can be used. There is another example that is not used either. Does they all need to be referenced? In reply to: 252891445 [](ancestors = 252891445) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should remove them. The purpose of this project is to populate the API reference website. the machinelearning-samples repo is the one where we can put more samples, if needed. If we keep them here, unreferenced, it will be hard to see what is used for what, overtime. In reply to: 252938208 [](ancestors = 252938208,252891445) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it possible to add multiple links? I initially had these in one file but moved out into separate files to not have to update line number reference. In reply to: 252938844 [](ancestors = 252938844,252938208,252891445) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added multiple links so they are all referenced now. In reply to: 252939988 [](ancestors = 252939988,252938844,252938208,252891445) |
||
{ | ||
/// <summary> | ||
/// Helper class for retrieving the resulting data | ||
/// </summary> | ||
class SampleInfertDataWithIntArray | ||
{ | ||
public float Age = 0; | ||
public string Education = default; | ||
public int[] EducationFeature = default; | ||
} | ||
|
||
/// This example demonstrates the use of the ValueMappingEstimator by mapping string-to-array values which allows for mapping string data | ||
/// to numeric arrays that can then be used as a feature set for a trainer. In this example, we are mapping the education data to | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Does this work? The vectors are of different sizes. #Resolved There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No probably not to pass into a trainer. I went ahead and made them the same size. In reply to: 252881242 [](ancestors = 252881242) |
||
/// arbitrary integer arrays with the following association: | ||
/// 0-5yrs -> 1, 2, 3 | ||
/// 6-11yrs -> 5, 6, 7 | ||
/// 12+yrs -> 42,32,64 | ||
public static void Run() | ||
{ | ||
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, | ||
// as well as the source of randomness. | ||
var mlContext = new MLContext(); | ||
|
||
// Get a small dataset as an IEnumerable. | ||
IEnumerable<SamplesUtils.DatasetUtils.SampleInfertData> data = SamplesUtils.DatasetUtils.GetInfertData(); | ||
IDataView trainData = mlContext.Data.ReadFromEnumerable(data); | ||
|
||
// If the list of keys and values are known, they can be passed to the API. The ValueMappingEstimator can also get the mapping through an IDataView | ||
// Creating a list of keys based on the Education values from the dataset | ||
var educationKeys = new List<string>() | ||
{ | ||
"0-5yrs", | ||
"6-11yrs", | ||
"12+yrs" | ||
}; | ||
|
||
// Sample list of associated array values | ||
var educationValues = new List<int[]>() | ||
{ | ||
new int[] { 1,2,3 }, | ||
new int[] { 5,6,7 }, | ||
new int[] { 42,32,64 } | ||
}; | ||
|
||
// Constructs the ValueMappingEstimator making the ML.net pipeline | ||
var pipeline = new ValueMappingEstimator<string, int>(mlContext, educationKeys, educationValues, ("EducationFeature", "Education")); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
mlContext #ByDesign There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same deal here -- need to add an api to support vector types. I have a github issue to fix. In reply to: 252891208 [](ancestors = 252891208) |
||
|
||
// Fits the ValueMappingEstimator and transforms the data adding the EducationFeature column. | ||
IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); | ||
|
||
// Getting the resulting data as an IEnumerable of SampleInfertDataWithIntArray. This will contain the newly created column EducationCategory | ||
IEnumerable<SampleInfertDataWithIntArray> featuresColumn = mlContext.CreateEnumerable<SampleInfertDataWithIntArray>(transformedData, reuseRowObject: false); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
nit;: var #Resolved There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yael's feedback was to keep this explicit for demo purposes. What would you prefer? In reply to: 252891247 [](ancestors = 252891247) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
||
Console.WriteLine($"Example of mapping string->array"); | ||
Console.WriteLine($"Age\tEducation\tEducationFeature"); | ||
foreach (var featureRow in featuresColumn) | ||
{ | ||
Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{string.Join(",", featureRow.EducationFeature)}"); | ||
} | ||
|
||
// Features column obtained post-transformation. | ||
// | ||
// Example of mapping string->array | ||
// Age Education EducationFeature | ||
// 26 0 - 5yrs 1,2,3 | ||
// 42 0 - 5yrs 1,2,3 | ||
// 39 12 + yrs 42,32,64 | ||
// 34 0 - 5yrs 1,2,3 | ||
// 35 6 - 11yrs 5,6,7 | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using Microsoft.Data.DataView; | ||
using Microsoft.ML.Data; | ||
using Microsoft.ML.Transforms.Conversions; | ||
|
||
namespace Microsoft.ML.Samples.Dynamic | ||
{ | ||
public class ValueMappingStringToKeyTypeExample | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment as above — what problem does this solve? #Resolved |
||
{ | ||
/// <summary> | ||
/// Helper class for retrieving the resulting data | ||
/// </summary> | ||
class SampleInfertDataWithFeatures | ||
|
||
{ | ||
public float Age = 0; | ||
public string Education = default; | ||
public string EducationCategory = default; | ||
} | ||
|
||
/// This example demonstrates the use of KeyTypes using both the ValueMappingEstimator and KeyToValueEstimator. Using a KeyType | ||
/// instead of the actual value provides a unique integer representation of the value. When the treatValueAsKeyTypes is true, | ||
/// the ValueMappingEstimator will generate a KeyType for each unique value. | ||
/// | ||
/// In this example, the education data is mapped to a grouping of 'Undergraduate' and 'Postgraduate'. Because KeyTypes are used, the | ||
/// ValueMappingEstimator will output the KeyType value rather than string value of 'Undergraduate' or 'Postgraduate'. | ||
/// | ||
/// The KeyToValueEstimator is added to the pipeline to convert the KeyType back to the original value. Therefore the output of this example | ||
/// results in the string value of 'Undergraduate' and 'Postgraduate'. | ||
public static void Run() | ||
{ | ||
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, | ||
// as well as the source of randomness. | ||
var mlContext = new MLContext(); | ||
|
||
// Get a small dataset as an IEnumerable. | ||
IEnumerable<SamplesUtils.DatasetUtils.SampleInfertData> data = SamplesUtils.DatasetUtils.GetInfertData(); | ||
IDataView trainData = mlContext.Data.ReadFromEnumerable(data); | ||
|
||
// Creating a list of keys based on the Education values from the dataset | ||
// These lists are created by hand for the demonstration, but the ValueMappingEstimator does take an IEnumerable. | ||
var educationKeys = new List<string>() | ||
{ | ||
"0-5yrs", | ||
"6-11yrs", | ||
"12+yrs" | ||
}; | ||
|
||
// Creating a list of values that are sample strings. These will be converted to KeyTypes | ||
var educationValues = new List<string>() | ||
{ | ||
"Undergraduate", | ||
"Postgraduate", | ||
"Postgraduate" | ||
}; | ||
|
||
// Generate the ValueMappingEstimator that will output KeyTypes even though our values are strings. | ||
// The KeyToValueMappingEstimator is added to provide a reverse lookup of the KeyType, converting the KeyType value back | ||
// to the original value. | ||
var pipeline = new ValueMappingEstimator<string, string>(mlContext, educationKeys, educationValues, true, ("EducationKeyType", "Education")) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
use mlContext #ByDesign There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I need to add an api to the extensions catalog to allow for the treatValuesAsKeyTypes. I dont want to add that in this PR as I am trying to scope this to documentation changes. I have a github issue tracking it and update this sample once its fixed In reply to: 252890986 [](ancestors = 252890986) |
||
.Append(new KeyToValueMappingEstimator(mlContext, ("EducationCategory", "EducationKeyType"))); | ||
|
||
// Fits the ValueMappingEstimator and transforms the data adding the EducationKeyType column. | ||
IDataView transformedData = pipeline.Fit(trainData).Transform(trainData); | ||
|
||
// Getting the resulting data as an IEnumerable of SampleInfertDataWithFeatures. | ||
IEnumerable<SampleInfertDataWithFeatures> featureRows = mlContext.CreateEnumerable<SampleInfertDataWithFeatures>(transformedData, reuseRowObject: false); | ||
|
||
Console.WriteLine($"Example of mapping string->keytype"); | ||
Console.WriteLine($"Age\tEducation\tEducationCategory"); | ||
foreach (var featureRow in featureRows) | ||
{ | ||
Console.WriteLine($"{featureRow.Age}\t{featureRow.Education} \t{featureRow.EducationCategory}"); | ||
} | ||
|
||
// Features column obtained post-transformation. | ||
// | ||
// Age Education EducationCategory | ||
// 26 0-5yrs Undergraduate | ||
// 42 0-5yrs Undergraduate | ||
// 39 12+yrs Postgraduate | ||
// 34 0-5yrs Undergraduate | ||
// 35 6-11yrs Postgraduate | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you need to reference your samples from the extensions, otherwise they won't show up anywhere.
For how to link it, see: https://github.com/dotnet/machinelearning/blob/master/src/Microsoft.ML.StandardLearners/StandardLearnersCatalog.cs#L115
The XML of the extensions should have this on it:
///
///
///
///
#Resolved