-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Conversion catalog samples #3167
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,18 +5,6 @@ namespace Microsoft.ML.Samples.Dynamic | |
{ | ||
public static class ConvertType | ||
{ | ||
private sealed class InputData | ||
{ | ||
public bool Survived; | ||
} | ||
|
||
private sealed class TransformedData | ||
{ | ||
public bool Survived { get; set; } | ||
|
||
public Int32 SurvivedInt32 { get; set; } | ||
} | ||
|
||
public static void Example() | ||
{ | ||
var mlContext = new MLContext(seed: 1); | ||
|
@@ -51,5 +39,13 @@ public static void Example() | |
// A: False Aconv:0 | ||
// A: False Aconv:0 | ||
} | ||
private class InputData | ||
{ | ||
public bool Survived; | ||
} | ||
private sealed class TransformedData : InputData | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
ditto |
||
{ | ||
public Int32 SurvivedInt32 { get; set; } | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
using System; | ||
using Microsoft.ML.Data; | ||
|
||
namespace Microsoft.ML.Samples.Dynamic | ||
{ | ||
// This example illustrates how to convert multiple columns of different types to one type, in this case System.Single. | ||
// This is often a useful data transformation before concatenating the features together and passing them to a particular estimator. | ||
public static class ConvertTypeMultiColumn | ||
{ | ||
public static void Example() | ||
{ | ||
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, | ||
// as well as the source of randomness. | ||
var mlContext = new MLContext(seed: 1); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I see missing comments overall in this sample which we usually have in other samples e.g. comments above the MLContext etc. #Closed |
||
|
||
var rawData = new[] { | ||
new InputData() { Feature1 = true, Feature2 = "0.4", Feature3 = DateTime.Now, Feature4 = 0.145}, | ||
new InputData() { Feature1 = false, Feature2 = "0.5", Feature3 = DateTime.Today, Feature4 = 3.14}, | ||
new InputData() { Feature1 = false, Feature2 = "14", Feature3 = DateTime.Today, Feature4 = 0.2046}, | ||
new InputData() { Feature1 = false, Feature2 = "23", Feature3 = DateTime.Now, Feature4 = 0.1206}, | ||
new InputData() { Feature1 = true, Feature2 = "8904", Feature3 = DateTime.UtcNow, Feature4 = 8.09}, | ||
}; | ||
|
||
// Convert the data to an IDataView. | ||
var data = mlContext.Data.LoadFromEnumerable(rawData); | ||
|
||
// Construct the pipeline. | ||
var pipeline = mlContext.Transforms.Conversion.ConvertType(new[] | ||
{ | ||
new InputOutputColumnPair("Converted1", "Feature1"), | ||
new InputOutputColumnPair("Converted2", "Feature2"), | ||
new InputOutputColumnPair("Converted3", "Feature3"), | ||
new InputOutputColumnPair("Converted4", "Feature4"), | ||
}, | ||
DataKind.Single); | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please remove this empty line. #Resolved |
||
// Let's fit our pipeline to the data. | ||
var transformer = pipeline.Fit(data); | ||
// Transforming the same data. This will add the 4 columns defined in the pipeline, containing the converted | ||
// values of the initial columns. | ||
var transformedData = transformer.Transform(data); | ||
|
||
// Shape the transformed data as a strongly typed IEnumerable. | ||
var convertedData = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, true); | ||
|
||
// Printing the results. | ||
Console.WriteLine("Converted1\t Converted2\t Converted3\t Converted4"); | ||
foreach (var item in convertedData) | ||
Console.WriteLine($"\t{item.Converted1}\t {item.Converted2}\t\t {item.Converted3}\t {item.Converted4}"); | ||
|
||
// Transformed data. | ||
// | ||
// Converted1 Converted2 Converted3 Converted4 | ||
// 1 0.4 6.368921E+17 0.145 | ||
// 0 0.5 6.368916E+17 3.14 | ||
// 0 14 6.368916E+17 0.2046 | ||
// 0 23 6.368921E+17 0.1206 | ||
// 1 8904 6.368924E+17 8.09 | ||
|
||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i see a lot of extra empty lines. please have a second pass and remove them. #Resolved |
||
// The initial data type | ||
private class InputData | ||
{ | ||
public bool Feature1; | ||
public string Feature2; | ||
public DateTime Feature3; | ||
public double Feature4; | ||
} | ||
// The resulting data type after the transformation | ||
private class TransformedData : InputData | ||
{ | ||
public float Converted1 { get; set; } | ||
public float Converted2 { get; set; } | ||
public float Converted3 { get; set; } | ||
public float Converted4 { get; set; } | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using Microsoft.ML.Data; | ||
|
||
|
||
namespace Microsoft.ML.Samples.Dynamic | ||
{ | ||
public static class MapValue | ||
{ | ||
/// This example demonstrates the use of the ValueMappingEstimator by mapping strings to other string values, or floats to strings. | ||
/// This is useful to map types to a category. | ||
public static void Example() | ||
{ | ||
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, | ||
// as well as the source of randomness. | ||
var mlContext = new MLContext(); | ||
|
||
// Get a small dataset as an IEnumerable. | ||
var rawData = new[] { | ||
new DataPoint() { Timeframe = "0-4yrs" , Score = 1 }, | ||
new DataPoint() { Timeframe = "6-11yrs" , Score = 2 }, | ||
new DataPoint() { Timeframe = "12-25yrs" , Score = 3 }, | ||
new DataPoint() { Timeframe = "0-5yrs" , Score = 4 }, | ||
new DataPoint() { Timeframe = "12-25yrs" , Score = 5 }, | ||
new DataPoint() { Timeframe = "25+yrs" , Score = 5 }, | ||
}; | ||
|
||
var data = mlContext.Data.LoadFromEnumerable(rawData); | ||
|
||
// Construct the mapping to other strings for the Timeframe column. | ||
var timeframeMap = new Dictionary<string, string>(); | ||
timeframeMap["0-4yrs"] = "Short"; | ||
timeframeMap["0-5yrs"] = "Short"; | ||
timeframeMap["6-11yrs"] = "Medium"; | ||
timeframeMap["12-25yrs"] = "Long"; | ||
timeframeMap["25+yrs"] = "Long"; | ||
|
||
// Construct the mapping of strings to keys(uints) for the Timeframe column. | ||
var timeframeKeyMap = new Dictionary<string, uint>(); | ||
timeframeKeyMap["0-4yrs"] = 1; | ||
timeframeKeyMap["0-5yrs"] = 1; | ||
timeframeKeyMap["6-11yrs"] = 2; | ||
timeframeKeyMap["12-25yrs"] = 3; | ||
timeframeKeyMap["25+yrs"] = 3; | ||
|
||
// Construct the mapping of ints to strings for the Score column. | ||
var scoreMap = new Dictionary<int, string>(); | ||
scoreMap[1] = "Low"; | ||
scoreMap[2] = "Low"; | ||
scoreMap[3] = "Average"; | ||
scoreMap[4] = "High"; | ||
scoreMap[5] = "High"; | ||
|
||
// Constructs the ML.net pipeline | ||
var pipeline = mlContext.Transforms.Conversion.MapValue("TimeframeCategory", timeframeMap, "Timeframe") | ||
.Append(mlContext.Transforms.Conversion.MapValue("ScoreCategory", scoreMap, "Score")) | ||
// on the MapValue below, the treatValuesAsKeyType is set to true. The type of the Label column will be a KeyDataViewType type, | ||
// and it can be used as input for trainers performing multiclass classification. | ||
.Append(mlContext.Transforms.Conversion.MapValue("Label", timeframeKeyMap, "Timeframe", treatValuesAsKeyType: true)); | ||
|
||
// Fits the pipeline to the data. | ||
IDataView transformedData = pipeline.Fit(data).Transform(data); | ||
|
||
// Getting the resulting data as an IEnumerable. | ||
// This will contain the newly created columns. | ||
IEnumerable<TransformedData> features = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, reuseRowObject: false); | ||
|
||
Console.WriteLine($" Timeframe TimeframeCategory Label Score ScoreCategory"); | ||
foreach (var featureRow in features) | ||
Console.WriteLine($"{featureRow.Timeframe}\t\t{featureRow.TimeframeCategory}\t\t\t{featureRow.Label}\t\t{featureRow.Score}\t{featureRow.ScoreCategory}"); | ||
|
||
// TransformedData obtained post-transformation. | ||
// | ||
// Timeframe TimeframeCategory Label Score ScoreCategory | ||
// 0-4yrs Short 1 1 Low | ||
// 6-11yrs Medium 2 2 Low | ||
// 12-25yrs Long 3 3 Average | ||
// 0-5yrs Short 1 4 High | ||
// 12-25yrs Long 3 5 High | ||
// 25+yrs Long 3 5 High | ||
} | ||
private class DataPoint | ||
{ | ||
public string Timeframe { get; set; } | ||
public int Score { get; set; } | ||
} | ||
private class TransformedData : DataPoint | ||
{ | ||
public string TimeframeCategory { get; set; } | ||
public string ScoreCategory { get; set; } | ||
public uint Label { get; set; } | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
|
||
namespace Microsoft.ML.Samples.Dynamic | ||
{ | ||
public static class MapValueIdvLookup | ||
{ | ||
/// This example demonstrates the use of MapValue by mapping floats to strings, looking up the mapping in an IDataView. | ||
/// This is useful to map types to a grouping. | ||
public static void Example() | ||
{ | ||
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, | ||
// as well as the source of randomness. | ||
var mlContext = new MLContext(); | ||
|
||
// Get a small dataset as an IEnumerable. | ||
var rawData = new[] { | ||
new DataPoint() { Price = 3.14f }, | ||
new DataPoint() { Price = 2000f }, | ||
new DataPoint() { Price = 1.19f }, | ||
new DataPoint() { Price = 2.17f }, | ||
new DataPoint() { Price = 33.784f }, | ||
|
||
}; | ||
|
||
// Convert to IDataView | ||
var data = mlContext.Data.LoadFromEnumerable(rawData); | ||
|
||
// Create the lookup map data IEnumerable. | ||
var lookupData = new[] { | ||
new LookupMap { Value = 3.14f, Category = "Low" }, | ||
new LookupMap { Value = 1.19f , Category = "Low" }, | ||
new LookupMap { Value = 2.17f , Category = "Low" }, | ||
new LookupMap { Value = 33.784f, Category = "Medium" }, | ||
new LookupMap { Value = 2000f, Category = "High"} | ||
|
||
}; | ||
|
||
// Convert to IDataView | ||
var lookupIdvMap = mlContext.Data.LoadFromEnumerable(lookupData); | ||
|
||
// Constructs the ValueMappingEstimator making the ML.NET pipeline | ||
var pipeline = mlContext.Transforms.Conversion.MapValue("PriceCategory", lookupIdvMap, lookupIdvMap.Schema["Value"], lookupIdvMap.Schema["Category"], "Price"); | ||
|
||
// Fits the ValueMappingEstimator and transforms the data converting the Price to PriceCategory. | ||
IDataView transformedData = pipeline.Fit(data).Transform(data); | ||
|
||
// Getting the resulting data as an IEnumerable. | ||
IEnumerable<TransformedData> features = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, reuseRowObject: false); | ||
|
||
Console.WriteLine($" Price PriceCategory"); | ||
foreach (var featureRow in features) | ||
Console.WriteLine($"{featureRow.Price}\t\t{featureRow.PriceCategory}"); | ||
|
||
// TransformedData obtained post-transformation. | ||
// | ||
// Price PriceCategory | ||
// 3.14 Low | ||
// 2000 High | ||
// 1.19 Low | ||
// 2.17 Low | ||
// 33.784 Medium | ||
} | ||
|
||
// Type for the IDataView that will be serving as the map | ||
private class LookupMap | ||
{ | ||
public float Value { get; set; } | ||
public string Category { get; set; } | ||
} | ||
private class DataPoint | ||
{ | ||
public float Price { get; set; } | ||
} | ||
private class TransformedData : DataPoint | ||
{ | ||
public string PriceCategory { get; set; } | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
namespace Microsoft.ML.Samples.Dynamic | ||
{ | ||
public static class MapValueToArray | ||
{ | ||
/// This example demonstrates the use of MapValue by mapping strings to array values, which allows for mapping data to numeric arrays. | ||
/// This functionality is useful when the generated column will serve as the Features column for a trainer. Most of the trainers take a numeric vector, as the Features column. | ||
/// In this example, we are mapping the Timeframe data to arbitrary integer arrays. | ||
public static void Example() | ||
{ | ||
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, | ||
// as well as the source of randomness. | ||
var mlContext = new MLContext(); | ||
|
||
// Get a small dataset as an IEnumerable. | ||
var rawData = new[] { | ||
new DataPoint() { Timeframe = "0-4yrs" }, | ||
new DataPoint() { Timeframe = "6-11yrs" }, | ||
new DataPoint() { Timeframe = "12-25yrs" }, | ||
new DataPoint() { Timeframe = "0-5yrs" }, | ||
new DataPoint() { Timeframe = "12-25yrs" }, | ||
new DataPoint() { Timeframe = "25+yrs" }, | ||
}; | ||
|
||
var data = mlContext.Data.LoadFromEnumerable(rawData); | ||
|
||
// Creating a list of key-value pairs to indicate the mapping between the | ||
// DataPoint values, and the arrays they should map to. | ||
var timeframeMap = new Dictionary<string, int[]>(); | ||
timeframeMap["0-4yrs"] = new int[] { 0, 5, 300 }; | ||
timeframeMap["0-5yrs"] = new int[] { 0, 5, 300 }; | ||
timeframeMap["6-11yrs"] = new int[] { 6, 11, 300 }; | ||
timeframeMap["12-25yrs"] = new int[] { 12, 50, 300 }; | ||
timeframeMap["25+yrs"] = new int[] { 12, 50, 300 }; | ||
|
||
// Constructs the ValueMappingEstimator making the ML.NET pipeline. | ||
var pipeline = mlContext.Transforms.Conversion.MapValue("Features", timeframeMap, "Timeframe"); | ||
|
||
// Fits the ValueMappingEstimator and transforms the data adding the Features column. | ||
IDataView transformedData = pipeline.Fit(data).Transform(data); | ||
|
||
// Getting the resulting data as an IEnumerable. | ||
IEnumerable<TransformedData> featuresColumn = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, reuseRowObject: false); | ||
|
||
Console.WriteLine($"Timeframe Features"); | ||
foreach (var featureRow in featuresColumn) | ||
Console.WriteLine($"{featureRow.Timeframe}\t\t {string.Join(",", featureRow.Features)}"); | ||
|
||
// Timeframe Features | ||
// 0-4yrs 0, 5, 300 | ||
// 6-11yrs 6, 11, 300 | ||
// 12-25yrs 12, 50, 300 | ||
// 0-5yrs 0, 5, 300 | ||
// 12-25yrs 12, 50,300 | ||
// 25+yrs 12, 50, 300 | ||
} | ||
public class DataPoint | ||
{ | ||
public string Timeframe { get; set; } | ||
} | ||
public class TransformedData : DataPoint | ||
{ | ||
public int[] Features { get; set; } | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
need extra line here