Skip to content

Conversion catalog samples #3167

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 4, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,6 @@ namespace Microsoft.ML.Samples.Dynamic
{
public static class ConvertType
{
private sealed class InputData
{
public bool Survived;
}

private sealed class TransformedData
{
public bool Survived { get; set; }

public Int32 SurvivedInt32 { get; set; }
}

public static void Example()
{
var mlContext = new MLContext(seed: 1);
Expand Down Expand Up @@ -51,5 +39,13 @@ public static void Example()
// A: False Aconv:0
// A: False Aconv:0
}
private class InputData

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pr [](start = 7, length = 3)

need extra line here

{
public bool Survived;
}
private sealed class TransformedData : InputData

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pr [](start = 8, length = 2)

ditto

{
public Int32 SurvivedInt32 { get; set; }
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
using System;
using Microsoft.ML.Data;

namespace Microsoft.ML.Samples.Dynamic
{
// This example illustrates how to convert multiple columns of different types to one type, in this case System.Single.
// This is often a useful data transformation before concatenating the features together and passing them to a particular estimator.
public static class ConvertTypeMultiColumn
{
public static void Example()
{
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
// as well as the source of randomness.
var mlContext = new MLContext(seed: 1);
Copy link
Contributor

@zeahmed zeahmed Apr 2, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

var mlContext = new MLContext(seed: 1); [](start = 12, length = 39)

I see missing comments overall in this sample which we usually have in other samples e.g. comments above the MLContext etc. #Closed


var rawData = new[] {
new InputData() { Feature1 = true, Feature2 = "0.4", Feature3 = DateTime.Now, Feature4 = 0.145},
new InputData() { Feature1 = false, Feature2 = "0.5", Feature3 = DateTime.Today, Feature4 = 3.14},
new InputData() { Feature1 = false, Feature2 = "14", Feature3 = DateTime.Today, Feature4 = 0.2046},
new InputData() { Feature1 = false, Feature2 = "23", Feature3 = DateTime.Now, Feature4 = 0.1206},
new InputData() { Feature1 = true, Feature2 = "8904", Feature3 = DateTime.UtcNow, Feature4 = 8.09},
};

// Convert the data to an IDataView.
var data = mlContext.Data.LoadFromEnumerable(rawData);

// Construct the pipeline.
var pipeline = mlContext.Transforms.Conversion.ConvertType(new[]
{
new InputOutputColumnPair("Converted1", "Feature1"),
new InputOutputColumnPair("Converted2", "Feature2"),
new InputOutputColumnPair("Converted3", "Feature3"),
new InputOutputColumnPair("Converted4", "Feature4"),
},
DataKind.Single);

Copy link
Member

@wschin wschin Apr 2, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please remove this empty line. #Resolved

// Let's fit our pipeline to the data.
var transformer = pipeline.Fit(data);
// Transforming the same data. This will add the 4 columns defined in the pipeline, containing the converted
// values of the initial columns.
var transformedData = transformer.Transform(data);

// Shape the transformed data as a strongly typed IEnumerable.
var convertedData = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, true);

// Printing the results.
Console.WriteLine("Converted1\t Converted2\t Converted3\t Converted4");
foreach (var item in convertedData)
Console.WriteLine($"\t{item.Converted1}\t {item.Converted2}\t\t {item.Converted3}\t {item.Converted4}");

// Transformed data.
//
// Converted1 Converted2 Converted3 Converted4
// 1 0.4 6.368921E+17 0.145
// 0 0.5 6.368916E+17 3.14
// 0 14 6.368916E+17 0.2046
// 0 23 6.368921E+17 0.1206
// 1 8904 6.368924E+17 8.09

}
Copy link

@shmoradims shmoradims Apr 2, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i see a lot of extra empty lines. please have a second pass and remove them. #Resolved

// The initial data type
private class InputData
{
public bool Feature1;
public string Feature2;
public DateTime Feature3;
public double Feature4;
}
// The resulting data type after the transformation
private class TransformedData : InputData
{
public float Converted1 { get; set; }
public float Converted2 { get; set; }
public float Converted3 { get; set; }
public float Converted4 { get; set; }
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
using System;
using System.Collections.Generic;
using Microsoft.ML.Data;


namespace Microsoft.ML.Samples.Dynamic
{
public static class MapValue
{
/// This example demonstrates the use of the ValueMappingEstimator by mapping strings to other string values, or floats to strings.
/// This is useful to map types to a category.
public static void Example()
{
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
// as well as the source of randomness.
var mlContext = new MLContext();

// Get a small dataset as an IEnumerable.
var rawData = new[] {
new DataPoint() { Timeframe = "0-4yrs" , Score = 1 },
new DataPoint() { Timeframe = "6-11yrs" , Score = 2 },
new DataPoint() { Timeframe = "12-25yrs" , Score = 3 },
new DataPoint() { Timeframe = "0-5yrs" , Score = 4 },
new DataPoint() { Timeframe = "12-25yrs" , Score = 5 },
new DataPoint() { Timeframe = "25+yrs" , Score = 5 },
};

var data = mlContext.Data.LoadFromEnumerable(rawData);

// Construct the mapping to other strings for the Timeframe column.
var timeframeMap = new Dictionary<string, string>();
timeframeMap["0-4yrs"] = "Short";
timeframeMap["0-5yrs"] = "Short";
timeframeMap["6-11yrs"] = "Medium";
timeframeMap["12-25yrs"] = "Long";
timeframeMap["25+yrs"] = "Long";

// Construct the mapping of strings to keys(uints) for the Timeframe column.
var timeframeKeyMap = new Dictionary<string, uint>();
timeframeKeyMap["0-4yrs"] = 1;
timeframeKeyMap["0-5yrs"] = 1;
timeframeKeyMap["6-11yrs"] = 2;
timeframeKeyMap["12-25yrs"] = 3;
timeframeKeyMap["25+yrs"] = 3;

// Construct the mapping of ints to strings for the Score column.
var scoreMap = new Dictionary<int, string>();
scoreMap[1] = "Low";
scoreMap[2] = "Low";
scoreMap[3] = "Average";
scoreMap[4] = "High";
scoreMap[5] = "High";

// Constructs the ML.net pipeline
var pipeline = mlContext.Transforms.Conversion.MapValue("TimeframeCategory", timeframeMap, "Timeframe")
.Append(mlContext.Transforms.Conversion.MapValue("ScoreCategory", scoreMap, "Score"))
// on the MapValue below, the treatValuesAsKeyType is set to true. The type of the Label column will be a KeyDataViewType type,
// and it can be used as input for trainers performing multiclass classification.
.Append(mlContext.Transforms.Conversion.MapValue("Label", timeframeKeyMap, "Timeframe", treatValuesAsKeyType: true));

// Fits the pipeline to the data.
IDataView transformedData = pipeline.Fit(data).Transform(data);

// Getting the resulting data as an IEnumerable.
// This will contain the newly created columns.
IEnumerable<TransformedData> features = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, reuseRowObject: false);

Console.WriteLine($" Timeframe TimeframeCategory Label Score ScoreCategory");
foreach (var featureRow in features)
Console.WriteLine($"{featureRow.Timeframe}\t\t{featureRow.TimeframeCategory}\t\t\t{featureRow.Label}\t\t{featureRow.Score}\t{featureRow.ScoreCategory}");

// TransformedData obtained post-transformation.
//
// Timeframe TimeframeCategory Label Score ScoreCategory
// 0-4yrs Short 1 1 Low
// 6-11yrs Medium 2 2 Low
// 12-25yrs Long 3 3 Average
// 0-5yrs Short 1 4 High
// 12-25yrs Long 3 5 High
// 25+yrs Long 3 5 High
}
private class DataPoint
{
public string Timeframe { get; set; }
public int Score { get; set; }
}
private class TransformedData : DataPoint
{
public string TimeframeCategory { get; set; }
public string ScoreCategory { get; set; }
public uint Label { get; set; }
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
using System;
using System.Collections.Generic;

namespace Microsoft.ML.Samples.Dynamic
{
public static class MapValueIdvLookup
{
/// This example demonstrates the use of MapValue by mapping floats to strings, looking up the mapping in an IDataView.
/// This is useful to map types to a grouping.
public static void Example()
{
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
// as well as the source of randomness.
var mlContext = new MLContext();

// Get a small dataset as an IEnumerable.
var rawData = new[] {
new DataPoint() { Price = 3.14f },
new DataPoint() { Price = 2000f },
new DataPoint() { Price = 1.19f },
new DataPoint() { Price = 2.17f },
new DataPoint() { Price = 33.784f },

};

// Convert to IDataView
var data = mlContext.Data.LoadFromEnumerable(rawData);

// Create the lookup map data IEnumerable.
var lookupData = new[] {
new LookupMap { Value = 3.14f, Category = "Low" },
new LookupMap { Value = 1.19f , Category = "Low" },
new LookupMap { Value = 2.17f , Category = "Low" },
new LookupMap { Value = 33.784f, Category = "Medium" },
new LookupMap { Value = 2000f, Category = "High"}

};

// Convert to IDataView
var lookupIdvMap = mlContext.Data.LoadFromEnumerable(lookupData);

// Constructs the ValueMappingEstimator making the ML.NET pipeline
var pipeline = mlContext.Transforms.Conversion.MapValue("PriceCategory", lookupIdvMap, lookupIdvMap.Schema["Value"], lookupIdvMap.Schema["Category"], "Price");

// Fits the ValueMappingEstimator and transforms the data converting the Price to PriceCategory.
IDataView transformedData = pipeline.Fit(data).Transform(data);

// Getting the resulting data as an IEnumerable.
IEnumerable<TransformedData> features = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, reuseRowObject: false);

Console.WriteLine($" Price PriceCategory");
foreach (var featureRow in features)
Console.WriteLine($"{featureRow.Price}\t\t{featureRow.PriceCategory}");

// TransformedData obtained post-transformation.
//
// Price PriceCategory
// 3.14 Low
// 2000 High
// 1.19 Low
// 2.17 Low
// 33.784 Medium
}

// Type for the IDataView that will be serving as the map
private class LookupMap
{
public float Value { get; set; }
public string Category { get; set; }
}
private class DataPoint
{
public float Price { get; set; }
}
private class TransformedData : DataPoint
{
public string PriceCategory { get; set; }
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
using System;
using System.Collections.Generic;
namespace Microsoft.ML.Samples.Dynamic
{
public static class MapValueToArray
{
/// This example demonstrates the use of MapValue by mapping strings to array values, which allows for mapping data to numeric arrays.
/// This functionality is useful when the generated column will serve as the Features column for a trainer. Most of the trainers take a numeric vector, as the Features column.
/// In this example, we are mapping the Timeframe data to arbitrary integer arrays.
public static void Example()
{
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
// as well as the source of randomness.
var mlContext = new MLContext();

// Get a small dataset as an IEnumerable.
var rawData = new[] {
new DataPoint() { Timeframe = "0-4yrs" },
new DataPoint() { Timeframe = "6-11yrs" },
new DataPoint() { Timeframe = "12-25yrs" },
new DataPoint() { Timeframe = "0-5yrs" },
new DataPoint() { Timeframe = "12-25yrs" },
new DataPoint() { Timeframe = "25+yrs" },
};

var data = mlContext.Data.LoadFromEnumerable(rawData);

// Creating a list of key-value pairs to indicate the mapping between the
// DataPoint values, and the arrays they should map to.
var timeframeMap = new Dictionary<string, int[]>();
timeframeMap["0-4yrs"] = new int[] { 0, 5, 300 };
timeframeMap["0-5yrs"] = new int[] { 0, 5, 300 };
timeframeMap["6-11yrs"] = new int[] { 6, 11, 300 };
timeframeMap["12-25yrs"] = new int[] { 12, 50, 300 };
timeframeMap["25+yrs"] = new int[] { 12, 50, 300 };

// Constructs the ValueMappingEstimator making the ML.NET pipeline.
var pipeline = mlContext.Transforms.Conversion.MapValue("Features", timeframeMap, "Timeframe");

// Fits the ValueMappingEstimator and transforms the data adding the Features column.
IDataView transformedData = pipeline.Fit(data).Transform(data);

// Getting the resulting data as an IEnumerable.
IEnumerable<TransformedData> featuresColumn = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, reuseRowObject: false);

Console.WriteLine($"Timeframe Features");
foreach (var featureRow in featuresColumn)
Console.WriteLine($"{featureRow.Timeframe}\t\t {string.Join(",", featureRow.Features)}");

// Timeframe Features
// 0-4yrs 0, 5, 300
// 6-11yrs 6, 11, 300
// 12-25yrs 12, 50, 300
// 0-5yrs 0, 5, 300
// 12-25yrs 12, 50,300
// 25+yrs 12, 50, 300
}
public class DataPoint
{
public string Timeframe { get; set; }
}
public class TransformedData : DataPoint
{
public int[] Features { get; set; }
}
}
}
Loading