|
| 1 | +using System; |
| 2 | +using System.Collections.Generic; |
| 3 | +using System.Linq; |
| 4 | +using Microsoft.ML.Core.Data; |
| 5 | +using Microsoft.ML.Data; |
| 6 | +using Microsoft.ML.Transforms; |
| 7 | + |
| 8 | +namespace Microsoft.ML.Samples.Dynamic |
| 9 | +{ |
| 10 | + public class CopyColumns |
| 11 | + { |
| 12 | + public static void Example() |
| 13 | + { |
| 14 | + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, |
| 15 | + // as well as the source of randomness. |
| 16 | + var mlContext = new MLContext(); |
| 17 | + |
| 18 | + // Get a small dataset as an IEnumerable and them read it as ML.NET's data type. |
| 19 | + IEnumerable<SamplesUtils.DatasetUtils.SampleInfertData> data = SamplesUtils.DatasetUtils.GetInfertData(); |
| 20 | + var trainData = mlContext.Data.ReadFromEnumerable(data); |
| 21 | + |
| 22 | + // Preview of the data. |
| 23 | + // |
| 24 | + // Age Case Education induced parity pooled.stratum row_num ... |
| 25 | + // 26.0 1.0 0-5yrs 1.0 6.0 3.0 1.0 ... |
| 26 | + // 42.0 1.0 0-5yrs 1.0 1.0 1.0 2.0 ... |
| 27 | + // 39.0 1.0 0-5yrs 2.0 6.0 4.0 3.0 ... |
| 28 | + // 34.0 1.0 0-5yrs 2.0 4.0 2.0 4.0 ... |
| 29 | + // 35.0 1.0 6-11yrs 1.0 3.0 32.0 5.0 ... |
| 30 | + |
| 31 | + // CopyColumns is commonly used to rename columns. |
| 32 | + // For example, if you want to train towards Age, and your learner expects a "Label" column, you can |
| 33 | + // use CopyColumns to rename Age to Label. Technically, the Age columns still exists, but it won't be |
| 34 | + // materialized unless you actually need it somewhere (e.g. if you were to save the transformed data |
| 35 | + // without explicitly dropping the column). This is a general property of IDataView's lazy evaluation. |
| 36 | + string labelColumnName = "Label"; |
| 37 | + var pipeline = mlContext.Transforms.CopyColumns(labelColumnName, "Age") as IEstimator<ITransformer>; |
| 38 | + |
| 39 | + // You also may want to copy a column to perform some hand-featurization using built-in transforms or |
| 40 | + // a CustomMapping transform. For example, we could make an indicator variable if a feature, such as Parity |
| 41 | + // goes above some threshold. We simply copy the Parity column to a new column, then pass it through a custom function. |
| 42 | + Action<InputRow, OutputRow> mapping = (input, output) =>output.CustomValue = input.CustomValue > 4 ? 1 : 0; |
| 43 | + pipeline = pipeline.Append(mlContext.Transforms.CopyColumns("CustomValue", "Parity")) |
| 44 | + .Append(mlContext.Transforms.CustomMapping(mapping, null)); |
| 45 | + |
| 46 | + // Now we can transform the data and look at the output to confirm the behavior of CopyColumns. |
| 47 | + // Don't forget that this operation doesn't actually evaluate data until we read the data below. |
| 48 | + var transformedData = pipeline.Fit(trainData).Transform(trainData); |
| 49 | + |
| 50 | + // We can extract the newly created column as an IEnumerable of SampleInfertDataTransformed, the class we define below. |
| 51 | + var rowEnumerable = mlContext.CreateEnumerable<SampleInfertDataTransformed>(transformedData, reuseRowObject: false); |
| 52 | + |
| 53 | + // And finally, we can write out the rows of the dataset, looking at the columns of interest. |
| 54 | + Console.WriteLine($"Label, Parity, and CustomValue columns obtained post-transformation."); |
| 55 | + foreach (var row in rowEnumerable) |
| 56 | + { |
| 57 | + Console.WriteLine($"Label: {row.Label} Parity: {row.Parity} CustomValue: {row.CustomValue}"); |
| 58 | + } |
| 59 | + |
| 60 | + // Expected output: |
| 61 | + // Label, Parity, and CustomValue columns obtained post-transformation. |
| 62 | + // Label: 26 Parity: 6 CustomValue: 1 |
| 63 | + // Label: 42 Parity: 1 CustomValue: 0 |
| 64 | + // Label: 39 Parity: 6 CustomValue: 1 |
| 65 | + // Label: 34 Parity: 4 CustomValue: 0 |
| 66 | + // Label: 35 Parity: 3 CustomValue: 0 |
| 67 | + } |
| 68 | + |
| 69 | + private class SampleInfertDataTransformed |
| 70 | + { |
| 71 | + public float Label { get; set; } |
| 72 | + public float Parity { get; set; } |
| 73 | + public float CustomValue { get; set; } |
| 74 | + } |
| 75 | + |
| 76 | + private class OutputRow |
| 77 | + { |
| 78 | + public float CustomValue { get; set; } |
| 79 | + } |
| 80 | + |
| 81 | + private class InputRow |
| 82 | + { |
| 83 | + public float CustomValue { get; set; } |
| 84 | + } |
| 85 | + } |
| 86 | +} |
0 commit comments