|
| 1 | +using System; |
| 2 | +using System.Collections.Generic; |
| 3 | +using Microsoft.ML.Data; |
| 4 | +using static Microsoft.ML.Transforms.OneHotEncodingTransformer; |
| 5 | + |
| 6 | +namespace Microsoft.ML.Samples.Dynamic |
| 7 | +{ |
| 8 | + public static class OneHotEncodingTransform |
| 9 | + { |
| 10 | + public static void Example() |
| 11 | + { |
| 12 | + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, |
| 13 | + // as well as the source of randomness. |
| 14 | + var ml = new MLContext(); |
| 15 | + |
| 16 | + // Get a small dataset as an IEnumerable and convert it to an IDataView. |
| 17 | + IEnumerable<SamplesUtils.DatasetUtils.SampleInfertData> data = SamplesUtils.DatasetUtils.GetInfertData(); |
| 18 | + var trainData = ml.Data.LoadFromEnumerable(data); |
| 19 | + |
| 20 | + // Preview of the data. |
| 21 | + // |
| 22 | + // Age Case Education Induced Parity PooledStratum RowNum ... |
| 23 | + // 26 1 0-5yrs 1 6 3 1 ... |
| 24 | + // 42 1 0-5yrs 1 1 1 2 ... |
| 25 | + // 39 1 0-5yrs 2 6 4 3 ... |
| 26 | + // 34 1 0-5yrs 2 4 2 4 ... |
| 27 | + // 35 1 6-11yrs 1 3 32 5 ... |
| 28 | + |
| 29 | + // A pipeline for one hot encoding the Education column. |
| 30 | + var pipeline = ml.Transforms.Categorical.OneHotEncoding("EducationOneHotEncoded", "Education", OutputKind.Bag); |
| 31 | + // Fit to data. |
| 32 | + var transformer = pipeline.Fit(trainData); |
| 33 | + |
| 34 | + // Get transformed data |
| 35 | + var transformedData = transformer.Transform(trainData); |
| 36 | + |
| 37 | + // Getting the data of the newly created column, so we can preview it. |
| 38 | + var encodedColumn = transformedData.GetColumn<float[]>(ml, "EducationOneHotEncoded"); |
| 39 | + |
| 40 | + // A small printing utility. |
| 41 | + Action<string, IEnumerable<float[]>> printHelper = (colName, column) => |
| 42 | + { |
| 43 | + foreach (var row in column) |
| 44 | + { |
| 45 | + for (var i = 0; i < row.Length; i++) |
| 46 | + Console.Write($"{row[i]} "); |
| 47 | + Console.WriteLine(); |
| 48 | + } |
| 49 | + }; |
| 50 | + |
| 51 | + printHelper("Education", encodedColumn); |
| 52 | + |
| 53 | + // data column obtained post-transformation. |
| 54 | + // 1 0 0 0 ... |
| 55 | + // 1 0 0 0 ... |
| 56 | + // 1 0 0 0 ... |
| 57 | + // 1 0 0 0 ... |
| 58 | + // 0 1 0 0 ... |
| 59 | + // .... |
| 60 | + } |
| 61 | + } |
| 62 | +} |
0 commit comments