Skip to content

Commit daeba69

Browse files
committed
OneHotEncoding sample
1 parent fcf85a2 commit daeba69

File tree

1 file changed

+62
-0
lines changed

1 file changed

+62
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using Microsoft.ML.Data;
4+
using static Microsoft.ML.Transforms.OneHotEncodingTransformer;
5+
6+
namespace Microsoft.ML.Samples.Dynamic
7+
{
8+
public static class OneHotEncodingTransform
9+
{
10+
public static void Example()
11+
{
12+
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
13+
// as well as the source of randomness.
14+
var ml = new MLContext();
15+
16+
// Get a small dataset as an IEnumerable and convert it to an IDataView.
17+
IEnumerable<SamplesUtils.DatasetUtils.SampleInfertData> data = SamplesUtils.DatasetUtils.GetInfertData();
18+
var trainData = ml.Data.LoadFromEnumerable(data);
19+
20+
// Preview of the data.
21+
//
22+
// Age Case Education Induced Parity PooledStratum RowNum ...
23+
// 26 1 0-5yrs 1 6 3 1 ...
24+
// 42 1 0-5yrs 1 1 1 2 ...
25+
// 39 1 0-5yrs 2 6 4 3 ...
26+
// 34 1 0-5yrs 2 4 2 4 ...
27+
// 35 1 6-11yrs 1 3 32 5 ...
28+
29+
// A pipeline for one hot encoding the Education column.
30+
var pipeline = ml.Transforms.Categorical.OneHotEncoding("EducationOneHotEncoded", "Education", OutputKind.Bag);
31+
// Fit to data.
32+
var transformer = pipeline.Fit(trainData);
33+
34+
// Get transformed data
35+
var transformedData = transformer.Transform(trainData);
36+
37+
// Getting the data of the newly created column, so we can preview it.
38+
var encodedColumn = transformedData.GetColumn<float[]>(ml, "EducationOneHotEncoded");
39+
40+
// A small printing utility.
41+
Action<string, IEnumerable<float[]>> printHelper = (colName, column) =>
42+
{
43+
foreach (var row in column)
44+
{
45+
for (var i = 0; i < row.Length; i++)
46+
Console.Write($"{row[i]} ");
47+
Console.WriteLine();
48+
}
49+
};
50+
51+
printHelper("Education", encodedColumn);
52+
53+
// data column obtained post-transformation.
54+
// 1 0 0 0 ...
55+
// 1 0 0 0 ...
56+
// 1 0 0 0 ...
57+
// 1 0 0 0 ...
58+
// 0 1 0 0 ...
59+
// ....
60+
}
61+
}
62+
}

0 commit comments

Comments
 (0)