Skip to content

Commit 3712486

Browse files
authored
Reformat categorical transform samples. (#3588)
* Reformat categorical transform samples. * PR feedback.
1 parent 1a1a62a commit 3712486

File tree

4 files changed

+140
-92
lines changed

4 files changed

+140
-92
lines changed

docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Categorical/OneHotEncoding.cs

+38-22
Original file line numberDiff line numberDiff line change
@@ -1,56 +1,67 @@
11
using System;
2-
using System.Collections.Generic;
32
using Microsoft.ML;
43
using Microsoft.ML.Data;
5-
using static Microsoft.ML.Transforms.OneHotEncodingEstimator;
4+
using Microsoft.ML.Transforms;
65

7-
namespace Samples.Dynamic
6+
namespace Samples.Dynamic.Transforms.Categorical
87
{
98
public static class OneHotEncoding
109
{
1110
public static void Example()
1211
{
13-
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
14-
// as well as the source of randomness.
12+
// Create a new ML context for ML.NET operations. It can be used for
13+
// exception tracking and logging as well as the source of randomness.
1514
var mlContext = new MLContext();
1615

17-
// Get a small dataset as an IEnumerable.
18-
var samples = new List<DataPoint>()
16+
// Create a small dataset as an IEnumerable.
17+
var samples = new[]
1918
{
20-
new DataPoint(){ Education = "0-5yrs" },
21-
new DataPoint(){ Education = "0-5yrs" },
22-
new DataPoint(){ Education = "6-11yrs" },
23-
new DataPoint(){ Education = "6-11yrs" },
24-
new DataPoint(){ Education = "11-15yrs" },
19+
new DataPoint {Education = "0-5yrs"},
20+
new DataPoint {Education = "0-5yrs"},
21+
new DataPoint {Education = "6-11yrs"},
22+
new DataPoint {Education = "6-11yrs"},
23+
new DataPoint {Education = "11-15yrs"}
2524
};
2625

2726
// Convert training data to IDataView.
28-
var data = mlContext.Data.LoadFromEnumerable(samples);
27+
IDataView data = mlContext.Data.LoadFromEnumerable(samples);
2928

3029
// A pipeline for one hot encoding the Education column.
31-
var pipeline = mlContext.Transforms.Categorical.OneHotEncoding("EducationOneHotEncoded", "Education");
30+
var pipeline = mlContext.Transforms.Categorical.OneHotEncoding(
31+
"EducationOneHotEncoded", "Education");
3232

3333
// Fit and transform the data.
34-
var oneHotEncodedData = pipeline.Fit(data).Transform(data);
34+
IDataView oneHotEncodedData = pipeline.Fit(data).Transform(data);
3535

3636
PrintDataColumn(oneHotEncodedData, "EducationOneHotEncoded");
37-
// We have 3 slots, because there are three categories in the 'Education' column.
37+
38+
// We have 3 slots because there are three categories in the
39+
// 'Education' column.
40+
3841
// 1 0 0
3942
// 1 0 0
4043
// 0 1 0
4144
// 0 1 0
4245
// 0 0 1
4346

4447
// A pipeline for one hot encoding the Education column (using keying).
45-
var keyPipeline = mlContext.Transforms.Categorical.OneHotEncoding("EducationOneHotEncoded", "Education", OutputKind.Key);
48+
var keyPipeline = mlContext.Transforms.Categorical.OneHotEncoding(
49+
"EducationOneHotEncoded", "Education",
50+
OneHotEncodingEstimator.OutputKind.Key);
4651

4752
// Fit and Transform data.
4853
oneHotEncodedData = keyPipeline.Fit(data).Transform(data);
4954

50-
var keyEncodedColumn = oneHotEncodedData.GetColumn<uint>("EducationOneHotEncoded");
55+
var keyEncodedColumn =
56+
oneHotEncodedData.GetColumn<uint>("EducationOneHotEncoded");
57+
58+
Console.WriteLine(
59+
"One Hot Encoding of single column 'Education', with key type " +
60+
"output.");
61+
62+
// One Hot Encoding of single column 'Education', with key type output.
5163

52-
Console.WriteLine("One Hot Encoding of single column 'Education', with key type output.");
53-
foreach (var element in keyEncodedColumn)
64+
foreach (uint element in keyEncodedColumn)
5465
Console.WriteLine(element);
5566

5667
// 1
@@ -59,17 +70,22 @@ public static void Example()
5970
// 2
6071
// 3
6172
}
62-
private static void PrintDataColumn(IDataView transformedData, string columnName)
73+
74+
private static void PrintDataColumn(IDataView transformedData,
75+
string columnName)
6376
{
64-
var countSelectColumn = transformedData.GetColumn<float[]>(transformedData.Schema[columnName]);
77+
var countSelectColumn = transformedData.GetColumn<float[]>(
78+
transformedData.Schema[columnName]);
6579

6680
foreach (var row in countSelectColumn)
6781
{
6882
for (var i = 0; i < row.Length; i++)
6983
Console.Write($"{row[i]}\t");
84+
7085
Console.WriteLine();
7186
}
7287
}
88+
7389
private class DataPoint
7490
{
7591
public string Education { get; set; }

docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Categorical/OneHotEncodingMultiColumn.cs

+33-23
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,55 @@
11
using System;
2-
using System.Collections.Generic;
32
using Microsoft.ML;
43

5-
namespace Samples.Dynamic
4+
namespace Samples.Dynamic.Transforms.Categorical
65
{
76
public static class OneHotEncodingMultiColumn
87
{
98
public static void Example()
109
{
11-
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
12-
// as well as the source of randomness.
10+
// Create a new ML context for ML.NET operations. It can be used for
11+
// exception tracking and logging as well as the source of randomness.
1312
var mlContext = new MLContext();
1413

15-
// Get a small dataset as an IEnumerable.
16-
var samples = new List<DataPoint>()
14+
// Create a small dataset as an IEnumerable.
15+
var samples = new[]
1716
{
18-
new DataPoint(){ Education = "0-5yrs", ZipCode = "98005" },
19-
new DataPoint(){ Education = "0-5yrs", ZipCode = "98052" },
20-
new DataPoint(){ Education = "6-11yrs", ZipCode = "98005" },
21-
new DataPoint(){ Education = "6-11yrs", ZipCode = "98052" },
22-
new DataPoint(){ Education = "11-15yrs", ZipCode = "98005" },
17+
new DataPoint {Education = "0-5yrs", ZipCode = "98005"},
18+
new DataPoint {Education = "0-5yrs", ZipCode = "98052"},
19+
new DataPoint {Education = "6-11yrs", ZipCode = "98005"},
20+
new DataPoint {Education = "6-11yrs", ZipCode = "98052"},
21+
new DataPoint {Education = "11-15yrs", ZipCode = "98005"}
2322
};
2423

2524
// Convert training data to IDataView.
26-
var data = mlContext.Data.LoadFromEnumerable(samples);
25+
IDataView data = mlContext.Data.LoadFromEnumerable(samples);
2726

28-
// Multi column example : A pipeline for one hot encoding two columns 'Education' and 'ZipCode'
29-
var multiColumnKeyPipeline = mlContext.Transforms.Categorical.OneHotEncoding(
30-
new InputOutputColumnPair[] {
31-
new InputOutputColumnPair("Education"),
32-
new InputOutputColumnPair("ZipCode"),
33-
});
27+
// Multi column example: A pipeline for one hot encoding two columns
28+
// 'Education' and 'ZipCode'.
29+
var multiColumnKeyPipeline =
30+
mlContext.Transforms.Categorical.OneHotEncoding(
31+
new[]
32+
{
33+
new InputOutputColumnPair("Education"),
34+
new InputOutputColumnPair("ZipCode")
35+
});
3436

3537
// Fit and Transform data.
36-
var transformedData = multiColumnKeyPipeline.Fit(data).Transform(data);
38+
IDataView transformedData =
39+
multiColumnKeyPipeline.Fit(data).Transform(data);
3740

38-
var convertedData = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, true);
41+
var convertedData =
42+
mlContext.Data.CreateEnumerable<TransformedData>(transformedData,
43+
true);
3944

40-
Console.WriteLine("One Hot Encoding of two columns 'Education' and 'ZipCode'.");
41-
foreach (var item in convertedData)
42-
Console.WriteLine("{0}\t\t\t{1}", string.Join(" ", item.Education), string.Join(" ", item.ZipCode));
45+
Console.WriteLine(
46+
"One Hot Encoding of two columns 'Education' and 'ZipCode'.");
47+
48+
// One Hot Encoding of two columns 'Education' and 'ZipCode'.
49+
50+
foreach (TransformedData item in convertedData)
51+
Console.WriteLine("{0}\t\t\t{1}", string.Join(" ", item.Education),
52+
string.Join(" ", item.ZipCode));
4353

4454
// 1 0 0 1 0
4555
// 1 0 0 0 1

docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Categorical/OneHotHashEncoding.cs

+35-26
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,37 @@
11
using System;
2-
using System.Collections.Generic;
32
using Microsoft.ML;
43
using Microsoft.ML.Data;
54
using Microsoft.ML.Transforms;
65

7-
namespace Samples.Dynamic
6+
namespace Samples.Dynamic.Transforms.Categorical
87
{
98
public static class OneHotHashEncoding
109
{
1110
public static void Example()
1211
{
13-
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
14-
// as well as the source of randomness.
12+
// Create a new ML context for ML.NET operations. It can be used for
13+
// exception tracking and logging as well as the source of randomness.
1514
var mlContext = new MLContext();
1615

17-
// Get a small dataset as an IEnumerable.
18-
var samples = new List<DataPoint>()
16+
// Create a small dataset as an IEnumerable.
17+
var samples = new[]
1918
{
20-
new DataPoint(){ Education = "0-5yrs" },
21-
new DataPoint(){ Education = "0-5yrs" },
22-
new DataPoint(){ Education = "6-11yrs" },
23-
new DataPoint(){ Education = "6-11yrs" },
24-
new DataPoint(){ Education = "11-15yrs" },
19+
new DataPoint {Education = "0-5yrs"},
20+
new DataPoint {Education = "0-5yrs"},
21+
new DataPoint {Education = "6-11yrs"},
22+
new DataPoint {Education = "6-11yrs"},
23+
new DataPoint {Education = "11-15yrs"}
2524
};
2625

27-
// Convert training data to IDataView.
28-
var data = mlContext.Data.LoadFromEnumerable(samples);
26+
// Convert training data to an IDataView.
27+
IDataView data = mlContext.Data.LoadFromEnumerable(samples);
2928

3029
// A pipeline for one hot hash encoding the 'Education' column.
31-
var pipeline = mlContext.Transforms.Categorical.OneHotHashEncoding("EducationOneHotHashEncoded", "Education", numberOfBits: 3);
30+
var pipeline = mlContext.Transforms.Categorical.OneHotHashEncoding(
31+
"EducationOneHotHashEncoded", "Education", numberOfBits: 3);
3232

3333
// Fit and transform the data.
34-
var hashEncodedData = pipeline.Fit(data).Transform(data);
34+
IDataView hashEncodedData = pipeline.Fit(data).Transform(data);
3535

3636
PrintDataColumn(hashEncodedData, "EducationOneHotHashEncoded");
3737
// We have 8 slots, because we used numberOfBits = 3.
@@ -42,19 +42,26 @@ public static void Example()
4242
// 0 0 0 0 1 0 0 0
4343
// 0 0 0 0 0 0 0 1
4444

45-
// A pipeline for one hot hash encoding the 'Education' column (using keying strategy).
46-
var keyPipeline = mlContext.Transforms.Categorical.OneHotHashEncoding("EducationOneHotHashEncoded", "Education",
47-
outputKind: OneHotEncodingEstimator.OutputKind.Key,
48-
numberOfBits: 3);
45+
// A pipeline for one hot hash encoding the 'Education' column
46+
// (using keying strategy).
47+
var keyPipeline = mlContext.Transforms.Categorical.OneHotHashEncoding(
48+
"EducationOneHotHashEncoded", "Education",
49+
OneHotEncodingEstimator.OutputKind.Key, 3);
4950

5051
// Fit and transform the data.
51-
var hashKeyEncodedData = keyPipeline.Fit(data).Transform(data);
52+
IDataView hashKeyEncodedData = keyPipeline.Fit(data).Transform(data);
5253

53-
// Getting the data of the newly created column, so we can preview it.
54-
var keyEncodedColumn = hashKeyEncodedData.GetColumn<uint>("EducationOneHotHashEncoded");
54+
// Get the data of the newly created column for inspecting.
55+
var keyEncodedColumn =
56+
hashKeyEncodedData.GetColumn<uint>("EducationOneHotHashEncoded");
5557

56-
Console.WriteLine("One Hot Hash Encoding of single column 'Education', with key type output.");
57-
foreach (var element in keyEncodedColumn)
58+
Console.WriteLine(
59+
"One Hot Hash Encoding of single column 'Education', with key " +
60+
"type output.");
61+
62+
// One Hot Hash Encoding of single column 'Education', with key type output.
63+
64+
foreach (uint element in keyEncodedColumn)
5865
Console.WriteLine(element);
5966

6067
// 4
@@ -64,9 +71,11 @@ public static void Example()
6471
// 8
6572
}
6673

67-
private static void PrintDataColumn(IDataView transformedData, string columnName)
74+
private static void PrintDataColumn(IDataView transformedData,
75+
string columnName)
6876
{
69-
var countSelectColumn = transformedData.GetColumn<float[]>(transformedData.Schema[columnName]);
77+
var countSelectColumn = transformedData.GetColumn<float[]>(
78+
transformedData.Schema[columnName]);
7079

7180
foreach (var row in countSelectColumn)
7281
{

docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Categorical/OneHotHashEncodingMultiColumn.cs

+34-21
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,57 @@
11
using System;
2-
using System.Collections.Generic;
32
using Microsoft.ML;
43

5-
namespace Samples.Dynamic
4+
namespace Samples.Dynamic.Transforms.Categorical
65
{
76
public static class OneHotHashEncodingMultiColumn
87
{
98
public static void Example()
109
{
11-
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
12-
// as well as the source of randomness.
10+
// Create a new ML context for ML.NET operations. It can be used for
11+
// exception tracking and logging as well as the source of randomness.
1312
var mlContext = new MLContext();
1413

1514
// Get a small dataset as an IEnumerable.
16-
var samples = new List<DataPoint>()
15+
var samples = new[]
1716
{
18-
new DataPoint(){ Education = "0-5yrs", ZipCode = "98005" },
19-
new DataPoint(){ Education = "0-5yrs", ZipCode = "98052" },
20-
new DataPoint(){ Education = "6-11yrs", ZipCode = "98005" },
21-
new DataPoint(){ Education = "6-11yrs", ZipCode = "98052" },
22-
new DataPoint(){ Education = "11-15yrs", ZipCode = "98005" },
17+
new DataPoint {Education = "0-5yrs", ZipCode = "98005"},
18+
new DataPoint {Education = "0-5yrs", ZipCode = "98052"},
19+
new DataPoint {Education = "6-11yrs", ZipCode = "98005"},
20+
new DataPoint {Education = "6-11yrs", ZipCode = "98052"},
21+
new DataPoint {Education = "11-15yrs", ZipCode = "98005"}
2322
};
2423

2524
// Convert training data to IDataView.
26-
var data = mlContext.Data.LoadFromEnumerable(samples);
25+
IDataView data = mlContext.Data.LoadFromEnumerable(samples);
2726

28-
// Multi column example : A pipeline for one hot has encoding two columns 'Education' and 'ZipCode'
29-
var multiColumnKeyPipeline = mlContext.Transforms.Categorical.OneHotHashEncoding(
30-
new InputOutputColumnPair[] { new InputOutputColumnPair("Education"), new InputOutputColumnPair("ZipCode") },
31-
numberOfBits: 3);
27+
// Multi column example: A pipeline for one hot has encoding two
28+
// columns 'Education' and 'ZipCode'.
29+
var multiColumnKeyPipeline =
30+
mlContext.Transforms.Categorical.OneHotHashEncoding(
31+
new[]
32+
{
33+
new InputOutputColumnPair("Education"),
34+
new InputOutputColumnPair("ZipCode")
35+
},
36+
numberOfBits: 3);
3237

3338
// Fit and Transform the data.
34-
var transformedData = multiColumnKeyPipeline.Fit(data).Transform(data);
39+
IDataView transformedData =
40+
multiColumnKeyPipeline.Fit(data).Transform(data);
3541

36-
var convertedData = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, true);
42+
var convertedData =
43+
mlContext.Data.CreateEnumerable<TransformedData>(transformedData,
44+
true);
45+
46+
Console.WriteLine(
47+
"One Hot Hash Encoding of two columns 'Education' and 'ZipCode'.");
48+
49+
// One Hot Hash Encoding of two columns 'Education' and 'ZipCode'.
50+
51+
foreach (TransformedData item in convertedData)
52+
Console.WriteLine("{0}\t\t\t{1}", string.Join(" ", item.Education),
53+
string.Join(" ", item.ZipCode));
3754

38-
Console.WriteLine("One Hot Hash Encoding of two columns 'Education' and 'ZipCode'.");
39-
foreach (var item in convertedData)
40-
Console.WriteLine("{0}\t\t\t{1}", string.Join(" ", item.Education), string.Join(" ", item.ZipCode));
41-
4255
// We have 8 slots, because we used numberOfBits = 3.
4356

4457
// 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1

0 commit comments

Comments
 (0)