Skip to content

Commit 8904af0

Browse files
authored
Add a sample for copy columns. (#2351)
Adding a sample for copy columns, with links from the catalog entries.
1 parent 29a32cd commit 8904af0

File tree

3 files changed

+101
-1
lines changed

3 files changed

+101
-1
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using Microsoft.ML.Core.Data;
5+
using Microsoft.ML.Data;
6+
using Microsoft.ML.Transforms;
7+
8+
namespace Microsoft.ML.Samples.Dynamic
9+
{
10+
public class CopyColumns
11+
{
12+
public static void Example()
13+
{
14+
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
15+
// as well as the source of randomness.
16+
var mlContext = new MLContext();
17+
18+
// Get a small dataset as an IEnumerable and them read it as ML.NET's data type.
19+
IEnumerable<SamplesUtils.DatasetUtils.SampleInfertData> data = SamplesUtils.DatasetUtils.GetInfertData();
20+
var trainData = mlContext.Data.ReadFromEnumerable(data);
21+
22+
// Preview of the data.
23+
//
24+
// Age Case Education induced parity pooled.stratum row_num ...
25+
// 26.0 1.0 0-5yrs 1.0 6.0 3.0 1.0 ...
26+
// 42.0 1.0 0-5yrs 1.0 1.0 1.0 2.0 ...
27+
// 39.0 1.0 0-5yrs 2.0 6.0 4.0 3.0 ...
28+
// 34.0 1.0 0-5yrs 2.0 4.0 2.0 4.0 ...
29+
// 35.0 1.0 6-11yrs 1.0 3.0 32.0 5.0 ...
30+
31+
// CopyColumns is commonly used to rename columns.
32+
// For example, if you want to train towards Age, and your learner expects a "Label" column, you can
33+
// use CopyColumns to rename Age to Label. Technically, the Age columns still exists, but it won't be
34+
// materialized unless you actually need it somewhere (e.g. if you were to save the transformed data
35+
// without explicitly dropping the column). This is a general property of IDataView's lazy evaluation.
36+
string labelColumnName = "Label";
37+
var pipeline = mlContext.Transforms.CopyColumns(labelColumnName, "Age") as IEstimator<ITransformer>;
38+
39+
// You also may want to copy a column to perform some hand-featurization using built-in transforms or
40+
// a CustomMapping transform. For example, we could make an indicator variable if a feature, such as Parity
41+
// goes above some threshold. We simply copy the Parity column to a new column, then pass it through a custom function.
42+
Action<InputRow, OutputRow> mapping = (input, output) =>output.CustomValue = input.CustomValue > 4 ? 1 : 0;
43+
pipeline = pipeline.Append(mlContext.Transforms.CopyColumns("CustomValue", "Parity"))
44+
.Append(mlContext.Transforms.CustomMapping(mapping, null));
45+
46+
// Now we can transform the data and look at the output to confirm the behavior of CopyColumns.
47+
// Don't forget that this operation doesn't actually evaluate data until we read the data below.
48+
var transformedData = pipeline.Fit(trainData).Transform(trainData);
49+
50+
// We can extract the newly created column as an IEnumerable of SampleInfertDataTransformed, the class we define below.
51+
var rowEnumerable = mlContext.CreateEnumerable<SampleInfertDataTransformed>(transformedData, reuseRowObject: false);
52+
53+
// And finally, we can write out the rows of the dataset, looking at the columns of interest.
54+
Console.WriteLine($"Label, Parity, and CustomValue columns obtained post-transformation.");
55+
foreach (var row in rowEnumerable)
56+
{
57+
Console.WriteLine($"Label: {row.Label} Parity: {row.Parity} CustomValue: {row.CustomValue}");
58+
}
59+
60+
// Expected output:
61+
// Label, Parity, and CustomValue columns obtained post-transformation.
62+
// Label: 26 Parity: 6 CustomValue: 1
63+
// Label: 42 Parity: 1 CustomValue: 0
64+
// Label: 39 Parity: 6 CustomValue: 1
65+
// Label: 34 Parity: 4 CustomValue: 0
66+
// Label: 35 Parity: 3 CustomValue: 0
67+
}
68+
69+
private class SampleInfertDataTransformed
70+
{
71+
public float Label { get; set; }
72+
public float Parity { get; set; }
73+
public float CustomValue { get; set; }
74+
}
75+
76+
private class OutputRow
77+
{
78+
public float CustomValue { get; set; }
79+
}
80+
81+
private class InputRow
82+
{
83+
public float CustomValue { get; set; }
84+
}
85+
}
86+
}

src/Microsoft.ML.Data/Transforms/ExtensionsCatalog.cs

+15-1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,13 @@ public static class TransformExtensionsCatalog
1818
/// <param name="catalog">The transform's catalog.</param>
1919
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
2020
/// <param name="inputColumnName">Name of the columns to transform.</param>
21+
/// <example>
22+
/// <format type="text/markdown">
23+
/// <![CDATA[
24+
/// [!code-csharp[CopyColumns](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CopyColumns.cs)]
25+
/// ]]>
26+
/// </format>
27+
/// </example>
2128
public static ColumnCopyingEstimator CopyColumns(this TransformsCatalog catalog, string outputColumnName, string inputColumnName)
2229
=> new ColumnCopyingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName);
2330

@@ -27,6 +34,13 @@ public static ColumnCopyingEstimator CopyColumns(this TransformsCatalog catalog,
2734
/// </summary>
2835
/// <param name="catalog">The transform's catalog</param>
2936
/// <param name="columns">The pairs of input and output columns.</param>
37+
/// <example>
38+
/// <format type="text/markdown">
39+
/// <![CDATA[
40+
/// [!code-csharp[CopyColumns](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CopyColumns.cs)]
41+
/// ]]>
42+
/// </format>
43+
/// </example>
3044
public static ColumnCopyingEstimator CopyColumns(this TransformsCatalog catalog, params (string outputColumnName, string inputColumnName)[] columns)
3145
=> new ColumnCopyingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
3246

@@ -39,7 +53,7 @@ public static ColumnCopyingEstimator CopyColumns(this TransformsCatalog catalog,
3953
/// <example>
4054
/// <format type="text/markdown">
4155
/// <![CDATA[
42-
/// [!code-csharp[Concat](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ConcatTransform.cs)]
56+
/// [!code-csharp[Concat](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ConcatTransform.cs)]
4357
/// ]]>
4458
/// </format>
4559
/// </example>

0 commit comments

Comments
 (0)