Skip to content

Commit 5746ec9

Browse files
authored
Sample for ConvertType transform estimator (#2781)
* adding sample for ConvertType * remove unnecessary row * taking care of review comments * moving sample file so we mirror to the catalog access path
1 parent a100505 commit 5746ec9

File tree

5 files changed

+72
-8
lines changed

5 files changed

+72
-8
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
using System;
2+
using Microsoft.ML.Data;
3+
4+
namespace Microsoft.ML.Samples.Dynamic
5+
{
6+
public static class ConvertType
7+
{
8+
private sealed class InputData
9+
{
10+
public bool Survived;
11+
}
12+
13+
private sealed class TransformedData
14+
{
15+
public bool Survived { get; set; }
16+
17+
public Int32 SurvivedInt32 { get; set; }
18+
}
19+
20+
public static void Example()
21+
{
22+
var mlContext = new MLContext(seed: 1, conc: 1);
23+
var rawData = new[] {
24+
new InputData() { Survived = true },
25+
new InputData() { Survived = false },
26+
new InputData() { Survived = true },
27+
new InputData() { Survived = false },
28+
new InputData() { Survived = false },
29+
};
30+
31+
var data = mlContext.Data.LoadFromEnumerable(rawData);
32+
33+
// Construct the pipeline.
34+
var pipeline = mlContext.Transforms.Conversion.ConvertType("SurvivedInt32", "Survived", DataKind.Int32);
35+
36+
// Let's train our pipeline, and then apply it to the same data.
37+
var transformer = pipeline.Fit(data);
38+
var transformedData = transformer.Transform(data);
39+
40+
// Display original column 'Survived' (boolean) and converted column 'SurvivedInt32' (Int32)
41+
var convertedData = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, true);
42+
foreach (var item in convertedData)
43+
{
44+
Console.WriteLine("A:{0,-10} Aconv:{1}", item.Survived, item.SurvivedInt32);
45+
}
46+
47+
// Output
48+
// A: True Aconv:1
49+
// A: False Aconv:0
50+
// A: True Aconv:1
51+
// A: False Aconv:0
52+
// A: False Aconv:0
53+
}
54+
}
55+
}

src/Microsoft.ML.Data/Data/Conversion.cs

+4-4
Original file line numberDiff line numberDiff line change
@@ -1642,10 +1642,10 @@ public void Convert(in TX src, ref SB dst)
16421642
#endregion FromTX
16431643

16441644
#region FromBL
1645-
public void Convert(in BL src, ref I1 dst) => dst = (I1)(object)src;
1646-
public void Convert(in BL src, ref I2 dst) => dst = (I2)(object)src;
1647-
public void Convert(in BL src, ref I4 dst) => dst = (I4)(object)src;
1648-
public void Convert(in BL src, ref I8 dst) => dst = (I8)(object)src;
1645+
public void Convert(in BL src, ref I1 dst) => dst = System.Convert.ToSByte(src);
1646+
public void Convert(in BL src, ref I2 dst) => dst = System.Convert.ToInt16(src);
1647+
public void Convert(in BL src, ref I4 dst) => dst = System.Convert.ToInt32(src);
1648+
public void Convert(in BL src, ref I8 dst) => dst = System.Convert.ToInt64(src);
16491649
public void Convert(in BL src, ref R4 dst) => dst = System.Convert.ToSingle(src);
16501650
public void Convert(in BL src, ref R8 dst) => dst = System.Convert.ToDouble(src);
16511651
public void Convert(in BL src, ref BL dst) => dst = src;

src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs

+6
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,12 @@ public static HashingEstimator Hash(this TransformsCatalog.ConversionTransforms
4747
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
4848
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
4949
/// <param name="outputKind">The expected kind of the output column.</param>
50+
/// <example>
51+
/// <format type="text/markdown">
52+
/// <![CDATA[
53+
/// [!code-csharp[ConvertType](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertType.cs)]
54+
/// ]]></format>
55+
/// </example>
5056
public static TypeConvertingEstimator ConvertType(this TransformsCatalog.ConversionTransforms catalog, string outputColumnName, string inputColumnName = null,
5157
DataKind outputKind = ConvertDefaults.DefaultOutputKind)
5258
=> new TypeConvertingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, outputKind);

test/BaselineOutput/Common/Convert/Types.tsv

+3-2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#@ col=ConvL:R4:42-43
2525
#@ col=ConvM:R4:44-45
2626
#@ col=ConvN:R4:46-47
27+
#@ col=ConvBI:I4:48-49
2728
#@ }
28-
a b 0 1 -1 1 0 1 0 1 -1 1 -1 1 0 1 -1 1 0 1 1 1 1 1 ? ? 0 1 -1 1 0 1 0 1 -1 1 -1 1 0 1 -1 1 0 1 1 1 1 1
29-
0 1 0 1 -2147483648 2147483647 0 4294967295 0 255 -128 127 -32768 32767 0 65535 -9223372036854775808 9223372036854775807 0 18446744073709551615 -3.40282347E+38 3.40282347E+38 -1.7976931348623157E+308 1.7976931348623157E+308 0 1 0 1 -2.14748365E+09 2.14748365E+09 0 4.2949673E+09 0 255 -128 127 -32768 32767 0 65535 -9.223372E+18 9.223372E+18 0 1.84467441E+19 -3.40282347E+38 3.40282347E+38 -Infinity Infinity
29+
a b 0 1 -1 1 0 1 0 1 -1 1 -1 1 0 1 -1 1 0 1 1 1 1 1 ? ? 0 1 -1 1 0 1 0 1 -1 1 -1 1 0 1 -1 1 0 1 1 1 1 1 0 1
30+
0 1 1 0 -2147483648 2147483647 0 4294967295 0 255 -128 127 -32768 32767 0 65535 -9223372036854775808 9223372036854775807 0 18446744073709551615 -3.40282347E+38 3.40282347E+38 -1.7976931348623157E+308 1.7976931348623157E+308 0 1 1 0 -2.14748365E+09 2.14748365E+09 0 4.2949673E+09 0 255 -128 127 -32768 32767 0 65535 -9.223372E+18 9.223372E+18 0 1.84467441E+19 -3.40282347E+38 3.40282347E+38 -Infinity Infinity 1 0

test/Microsoft.ML.Tests/Transformers/ConvertTests.cs

+4-2
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ public void TestConvertWorkout()
9898
new TestPrimitiveClass()
9999
{
100100
AA = new []{"0", "1"},
101-
AB = new []{false, true},
101+
AB = new []{true, false},
102102
AC = new []{ int.MinValue, int.MaxValue},
103103
AD = new uint[]{ uint.MinValue, uint.MaxValue},
104104
AE = new byte[]{ byte.MinValue, byte.MaxValue},
@@ -125,7 +125,9 @@ public void TestConvertWorkout()
125125
new TypeConvertingEstimator.ColumnOptions("ConvK", DataKind.Single, "AK"),
126126
new TypeConvertingEstimator.ColumnOptions("ConvL", DataKind.Single, "AL"),
127127
new TypeConvertingEstimator.ColumnOptions("ConvM", DataKind.Single, "AM"),
128-
new TypeConvertingEstimator.ColumnOptions("ConvN", DataKind.Single, "AN")}
128+
new TypeConvertingEstimator.ColumnOptions("ConvN", DataKind.Single, "AN"),
129+
new TypeConvertingEstimator.ColumnOptions("ConvBI", DataKind.Int32, "AB") // verify Boolean -> Int32 conversion
130+
}
129131
);
130132
TestEstimatorCore(allTypesPipe, allTypesDataView);
131133

0 commit comments

Comments
 (0)