|
| 1 | +using System; |
| 2 | +using Microsoft.ML.Data; |
| 3 | + |
| 4 | +namespace Microsoft.ML.Samples.Dynamic |
| 5 | +{ |
| 6 | + public class CustomMappingSample |
| 7 | + { |
| 8 | + public static void Example() |
| 9 | + { |
| 10 | + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, |
| 11 | + // as well as the source of randomness. |
| 12 | + var mlContext = new MLContext(); |
| 13 | + |
| 14 | + // Get a small dataset as an IEnumerable and convert it to an IDataView. |
| 15 | + var data = SamplesUtils.DatasetUtils.GetInfertData(); |
| 16 | + var trainData = mlContext.Data.ReadFromEnumerable(data); |
| 17 | + |
| 18 | + // Preview of the data. |
| 19 | + // |
| 20 | + // Age Case Education Induced Parity PooledStratum RowNum ... |
| 21 | + // 26 1 0-5yrs 1 6 3 1 ... |
| 22 | + // 42 1 0-5yrs 1 1 1 2 ... |
| 23 | + // 39 1 0-5yrs 2 6 4 3 ... |
| 24 | + // 34 1 0-5yrs 2 4 2 4 ... |
| 25 | + // 35 1 6-11yrs 1 3 32 5 ... |
| 26 | + |
| 27 | + // We define the custom mapping between input and output rows that will be applied by the transformation. |
| 28 | + Action<SamplesUtils.DatasetUtils.SampleInfertData, SampleInfertDataTransformed> mapping = |
| 29 | + (input, output) => output.IsUnderThirty = input.Age < 30; |
| 30 | + |
| 31 | + // Custom transformations can be used to transform data directly, or as part of a pipeline. Below we transform data directly. |
| 32 | + var transformer = mlContext.Transforms.CustomMappingTransformer(mapping, null); |
| 33 | + var transformedData = transformer.Transform(trainData); |
| 34 | + |
| 35 | + // Preview of the data. |
| 36 | + // |
| 37 | + // IsUnderThirty Age Case Education Induced Parity PooledStratum RowNum ... |
| 38 | + // true 26 1 0-5yrs 1 6 3 1 ... |
| 39 | + // false 42 1 0-5yrs 1 1 1 2 ... |
| 40 | + // false 39 1 0-5yrs 2 6 4 3 ... |
| 41 | + // false 34 1 0-5yrs 2 4 2 4 ... |
| 42 | + // false 35 1 6-11yrs 1 3 32 5 ... |
| 43 | + |
| 44 | + // Here instead we use it as part of a pipeline of estimators. |
| 45 | + var pipeline = mlContext.Transforms.CustomMapping(mapping, null) |
| 46 | + .Append(mlContext.Transforms.Concatenate(outputColumnName: "Features", inputColumnNames: new[] { "Parity", "Induced" })) |
| 47 | + // It is useful to add a caching checkpoint before a trainer that does several passes over the data. |
| 48 | + .AppendCacheCheckpoint(mlContext) |
| 49 | + // We use binary FastTree to predict the label column that was generated by the custom mapping at the first step of the pipeline. |
| 50 | + .Append(mlContext.BinaryClassification.Trainers.FastTree(labelColumn: "IsUnderThirty")); |
| 51 | + |
| 52 | + // We can train the pipeline and use it to transform data. |
| 53 | + transformedData = pipeline.Fit(trainData).Transform(trainData); |
| 54 | + } |
| 55 | + |
| 56 | + // Represents the transformed infertility dataset. |
| 57 | + public class SampleInfertDataTransformed |
| 58 | + { |
| 59 | + public int RowNum { get; set; } |
| 60 | + public string Education { get; set; } |
| 61 | + public bool IsUnderThirty { get; set; } |
| 62 | + public float Parity { get; set; } |
| 63 | + public float Induced { get; set; } |
| 64 | + public float Case { get; set; } |
| 65 | + public float Spontaneous { get; set; } |
| 66 | + public float Stratum { get; set; } |
| 67 | + public float PooledStratum { get; set; } |
| 68 | + } |
| 69 | + } |
| 70 | +} |
0 commit comments