diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs deleted file mode 100644 index 8fb934bf5c..0000000000 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs +++ /dev/null @@ -1,97 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using Microsoft.ML; -using Microsoft.ML.Data; - -namespace Samples.Dynamic -{ - public static class ProjectionTransforms - { - public static void Example() - { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. - var ml = new MLContext(); - - // Get a small dataset as an IEnumerable and convert it to an IDataView. - IEnumerable data = Microsoft.ML.SamplesUtils.DatasetUtils.GetVectorOfNumbersData(); - var trainData = ml.Data.LoadFromEnumerable(data); - - // Preview of the data. - // - // Features - // 0 1 2 3 4 5 6 7 8 9 - // 1 2 3 4 5 6 7 8 9 0 - // 2 3 4 5 6 7 8 9 0 1 - // 3 4 5 6 7 8 9 0 1 2 - // 4 5 6 7 8 9 0 1 2 3 - // 5 6 7 8 9 0 1 2 3 4 - // 6 7 8 9 0 1 2 3 4 5 - - // A small printing utility. - Action>> printHelper = (colName, column) => - { - Console.WriteLine($"{colName} column obtained post-transformation."); - foreach (var row in column) - Console.WriteLine($"{string.Join(" ",row.DenseValues().Select(x=>x.ToString("f3")))} "); - }; - - // A pipeline to project Features column into Random fourier space. - var rffPipeline = ml.Transforms.ApproximatedKernelMap(nameof(Microsoft.ML.SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), rank: 4); - // The transformed (projected) data. - var transformedData = rffPipeline.Fit(trainData).Transform(trainData); - // Getting the data of the newly created column, so we can preview it. - var randomFourier = transformedData.GetColumn>(transformedData.Schema[nameof(Microsoft.ML.SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features)]); - - printHelper(nameof(Microsoft.ML.SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), randomFourier); - - // Features column obtained post-transformation. - // - //0.634 0.628 -0.705 -0.337 - //0.704 0.683 -0.555 -0.422 - //0.407 0.542 -0.707 -0.616 - //0.473 0.331 -0.400 -0.699 - //0.181 0.361 -0.335 -0.157 - //0.165 0.117 -0.547 0.014 - - // A pipeline to project Features column into L-p normalized vector. - var lpNormalizePipeline = ml.Transforms.NormalizeLpNorm(nameof(Microsoft.ML.SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), - norm: Microsoft.ML.Transforms.LpNormNormalizingEstimatorBase.NormFunction.L1); - - // The transformed (projected) data. - transformedData = lpNormalizePipeline.Fit(trainData).Transform(trainData); - // Getting the data of the newly created column, so we can preview it. - var lpNormalize= transformedData.GetColumn>(transformedData.Schema[nameof(Microsoft.ML.SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features)]); - - printHelper(nameof(Microsoft.ML.SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), lpNormalize); - - // Features column obtained post-transformation. - // - // 0.000 0.022 0.044 0.067 0.089 0.111 0.133 0.156 0.178 0.200 - // 0.022 0.044 0.067 0.089 0.111 0.133 0.156 0.178 0.200 0.000 - // 0.044 0.067 0.089 0.111 0.133 0.156 0.178 0.200 0.000 0.022 - // 0.067 0.089 0.111 0.133 0.156 0.178 0.200 0.000 0.022 0.044 - // 0.111 0.133 0.156 0.178 0.200 0.000 0.022 0.044 0.067 0.089 - // 0.133 0.156 0.178 0.200 0.000 0.022 0.044 0.067 0.089 0.111 - - // A pipeline to project Features column into L-p normalized vector. - var gcNormalizePipeline = ml.Transforms.NormalizeGlobalContrast(nameof(Microsoft.ML.SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), ensureZeroMean:false); - // The transformed (projected) data. - transformedData = gcNormalizePipeline.Fit(trainData).Transform(trainData); - // Getting the data of the newly created column, so we can preview it. - var gcNormalize = transformedData.GetColumn>(transformedData.Schema[nameof(Microsoft.ML.SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features)]); - - printHelper(nameof(Microsoft.ML.SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), gcNormalize); - - // Features column obtained post-transformation. - // - // 0.000 0.059 0.118 0.178 0.237 0.296 0.355 0.415 0.474 0.533 - // 0.059 0.118 0.178 0.237 0.296 0.355 0.415 0.474 0.533 0.000 - // 0.118 0.178 0.237 0.296 0.355 0.415 0.474 0.533 0.000 0.059 - // 0.178 0.237 0.296 0.355 0.415 0.474 0.533 0.000 0.059 0.118 - // 0.296 0.355 0.415 0.474 0.533 0.000 0.059 0.118 0.178 0.237 - // 0.355 0.415 0.474 0.533 0.000 0.059 0.118 0.178 0.237 0.296 - } - } -} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApproximatedKernelMap.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApproximatedKernelMap.cs new file mode 100644 index 0000000000..ce7152b50f --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApproximatedKernelMap.cs @@ -0,0 +1,52 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.ML; +using Microsoft.ML.Data; +using Microsoft.ML.Transforms; + +namespace Samples.Dynamic +{ + public static class ApproximatedKernelMap + { + // Transform feature vector to another non-linear space. See https://people.eecs.berkeley.edu/~brecht/papers/07.rah.rec.nips.pdf. + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var mlContext = new MLContext(); + var samples = new List() + { + new DataPoint(){ Features = new float[7] { 1, 1, 0, 0, 1, 0, 1} }, + new DataPoint(){ Features = new float[7] { 0, 0, 1, 0, 0, 1, 1} }, + new DataPoint(){ Features = new float[7] {-1, 1, 0,-1,-1, 0,-1} }, + new DataPoint(){ Features = new float[7] { 0,-1, 0, 1, 0,-1,-1} } + }; + // Convert training data to IDataView, the general data type used in ML.NET. + var data = mlContext.Data.LoadFromEnumerable(samples); + // ApproximatedKernel map takes data and maps it's to a random low-dimensional space. + var approximation = mlContext.Transforms.ApproximatedKernelMap("Features", rank: 4, generator: new GaussianKernel(gamma: 0.7f), seed: 1); + + // Now we can transform the data and look at the output to confirm the behavior of the estimator. + // This operation doesn't actually evaluate data until we read the data below. + var tansformer = approximation.Fit(data); + var transformedData = tansformer.Transform(data); + + var column = transformedData.GetColumn("Features").ToArray(); + foreach (var row in column) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // Expected output: + // -0.0119, 0.5867, 0.4942, 0.7041 + // 0.4720, 0.5639, 0.4346, 0.2671 + // -0.2243, 0.7071, 0.7053, -0.1681 + // 0.0846, 0.5836, 0.6575, 0.0581 + } + + private class DataPoint + { + [VectorType(7)] + public float[] Features { get; set; } + } + + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs new file mode 100644 index 0000000000..322997ca3b --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs @@ -0,0 +1,48 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.ML; +using Microsoft.ML.Data; + +namespace Samples.Dynamic +{ + class NormalizeGlobalContrast + { + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var mlContext = new MLContext(); + var samples = new List() + { + new DataPoint(){ Features = new float[4] { 1, 1, 0, 0} }, + new DataPoint(){ Features = new float[4] { 2, 2, 0, 0} }, + new DataPoint(){ Features = new float[4] { 1, 0, 1, 0} }, + new DataPoint(){ Features = new float[4] { 0, 1, 0, 1} } + }; + // Convert training data to IDataView, the general data type used in ML.NET. + var data = mlContext.Data.LoadFromEnumerable(samples); + var approximation = mlContext.Transforms.NormalizeGlobalContrast("Features", ensureZeroMean: false, scale:2, ensureUnitStandardDeviation:true); + + // Now we can transform the data and look at the output to confirm the behavior of the estimator. + // This operation doesn't actually evaluate data until we read the data below. + var tansformer = approximation.Fit(data); + var transformedData = tansformer.Transform(data); + + var column = transformedData.GetColumn("Features").ToArray(); + foreach (var row in column) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // Expected output: + // 2.0000, 2.0000,-2.0000,-2.0000 + // 2.0000, 2.0000,-2.0000,-2.0000 + // 2.0000,-2.0000, 2.0000,-2.0000 + //- 2.0000, 2.0000,-2.0000, 2.0000 + } + + private class DataPoint + { + [VectorType(4)] + public float[] Features { get; set; } + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs new file mode 100644 index 0000000000..83883ae49e --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs @@ -0,0 +1,49 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.ML; +using Microsoft.ML.Data; +using Microsoft.ML.Transforms; + +namespace Samples.Dynamic +{ + class NormalizeLpNorm + { + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var mlContext = new MLContext(); + var samples = new List() + { + new DataPoint(){ Features = new float[4] { 1, 1, 0, 0} }, + new DataPoint(){ Features = new float[4] { 2, 2, 0, 0} }, + new DataPoint(){ Features = new float[4] { 1, 0, 1, 0} }, + new DataPoint(){ Features = new float[4] { 0, 1, 0, 1} } + }; + // Convert training data to IDataView, the general data type used in ML.NET. + var data = mlContext.Data.LoadFromEnumerable(samples); + var approximation = mlContext.Transforms.NormalizeLpNorm("Features", norm: LpNormNormalizingEstimatorBase.NormFunction.L1, ensureZeroMean: true); + + // Now we can transform the data and look at the output to confirm the behavior of the estimator. + // This operation doesn't actually evaluate data until we read the data below. + var tansformer = approximation.Fit(data); + var transformedData = tansformer.Transform(data); + + var column = transformedData.GetColumn("Features").ToArray(); + foreach (var row in column) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + // Expected output: + // 0.2500, 0.2500, -0.2500, -0.2500 + // 0.2500, 0.2500, -0.2500, -0.2500 + // 0.2500, -0.2500, 0.2500, -0.2500 + // -0.2500, 0.2500, -0.2500, 0.2500 + } + + private class DataPoint + { + [VectorType(4)] + public float[] Features { get; set; } + } + } +} diff --git a/src/Microsoft.ML.Transforms/FourierDistributionSampler.cs b/src/Microsoft.ML.Transforms/FourierDistributionSampler.cs index 4ffba0a9e2..59ea97bf30 100644 --- a/src/Microsoft.ML.Transforms/FourierDistributionSampler.cs +++ b/src/Microsoft.ML.Transforms/FourierDistributionSampler.cs @@ -203,7 +203,7 @@ internal sealed class Options : IComponentFactory /// /// Create a new instance of a LaplacianKernel. /// - /// The coefficient in the exponent of the kernel function + /// The coefficient in the exponent of the kernel function. public LaplacianKernel(float a = 1) { Contracts.CheckParam(a > 0, nameof(a)); diff --git a/src/Microsoft.ML.Transforms/KernelCatalog.cs b/src/Microsoft.ML.Transforms/KernelCatalog.cs index e399038833..5e36ea8927 100644 --- a/src/Microsoft.ML.Transforms/KernelCatalog.cs +++ b/src/Microsoft.ML.Transforms/KernelCatalog.cs @@ -26,7 +26,7 @@ public static class KernelExpansionCatalog /// /// /// /// /// diff --git a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs index cfec3f878a..e729567461 100644 --- a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs +++ b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs @@ -246,10 +246,15 @@ internal static NormalizingEstimator Normalize(this TransformsCatalog catalog, /// Name of column to transform. If set to , the value of the will be used as source. /// Type of norm to use to normalize each sample. The indicated norm of the resulted vector will be normalized to one. /// If , subtract mean from each value before normalizing and use the raw input otherwise. + /// + /// This transform performs the following operation on a each row X: Y = (X - M(X)) / D(X) + /// where M(X) is scalar value of mean for all elements in the current row if set to or 0 othewise + /// and D(X) is scalar value of selected . + /// /// /// /// /// /// @@ -276,10 +281,16 @@ internal static LpNormNormalizingEstimator NormalizeLpNorm(this TransformsCatalo /// If , subtract mean from each value before normalizing and use the raw input otherwise. /// If , resulted vector's standard deviation would be one. Otherwise, resulted vector's L2-norm would be one. /// Scale features by this value. + /// + /// This transform performs the following operation on a row X: Y = scale * (X - M(X)) / D(X) + /// where M(X) is scalar value of mean for all elements in the current row if set to or 0 othewise + /// D(X) is scalar value of standard deviation for row if set to or + /// L2 norm of this row vector if set to and scale is . + /// /// /// /// /// ///