From 4e39a9201a78aefd10c4647f8d899c7fe1471fbe Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Mon, 8 Apr 2019 10:02:37 -0700 Subject: [PATCH 01/10] Separate documentaion for projection transforms --- .../Transforms/ApproximatedKernelMap.cs | 50 +++++++++++++++++++ .../Transforms/NormalizeGlobalContrast.cs | 50 +++++++++++++++++++ .../Dynamic/Transforms/NormalizeLpNorm.cs | 50 +++++++++++++++++++ .../FourierDistributionSampler.cs | 2 +- src/Microsoft.ML.Transforms/KernelCatalog.cs | 2 +- .../NormalizerCatalog.cs | 4 +- 6 files changed, 154 insertions(+), 4 deletions(-) create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApproximatedKernelMap.cs create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApproximatedKernelMap.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApproximatedKernelMap.cs new file mode 100644 index 0000000000..66eb704152 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApproximatedKernelMap.cs @@ -0,0 +1,50 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.ML; +using Microsoft.ML.Data; +using Microsoft.ML.Transforms; + +namespace Samples.Dynamic +{ + public static class ApproximatedKernelMap + { + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var mlContext = new MLContext(); + var samples = new List() + { + new DataPoint(){ Features = new float[7] { 1, 1, 0, 0, 1, 0, 1} }, + new DataPoint(){ Features = new float[7] { 0, 0, 1, 0, 0, 1, 1} }, + new DataPoint(){ Features = new float[7] {-1, 1, 0,-1,-1, 0,-1} }, + new DataPoint(){ Features = new float[7] { 0,-1, 0, 1, 0,-1,-1} } + }; + // Convert training data to IDataView, the general data type used in ML.NET. + var data = mlContext.Data.LoadFromEnumerable(samples); + // ApproximatedKernel map takes data and maps it's to a random low-dimensional space. + var approximation = mlContext.Transforms.ApproximatedKernelMap("Features", rank: 4, generator: new GaussianKernel(gamma: 0.7f), seed: 1); + + // Now we can transform the data and look at the output to confirm the behavior of the estimator. + // This operation doesn't actually evaluate data until we read the data below. + var tansformer = approximation.Fit(data); + var transformedData = tansformer.Transform(data); + + var column = transformedData.GetColumn("Features").ToArray(); + //-0.0119, 0.5867, 0.4942, 0.7041 + // 0.4720, 0.5639, 0.4346, 0.2671 + //-0.2243, 0.7071, 0.7053, -0.1681 + // 0.0846, 0.5836, 0.6575, 0.0581 + foreach (var row in column) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + } + + private class DataPoint + { + [VectorType(7)] + public float[] Features { get; set; } + } + + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs new file mode 100644 index 0000000000..51d9d8cad9 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs @@ -0,0 +1,50 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.ML; +using Microsoft.ML.Data; + +namespace Samples.Dynamic +{ + class NormalizeGlobalContrast + { + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var mlContext = new MLContext(); + var samples = new List() + { + new DataPoint(){ Features = new float[4] { 1, 1, 0, 0} }, + new DataPoint(){ Features = new float[4] { 2, 2, 0, 0} }, + new DataPoint(){ Features = new float[4] { 1, 0, 1, 0} }, + new DataPoint(){ Features = new float[4] { 0, 1, 0, 1} } + }; + // Convert training data to IDataView, the general data type used in ML.NET. + var data = mlContext.Data.LoadFromEnumerable(samples); + // NormalizeLpNorm normalize rows individually by rescaling them to unit norm. + // Performs the following operaion on a row X: Y = scale *(X - M) / D + // where M is mean and D is either Standard deviation or L2 norm. + var approximation = mlContext.Transforms.NormalizeGlobalContrast("Features", ensureZeroMean: false, scale:2, ensureUnitStandardDeviation:true); + + // Now we can transform the data and look at the output to confirm the behavior of the estimator. + // This operation doesn't actually evaluate data until we read the data below. + var tansformer = approximation.Fit(data); + var transformedData = tansformer.Transform(data); + + var column = transformedData.GetColumn("Features").ToArray(); + // 2.0000, 2.0000,-2.0000,-2.0000 + // 2.0000, 2.0000,-2.0000,-2.0000 + // 2.0000,-2.0000, 2.0000,-2.0000 + //-2.0000, 2.0000,-2.0000, 2.0000 + foreach (var row in column) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + } + + private class DataPoint + { + [VectorType(7)] + public float[] Features { get; set; } + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs new file mode 100644 index 0000000000..2e021ca8b1 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs @@ -0,0 +1,50 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.ML; +using Microsoft.ML.Data; +using Microsoft.ML.Transforms; + +namespace Samples.Dynamic +{ + class NormalizeLpNorm + { + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var mlContext = new MLContext(); + var samples = new List() + { + new DataPoint(){ Features = new float[4] { 1, 1, 0, 0} }, + new DataPoint(){ Features = new float[4] { 2, 2, 0, 0} }, + new DataPoint(){ Features = new float[4] { 1, 0, 1, 0} }, + new DataPoint(){ Features = new float[4] { 0, 1, 0, 1} } + }; + // Convert training data to IDataView, the general data type used in ML.NET. + var data = mlContext.Data.LoadFromEnumerable(samples); + // NormalizeLpNorm normalize rows individually by rescaling them to unit norm. + // Performs the following operaion on a row X: Y = (X - M) / D where M is mean, and D is selected norm. + var approximation = mlContext.Transforms.NormalizeLpNorm("Features", norm: LpNormNormalizingEstimatorBase.NormFunction.L1, ensureZeroMean: true); + + // Now we can transform the data and look at the output to confirm the behavior of the estimator. + // This operation doesn't actually evaluate data until we read the data below. + var tansformer = approximation.Fit(data); + var transformedData = tansformer.Transform(data); + + var column = transformedData.GetColumn("Features").ToArray(); + // 0.2500, 0.2500, -0.2500, -0.2500 + // 0.2500, 0.2500, -0.2500, -0.2500 + // 0.2500, -0.2500, 0.2500, -0.2500 + //-0.2500, 0.2500, -0.2500, 0.2500 + foreach (var row in column) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); + } + + private class DataPoint + { + [VectorType(7)] + public float[] Features { get; set; } + } + } +} diff --git a/src/Microsoft.ML.Transforms/FourierDistributionSampler.cs b/src/Microsoft.ML.Transforms/FourierDistributionSampler.cs index 4ffba0a9e2..59ea97bf30 100644 --- a/src/Microsoft.ML.Transforms/FourierDistributionSampler.cs +++ b/src/Microsoft.ML.Transforms/FourierDistributionSampler.cs @@ -203,7 +203,7 @@ internal sealed class Options : IComponentFactory /// /// Create a new instance of a LaplacianKernel. /// - /// The coefficient in the exponent of the kernel function + /// The coefficient in the exponent of the kernel function. public LaplacianKernel(float a = 1) { Contracts.CheckParam(a > 0, nameof(a)); diff --git a/src/Microsoft.ML.Transforms/KernelCatalog.cs b/src/Microsoft.ML.Transforms/KernelCatalog.cs index e399038833..040c411c93 100644 --- a/src/Microsoft.ML.Transforms/KernelCatalog.cs +++ b/src/Microsoft.ML.Transforms/KernelCatalog.cs @@ -26,7 +26,7 @@ public static class KernelExpansionCatalog /// /// /// /// /// diff --git a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs index cfec3f878a..f786dbb401 100644 --- a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs +++ b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs @@ -249,7 +249,7 @@ internal static NormalizingEstimator Normalize(this TransformsCatalog catalog, /// /// /// /// /// @@ -279,7 +279,7 @@ internal static LpNormNormalizingEstimator NormalizeLpNorm(this TransformsCatalo /// /// /// /// /// From 1701698cd8201d4d8c48afb683db130535eaf9a3 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Mon, 8 Apr 2019 10:02:51 -0700 Subject: [PATCH 02/10] remove old file --- .../Dynamic/ProjectionTransforms.cs | 94 ------------------- 1 file changed, 94 deletions(-) delete mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs deleted file mode 100644 index 67d4680796..0000000000 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/ProjectionTransforms.cs +++ /dev/null @@ -1,94 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using Microsoft.ML.Data; - -namespace Microsoft.ML.Samples.Dynamic -{ - public static class ProjectionTransforms - { - public static void Example() - { - // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, - // as well as the source of randomness. - var ml = new MLContext(); - - // Get a small dataset as an IEnumerable and convert it to an IDataView. - IEnumerable data = SamplesUtils.DatasetUtils.GetVectorOfNumbersData(); - var trainData = ml.Data.LoadFromEnumerable(data); - - // Preview of the data. - // - // Features - // 0 1 2 3 4 5 6 7 8 9 - // 1 2 3 4 5 6 7 8 9 0 - // 2 3 4 5 6 7 8 9 0 1 - // 3 4 5 6 7 8 9 0 1 2 - // 4 5 6 7 8 9 0 1 2 3 - // 5 6 7 8 9 0 1 2 3 4 - // 6 7 8 9 0 1 2 3 4 5 - - // A small printing utility. - Action>> printHelper = (colName, column) => - { - Console.WriteLine($"{colName} column obtained post-transformation."); - foreach (var row in column) - Console.WriteLine($"{string.Join(" ",row.DenseValues().Select(x=>x.ToString("f3")))} "); - }; - - // A pipeline to project Features column into Random fourier space. - var rffPipeline = ml.Transforms.ApproximatedKernelMap(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), rank: 4); - // The transformed (projected) data. - var transformedData = rffPipeline.Fit(trainData).Transform(trainData); - // Getting the data of the newly created column, so we can preview it. - var randomFourier = transformedData.GetColumn>(transformedData.Schema[nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features)]); - - printHelper(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), randomFourier); - - // Features column obtained post-transformation. - // - //0.634 0.628 -0.705 -0.337 - //0.704 0.683 -0.555 -0.422 - //0.407 0.542 -0.707 -0.616 - //0.473 0.331 -0.400 -0.699 - //0.181 0.361 -0.335 -0.157 - //0.165 0.117 -0.547 0.014 - - // A pipeline to project Features column into L-p normalized vector. - var lpNormalizePipeline = ml.Transforms.NormalizeLpNorm(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), norm: Transforms.LpNormNormalizingEstimatorBase.NormFunction.L1); - // The transformed (projected) data. - transformedData = lpNormalizePipeline.Fit(trainData).Transform(trainData); - // Getting the data of the newly created column, so we can preview it. - var lpNormalize= transformedData.GetColumn>(transformedData.Schema[nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features)]); - - printHelper(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), lpNormalize); - - // Features column obtained post-transformation. - // - // 0.000 0.022 0.044 0.067 0.089 0.111 0.133 0.156 0.178 0.200 - // 0.022 0.044 0.067 0.089 0.111 0.133 0.156 0.178 0.200 0.000 - // 0.044 0.067 0.089 0.111 0.133 0.156 0.178 0.200 0.000 0.022 - // 0.067 0.089 0.111 0.133 0.156 0.178 0.200 0.000 0.022 0.044 - // 0.111 0.133 0.156 0.178 0.200 0.000 0.022 0.044 0.067 0.089 - // 0.133 0.156 0.178 0.200 0.000 0.022 0.044 0.067 0.089 0.111 - - // A pipeline to project Features column into L-p normalized vector. - var gcNormalizePipeline = ml.Transforms.NormalizeGlobalContrast(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), ensureZeroMean:false); - // The transformed (projected) data. - transformedData = gcNormalizePipeline.Fit(trainData).Transform(trainData); - // Getting the data of the newly created column, so we can preview it. - var gcNormalize = transformedData.GetColumn>(transformedData.Schema[nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features)]); - - printHelper(nameof(SamplesUtils.DatasetUtils.SampleVectorOfNumbersData.Features), gcNormalize); - - // Features column obtained post-transformation. - // - // 0.000 0.059 0.118 0.178 0.237 0.296 0.355 0.415 0.474 0.533 - // 0.059 0.118 0.178 0.237 0.296 0.355 0.415 0.474 0.533 0.000 - // 0.118 0.178 0.237 0.296 0.355 0.415 0.474 0.533 0.000 0.059 - // 0.178 0.237 0.296 0.355 0.415 0.474 0.533 0.000 0.059 0.118 - // 0.296 0.355 0.415 0.474 0.533 0.000 0.059 0.118 0.178 0.237 - // 0.355 0.415 0.474 0.533 0.000 0.059 0.118 0.178 0.237 0.296 - } - } -} From 8badc774935f1a6fb1c4a639f7aae57575fb3500 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Mon, 8 Apr 2019 10:43:29 -0700 Subject: [PATCH 03/10] Address comments --- .../Dynamic/Transforms/ApproximatedKernelMap.cs | 1 + .../Dynamic/Transforms/NormalizeGlobalContrast.cs | 2 +- .../Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs | 3 ++- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApproximatedKernelMap.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApproximatedKernelMap.cs index 66eb704152..c80df20d9e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApproximatedKernelMap.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApproximatedKernelMap.cs @@ -9,6 +9,7 @@ namespace Samples.Dynamic { public static class ApproximatedKernelMap { + // Transform feature vector to another non-linear space. See https://people.eecs.berkeley.edu/~brecht/papers/07.rah.rec.nips.pdf. public static void Example() { // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs index 51d9d8cad9..4d3eb2a0b0 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs @@ -24,7 +24,7 @@ public static void Example() var data = mlContext.Data.LoadFromEnumerable(samples); // NormalizeLpNorm normalize rows individually by rescaling them to unit norm. // Performs the following operaion on a row X: Y = scale *(X - M) / D - // where M is mean and D is either Standard deviation or L2 norm. + // where M is mean vectors and D is value of either Standard deviation or L2 norm. var approximation = mlContext.Transforms.NormalizeGlobalContrast("Features", ensureZeroMean: false, scale:2, ensureUnitStandardDeviation:true); // Now we can transform the data and look at the output to confirm the behavior of the estimator. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs index 2e021ca8b1..de3abb5835 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs @@ -24,7 +24,8 @@ public static void Example() // Convert training data to IDataView, the general data type used in ML.NET. var data = mlContext.Data.LoadFromEnumerable(samples); // NormalizeLpNorm normalize rows individually by rescaling them to unit norm. - // Performs the following operaion on a row X: Y = (X - M) / D where M is mean, and D is selected norm. + // Performs the following operaion on a row X: Y = (X - M) / D + // where M is mean vector, and D is calculated value of selected `norm` parameter . var approximation = mlContext.Transforms.NormalizeLpNorm("Features", norm: LpNormNormalizingEstimatorBase.NormFunction.L1, ensureZeroMean: true); // Now we can transform the data and look at the output to confirm the behavior of the estimator. From 9b0a42aa14b0b6cd8b3493c7c7e757f10ccba75e Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Tue, 9 Apr 2019 09:13:48 -0700 Subject: [PATCH 04/10] address comments --- .../Dynamic/Transforms/ApproximatedKernelMap.cs | 1 + .../Dynamic/Transforms/NormalizeGlobalContrast.cs | 8 +++++--- .../Dynamic/Transforms/NormalizeLpNorm.cs | 8 +++++--- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApproximatedKernelMap.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApproximatedKernelMap.cs index c80df20d9e..f253ee03b7 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApproximatedKernelMap.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApproximatedKernelMap.cs @@ -33,6 +33,7 @@ public static void Example() var transformedData = tansformer.Transform(data); var column = transformedData.GetColumn("Features").ToArray(); + // Expected output: //-0.0119, 0.5867, 0.4942, 0.7041 // 0.4720, 0.5639, 0.4346, 0.2671 //-0.2243, 0.7071, 0.7053, -0.1681 diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs index 4d3eb2a0b0..b23634450a 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs @@ -23,8 +23,9 @@ public static void Example() // Convert training data to IDataView, the general data type used in ML.NET. var data = mlContext.Data.LoadFromEnumerable(samples); // NormalizeLpNorm normalize rows individually by rescaling them to unit norm. - // Performs the following operaion on a row X: Y = scale *(X - M) / D - // where M is mean vectors and D is value of either Standard deviation or L2 norm. + // Performs the following operaion on a row X: Y = scale *(X - M(X)) / D(X) + // where M(X) is scalar value of mean for current row, + // and D(X) is scalar value of either Standard deviation or L2 norm. var approximation = mlContext.Transforms.NormalizeGlobalContrast("Features", ensureZeroMean: false, scale:2, ensureUnitStandardDeviation:true); // Now we can transform the data and look at the output to confirm the behavior of the estimator. @@ -33,6 +34,7 @@ public static void Example() var transformedData = tansformer.Transform(data); var column = transformedData.GetColumn("Features").ToArray(); + // Expected output: // 2.0000, 2.0000,-2.0000,-2.0000 // 2.0000, 2.0000,-2.0000,-2.0000 // 2.0000,-2.0000, 2.0000,-2.0000 @@ -43,7 +45,7 @@ public static void Example() private class DataPoint { - [VectorType(7)] + [VectorType(4)] public float[] Features { get; set; } } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs index de3abb5835..4dfca9a123 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs @@ -24,8 +24,9 @@ public static void Example() // Convert training data to IDataView, the general data type used in ML.NET. var data = mlContext.Data.LoadFromEnumerable(samples); // NormalizeLpNorm normalize rows individually by rescaling them to unit norm. - // Performs the following operaion on a row X: Y = (X - M) / D - // where M is mean vector, and D is calculated value of selected `norm` parameter . + // Performs the following operaion on a row X: Y = (X - M(X)) / D(X) + // where M(X) is scalar value of mean for current row, + // and D(X) is scalar value of selected `norm` parameter . var approximation = mlContext.Transforms.NormalizeLpNorm("Features", norm: LpNormNormalizingEstimatorBase.NormFunction.L1, ensureZeroMean: true); // Now we can transform the data and look at the output to confirm the behavior of the estimator. @@ -34,6 +35,7 @@ public static void Example() var transformedData = tansformer.Transform(data); var column = transformedData.GetColumn("Features").ToArray(); + // Expected output: // 0.2500, 0.2500, -0.2500, -0.2500 // 0.2500, 0.2500, -0.2500, -0.2500 // 0.2500, -0.2500, 0.2500, -0.2500 @@ -44,7 +46,7 @@ public static void Example() private class DataPoint { - [VectorType(7)] + [VectorType(4)] public float[] Features { get; set; } } } From 71e83a08c0600e1d199bd696acf070ff565138df Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Tue, 9 Apr 2019 12:08:39 -0700 Subject: [PATCH 05/10] Switch order of comments and console.writeline --- .../Dynamic/Transforms/ApproximatedKernelMap.cs | 4 ++-- .../Dynamic/Transforms/NormalizeGlobalContrast.cs | 4 ++-- .../Dynamic/Transforms/NormalizeLpNorm.cs | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApproximatedKernelMap.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApproximatedKernelMap.cs index f253ee03b7..e651dcd1b9 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApproximatedKernelMap.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApproximatedKernelMap.cs @@ -33,13 +33,13 @@ public static void Example() var transformedData = tansformer.Transform(data); var column = transformedData.GetColumn("Features").ToArray(); + foreach (var row in column) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); // Expected output: //-0.0119, 0.5867, 0.4942, 0.7041 // 0.4720, 0.5639, 0.4346, 0.2671 //-0.2243, 0.7071, 0.7053, -0.1681 // 0.0846, 0.5836, 0.6575, 0.0581 - foreach (var row in column) - Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); } private class DataPoint diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs index b23634450a..e882e7ef8d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs @@ -34,13 +34,13 @@ public static void Example() var transformedData = tansformer.Transform(data); var column = transformedData.GetColumn("Features").ToArray(); + foreach (var row in column) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); // Expected output: // 2.0000, 2.0000,-2.0000,-2.0000 // 2.0000, 2.0000,-2.0000,-2.0000 // 2.0000,-2.0000, 2.0000,-2.0000 //-2.0000, 2.0000,-2.0000, 2.0000 - foreach (var row in column) - Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); } private class DataPoint diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs index 4dfca9a123..8c8fd21594 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs @@ -35,13 +35,13 @@ public static void Example() var transformedData = tansformer.Transform(data); var column = transformedData.GetColumn("Features").ToArray(); + foreach (var row in column) + Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); // Expected output: // 0.2500, 0.2500, -0.2500, -0.2500 // 0.2500, 0.2500, -0.2500, -0.2500 // 0.2500, -0.2500, 0.2500, -0.2500 //-0.2500, 0.2500, -0.2500, 0.2500 - foreach (var row in column) - Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); } private class DataPoint From 70bb363baeaf733c60c831725c0ff46cb9452f44 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Tue, 9 Apr 2019 15:54:24 -0700 Subject: [PATCH 06/10] shift output by one --- .../Dynamic/Transforms/ApproximatedKernelMap.cs | 8 ++++---- .../Dynamic/Transforms/NormalizeGlobalContrast.cs | 8 ++++---- .../Dynamic/Transforms/NormalizeLpNorm.cs | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApproximatedKernelMap.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApproximatedKernelMap.cs index e651dcd1b9..ce7152b50f 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApproximatedKernelMap.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/ApproximatedKernelMap.cs @@ -36,10 +36,10 @@ public static void Example() foreach (var row in column) Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); // Expected output: - //-0.0119, 0.5867, 0.4942, 0.7041 - // 0.4720, 0.5639, 0.4346, 0.2671 - //-0.2243, 0.7071, 0.7053, -0.1681 - // 0.0846, 0.5836, 0.6575, 0.0581 + // -0.0119, 0.5867, 0.4942, 0.7041 + // 0.4720, 0.5639, 0.4346, 0.2671 + // -0.2243, 0.7071, 0.7053, -0.1681 + // 0.0846, 0.5836, 0.6575, 0.0581 } private class DataPoint diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs index e882e7ef8d..c921bc95d0 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs @@ -37,10 +37,10 @@ public static void Example() foreach (var row in column) Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); // Expected output: - // 2.0000, 2.0000,-2.0000,-2.0000 - // 2.0000, 2.0000,-2.0000,-2.0000 - // 2.0000,-2.0000, 2.0000,-2.0000 - //-2.0000, 2.0000,-2.0000, 2.0000 + // 2.0000, 2.0000,-2.0000,-2.0000 + // 2.0000, 2.0000,-2.0000,-2.0000 + // 2.0000,-2.0000, 2.0000,-2.0000 + //- 2.0000, 2.0000,-2.0000, 2.0000 } private class DataPoint diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs index 8c8fd21594..d3489594d2 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs @@ -38,10 +38,10 @@ public static void Example() foreach (var row in column) Console.WriteLine(string.Join(", ", row.Select(x => x.ToString("f4")))); // Expected output: - // 0.2500, 0.2500, -0.2500, -0.2500 - // 0.2500, 0.2500, -0.2500, -0.2500 - // 0.2500, -0.2500, 0.2500, -0.2500 - //-0.2500, 0.2500, -0.2500, 0.2500 + // 0.2500, 0.2500, -0.2500, -0.2500 + // 0.2500, 0.2500, -0.2500, -0.2500 + // 0.2500, -0.2500, 0.2500, -0.2500 + // -0.2500, 0.2500, -0.2500, 0.2500 } private class DataPoint From 8d2904521bb6e6f54ba8c9f79a0004a3ea4ee388 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Tue, 9 Apr 2019 16:22:10 -0700 Subject: [PATCH 07/10] ensure zero mean --- .../Dynamic/Transforms/NormalizeGlobalContrast.cs | 2 +- .../Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs index c921bc95d0..cb72336482 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs @@ -24,7 +24,7 @@ public static void Example() var data = mlContext.Data.LoadFromEnumerable(samples); // NormalizeLpNorm normalize rows individually by rescaling them to unit norm. // Performs the following operaion on a row X: Y = scale *(X - M(X)) / D(X) - // where M(X) is scalar value of mean for current row, + // where M(X) is scalar value of mean for current row if ensureZeroMean = true or 0 othewise // and D(X) is scalar value of either Standard deviation or L2 norm. var approximation = mlContext.Transforms.NormalizeGlobalContrast("Features", ensureZeroMean: false, scale:2, ensureUnitStandardDeviation:true); diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs index d3489594d2..39296536aa 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs @@ -25,7 +25,7 @@ public static void Example() var data = mlContext.Data.LoadFromEnumerable(samples); // NormalizeLpNorm normalize rows individually by rescaling them to unit norm. // Performs the following operaion on a row X: Y = (X - M(X)) / D(X) - // where M(X) is scalar value of mean for current row, + // where M(X) is scalar value of mean for current row if ensureZeroMean = true or 0 othewise // and D(X) is scalar value of selected `norm` parameter . var approximation = mlContext.Transforms.NormalizeLpNorm("Features", norm: LpNormNormalizingEstimatorBase.NormFunction.L1, ensureZeroMean: true); From dfa63081a349bffd7699c73804f9009ae12cc675 Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Tue, 9 Apr 2019 16:22:42 -0700 Subject: [PATCH 08/10] ApproximatedKernelMap --- src/Microsoft.ML.Transforms/KernelCatalog.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.Transforms/KernelCatalog.cs b/src/Microsoft.ML.Transforms/KernelCatalog.cs index 040c411c93..5e36ea8927 100644 --- a/src/Microsoft.ML.Transforms/KernelCatalog.cs +++ b/src/Microsoft.ML.Transforms/KernelCatalog.cs @@ -26,7 +26,7 @@ public static class KernelExpansionCatalog /// /// /// /// /// From f53218c8c0fd99e066fdf6bff2543fbe55cc307a Mon Sep 17 00:00:00 2001 From: Ivan Matantsev Date: Fri, 12 Apr 2019 09:33:25 -0700 Subject: [PATCH 09/10] Let's see can I make people happier? --- .../Dynamic/Transforms/NormalizeGlobalContrast.cs | 4 ---- .../Dynamic/Transforms/NormalizeLpNorm.cs | 4 ---- src/Microsoft.ML.Transforms/NormalizerCatalog.cs | 11 +++++++++++ 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs index cb72336482..322997ca3b 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeGlobalContrast.cs @@ -22,10 +22,6 @@ public static void Example() }; // Convert training data to IDataView, the general data type used in ML.NET. var data = mlContext.Data.LoadFromEnumerable(samples); - // NormalizeLpNorm normalize rows individually by rescaling them to unit norm. - // Performs the following operaion on a row X: Y = scale *(X - M(X)) / D(X) - // where M(X) is scalar value of mean for current row if ensureZeroMean = true or 0 othewise - // and D(X) is scalar value of either Standard deviation or L2 norm. var approximation = mlContext.Transforms.NormalizeGlobalContrast("Features", ensureZeroMean: false, scale:2, ensureUnitStandardDeviation:true); // Now we can transform the data and look at the output to confirm the behavior of the estimator. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs index 39296536aa..83883ae49e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/NormalizeLpNorm.cs @@ -23,10 +23,6 @@ public static void Example() }; // Convert training data to IDataView, the general data type used in ML.NET. var data = mlContext.Data.LoadFromEnumerable(samples); - // NormalizeLpNorm normalize rows individually by rescaling them to unit norm. - // Performs the following operaion on a row X: Y = (X - M(X)) / D(X) - // where M(X) is scalar value of mean for current row if ensureZeroMean = true or 0 othewise - // and D(X) is scalar value of selected `norm` parameter . var approximation = mlContext.Transforms.NormalizeLpNorm("Features", norm: LpNormNormalizingEstimatorBase.NormFunction.L1, ensureZeroMean: true); // Now we can transform the data and look at the output to confirm the behavior of the estimator. diff --git a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs index ce5d4f6b63..9c69ec69a0 100644 --- a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs +++ b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs @@ -246,6 +246,11 @@ internal static NormalizingEstimator Normalize(this TransformsCatalog catalog, /// Name of column to transform. If set to , the value of the will be used as source. /// Type of norm to use to normalize each sample. The indicated norm of the resulted vector will be normalized to one. /// If , subtract mean from each value before normalizing and use the raw input otherwise. + /// + /// This transform performs the following operation on a each row X: Y = (X - M(X)) / D(X) + /// where M(X) is scalar value of mean for current row if set to or 0 othewise + /// and D(X) is scalar value of selected . + /// /// /// /// If , subtract mean from each value before normalizing and use the raw input otherwise. /// If , resulted vector's standard deviation would be one. Otherwise, resulted vector's L2-norm would be one. /// Scale features by this value. + /// + /// This transform performs the following operation on a row X: Y = scale * (X - M(X)) / D(X) + /// where M(X) is scalar value of mean for current row if set to or 0 othewise + /// D(X) is scalar value of standard deviation for row if set to or + /// L2 norm value for this row if it set to and scale is . + /// /// /// /// Date: Fri, 12 Apr 2019 10:14:50 -0700 Subject: [PATCH 10/10] update for Wschin comments --- src/Microsoft.ML.Transforms/NormalizerCatalog.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs index 9c69ec69a0..e729567461 100644 --- a/src/Microsoft.ML.Transforms/NormalizerCatalog.cs +++ b/src/Microsoft.ML.Transforms/NormalizerCatalog.cs @@ -248,7 +248,7 @@ internal static NormalizingEstimator Normalize(this TransformsCatalog catalog, /// If , subtract mean from each value before normalizing and use the raw input otherwise. /// /// This transform performs the following operation on a each row X: Y = (X - M(X)) / D(X) - /// where M(X) is scalar value of mean for current row if set to or 0 othewise + /// where M(X) is scalar value of mean for all elements in the current row if set to or 0 othewise /// and D(X) is scalar value of selected . /// /// @@ -283,9 +283,9 @@ internal static LpNormNormalizingEstimator NormalizeLpNorm(this TransformsCatalo /// Scale features by this value. /// /// This transform performs the following operation on a row X: Y = scale * (X - M(X)) / D(X) - /// where M(X) is scalar value of mean for current row if set to or 0 othewise + /// where M(X) is scalar value of mean for all elements in the current row if set to or 0 othewise /// D(X) is scalar value of standard deviation for row if set to or - /// L2 norm value for this row if it set to and scale is . + /// L2 norm of this row vector if set to and scale is . /// /// ///