From b79c747992f22aafe1ca83e7de5f2abaa95b9fb5 Mon Sep 17 00:00:00 2001
From: Zeeshan Ahmed <zeahmed@microsoft.com>
Date: Thu, 28 Mar 2019 14:43:20 -0700
Subject: [PATCH 1/2] Created sample for text normalizing API.

---
 .../Dynamic/Transforms/Text/NormalizeText.cs  | 57 +++++++++++++++++++
 .../Text/TextCatalog.cs                       | 14 +++++
 2 files changed, 71 insertions(+)
 create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/NormalizeText.cs
diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/NormalizeText.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/NormalizeText.cs
new file mode 100644
index 0000000000..7f18469f41
--- /dev/null
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/NormalizeText.cs
@@ -0,0 +1,57 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Microsoft.ML.Samples.Dynamic
+{
+    public static class NormalizeText
+    {
+        public static void Example()
+        {
+            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, 
+            // as well as the source of randomness.
+            var mlContext = new MLContext();
+
+            // Create an empty data sample list. The 'NormalizeText' API does not require training data as
+            // the estimator ('TextNormalizingEstimator') created by 'NormalizeText' API is not a trainable estimator.
+            // The empty list is only needed to pass input schema to the pipeline.
+            var samples = new List<TextData>();
+
+            // Convert sample list to an empty IDataView.
+            var dataview = mlContext.Data.LoadFromEnumerable(samples);
+
+            // A pipeline for normalizing text.
+            var normTextPipeline = mlContext.Transforms.Text.NormalizeText("NormalizedText", "Text",
+                Transforms.Text.TextNormalizingEstimator.CaseMode.Lower,
+                keepDiacritics: false,
+                keepPunctuations: false,
+                keepNumbers: false);
+
+            // Fit to data.
+            var normTextTransformer = normTextPipeline.Fit(dataview);
+
+            // Create the prediction engine to get the normalized text from the input text/string.
+            var predictionEngine = mlContext.Model.CreatePredictionEngine<TextData, TransformedTextData>(normTextTransformer);
+
+            // Call the prediction API.
+            var data = new TextData() { Text = "ML.NET's NormalizeText API changes the case of the TEXT and removes/keeps diâcrîtîcs, punctuations, and/or numbers (123)." };
+            var prediction = predictionEngine.Predict(data);
+
+            // Print the normalized text.
+            Console.WriteLine($"Normalized Text: {prediction.NormalizedText}");
+
+            //  Expected output:
+            //   Normalized Text: mlnets normalizetext api changes the case of the text and removeskeeps diacritics punctuations andor numbers
+        }
+
+        public class TextData
+        {
+            public string Text { get; set; }
+        }
+
+        public class TransformedTextData : TextData
+        {
+            public string NormalizedText { get; set; }
+        }
+    }
+}
diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
index e2baf1578e..605b465af1 100644
--- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
+++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
@@ -57,6 +57,13 @@ public static TextFeaturizingEstimator FeaturizeText(this TransformsCatalog.Text
         /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
         /// <param name="useMarkerCharacters">Whether to prepend a marker character, <see langword="0x02"/>, to the beginning,
         /// and append another marker character, <see langword="0x03"/>, to the end of the output vector of characters.</param>
+        /// <example>
+        /// <format type="text/markdown">
+        /// <![CDATA[
+        /// [!code-csharp[TokenizeIntoCharacters](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/TokenizeIntoCharacters.cs)]
+        /// ]]>
+        /// </format>
+        /// </example>
         public static TokenizingByCharactersEstimator TokenizeIntoCharactersAsKeys(this TransformsCatalog.TextTransforms catalog,
             string outputColumnName,
             string inputColumnName = null,
@@ -93,6 +100,13 @@ internal static TokenizingByCharactersEstimator TokenizeIntoCharactersAsKeys(thi
         /// <param name="keepDiacritics">Whether to keep diacritical marks or remove them.</param>
         /// <param name="keepPunctuations">Whether to keep punctuation marks or remove them.</param>
         /// <param name="keepNumbers">Whether to keep numbers or remove them.</param>
+        /// <example>
+        /// <format type="text/markdown">
+        /// <![CDATA[
+        /// [!code-csharp[NormalizeText](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/NormalizeText.cs)]
+        /// ]]>
+        /// </format>
+        /// </example>
         public static TextNormalizingEstimator NormalizeText(this TransformsCatalog.TextTransforms catalog,
             string outputColumnName,
             string inputColumnName = null,

From f4be9702597e4634a72096b9bb2a6713f40cfedf Mon Sep 17 00:00:00 2001
From: Zeeshan Ahmed <zeahmed@microsoft.com>
Date: Thu, 28 Mar 2019 14:53:36 -0700
Subject: [PATCH 2/2] Some renaming and typos removed.

---
 .../Dynamic/Transforms/Text/NormalizeText.cs               | 6 +++---
 src/Microsoft.ML.Transforms/Text/TextCatalog.cs            | 7 -------
 2 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/NormalizeText.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/NormalizeText.cs
index 7f18469f41..920ea4353c 100644
--- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/NormalizeText.cs
+++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/NormalizeText.cs
@@ -15,10 +15,10 @@ public static void Example()
             // Create an empty data sample list. The 'NormalizeText' API does not require training data as
             // the estimator ('TextNormalizingEstimator') created by 'NormalizeText' API is not a trainable estimator.
             // The empty list is only needed to pass input schema to the pipeline.
-            var samples = new List<TextData>();
+            var emptySamples = new List<TextData>();
 
             // Convert sample list to an empty IDataView.
-            var dataview = mlContext.Data.LoadFromEnumerable(samples);
+            var emptyDataView = mlContext.Data.LoadFromEnumerable(emptySamples);
 
             // A pipeline for normalizing text.
             var normTextPipeline = mlContext.Transforms.Text.NormalizeText("NormalizedText", "Text",
@@ -28,7 +28,7 @@ public static void Example()
                 keepNumbers: false);
 
             // Fit to data.
-            var normTextTransformer = normTextPipeline.Fit(dataview);
+            var normTextTransformer = normTextPipeline.Fit(emptyDataView);
 
             // Create the prediction engine to get the normalized text from the input text/string.
             var predictionEngine = mlContext.Model.CreatePredictionEngine<TextData, TransformedTextData>(normTextTransformer);
diff --git a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
index 605b465af1..391b8d4f91 100644
--- a/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
+++ b/src/Microsoft.ML.Transforms/Text/TextCatalog.cs
@@ -57,13 +57,6 @@ public static TextFeaturizingEstimator FeaturizeText(this TransformsCatalog.Text
         /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
         /// <param name="useMarkerCharacters">Whether to prepend a marker character, <see langword="0x02"/>, to the beginning,
         /// and append another marker character, <see langword="0x03"/>, to the end of the output vector of characters.</param>
-        /// <example>
-        /// <format type="text/markdown">
-        /// <![CDATA[
-        /// [!code-csharp[TokenizeIntoCharacters](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Text/TokenizeIntoCharacters.cs)]
-        /// ]]>
-        /// </format>
-        /// </example>
         public static TokenizingByCharactersEstimator TokenizeIntoCharactersAsKeys(this TransformsCatalog.TextTransforms catalog,
             string outputColumnName,
             string inputColumnName = null,