dotnet · shmoradims · Nov 7, 2018 · Nov 5, 2018 · Nov 5, 2018 · Nov 5, 2018
diff --git a/docs/code/MlNetCookBook.md b/docs/code/MlNetCookBook.md
@@ -148,12 +148,12 @@ Label	Workclass	education	marital-status
 
 This is how you can read this data:
 ```csharp
-// Create a new environment for ML.NET operations. It can be used for exception tracking and logging, 
-// as well as the source of randomness.
-var env = new LocalEnvironment();
+// Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
+// as a catalog of available operations and as the source of randomness.
+var mlContext = new MLContext();
 
 // Create the reader: define the data columns and where to find them in the text file.
-var reader = TextLoader.CreateReader(env, ctx => (
+var reader = mlContext.Data.TextReader(ctx => (
         // A boolean column depicting the 'target label'.
         IsOver50K: ctx.LoadBool(14),
         // Three text columns.
@@ -303,6 +303,52 @@ private class InspectedRow
 }
 ```
 
+You can also use the dynamic API to create the equivalent of the previous pipeline. 
+```csharp
+// Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
+// as a catalog of available operations and as the source of randomness.
+var mlContext = new MLContext();
+
+// Create the reader: define the data columns and where to find them in the text file.
+var reader = new TextLoader(mlContext, new TextLoader.Arguments
+{
+    Column = new[] {
+        // A boolean column depicting the 'label'.
+        new TextLoader.Column("IsOver50k", DataKind.BL, 0),
+        // Three text columns.
+        new TextLoader.Column("Workclass", DataKind.TX, 1),
+        new TextLoader.Column("Education", DataKind.TX, 2),
+        new TextLoader.Column("MaritalStatus", DataKind.TX, 3)
+    },
+    // First line of the file is a header, not a data row.
+    HasHeader = true
+});
+
+// Start creating our processing pipeline. For now, let's just concatenate all the text columns
+// together into one.
+var dynamicPipeline = mlContext.Transforms.Concatenate("AllFeatures", "Education", "MaritalStatus");
+
+// Let's verify that the data has been read correctly. 
+// First, we read the data file.
+var data = reader.Read(dataPath);
+
+// Fit our data pipeline and transform data with it.
+var transformedData = dynamicPipeline.Fit(data).Transform(data);
+
+// 'transformedData' is a 'promise' of data. Let's actually read it.
+var someRows = transformedData
+    // Convert to an enumerable of user-defined type. 
+    .AsEnumerable<InspectedRow>(mlContext, reuseRowObject: false)
+    // Take a couple values as an array.
+    .Take(4).ToArray();
+
+// Extract the 'AllFeatures' column.
+// This will give the entire dataset: make sure to only take several row
+// in case the dataset is huge. The is similar to the static API, except
+// you have to specify the column name and type.
+var featureColumns = transformedData.GetColumn<string[]>(mlContext, "AllFeatures")
+    .Take(20).ToArray();
+```
 ## How do I train a regression model?
 
 Generally, in order to train any model in ML.NET, you will go through three steps:
@@ -359,6 +405,45 @@ var learningPipeline = reader.MakeNewEstimator()
 var model = learningPipeline.Fit(trainData);
 ```
 
+You can also use the dynamic API to create the equivalent of the previous pipeline. 
+```csharp
+// Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
+// as a catalog of available operations and as the source of randomness.
+var mlContext = new MLContext();
+
+// Step one: read the data as an IDataView.
+// First, we define the reader: specify the data columns and where to find them in the text file.
+var reader = new TextLoader(mlContext, new TextLoader.Arguments
+{
+    Column = new[] {
+        // We read the first 11 values as a single float vector.
+        new TextLoader.Column("FeatureVector", DataKind.R4, 0, 10),
+
+        // Separately, read the target variable.
+        new TextLoader.Column("Target", DataKind.R4, 11),
+    },
+    // First line of the file is a header, not a data row.
+    HasHeader = true,
+    // Default separator is tab, but we need a semicolon.
+    Separator = ";"
+});
+
+// Now read the file (remember though, readers are lazy, so the actual reading will happen when the data is accessed).
+var trainData = reader.Read(trainDataPath);
+
+// Step two: define the learning pipeline. 
+
+// We 'start' the pipeline with the output of the reader.
+var dynamicPipeline =
+    // First 'normalize' the data (rescale to be
+    // between -1 and 1 for all examples), and then train the model.
+    mlContext.Transforms.Normalize("FeatureVector")
+    // Add the SDCA regression trainer.
+    .Append(mlContext.Regression.Trainers.StochasticDualCoordinateAscent(label: "Target", features: "FeatureVector"))
+
+// Step three. Train the pipeline.
+var model = dynamicPipeline.Fit(trainData);
+```
 ## How do I verify the model quality?
 
 This is the first question that arises after you train the model: how good it actually is?
@@ -447,6 +532,46 @@ var learningPipeline = reader.MakeNewEstimator()
 var model = learningPipeline.Fit(trainData).AsDynamic;
 ```
 
+You can also use the dynamic API to create the equivalent of the previous pipeline. 
+```csharp
+// Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
+// as a catalog of available operations and as the source of randomness.
+var mlContext = new MLContext();
+
+// Step one: read the data as an IDataView.
+// First, we define the reader: specify the data columns and where to find them in the text file.
+var reader = new TextLoader(mlContext, new TextLoader.Arguments
+{
+    Column = new[] {
+        new TextLoader.Column("SepalLength", DataKind.R4, 0),
+        new TextLoader.Column("SepalWidth", DataKind.R4, 1),
+        new TextLoader.Column("PetalLength", DataKind.R4, 2),
+        new TextLoader.Column("PetalWidth", DataKind.R4, 3),
+        // Label: kind of iris.
+        new TextLoader.Column("Label", DataKind.TX, 4),
+    },
+    // Default separator is tab, but the dataset has comma.
+    Separator = ","
+});
+
+// Retrieve the training data.
+var trainData = reader.Read(irisDataPath);
+
+// Build the training pipeline.
+var dynamicPipeline =
+    // Concatenate all the features together into one column 'Features'.
+    mlContext.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
+    // Note that the label is text, so it needs to be converted to key.
+    .Append(new ValueToKeyMappingEstimator(mlContext, "Label"), TransformerScope.TrainTest)
+    // Use the multi-class SDCA model to predict the label using features.
+    .Append(mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent())
+    // Apply the inverse conversion from 'PredictedLabel' key back to string value.
+    .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel"));
+
+// Train the model.
+var model = dynamicPipeline.Fit(trainData);
+```
+
 Now, in order to use [schema comprehension](SchemaComprehension.md) for prediction, we define a pair of classes like following:
 ```csharp
 private class IrisInput
@@ -538,7 +663,7 @@ var trainData = mlContext.CreateStreamingDataView(churnData);
 // We apply our FastTree binary classifier to predict the 'HasChurned' label.
 
 var dynamicLearningPipeline = mlContext.Transforms.Categorical.OneHotEncoding("DemographicCategory")
-    .Append(new ColumnConcatenatingEstimator(mlContext, "Features", "DemographicCategory", "LastVisits"))
+    .Append(mlContext.Transforms.Concatenate("Features", "DemographicCategory", "LastVisits"))
     .Append(mlContext.BinaryClassification.Trainers.FastTree("HasChurned", "Features", numTrees: 20));
 
 var dynamicModel = dynamicLearningPipeline.Fit(trainData);
@@ -701,6 +826,42 @@ var normalizedData = pipeline.Fit(trainData).Transform(trainData);
 var meanVarValues = normalizedData.GetColumn(r => r.MeanVarNormalized).ToArray();
 ```
 
+You can achieve the same results using the dynamic API.
+```csharp
+// Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
+// as a catalog of available operations and as the source of randomness.
+var mlContext = new MLContext();
+
+// Define the reader: specify the data columns and where to find them in the text file.
+var reader = new TextLoader(mlContext, new TextLoader.Arguments
+{
+    Column = new[] {
+        // The four features of the Iris dataset will be grouped together as one Features column.
+        new TextLoader.Column("Features", DataKind.R4, 0, 3),
+        // Label: kind of iris.
+        new TextLoader.Column("Label", DataKind.TX, 4),
+    },
+    // Default separator is tab, but the dataset has comma.
+    Separator = ","
+});
+
+// Read the training data.
+var trainData = reader.Read(dataPath);
+
+// Apply all kinds of standard ML.NET normalization to the raw features.
+var pipeline =
+    mlContext.Transforms.Normalize(
+        new NormalizingEstimator.MinMaxColumn("Features", "MinMaxNormalized", fixZero: true),
+        new NormalizingEstimator.MeanVarColumn("Features", "MeanVarNormalized", fixZero: true),
+        new NormalizingEstimator.BinningColumn("Features", "BinNormalized", numBins: 256));
+
+// Let's train our pipeline of normalizers, and then apply it to the same data.
+var normalizedData = pipeline.Fit(trainData).Transform(trainData);
+
+// Inspect one column of the resulting dataset.
+var meanVarValues = normalizedData.GetColumn<float[]>(mlContext, "MeanVarNormalized").ToArray();
+```
+
 ## How do I train my model on categorical data?
 
 Generally speaking, *all ML.NET learners expect the features as a float vector*. So, if some of your data is not natively a float, you will need to convert to floats. 
@@ -782,6 +943,63 @@ var fullLearningPipeline = learningPipeline
 var model = fullLearningPipeline.Fit(data);
 ```
 
+You can achieve the same results using the dynamic API.
+```csharp
+// Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
+// as a catalog of available operations and as the source of randomness.
+var mlContext = new MLContext();
+
+// Define the reader: specify the data columns and where to find them in the text file.
+var reader = new TextLoader(mlContext, new TextLoader.Arguments
+{
+    Column = new[] {
+        new TextLoader.Column("Label", DataKind.BL, 0),
+        // We will load all the categorical features into one vector column of size 8.
+        new TextLoader.Column("CategoricalFeatures", DataKind.TX, 1, 8),
+        // Similarly, load all numerical features into one vector of size 6.
+        new TextLoader.Column("NumericalFeatures", DataKind.R4, 9, 14),
+        // Let's also separately load the 'Workclass' column.
+        new TextLoader.Column("Workclass", DataKind.TX, 1),
+    },
+    HasHeader = true
+});
+
+// Read the data.
+var data = reader.Read(dataPath);
+
+// Inspect the categorical columns to check that they are correctly read.
+var catColumns = data.GetColumn<string[]>(mlContext, "CategoricalFeatures").Take(10).ToArray();
+
+// Build several alternative featurization pipelines.
+var dynamicPipeline =
+    // Convert each categorical feature into one-hot encoding independently.
+    mlContext.Transforms.Categorical.OneHotEncoding("CategoricalFeatures", "CategoricalOneHot")
+    // Convert all categorical features into indices, and build a 'word bag' of these.
+    .Append(mlContext.Transforms.Categorical.OneHotEncoding("CategoricalFeatures", "CategoricalOneHot", CategoricalTransform.OutputKind.Bag))
+    // One-hot encode the workclass column, then drop all the categories that have fewer than 10 instances in the train set.
+    .Append(mlContext.Transforms.Categorical.OneHotEncoding("Workclass", "WorkclassOneHot"))
+    .Append(new CountFeatureSelector(mlContext, "WorkclassOneHot", "WorkclassOneHotTrimmed", count: 10));
+
+// Let's train our pipeline, and then apply it to the same data.
+var transformedData = dynamicPipeline.Fit(data).Transform(data);
+
+// Inspect some columns of the resulting dataset.
+var categoricalBags = transformedData.GetColumn<float[]>(mlContext, "CategoricalBag").Take(10).ToArray();
+var workclasses = transformedData.GetColumn<float[]>(mlContext, "WorkclassOneHotTrimmed").Take(10).ToArray();
+
+// Of course, if we want to train the model, we will need to compose a single float vector of all the features.
+// Here's how we could do this:
+
+var fullLearningPipeline = dynamicPipeline
+    // Concatenate two of the 3 categorical pipelines, and the numeric features.
+    .Append(mlContext.Transforms.Concatenate("Features", "NumericalFeatures", "CategoricalBag", "WorkclassOneHotTrimmed"))
+    // Now we're ready to train. We chose our FastTree trainer for this classification task.
+    .Append(mlContext.BinaryClassification.Trainers.FastTree(numTrees: 50));
+
+// Train the model.
+var model = fullLearningPipeline.Fit(data);
+```
+
 ## How do I train my model on textual data?
 
 Generally speaking, *all ML.NET learners expect the features as a float vector*. So, if some of your data is not natively a float, you will need to convert to floats. 
@@ -852,6 +1070,60 @@ var embeddings = transformedData.GetColumn(x => x.Embeddings).Take(10).ToArray()
 var unigrams = transformedData.GetColumn(x => x.BagOfWords).Take(10).ToArray();
 ```
 
+You can achieve the same results using the dynamic API.
+```csharp
+// Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
+// as a catalog of available operations and as the source of randomness.
+var mlContext = new MLContext();
+
+// Define the reader: specify the data columns and where to find them in the text file.
+var reader = new TextLoader(mlContext, new TextLoader.Arguments
+{
+    Column = new[] {
+        new TextLoader.Column("IsToxic", DataKind.BL, 0),
+        new TextLoader.Column("Message", DataKind.TX, 1),
+    },
+    HasHeader = true
+});
+
+// Read the data.
+var data = reader.Read(dataPath);
+
+// Inspect the message texts that are read from the file.
+var messageTexts = data.GetColumn<string>(mlContext, "Message").Take(20).ToArray();
+
+// Apply various kinds of text operations supported by ML.NET.
+var dynamicPipeline =
+    // One-stop shop to run the full text featurization.
+    mlContext.Transforms.Text.FeaturizeText("Message", "TextFeatures")
+
+    // Normalize the message for later transforms
+    .Append(mlContext.Transforms.Text.NormalizeText("Message", "NormalizedMessage"))
+
+    // NLP pipeline 1: bag of words.
+    .Append(new WordBagEstimator(mlContext, "NormalizedMessage", "BagOfWords"))
+
+    // NLP pipeline 2: bag of bigrams, using hashes instead of dictionary indices.
+    .Append(new WordHashBagEstimator(mlContext, "NormalizedMessage", "BagOfBigrams", 
+                ngramLength: 2, allLengths: false))
+
+    // NLP pipeline 3: bag of tri-character sequences with TF-IDF weighting.
+    .Append(mlContext.Transforms.Text.TokenizeCharacters("Message", "MessageChars"))
+    .Append(new WordBagEstimator(mlContext, "MessageChars", "BagOfTrichar", 
+                ngramLength: 3, weighting: NgramTransform.WeightingCriteria.TfIdf))
+
+    // NLP pipeline 4: word embeddings.
+    .Append(mlContext.Transforms.Text.ExtractWordEmbeedings("NormalizedMessage", "Embeddings", 
+                WordEmbeddingsTransform.PretrainedModelKind.GloVeTwitter25D));
+
+// Let's train our pipeline, and then apply it to the same data.
+// Note that even on a small dataset of 70KB the pipeline above can take up to a minute to completely train.
+var transformedData = dynamicPipeline.Fit(data).Transform(data);
+
+// Inspect some columns of the resulting dataset.
+var embeddings = transformedData.GetColumn<float[]>(mlContext, "Embeddings").Take(10).ToArray();
+var unigrams = transformedData.GetColumn<float[]>(mlContext, "BagOfWords").Take(10).ToArray();
+```
 ## How do I train using cross-validation?
 
 [Cross-validation](https://en.wikipedia.org/wiki/Cross-validation_(statistics)) is a useful technique for ML applications. It helps estimate the variance of the model quality from one run to another and also eliminates the need to extract a separate test set for evaluation.
@@ -916,6 +1188,59 @@ var microAccuracies = cvResults.Select(r => r.metrics.AccuracyMicro);
 Console.WriteLine(microAccuracies.Average());
 ```
 
+You can achieve the same results using the dynamic API.
+```csharp
+// Create a new context for ML.NET operations. It can be used for exception tracking and logging, 
+// as a catalog of available operations and as the source of randomness.
+var mlContext = new MLContext();
+
+// Step one: read the data as an IDataView.
+// First, we define the reader: specify the data columns and where to find them in the text file.
+var reader = new TextLoader(mlContext, new TextLoader.Arguments
+{
+    Column = new[] {
+        // We read the first 11 values as a single float vector.
+        new TextLoader.Column("SepalLength", DataKind.R4, 0),
+        new TextLoader.Column("SepalWidth", DataKind.R4, 1),
+        new TextLoader.Column("PetalLength", DataKind.R4, 2),
+        new TextLoader.Column("PetalWidth", DataKind.R4, 3),
+        // Label: kind of iris.
+        new TextLoader.Column("Label", DataKind.TX, 4),
+    },
+    // Default separator is tab, but the dataset has comma.
+    Separator = ","
+});
+
+// Read the data.
+var data = reader.Read(dataPath);
+
+// Build the training pipeline.
+var dynamicPipeline =
+    // Concatenate all the features together into one column 'Features'.
+    mlContext.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
+    // Note that the label is text, so it needs to be converted to key.
+    .Append(new ValueToKeyMappingEstimator(mlContext, "Label"), TransformerScope.TrainTest)
+    // Use the multi-class SDCA model to predict the label using features.
+    .Append(mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent());
+
+// Split the data 90:10 into train and test sets, train and evaluate.
+var (trainData, testData) = mlContext.MulticlassClassification.TrainTestSplit(data, testFraction: 0.1);
+
+// Train the model.
+var model = dynamicPipeline.Fit(trainData);
+// Compute quality metrics on the test set.
+var metrics = mlContext.MulticlassClassification.Evaluate(model.Transform(testData));
+Console.WriteLine(metrics.AccuracyMicro);
+
+// Now run the 5-fold cross-validation experiment, using the same pipeline.
+var cvResults = mlContext.MulticlassClassification.CrossValidate(data, dynamicPipeline, numFolds: 5);
+
+// The results object is an array of 5 elements. For each of the 5 folds, we have metrics, model and scored test data.
+// Let's compute the average micro-accuracy.
+var microAccuracies = cvResults.Select(r => r.metrics.AccuracyMicro);
+Console.WriteLine(microAccuracies.Average());
+
+```
 ## Can I mix and match static and dynamic pipelines?
 
 Yes, we can have both of them in our codebase. The static pipelines are just a statically-typed way to build dynamic pipelines.