|
| 1 | +using Microsoft.ML.Data; |
| 2 | +using Microsoft.ML.Runtime.Api; |
| 3 | +using Microsoft.ML.Runtime.Data; |
| 4 | +using System; |
| 5 | +using System.Collections.Generic; |
| 6 | + |
| 7 | +namespace Microsoft.ML.Samples.Dynamic |
| 8 | +{ |
| 9 | + public class LdaTransformExample |
| 10 | + { |
| 11 | + public static void LdaTransform() |
| 12 | + { |
| 13 | + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, |
| 14 | + // as well as the source of randomness. |
| 15 | + var ml = new MLContext(); |
| 16 | + |
| 17 | + // Get a small dataset as an IEnumerable. |
| 18 | + IEnumerable<SamplesUtils.DatasetUtils.SampleTopicsData> data = SamplesUtils.DatasetUtils.GetTopicsData(); |
| 19 | + var trainData = ml.CreateStreamingDataView(data); |
| 20 | + |
| 21 | + // Preview of one of the columns of the the topics data. |
| 22 | + // The Review column contains the keys associated with a particular body of text. |
| 23 | + // |
| 24 | + // Review |
| 25 | + // "animals birds cats dogs fish horse" |
| 26 | + // "horse birds house fish duck cats" |
| 27 | + // "car truck driver bus pickup" |
| 28 | + // "car truck driver bus pickup horse" |
| 29 | + |
| 30 | + // A pipeline for featurizing the "Review" column |
| 31 | + string ldaFeatures = "LdaFeatures"; |
| 32 | + var pipeline = ml.Transforms.Text.ProduceWordBags("Review"). |
| 33 | + Append(ml.Transforms.Text.LatentDirichletAllocation("Review", ldaFeatures, numTopic:3)); |
| 34 | + |
| 35 | + // The transformed data |
| 36 | + var transformer = pipeline.Fit(trainData); |
| 37 | + var transformed_data = transformer.Transform(trainData); |
| 38 | + |
| 39 | + // Small helper to print the text inside the columns, in the console. |
| 40 | + Action<string, IEnumerable<VBuffer<float>>> printHelper = (columnName, column) => |
| 41 | + { |
| 42 | + Console.WriteLine($"{columnName} column obtained post-transformation."); |
| 43 | + foreach (var featureRow in column) |
| 44 | + { |
| 45 | + foreach (var value in featureRow.GetValues()) |
| 46 | + Console.Write($"{value} "); |
| 47 | + Console.WriteLine(""); |
| 48 | + } |
| 49 | + |
| 50 | + Console.WriteLine("==================================================="); |
| 51 | + }; |
| 52 | + |
| 53 | + // Preview of the column obtained after processing the input. |
| 54 | + var defaultColumn = transformed_data.GetColumn<VBuffer<float>>(ml, ldaFeatures); |
| 55 | + printHelper(ldaFeatures, defaultColumn); |
| 56 | + |
| 57 | + // LdaFeatures column obtained post-transformation. |
| 58 | + // For LDA, we had specified numTopic:3. Hence each row of text has been featurized as a vector of floats with length 3. |
| 59 | + |
| 60 | + //0.1818182 0.4545455 0.3636364 |
| 61 | + //0.3636364 0.1818182 0.4545455 |
| 62 | + //0.2222222 0.2222222 0.5555556 |
| 63 | + //0.2727273 0.09090909 0.6363636 |
| 64 | + } |
| 65 | + } |
| 66 | +} |
0 commit comments