|
| 1 | +using System; |
| 2 | +using System.IO; |
| 3 | +using System.Linq; |
| 4 | +using Microsoft.ML.Data; |
| 5 | +using Microsoft.ML.Transforms.TensorFlow; |
| 6 | + |
| 7 | +namespace Microsoft.ML.Samples.Dynamic.TensorFlow |
| 8 | +{ |
| 9 | + class TextClassification |
| 10 | + { |
| 11 | + public const int MaxSentenceLenth = 600; |
| 12 | + /// <summary> |
| 13 | + /// Example use of the TensorFlow sentiment classification model. |
| 14 | + /// </summary> |
| 15 | + public static void ScoringWithTextClassificationModelSample() |
| 16 | + { |
| 17 | + string modelLocation = SamplesUtils.DatasetUtils.DownloadTensorFlowSentimentModel(); |
| 18 | + |
| 19 | + var mlContext = new MLContext(); |
| 20 | + var data = new[] { new IMDBSentiment() { |
| 21 | + Sentiment_Text = "this film was just brilliant casting location scenery story direction " + |
| 22 | + "everyone's really suited the part they played and you could just imagine being there robert " + |
| 23 | + "is an amazing actor and now the same being director father came from the same scottish " + |
| 24 | + "island as myself so i loved the fact there was a real connection with this film the witty " + |
| 25 | + "remarks throughout the film were great it was just brilliant so much that i bought the " + |
| 26 | + "film as soon as it was released for and would recommend it to everyone to watch and the " + |
| 27 | + "fly fishing was amazing really cried at the end it was so sad and you know what they say " + |
| 28 | + "if you cry at a film it must have been good and this definitely was also to the two " + |
| 29 | + "little boy's that played the of norman and paul they were just brilliant children are " + |
| 30 | + "often left out of the list i think because the stars that play them all grown up are " + |
| 31 | + "such a big profile for the whole film but these children are amazing and should be praised " + |
| 32 | + "for what they have done don't you think the whole story was so lovely because it was true " + |
| 33 | + "and was someone's life after all that was shared with us all" } }; |
| 34 | + var dataView = mlContext.Data.ReadFromEnumerable(data); |
| 35 | + |
| 36 | + // This is the dictionary to convert words into the integer indexes. |
| 37 | + var lookupMap = mlContext.Data.ReadFromTextFile(Path.Combine(modelLocation, "imdb_word_index.csv"), |
| 38 | + columns: new[] |
| 39 | + { |
| 40 | + new TextLoader.Column("Words", DataKind.TX, 0), |
| 41 | + new TextLoader.Column("Ids", DataKind.I4, 1), |
| 42 | + }, |
| 43 | + separatorChar: ',' |
| 44 | + ); |
| 45 | + |
| 46 | + // Load the TensorFlow model once. |
| 47 | + // - Use it for quering the schema for input and output in the model |
| 48 | + // - Use it for prediction in the pipeline. |
| 49 | + var modelInfo = TensorFlowUtils.LoadTensorFlowModel(mlContext, modelLocation); |
| 50 | + var schema = modelInfo.GetModelSchema(); |
| 51 | + var featuresType = (VectorType)schema["Features"].Type; |
| 52 | + Console.WriteLine("Name: {0}, Type: {1}, Shape: (-1, {2})", "Features", featuresType.ItemType.RawType, featuresType.Dimensions[0]); |
| 53 | + var predictionType = (VectorType)schema["Prediction/Softmax"].Type; |
| 54 | + Console.WriteLine("Name: {0}, Type: {1}, Shape: (-1, {2})", "Prediction/Softmax", predictionType.ItemType.RawType, predictionType.Dimensions[0]); |
| 55 | + |
| 56 | + // The model expects the input feature vector to be a fixed length vector. |
| 57 | + // In this sample, CustomMappingEstimator is used to resize variable length vector to fixed length vector. |
| 58 | + // The following ML.NET pipeline |
| 59 | + // 1. tokenzies the string into words, |
| 60 | + // 2. maps each word to an integer which is an index in the dictionary ('lookupMap'), |
| 61 | + // 3. Resizes the integer vector to a fixed length vector using CustomMappingEstimator ('ResizeFeaturesAction') |
| 62 | + // 4. Passes the data to TensorFlow for scoring. |
| 63 | + // 5. Retreives the 'Prediction' from TensorFlow and put it into ML.NET Pipeline |
| 64 | + |
| 65 | + Action<IMDBSentiment, IntermediateFeatures> ResizeFeaturesAction = (i, j) => |
| 66 | + { |
| 67 | + j.Sentiment_Text = i.Sentiment_Text; |
| 68 | + var features = i.VariableLenghtFeatures; |
| 69 | + Array.Resize(ref features, MaxSentenceLenth); |
| 70 | + j.Features = features; |
| 71 | + }; |
| 72 | + |
| 73 | + var engine = mlContext.Transforms.Text.TokenizeWords("TokenizedWords", "Sentiment_Text") |
| 74 | + .Append(mlContext.Transforms.Conversion.ValueMap(lookupMap, "Words", "Ids", new[] { ("VariableLenghtFeatures", "TokenizedWords") })) |
| 75 | + .Append(mlContext.Transforms.CustomMapping(ResizeFeaturesAction, "Resize")) |
| 76 | + .Append(mlContext.Transforms.ScoreTensorFlowModel(modelInfo, new[] { "Prediction/Softmax" }, new[] { "Features" })) |
| 77 | + .Append(mlContext.Transforms.CopyColumns(("Prediction", "Prediction/Softmax"))) |
| 78 | + .Fit(dataView) |
| 79 | + .CreatePredictionEngine<IMDBSentiment, OutputScores>(mlContext); |
| 80 | + |
| 81 | + // Predict with TensorFlow pipeline. |
| 82 | + var prediction = engine.Predict(data[0]); |
| 83 | + |
| 84 | + Console.WriteLine("Number of classes: {0}", prediction.Prediction.Length); |
| 85 | + Console.WriteLine("Is sentiment/review positive? {0}", prediction.Prediction[1] > 0.5 ? "Yes." : "No."); |
| 86 | + Console.WriteLine("Prediction Confidence: {0}", prediction.Prediction[1].ToString("0.00")); |
| 87 | + |
| 88 | + /////////////////////////////////// Expected output /////////////////////////////////// |
| 89 | + // |
| 90 | + // Name: Features, Type: System.Int32, Shape: (-1, 600) |
| 91 | + // Name: Prediction/Softmax, Type: System.Single, Shape: (-1, 2) |
| 92 | + // |
| 93 | + // Number of classes: 2 |
| 94 | + // Is sentiment/review positive ? Yes |
| 95 | + // Prediction Confidence: 0.65 |
| 96 | + } |
| 97 | + |
| 98 | + |
| 99 | + /// <summary> |
| 100 | + /// Class to hold original sentiment data. |
| 101 | + /// </summary> |
| 102 | + public class IMDBSentiment |
| 103 | + { |
| 104 | + public string Sentiment_Text { get; set; } |
| 105 | + |
| 106 | + /// <summary> |
| 107 | + /// This is a variable length vector designated by VectorType(0) attribute. |
| 108 | + /// Variable length vectors are produced by applying operations such as 'TokenizeWords' on strings |
| 109 | + /// resulting in vectors of tokens of variable lengths. |
| 110 | + /// </summary> |
| 111 | + [VectorType(0)] |
| 112 | + public int[] VariableLenghtFeatures { get; set; } |
| 113 | + } |
| 114 | + |
| 115 | + /// <summary> |
| 116 | + /// Class to hold intermediate data. Mostly used by CustomMapping Estimator |
| 117 | + /// </summary> |
| 118 | + public class IntermediateFeatures |
| 119 | + { |
| 120 | + public string Sentiment_Text { get; set; } |
| 121 | + |
| 122 | + [VectorType(MaxSentenceLenth)] |
| 123 | + public int[] Features { get; set; } |
| 124 | + } |
| 125 | + |
| 126 | + /// <summary> |
| 127 | + /// Class to contain the output values from the transformation. |
| 128 | + /// </summary> |
| 129 | + class OutputScores |
| 130 | + { |
| 131 | + [VectorType(2)] |
| 132 | + public float[] Prediction { get; set; } |
| 133 | + } |
| 134 | + |
| 135 | + } |
| 136 | +} |
0 commit comments