Skip to content

Commit e08a329

Browse files
authored
Created TF text classification sample and moved TF samples to its own directory. (#2429)
1 parent 0523041 commit e08a329

File tree

7 files changed

+183
-11
lines changed

7 files changed

+183
-11
lines changed

docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlowTransform.cs renamed to docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/ImageClassification.cs

+4-4
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@
22
using System.Linq;
33
using Microsoft.ML.Data;
44

5-
namespace Microsoft.ML.Samples.Dynamic
5+
namespace Microsoft.ML.Samples.Dynamic.TensorFlow
66
{
7-
class TensorFlowTransformExample
7+
class ImageClassification
88
{
99
/// <summary>
10-
/// Example use of the TensorFlowEstimator in a ML.NET pipeline.
10+
/// Example use of the TensorFlow image model in a ML.NET pipeline.
1111
/// </summary>
12-
public static void TensorFlowScoringSample()
12+
public static void ScoringWithImageClassificationModelSample()
1313
{
1414
// Download the ResNet 101 model from the location below.
1515
// https://storage.googleapis.com/download.tensorflow.org/models/tflite_11_05_08/resnet_v2_101.tgz
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
using System;
2+
using System.IO;
3+
using System.Linq;
4+
using Microsoft.ML.Data;
5+
using Microsoft.ML.Transforms.TensorFlow;
6+
7+
namespace Microsoft.ML.Samples.Dynamic.TensorFlow
8+
{
9+
class TextClassification
10+
{
11+
public const int MaxSentenceLenth = 600;
12+
/// <summary>
13+
/// Example use of the TensorFlow sentiment classification model.
14+
/// </summary>
15+
public static void ScoringWithTextClassificationModelSample()
16+
{
17+
string modelLocation = SamplesUtils.DatasetUtils.DownloadTensorFlowSentimentModel();
18+
19+
var mlContext = new MLContext();
20+
var data = new[] { new IMDBSentiment() {
21+
Sentiment_Text = "this film was just brilliant casting location scenery story direction " +
22+
"everyone's really suited the part they played and you could just imagine being there robert " +
23+
"is an amazing actor and now the same being director father came from the same scottish " +
24+
"island as myself so i loved the fact there was a real connection with this film the witty " +
25+
"remarks throughout the film were great it was just brilliant so much that i bought the " +
26+
"film as soon as it was released for and would recommend it to everyone to watch and the " +
27+
"fly fishing was amazing really cried at the end it was so sad and you know what they say " +
28+
"if you cry at a film it must have been good and this definitely was also to the two " +
29+
"little boy's that played the of norman and paul they were just brilliant children are " +
30+
"often left out of the list i think because the stars that play them all grown up are " +
31+
"such a big profile for the whole film but these children are amazing and should be praised " +
32+
"for what they have done don't you think the whole story was so lovely because it was true " +
33+
"and was someone's life after all that was shared with us all" } };
34+
var dataView = mlContext.Data.ReadFromEnumerable(data);
35+
36+
// This is the dictionary to convert words into the integer indexes.
37+
var lookupMap = mlContext.Data.ReadFromTextFile(Path.Combine(modelLocation, "imdb_word_index.csv"),
38+
columns: new[]
39+
{
40+
new TextLoader.Column("Words", DataKind.TX, 0),
41+
new TextLoader.Column("Ids", DataKind.I4, 1),
42+
},
43+
separatorChar: ','
44+
);
45+
46+
// Load the TensorFlow model once.
47+
// - Use it for quering the schema for input and output in the model
48+
// - Use it for prediction in the pipeline.
49+
var modelInfo = TensorFlowUtils.LoadTensorFlowModel(mlContext, modelLocation);
50+
var schema = modelInfo.GetModelSchema();
51+
var featuresType = (VectorType)schema["Features"].Type;
52+
Console.WriteLine("Name: {0}, Type: {1}, Shape: (-1, {2})", "Features", featuresType.ItemType.RawType, featuresType.Dimensions[0]);
53+
var predictionType = (VectorType)schema["Prediction/Softmax"].Type;
54+
Console.WriteLine("Name: {0}, Type: {1}, Shape: (-1, {2})", "Prediction/Softmax", predictionType.ItemType.RawType, predictionType.Dimensions[0]);
55+
56+
// The model expects the input feature vector to be a fixed length vector.
57+
// In this sample, CustomMappingEstimator is used to resize variable length vector to fixed length vector.
58+
// The following ML.NET pipeline
59+
// 1. tokenzies the string into words,
60+
// 2. maps each word to an integer which is an index in the dictionary ('lookupMap'),
61+
// 3. Resizes the integer vector to a fixed length vector using CustomMappingEstimator ('ResizeFeaturesAction')
62+
// 4. Passes the data to TensorFlow for scoring.
63+
// 5. Retreives the 'Prediction' from TensorFlow and put it into ML.NET Pipeline
64+
65+
Action<IMDBSentiment, IntermediateFeatures> ResizeFeaturesAction = (i, j) =>
66+
{
67+
j.Sentiment_Text = i.Sentiment_Text;
68+
var features = i.VariableLenghtFeatures;
69+
Array.Resize(ref features, MaxSentenceLenth);
70+
j.Features = features;
71+
};
72+
73+
var engine = mlContext.Transforms.Text.TokenizeWords("TokenizedWords", "Sentiment_Text")
74+
.Append(mlContext.Transforms.Conversion.ValueMap(lookupMap, "Words", "Ids", new[] { ("VariableLenghtFeatures", "TokenizedWords") }))
75+
.Append(mlContext.Transforms.CustomMapping(ResizeFeaturesAction, "Resize"))
76+
.Append(mlContext.Transforms.ScoreTensorFlowModel(modelInfo, new[] { "Prediction/Softmax" }, new[] { "Features" }))
77+
.Append(mlContext.Transforms.CopyColumns(("Prediction", "Prediction/Softmax")))
78+
.Fit(dataView)
79+
.CreatePredictionEngine<IMDBSentiment, OutputScores>(mlContext);
80+
81+
// Predict with TensorFlow pipeline.
82+
var prediction = engine.Predict(data[0]);
83+
84+
Console.WriteLine("Number of classes: {0}", prediction.Prediction.Length);
85+
Console.WriteLine("Is sentiment/review positive? {0}", prediction.Prediction[1] > 0.5 ? "Yes." : "No.");
86+
Console.WriteLine("Prediction Confidence: {0}", prediction.Prediction[1].ToString("0.00"));
87+
88+
/////////////////////////////////// Expected output ///////////////////////////////////
89+
//
90+
// Name: Features, Type: System.Int32, Shape: (-1, 600)
91+
// Name: Prediction/Softmax, Type: System.Single, Shape: (-1, 2)
92+
//
93+
// Number of classes: 2
94+
// Is sentiment/review positive ? Yes
95+
// Prediction Confidence: 0.65
96+
}
97+
98+
99+
/// <summary>
100+
/// Class to hold original sentiment data.
101+
/// </summary>
102+
public class IMDBSentiment
103+
{
104+
public string Sentiment_Text { get; set; }
105+
106+
/// <summary>
107+
/// This is a variable length vector designated by VectorType(0) attribute.
108+
/// Variable length vectors are produced by applying operations such as 'TokenizeWords' on strings
109+
/// resulting in vectors of tokens of variable lengths.
110+
/// </summary>
111+
[VectorType(0)]
112+
public int[] VariableLenghtFeatures { get; set; }
113+
}
114+
115+
/// <summary>
116+
/// Class to hold intermediate data. Mostly used by CustomMapping Estimator
117+
/// </summary>
118+
public class IntermediateFeatures
119+
{
120+
public string Sentiment_Text { get; set; }
121+
122+
[VectorType(MaxSentenceLenth)]
123+
public int[] Features { get; set; }
124+
}
125+
126+
/// <summary>
127+
/// Class to contain the output values from the transformation.
128+
/// </summary>
129+
class OutputScores
130+
{
131+
[VectorType(2)]
132+
public float[] Prediction { get; set; }
133+
}
134+
135+
}
136+
}

docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
<NativeAssemblyReference Include="FastTreeNative" />
2424
<NativeAssemblyReference Include="MatrixFactorizationNative" />
2525
<NativeAssemblyReference Include="LdaNative" />
26-
<PackageReference Include="Microsoft.ML.TensorFlow.Redist" Version="0.7.0" />
26+
<PackageReference Include="Microsoft.ML.TensorFlow.Redist" Version="0.10.0" />
2727

2828
<ProjectReference Include="..\..\..\src\Microsoft.ML.Analyzer\Microsoft.ML.Analyzer.csproj">
2929
<ReferenceOutputAssembly>false</ReferenceOutputAssembly>

src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs

+29
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,35 @@ public static string DownloadImages()
107107
return $"{path}{Path.DirectorySeparatorChar}images.tsv";
108108
}
109109

110+
/// <summary>
111+
/// Downloads sentiment_model from the dotnet/machinelearning-testdata repo.
112+
/// </summary>
113+
/// <remarks>
114+
/// The model is downloaded from
115+
/// https://github.com/dotnet/machinelearning-testdata/blob/master/Microsoft.ML.TensorFlow.TestModels/sentiment_model
116+
/// The model is in 'SavedModel' format. For further explanation on how was the `sentiment_model` created
117+
/// c.f. https://github.com/dotnet/machinelearning-testdata/blob/master/Microsoft.ML.TensorFlow.TestModels/sentiment_model/README.md
118+
/// </remarks>
119+
public static string DownloadTensorFlowSentimentModel()
120+
{
121+
string remotePath = "https://github.com/dotnet/machinelearning-testdata/raw/master/Microsoft.ML.TensorFlow.TestModels/sentiment_model/";
122+
123+
string path = "sentiment_model";
124+
if(!Directory.Exists(path))
125+
Directory.CreateDirectory(path);
126+
127+
string varPath = Path.Combine(path, "variables");
128+
if (!Directory.Exists(varPath))
129+
Directory.CreateDirectory(varPath);
130+
131+
Download(Path.Combine(remotePath, "saved_model.pb"), Path.Combine(path,"saved_model.pb"));
132+
Download(Path.Combine(remotePath, "imdb_word_index.csv"), Path.Combine(path, "imdb_word_index.csv"));
133+
Download(Path.Combine(remotePath, "variables", "variables.data-00000-of-00001"), Path.Combine(varPath, "variables.data-00000-of-00001"));
134+
Download(Path.Combine(remotePath, "variables", "variables.index"), Path.Combine(varPath, "variables.index"));
135+
136+
return path;
137+
}
138+
110139
private static string Download(string baseGitPath, string dataFile)
111140
{
112141
using (WebClient client = new WebClient())

src/Microsoft.ML.TensorFlow/TensorFlowModelInfo.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ internal TensorFlowModelInfo(IHostEnvironment env, TFSession session, string mod
4343
/// <summary>
4444
/// Get <see cref="Schema"/> for complete model. Every node in the TensorFlow model will be included in the <see cref="Schema"/> object.
4545
/// </summary>
46-
internal Schema GetModelSchema()
46+
public Schema GetModelSchema()
4747
{
4848
return TensorFlowUtils.GetModelSchema(_env, Session.Graph);
4949
}

src/Microsoft.ML.TensorFlow/TensorflowCatalog.cs

+10-3
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ namespace Microsoft.ML
1212
public static class TensorflowCatalog
1313
{
1414
/// <summary>
15-
/// Scores a dataset using a pre-traiend TensorFlow model located in <paramref name="modelLocation"/>.
15+
/// Scores a dataset using a pre-traiend <a href="https://www.tensorflow.org/">TensorFlow</a> model located in <paramref name="modelLocation"/>.
1616
/// </summary>
1717
/// <param name="catalog">The transform's catalog.</param>
1818
/// <param name="modelLocation">Location of the TensorFlow model.</param>
@@ -21,7 +21,7 @@ public static class TensorflowCatalog
2121
/// <example>
2222
/// <format type="text/markdown">
2323
/// <![CDATA[
24-
/// [!code-csharp[ScoreTensorFlowModel](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlowTransform.cs)]
24+
/// [!code-csharp[ScoreTensorFlowModel](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/ImageClassification.cs)]
2525
/// ]]>
2626
/// </format>
2727
/// </example>
@@ -32,12 +32,19 @@ public static TensorFlowEstimator ScoreTensorFlowModel(this TransformsCatalog ca
3232
=> new TensorFlowEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnNames, inputColumnNames, modelLocation);
3333

3434
/// <summary>
35-
/// Scores a dataset using a pre-traiend TensorFlow model specified via <paramref name="tensorFlowModel"/>.
35+
/// Scores a dataset using a pre-traiend <a href="https://www.tensorflow.org/">TensorFlow</a> model specified via <paramref name="tensorFlowModel"/>.
3636
/// </summary>
3737
/// <param name="catalog">The transform's catalog.</param>
3838
/// <param name="tensorFlowModel">The pre-trained TensorFlow model.</param>
3939
/// <param name="inputColumnNames"> The names of the model inputs.</param>
4040
/// <param name="outputColumnNames">The names of the requested model outputs.</param>
41+
/// <example>
42+
/// <format type="text/markdown">
43+
/// <![CDATA[
44+
/// [!code-csharp[ScoreTensorFlowModel](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/TensorFlow/TextClassification.cs)]
45+
/// ]]>
46+
/// </format>
47+
/// </example>
4148
public static TensorFlowEstimator ScoreTensorFlowModel(this TransformsCatalog catalog,
4249
TensorFlowModelInfo tensorFlowModel,
4350
string[] outputColumnNames,

src/Microsoft.ML.TensorFlow/doc.xml

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@
88
<list type="number">
99
<item>
1010
<description>
11-
Scoring with pretrained TensorFlow model: In this mode, the transform extracts hidden layers&apos; values from a pre-trained Tensorflow model and uses outputs as features in ML.Net pipeline.
11+
Scoring with pretrained <a href="https://www.tensorflow.org/">TensorFlow</a> model: In this mode, the transform extracts hidden layers&apos; values from a pre-trained Tensorflow model and uses outputs as features in ML.Net pipeline.
1212
</description>
1313
</item>
1414
<item>
1515
<description>
16-
Retraining of TensorFlow model: In this mode, the transform retrains a TensorFlow model using the user data passed through ML.Net pipeline. Once the model is trained, it's outputs can be used as features for scoring.
16+
Retraining of <a href="https://www.tensorflow.org/">TensorFlow</a> model: In this mode, the transform retrains a TensorFlow model using the user data passed through ML.Net pipeline. Once the model is trained, it's outputs can be used as features for scoring.
1717
</description>
1818
</item>
1919
</list>

0 commit comments

Comments
 (0)