Skip to content

Commit b8fed8b

Browse files
authored
Merge pull request #36 from dsyme/fs3b
Add F# samples
2 parents ed3c424 + f1cd867 commit b8fed8b

18 files changed

+1097
-3
lines changed

.vsts-dotnet-ci.yml

+8
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,14 @@ phases:
77
inputs:
88
projects: '.\samples\csharp\getting-started\GettingStarted.sln'
99

10+
- phase: FSharpGettingStarted
11+
queue: Hosted VS2017
12+
steps:
13+
- task: DotNetCoreCLI@2
14+
displayName: Build F# GettingStarted
15+
inputs:
16+
projects: '.\samples\fsharp\getting-started\GettingStarted.sln'
17+
1018
- phase: BinaryClasification_Titanic
1119
queue: Hosted VS2017
1220
steps:

README.md

+3-2
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@
55
[ML.NET](https://www.microsoft.com/net/learn/apps/machine-learning-and-ai/ml-dotnet) is a cross-platform open-source machine learning framework that makes machine learning accessible to .NET developers.
66

77
ML.NET samples are divided in three categories:
8-
* **Getting started** - basic "hello world" samples for each ML task.
8+
* **Getting started (C#)** - basic "hello world" samples for each ML task, in C#
9+
* **Getting started (F#)** - basic "hello world" samples for each ML task, in F#
910
* **Examples** - examples of how you can use various ML.NET components (learners, transforms, ...).
10-
* **End-to-end apps** - real world examples of web, desktop, mobile, and other applications infused with ML solutions via [ML.NET APIs](https://docs.microsoft.com/dotnet/api/?view=ml-dotnet).
11+
* **End-to-end (C#)** - real world examples of web, desktop, mobile, and other applications infused with ML solutions via [ML.NET APIs](https://docs.microsoft.com/dotnet/api/?view=ml-dotnet).
1112

1213
All samples in this repo are using the latest released [Microsoft.ML](https://www.nuget.org/packages/Microsoft.ML/) NuGet package. If you would like to see the examples referencing the source code, check out [scenario tests](https://github.com/dotnet/machinelearning/tree/master/test/Microsoft.ML.Tests/Scenarios) in [ML.NET repository](https://github.com/dotnet/machinelearning).
1314

samples/Directory.Build.props

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
<Project>
22

33
<PropertyGroup>
4-
<MicrosoftMLVersion>0.3.0</MicrosoftMLVersion>
4+
<MicrosoftMLVersion>0.4.0</MicrosoftMLVersion>
55
</PropertyGroup>
66

77
</Project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFramework>netcoreapp2.0</TargetFramework>
6+
</PropertyGroup>
7+
8+
<ItemGroup>
9+
<Compile Include="Program.fs" />
10+
<Folder Include="datasets\" />
11+
<None Include="..\..\..\..\datasets\sentiment-imdb-train.txt" Link="datasets\sentiment-imdb-train.txt" />
12+
<None Include="..\..\..\..\datasets\sentiment-yelp-test.txt" Link="datasets\sentiment-yelp-test.txt" />
13+
</ItemGroup>
14+
15+
<ItemGroup>
16+
<PackageReference Include="Microsoft.ML" Version="$(MicrosoftMLVersion)" />
17+
</ItemGroup>
18+
19+
</Project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
module BinaryClassification_SentimentAnalysis
2+
3+
open System
4+
open System.IO
5+
open Microsoft.ML
6+
open Microsoft.ML.Data
7+
open Microsoft.ML.Models
8+
open Microsoft.ML.Runtime.Api
9+
open Microsoft.ML.Trainers
10+
open Microsoft.ML.Transforms
11+
12+
type SentimentData() =
13+
[<Column("0")>]
14+
member val SentimentText: string = "" with get, set
15+
16+
[<Column("1", name="Label")>]
17+
member val Sentiment : double = 0.0 with get, set
18+
19+
type SentimentPrediction() =
20+
[<ColumnName("PredictedLabel")>]
21+
member val Sentiment : bool = false with get, set
22+
23+
let sentiments =
24+
[| SentimentData(SentimentText = "Contoso's 11 is a wonderful experience", Sentiment = 1.0)
25+
SentimentData(SentimentText = "The acting in this movie is very bad", Sentiment = 0.0)
26+
SentimentData(SentimentText = "Joe versus the Volcano Coffee Company is a great film.", Sentiment = 1.0) |]
27+
28+
let AppPath = Path.Combine(__SOURCE_DIRECTORY__, "../../../..")
29+
let TrainDataPath = Path.Combine(AppPath, "datasets", "sentiment-imdb-train.txt")
30+
let TestDataPath = Path.Combine(AppPath, "datasets", "sentiment-yelp-test.txt")
31+
let modelPath = Path.Combine(AppPath, "SentimentModel.zip")
32+
33+
let TrainAsync() =
34+
// LearningPipeline holds all steps of the learning process: data, transforms, learners.
35+
let pipeline = LearningPipeline()
36+
37+
// The TextLoader loads a dataset. The schema of the dataset is specified by passing a class containing
38+
// all the column names and their types.
39+
pipeline.Add(TextLoader(TrainDataPath).CreateFrom<SentimentData>())
40+
41+
// TextFeaturizer is a transform that will be used to featurize an input column to format and clean the data.
42+
pipeline.Add(TextFeaturizer("Features", "SentimentText"))
43+
44+
// FastTreeBinaryClassifier is an algorithm that will be used to train the model.
45+
// It has three hyperparameters for tuning decision tree performance.
46+
pipeline.Add(FastTreeBinaryClassifier(NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2))
47+
48+
Console.WriteLine("=============== Training model ===============")
49+
// The pipeline is trained on the dataset that has been loaded and transformed.
50+
let model = pipeline.Train<SentimentData, SentimentPrediction>()
51+
52+
// Saving the model as a .zip file.
53+
model.WriteAsync(modelPath) |> Async.AwaitTask |> Async.RunSynchronously
54+
55+
Console.WriteLine("=============== End training ===============")
56+
Console.WriteLine(sprintf "The model is saved to %s" modelPath)
57+
58+
model
59+
60+
let Evaluate(model: PredictionModel<SentimentData, SentimentPrediction> ) =
61+
// To evaluate how good the model predicts values, the model is ran against new set
62+
// of data (test data) that was not involved in training.
63+
let testData = TextLoader(TestDataPath).CreateFrom<SentimentData>()
64+
65+
// BinaryClassificationEvaluator performs evaluation for Binary Classification type of ML problems.
66+
let evaluator = BinaryClassificationEvaluator()
67+
68+
Console.WriteLine("=============== Evaluating model ===============")
69+
70+
let metrics = evaluator.Evaluate(model, testData)
71+
// BinaryClassificationMetrics contains the overall metrics computed by binary classification evaluators
72+
// The Accuracy metric gets the accuracy of a classifier which is the proportion
73+
//of correct predictions in the test set.
74+
75+
// The Auc metric gets the area under the ROC curve.
76+
// The area under the ROC curve is equal to the probability that the classifier ranks
77+
// a randomly chosen positive instance higher than a randomly chosen negative one
78+
// (assuming 'positive' ranks higher than 'negative').
79+
80+
// The F1Score metric gets the classifier's F1 score.
81+
// The F1 score is the harmonic mean of precision and recall:
82+
// 2 * precision * recall / (precision + recall).
83+
84+
Console.WriteLine(sprintf "Accuracy: %0.2f" metrics.Accuracy)
85+
Console.WriteLine(sprintf "Auc: %0.2f" metrics.Auc)
86+
Console.WriteLine(sprintf "F1Score: %0.2f" metrics.F1Score)
87+
Console.WriteLine("=============== End evaluating ===============")
88+
Console.WriteLine()
89+
90+
// STEP 1: Create a model
91+
let model = TrainAsync()
92+
93+
// STEP2: Test accuracy
94+
Evaluate(model)
95+
96+
// STEP 3: Make a prediction
97+
let predictions = model.Predict(sentiments)
98+
99+
for (sentiment, prediction) in Seq.zip sentiments predictions do
100+
Console.WriteLine( sprintf "Sentiment: %s | Prediction: %s sentiment" sentiment.SentimentText (if prediction.Sentiment then "Positive" else "Negative"))
101+
102+
Console.ReadLine() |> ignore
103+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
# Sentiment Analysis for User Reviews
2+
In this introductory sample, you'll see how to use [ML.NET](https://www.microsoft.com/net/learn/apps/machine-learning-and-ai/ml-dotnet) to predict a sentiment (positive or negative) for customer reviews. In the world of machine learning, this type of prediction is known as **binary classification**.
3+
4+
## Problem
5+
This problem is centered around predicting if a customer's review has positive or negative sentiment. We will use IMDB and Yelp comments that were processed by humans and each comment has been assigned a label:
6+
* 0 - negative
7+
* 1 - positive
8+
9+
Using those datasets we will build a model that will analyze a string and predict a sentiment value of 0 or 1.
10+
11+
## ML task - Binary classification
12+
The generalized problem of **binary classification** is to classify items into one of two classes (classifying items into more than two classes is called **multiclass classification**).
13+
14+
* predict if an insurance claim is valid or not.
15+
* predict if a plane will be delayed or will arrive on time.
16+
* predict if a face ID (photo) belongs to the owner of a device.
17+
18+
The common feature for all those examples is that the parameter we want to predict can take only one of two values. In other words, this value is represented by `boolean` type.
19+
20+
## Solution
21+
To solve this problem, first we will build an ML model. Then we will train the model on existing data, evaluate how good it is, and lastly we'll consume the model to predict a sentiment for new reviews.
22+
23+
![Build -> Train -> Evaluate -> Consume](https://github.com/dotnet/machinelearning-samples/raw/master/samples/getting-started/shared_content/modelpipeline.png)
24+
25+
### 1. Build model
26+
27+
Building a model includes: uploading data (`sentiment-imdb-train.txt` with `TextLoader`), transforming the data so it can be used effectively by an ML algorithm (with `TextFeaturizer`), and choosing a learning algorithm (`FastTreeBinaryClassifier`). All of those steps are stored in a `LearningPipeline`:
28+
```fsharp
29+
// LearningPipeline holds all steps of the learning process: data, transforms, learners.
30+
let pipeline = LearningPipeline()
31+
// The TextLoader loads a dataset. The schema of the dataset is specified by passing a class containing
32+
// all the column names and their types.
33+
pipeline.Add(TextLoader(TrainDataPath).CreateFrom<SentimentData>())
34+
// TextFeaturizer is a transform that will be used to featurize an input column to format and clean the data.
35+
pipeline.Add(TextFeaturizer("Features", "SentimentText"))
36+
// FastTreeBinaryClassifier is an algorithm that will be used to train the model.
37+
// It has three hyperparameters for tuning decision tree performance.
38+
pipeline.Add(FastTreeBinaryClassifier(NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2)
39+
```
40+
### 2. Train model
41+
Training the model is a process of running the chosen algorithm on a training data (with known sentiment values) to tune the parameters of the model. It is implemented in the `Train()` API. To perform training we just call the method and provide the types for our data object `SentimentData` and prediction object `SentimentPrediction`.
42+
```fsharp
43+
let model = pipeline.Train<SentimentData, SentimentPrediction>()
44+
```
45+
### 3. Evaluate model
46+
We need this step to conclude how accurate our model operates on new data. To do so, the model from the previous step is run against another dataset that was not used in training (`sentiment-yelp-test.txt`). This dataset also contains known sentiments. `BinaryClassificationEvaluator` calculates the difference between known fares and values predicted by the model in various metrics.
47+
```fsharp
48+
let testData = TextLoader(TestDataPath).CreateFrom<SentimentData>()
49+
50+
let evaluator = BinaryClassificationEvaluator()
51+
let metrics = evaluator.Evaluate(model, testData)
52+
```
53+
>*To learn more on how to understand the metrics, check out the Machine Learning glossary from the [ML.NET Guide](https://docs.microsoft.com/en-us/dotnet/machine-learning/) or use any available materials on data science and machine learning*.
54+
55+
If you are not satisfied with the quality of the model, there are a variety of ways to improve it, which will be covered in the *examples* category.
56+
57+
>*Keep in mind that for this sample the quality is lower than it could be because the datasets were reduced in size for performance purposes. You can use bigger labeled sentiment datasets available online to significantly improve the quality.*
58+
59+
### 4. Consume model
60+
After the model is trained, we can use the `Predict()` API to predict the sentiment for new reviews.
61+
62+
```fsharp
63+
let predictions = model.Predict(sentiments)
64+
```
65+
Where `sentiments` contains new user reviews that we want to analyze.
66+
67+
```fsharp
68+
let sentiments =
69+
[| SentimentData(SentimentText = "Contoso's 11 is a wonderful experience", Sentiment = 1.0)
70+
SentimentData(SentimentText = "The acting in this movie is very bad", Sentiment = 0.0)
71+
SentimentData(SentimentText = "Joe versus the Volcano Coffee Company is a great film.", Sentiment = 1.0) |]
72+
```
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFramework>netcoreapp2.0</TargetFramework>
6+
</PropertyGroup>
7+
8+
<ItemGroup>
9+
<Compile Include="Program.fs" />
10+
<Folder Include="datasets\" />
11+
<None Include="..\..\..\..\datasets\iris-full.txt" Link="datasets\iris-full.txt" />
12+
</ItemGroup>
13+
14+
<ItemGroup>
15+
<PackageReference Include="Microsoft.ML" Version="$(MicrosoftMLVersion)" />
16+
</ItemGroup>
17+
18+
</Project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
module Clustering_Iris
2+
3+
open System
4+
open System.IO
5+
open Microsoft.ML
6+
open Microsoft.ML.Runtime.Api
7+
open Microsoft.ML.Data
8+
open Microsoft.ML.Trainers
9+
open Microsoft.ML.Transforms
10+
11+
let AppPath = Path.Combine(__SOURCE_DIRECTORY__, "../../../..")
12+
let DataPath = Path.Combine(AppPath, "datasets", "iris-full.txt")
13+
let ModelPath = Path.Combine(AppPath, "IrisClustersModel.zip")
14+
15+
type IrisData() =
16+
[<Column("0")>]
17+
member val Label = 0.0 with get,set
18+
19+
[<Column("1")>]
20+
member val SepalLength = 0.0 with get, set
21+
22+
[<Column("2")>]
23+
member val SepalWidth = 0.0 with get, set
24+
25+
[<Column("3")>]
26+
member val PetalLength = 0.0 with get, set
27+
28+
[<Column("4")>]
29+
member val PetalWidth = 0.0 with get, set
30+
31+
type ClusterPrediction() =
32+
[<ColumnName("PredictedLabel")>]
33+
member val SelectedClusterId = 0 with get, set
34+
35+
[<ColumnName("Score")>]
36+
member val Distance : float[] = null with get, set
37+
38+
let Train() =
39+
// LearningPipeline holds all steps of the learning process: data, transforms, learners.
40+
let pipeline = LearningPipeline()
41+
// The TextLoader loads a dataset. The schema of the dataset is specified by passing a class containing
42+
// all the column names and their types.
43+
pipeline.Add(TextLoader(DataPath).CreateFrom<IrisData>(useHeader=true))
44+
// ColumnConcatenator concatenates all columns into Features column
45+
pipeline.Add(ColumnConcatenator("Features",
46+
"SepalLength",
47+
"SepalWidth",
48+
"PetalLength",
49+
"PetalWidth"))
50+
// KMeansPlusPlusClusterer is an algorithm that will be used to build clusters. We set the number of clusters to 3.
51+
pipeline.Add(KMeansPlusPlusClusterer(K = 3))
52+
53+
Console.WriteLine("=============== Training model ===============")
54+
let model = pipeline.Train<IrisData, ClusterPrediction>()
55+
Console.WriteLine("=============== End training ===============")
56+
57+
// Saving the model as a .zip file.
58+
model.WriteAsync(ModelPath) |> Async.AwaitTask |> Async.RunSynchronously
59+
Console.WriteLine("The model is saved to {0}", ModelPath)
60+
61+
model
62+
63+
module TestIrisData =
64+
let Setosa1 = IrisData(SepalLength = 5.1, SepalWidth = 3.3, PetalLength = 1.6, PetalWidth = 0.2)
65+
let Setosa2 = IrisData(SepalLength = 0.2, SepalWidth = 5.1, PetalLength = 3.5, PetalWidth = 1.4)
66+
let Virginica1 = IrisData(SepalLength = 6.4, SepalWidth = 3.1, PetalLength = 5.5, PetalWidth = 2.2)
67+
let Virginica2 = IrisData(SepalLength = 2.5, SepalWidth = 6.3, PetalLength = 3.3, PetalWidth = 6.0)
68+
let Versicolor1 = IrisData(SepalLength = 6.4, SepalWidth = 3.1, PetalLength = 4.5, PetalWidth = 1.5)
69+
let Versicolor2 = IrisData(SepalLength = 7.0, SepalWidth = 3.2, PetalLength = 4.7, PetalWidth = 1.4)
70+
71+
// STEP 1: Create a model
72+
let model = Train()
73+
74+
// STEP 2: Make a prediction
75+
Console.WriteLine()
76+
let prediction1 = model.Predict(TestIrisData.Setosa1)
77+
let prediction2 = model.Predict(TestIrisData.Setosa2)
78+
Console.WriteLine(sprintf "Clusters assigned for setosa flowers:")
79+
Console.WriteLine(sprintf " {%d}" prediction1.SelectedClusterId)
80+
Console.WriteLine(sprintf " {%d}" prediction2.SelectedClusterId)
81+
82+
let prediction3 = model.Predict(TestIrisData.Virginica1)
83+
let prediction4 = model.Predict(TestIrisData.Virginica2)
84+
Console.WriteLine(sprintf "Clusters assigned for virginica flowers:")
85+
Console.WriteLine(sprintf " {%d}" prediction3.SelectedClusterId)
86+
Console.WriteLine(sprintf " {%d}" prediction4.SelectedClusterId)
87+
88+
let prediction5 = model.Predict(TestIrisData.Versicolor1)
89+
let prediction6 = model.Predict(TestIrisData.Versicolor2)
90+
Console.WriteLine(sprintf "Clusters assigned for versicolor flowers:")
91+
Console.WriteLine(sprintf " {%d}" prediction5.SelectedClusterId)
92+
Console.WriteLine(sprintf " {%d}" prediction6.SelectedClusterId)
93+
Console.ReadLine() |> ignore
94+

0 commit comments

Comments
 (0)