Skip to content

Commit 44d470f

Browse files
committed
dotnet#202: migrate Clustering_Iris F# sample to v0.9
1 parent 54ed672 commit 44d470f

File tree

5 files changed

+90
-73
lines changed

5 files changed

+90
-73
lines changed

samples/fsharp/getting-started/Clustering_Iris/IrisClustering/IrisClusteringConsoleApp/Clustering_Iris.fsproj

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66
</PropertyGroup>
77

88
<ItemGroup>
9-
<Compile Include="..\..\..\..\common\ConsoleHelper.fs" Link="Common\ConsoleHelper.fs" />
10-
<Compile Include="..\..\..\..\common\ModelBuilder.fs" Link="Common\ModelBuilder.fs" />
11-
<Compile Include="..\..\..\..\common\ModelScorer.fs" Link="Common\ModelScorer.fs" />
12-
<Compile Include="..\..\..\..\common\Pipeline.fs" Link="Common\Pipeline.fs" />
9+
<Compile Include="..\..\..\..\common_v0.9\ConsoleHelper.fs" Link="Common\ConsoleHelper.fs" />
10+
<Compile Include="..\..\..\..\common_v0.9\ModelBuilder.fs" Link="Common\ModelBuilder.fs" />
11+
<Compile Include="..\..\..\..\common_v0.9\ModelScorer.fs" Link="Common\ModelScorer.fs" />
12+
<Compile Include="..\..\..\..\common_v0.9\Pipeline.fs" Link="Common\Pipeline.fs" />
1313
</ItemGroup>
1414

1515
<ItemGroup>
@@ -18,7 +18,7 @@
1818
</ItemGroup>
1919

2020
<ItemGroup>
21-
<PackageReference Include="Microsoft.ML" Version="0.7.0" />
21+
<PackageReference Include="Microsoft.ML" Version="$(MicrosoftMLVersion)" />
2222
</ItemGroup>
2323

2424
<ItemGroup>

samples/fsharp/getting-started/Clustering_Iris/IrisClustering/IrisClusteringConsoleApp/DataStructures/DataStructures.fs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
namespace Clustering_Iris.DataStructures
22

33
module DataStructures =
4-
open Microsoft.ML.Runtime.Api
4+
open Microsoft.ML.Data
55

66
/// Describes Iris flower. Used as an input to prediction function.
77
[<CLIMutable>]

samples/fsharp/getting-started/Clustering_Iris/IrisClustering/IrisClusteringConsoleApp/Program.fs

Lines changed: 20 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
open System
44
open System.IO
55
open Microsoft.ML
6-
open Microsoft.ML.Runtime.Data
6+
open Microsoft.ML.Data
77
open Clustering_Iris.DataStructures
88
open DataStructures
99

@@ -14,12 +14,19 @@ let dataPath = sprintf @"%s/iris-full.txt" baseDatasetsLocation
1414
let baseModelsPath = @"../../../../MLModels"
1515
let modelPath = sprintf @"%s/IrisModel.zip" baseModelsPath
1616

17-
let dataLoader (mlContext : MLContext) =
18-
mlContext.Data.TextReader(
19-
TextLoader.Arguments(
20-
Separator = "\t",
21-
HasHeader = true,
22-
Column =
17+
18+
[<EntryPoint>]
19+
let main argv =
20+
21+
//Create the MLContext to share across components for deterministic results
22+
let mlContext = MLContext(seed = Nullable 1) //Seed set to any number so you have a deterministic environment
23+
24+
// STEP 1: Common data loading configuration
25+
let textLoader =
26+
mlContext.Data.CreateTextReader(
27+
hasHeader = true,
28+
separatorChar = '\t',
29+
columns =
2330
[|
2431
TextLoader.Column("Label", Nullable DataKind.R4, 0)
2532
TextLoader.Column("SepalLength", Nullable DataKind.R4, 1)
@@ -28,35 +35,22 @@ let dataLoader (mlContext : MLContext) =
2835
TextLoader.Column("PetalWidth", Nullable DataKind.R4, 4)
2936
|]
3037
)
31-
)
32-
33-
let read (dataPath : string) (dataLoader : TextLoader) =
34-
dataLoader.Read dataPath
35-
36-
37-
[<EntryPoint>]
38-
let main argv =
39-
40-
let mlContext = MLContext(seed = Nullable 1)
41-
42-
//STEP 1: Common data loading
43-
let fullData =
44-
dataLoader mlContext
45-
|> read dataPath
4638

47-
let struct (trainingDataView, testingDataView) = mlContext.Clustering.TrainTestSplit(fullData, testFraction = 0.2)
39+
let fullData = textLoader.Read dataPath
40+
41+
//Split dataset in two parts: TrainingDataset (80%) and TestDataset (20%)
42+
let struct(trainingDataView, testingDataView) = mlContext.Clustering.TrainTestSplit(fullData, testFraction = 0.2)
4843

4944
//STEP 2: Process data transformations in pipeline
50-
let dataProcessPipeline =
51-
mlContext.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
45+
let dataProcessPipeline = mlContext.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
5246

5347
// (Optional) Peek data in training DataView after applying the ProcessPipeline's transformations
5448
Common.ConsoleHelper.peekDataViewInConsole<IrisData> mlContext trainingDataView dataProcessPipeline 10 |> ignore
5549
Common.ConsoleHelper.peekVectorColumnDataInConsole mlContext "Features" trainingDataView dataProcessPipeline 10 |> ignore
5650

51+
// STEP 3: Create and train the model
5752
let trainer = mlContext.Clustering.Trainers.KMeans(features = "Features", clustersCount = 3)
5853

59-
// STEP 3: Create and train the model
6054
let modelBuilder =
6155
Common.ModelBuilder.create mlContext dataProcessPipeline
6256
|> Common.ModelBuilder.addTrainer trainer
Lines changed: 55 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
# Clustering Iris flowers (F#)
1+
# Clustering Iris Data
22

33
| ML.NET version | API type | Status | App Type | Data type | Scenario | ML Task | Algorithms |
44
|----------------|-------------------|-------------------------------|-------------|-----------|---------------------|---------------------------|-----------------------------|
5-
| v0.7 | Dynamic API | README.md needs update | Console app | .txt file | Clustering Iris flowers | Clustering | K-means++ |
5+
| v0.9 | Dynamic API | Up-to-date | Console app | .txt file | Clustering Iris flowers | Clustering | K-means++ |
66

77
In this introductory sample, you'll see how to use [ML.NET](https://www.microsoft.com/net/learn/apps/machine-learning-and-ai/ml-dotnet) to divide iris flowers into different groups that correspond to different types of iris. In the world of machine learning, this task is known as **clustering**.
88

@@ -30,59 +30,73 @@ To solve this problem, first we will build and train an ML model. Then we will u
3030

3131
### 1. Build model
3232

33-
Building a model includes: uploading data (`iris-full.txt` with `TextLoader`), transforming the data so it can be used effectively by an ML algorithm (with `ConcatEstimator`), and choosing a learning algorithm (`KMeansPlusPlusTrainer`). All of those steps are stored in a `EstimatorChain`:
33+
Building a model includes: uploading data (`iris-full.txt` with `TextLoader`), transforming the data so it can be used effectively by an ML algorithm (with `Concatenate`), and choosing a learning algorithm (`KMeans`). All of those steps are stored in `trainingPipeline`:
34+
3435
```fsharp
35-
// LearningPipeline holds all steps of the learning process: data, transforms, learners.
36+
// STEP 1: Common data loading configuration
37+
let textLoader =
38+
mlContext.Data.CreateTextReader(
39+
hasHeader = true,
40+
separatorChar = '\t',
41+
columns =
42+
[|
43+
TextLoader.Column("Label", Nullable DataKind.R4, 0)
44+
TextLoader.Column("SepalLength", Nullable DataKind.R4, 1)
45+
TextLoader.Column("SepalWidth", Nullable DataKind.R4, 2)
46+
TextLoader.Column("PetalLength", Nullable DataKind.R4, 3)
47+
TextLoader.Column("PetalWidth", Nullable DataKind.R4, 4)
48+
|]
49+
)
50+
51+
let fullData = textLoader.Read dataPath
3652
37-
//1. Create ML.NET context/environment
38-
use env = new LocalEnvironment()
39-
40-
//2. Create DataReader with data schema mapped to file's columns
41-
let reader =
42-
TextLoader(
43-
env,
44-
TextLoader.Arguments(
45-
Separator = "tab",
46-
HasHeader = true,
47-
Column =
48-
[|
49-
TextLoader.Column("Label", Nullable DataKind.R4, 0)
50-
TextLoader.Column("SepalLength", Nullable DataKind.R4, 1)
51-
TextLoader.Column("SepalWidth", Nullable DataKind.R4, 2)
52-
TextLoader.Column("PetalLength", Nullable DataKind.R4, 3)
53-
TextLoader.Column("PetalWidth", Nullable DataKind.R4, 4)
54-
|]
55-
)
56-
)
57-
58-
//Load training data
59-
let trainingDataView = MultiFileSource(DataPath) |> reader.Read
53+
//Split dataset in two parts: TrainingDataset (80%) and TestDataset (20%)
54+
let struct(trainingDataView, testingDataView) = mlContext.Clustering.TrainTestSplit(fullData, testFraction = 0.2)
55+
56+
//STEP 2: Process data transformations in pipeline
57+
let dataProcessPipeline = mlContext.Transforms.Concatenate("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
58+
59+
// (Optional) Peek data in training DataView after applying the ProcessPipeline's transformations
60+
Common.ConsoleHelper.peekDataViewInConsole<IrisData> mlContext trainingDataView dataProcessPipeline 10 |> ignore
61+
Common.ConsoleHelper.peekVectorColumnDataInConsole mlContext "Features" trainingDataView dataProcessPipeline 10 |> ignore
62+
63+
// STEP 3: Create and train the model
64+
let trainer = mlContext.Clustering.Trainers.KMeans(features = "Features", clustersCount = 3)
65+
66+
let modelBuilder =
67+
Common.ModelBuilder.create mlContext dataProcessPipeline
68+
|> Common.ModelBuilder.addTrainer trainer
69+
70+
let trainedModel =
71+
modelBuilder
72+
|> Common.ModelBuilder.train trainingDataView
6073
```
74+
6175
### 2. Train model
62-
Training the model is a process of running the chosen algorithm on the given data. It is implemented in the `Fit()` method from the Estimator object. To perform training we just call the method and provide our data.
63-
```fsharp
64-
let model =
65-
env
66-
|> Pipeline.concatEstimator "Features" [| "SepalLength"; "SepalWidth"; "PetalLength"; "PetalWidth" |]
67-
|> Pipeline.append (KMeansPlusPlusTrainer(env, "Features", clustersCount = 3))
68-
|> Pipeline.fit trainingDataView
76+
Training the model is a process of running the chosen algorithm on the given data. To perform training you need to call the Fit() method.
6977

78+
```fsharp
79+
let trainedModel =
80+
modelBuilder
81+
|> Common.ModelBuilder.train trainingDataView
7082
```
7183
### 3. Consume model
7284
After the model is build and trained, we can use the `Predict()` API to predict the cluster for an iris flower and calculate the distance from given flower parameters to each cluster (each centroid of a cluster).
7385

7486
```fsharp
75-
let sampleIrisData =
76-
{
87+
let sampleIrisData =
88+
{
7789
SepalLength = 3.3f
7890
SepalWidth = 1.6f
7991
PetalLength = 0.2f
80-
PetalWidth = 5.1f
92+
PetalWidth = 5.1f
8193
}
8294
83-
let predictionFunc = loadedModel.MakePredictionFunction<IrisData, IrisPrediction> env
84-
let prediction = predictionFunc.Predict sampleIrisData
95+
//Create the clusters: Create data files and plot a chart
96+
let prediction =
97+
Common.ModelScorer.create mlContext
98+
|> Common.ModelScorer.loadModelFromZipFile modelPath
99+
|> Common.ModelScorer.predictSingle sampleIrisData
85100
86-
printfn "Clusters assigned for setosa flowers: %d" prediction.SelectedClusterId
87-
```
101+
printfn "Cluster assigned for setosa flowers: %d" prediction.SelectedClusterId```
88102
```

samples/fsharp/v0.9-All-Samples.sln

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "TaxiFarePrediction.Solution
1919
EndProject
2020
Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "TaxiFarePrediction", "getting-started\Regression_TaxiFarePrediction\TaxiFarePrediction\TaxiFarePredictionConsoleApp\TaxiFarePrediction.fsproj", "{2865D3B8-753F-4B61-B452-D5A3D032F9BB}"
2121
EndProject
22+
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "IrisClustering.Solution", "IrisClustering.Solution", "{68E5A791-705C-464D-A2AC-A30F0C452A54}"
23+
EndProject
24+
Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "Clustering_Iris", "getting-started\Clustering_Iris\IrisClustering\IrisClusteringConsoleApp\Clustering_Iris.fsproj", "{23F5ADD0-DCF0-494B-9AE7-4754EC9A23C9}"
25+
EndProject
2226
Global
2327
GlobalSection(SolutionConfigurationPlatforms) = preSolution
2428
Debug|Any CPU = Debug|Any CPU
@@ -41,6 +45,10 @@ Global
4145
{2865D3B8-753F-4B61-B452-D5A3D032F9BB}.Debug|Any CPU.Build.0 = Debug|Any CPU
4246
{2865D3B8-753F-4B61-B452-D5A3D032F9BB}.Release|Any CPU.ActiveCfg = Release|Any CPU
4347
{2865D3B8-753F-4B61-B452-D5A3D032F9BB}.Release|Any CPU.Build.0 = Release|Any CPU
48+
{23F5ADD0-DCF0-494B-9AE7-4754EC9A23C9}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
49+
{23F5ADD0-DCF0-494B-9AE7-4754EC9A23C9}.Debug|Any CPU.Build.0 = Debug|Any CPU
50+
{23F5ADD0-DCF0-494B-9AE7-4754EC9A23C9}.Release|Any CPU.ActiveCfg = Release|Any CPU
51+
{23F5ADD0-DCF0-494B-9AE7-4754EC9A23C9}.Release|Any CPU.Build.0 = Release|Any CPU
4452
EndGlobalSection
4553
GlobalSection(SolutionProperties) = preSolution
4654
HideSolutionNode = FALSE
@@ -50,6 +58,7 @@ Global
5058
{AC2541F6-ADDF-4B9A-8216-04900767881E} = {D04EC2CC-4F1B-41B2-AFC6-E406FEA0412E}
5159
{CF0F35E4-4A8B-4E9A-A284-B791E404D334} = {ABE9B87E-D778-4F97-B38D-62B4A186A6E5}
5260
{2865D3B8-753F-4B61-B452-D5A3D032F9BB} = {740E8A4B-A0C2-4FE2-8561-6F46EA10BDCA}
61+
{23F5ADD0-DCF0-494B-9AE7-4754EC9A23C9} = {68E5A791-705C-464D-A2AC-A30F0C452A54}
5362
EndGlobalSection
5463
GlobalSection(ExtensibilityGlobals) = postSolution
5564
SolutionGuid = {47D66D3A-D6C7-45A5-8C11-8723039BC142}

0 commit comments

Comments
 (0)