Skip to content

Commit f2d749a

Browse files
author
Rogan Carr
committed
Breaking the PFI examples into different files in a subfolder.
1 parent 8655730 commit f2d749a

File tree

4 files changed

+250
-231
lines changed

4 files changed

+250
-231
lines changed

docs/samples/Microsoft.ML.Samples/Dynamic/PermutationFeatureImportance.cs

Lines changed: 0 additions & 231 deletions
This file was deleted.
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
using Microsoft.ML.Runtime.Data;
2+
using Microsoft.ML.Runtime.Learners;
3+
using Microsoft.ML.Trainers.HalLearners;
4+
using System;
5+
using System.Linq;
6+
7+
namespace Microsoft.ML.Samples.Dynamic.PermutationFeatureImportance
8+
{
9+
public class PfiHelper
10+
{
11+
public static IDataView GetHousingRegressionIDataView(MLContext mlContext, out string labelName, out string[] featureNames, bool binaryPrediction = false)
12+
{
13+
// Download the dataset from github.com/dotnet/machinelearning.
14+
// This will create a housing.txt file in the filesystem.
15+
// You can open this file to see the data.
16+
string dataFile = SamplesUtils.DatasetUtils.DownloadHousingRegressionDataset();
17+
18+
// Read the data as an IDataView.
19+
// First, we define the reader: specify the data columns and where to find them in the text file.
20+
// The data file is composed of rows of data, with each row having 11 numerical columns
21+
// separated by whitespace.
22+
var reader = mlContext.Data.CreateTextReader(
23+
columns: new[]
24+
{
25+
// Read the first column (indexed by 0) in the data file as an R4 (float)
26+
new TextLoader.Column("MedianHomeValue", DataKind.R4, 0),
27+
new TextLoader.Column("CrimesPerCapita", DataKind.R4, 1),
28+
new TextLoader.Column("PercentResidental", DataKind.R4, 2),
29+
new TextLoader.Column("PercentNonRetail", DataKind.R4, 3),
30+
new TextLoader.Column("CharlesRiver", DataKind.R4, 4),
31+
new TextLoader.Column("NitricOxides", DataKind.R4, 5),
32+
new TextLoader.Column("RoomsPerDwelling", DataKind.R4, 6),
33+
new TextLoader.Column("PercentPre40s", DataKind.R4, 7),
34+
new TextLoader.Column("EmploymentDistance", DataKind.R4, 8),
35+
new TextLoader.Column("HighwayDistance", DataKind.R4, 9),
36+
new TextLoader.Column("TaxRate", DataKind.R4, 10),
37+
new TextLoader.Column("TeacherRatio", DataKind.R4, 11),
38+
},
39+
hasHeader: true
40+
);
41+
42+
// Read the data
43+
var data = reader.Read(dataFile);
44+
var labelColumn = "MedianHomeValue";
45+
46+
if (binaryPrediction)
47+
{
48+
labelColumn = nameof(BinaryOutputRow.AboveAverage);
49+
data = mlContext.Transforms.CustomMappingTransformer(GreaterThanAverage, null).Transform(data);
50+
data = mlContext.Transforms.DropColumns("MedianHomeValue").Fit(data).Transform(data);
51+
}
52+
53+
labelName = labelColumn;
54+
featureNames = data.Schema.AsEnumerable()
55+
.Select(column => column.Name) // Get the column names
56+
.Where(name => name != labelColumn) // Drop the Label
57+
.ToArray();
58+
59+
return data;
60+
}
61+
62+
// Define a class for all the input columns that we intend to consume.
63+
private class ContinuousInputRow
64+
{
65+
public float MedianHomeValue { get; set; }
66+
}
67+
68+
// Define a class for all output columns that we intend to produce.
69+
private class BinaryOutputRow
70+
{
71+
public bool AboveAverage { get; set; }
72+
}
73+
74+
// Define an Action to apply a custom mapping from one object to the other
75+
private readonly static Action<ContinuousInputRow, BinaryOutputRow> GreaterThanAverage = (input, output)
76+
=> output.AboveAverage = input.MedianHomeValue > 22.6;
77+
78+
public static float[] GetLinearModelWeights(OlsLinearRegressionModelParameters linearModel)
79+
{
80+
return linearModel.Weights.ToArray();
81+
}
82+
83+
public static float[] GetLinearModelWeights(LinearBinaryModelParameters linearModel)
84+
{
85+
return linearModel.Weights.ToArray();
86+
}
87+
}
88+
}

0 commit comments

Comments
 (0)