diff --git a/test/Microsoft.ML.Tests/Scenarios/Scenario_HousePricePrediction.cs b/test/Microsoft.ML.Tests/Scenarios/HousePricePredictionTests.cs similarity index 57% rename from test/Microsoft.ML.Tests/Scenarios/Scenario_HousePricePrediction.cs rename to test/Microsoft.ML.Tests/Scenarios/HousePricePredictionTests.cs index 2b5fffa446..6111a07cda 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Scenario_HousePricePrediction.cs +++ b/test/Microsoft.ML.Tests/Scenarios/HousePricePredictionTests.cs @@ -2,14 +2,17 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using Microsoft.ML.Models; using Microsoft.ML.Runtime.Api; using Microsoft.ML.TestFramework; +using Microsoft.ML.Trainers; +using Microsoft.ML.Transforms; using Xunit; using Xunit.Abstractions; namespace Microsoft.ML.Scenarios { - public partial class Top5Scenarios : BaseTestClass + public partial class ScenariosTests : BaseTestClass { /* A real-estate firm Contoso wants to add a house price prediction to their ASP.NET/Xamarin application. @@ -49,6 +52,64 @@ public async void PredictHousePriceModelTest() Assert.InRange(prediction.Price, 260_000, 330_000); } + [Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")] + public void TrainAndPredictHousePriceModelTest() + { + string dataPath = GetDataPath("kc_house_data.csv"); + + var pipeline = new LearningPipeline(); + + pipeline.Add(new TextLoader(dataPath, useHeader: true, separator: ",")); + + pipeline.Add(new ColumnConcatenator(outputColumn: "NumericalFeatures", + "SqftLiving", "SqftLot", "SqftAbove", "SqftBasement", "Lat", "Long", "SqftLiving15", "SqftLot15")); + + pipeline.Add(new ColumnConcatenator(outputColumn: "CategoryFeatures", + "Bedrooms", "Bathrooms", "Floors", "Waterfront", "View", "Condition", "Grade", "YearBuilt", "YearRenovated", "Zipcode")); + + pipeline.Add(new CategoricalOneHotVectorizer("CategoryFeatures")); + pipeline.Add(new ColumnConcatenator(outputColumn: "Features", + "NumericalFeatures", "CategoryFeatures")); + pipeline.Add(new StochasticDualCoordinateAscentRegressor()); + + PredictionModel model = pipeline.Train(); + + HousePricePrediction prediction = model.Predict(new HousePriceData() + { + Bedrooms = 3, + Bathrooms = 2, + SqftLiving = 1710, + SqftLot = 4697, + Floors = 1.5f, + Waterfront = 0, + View = 0, + Condition = 5, + Grade = 6, + SqftAbove = 1710, + SqftBasement = 0, + YearBuilt = 1941, + YearRenovated = 0, + Zipcode = 98002, + Lat = 47.3048f, + Long = -122.218f, + SqftLiving15 = 1030, + SqftLot15 = 4705 + }); + + Assert.InRange(prediction.Price, 260_000, 330_000); + + string testDataPath = GetDataPath("kc_house_test.csv"); + var testData = new TextLoader(testDataPath, useHeader: true, separator: ","); + + var evaluator = new RegressionEvaluator(); + RegressionMetrics metrics = evaluator.Evaluate(model, testData); + Assert.InRange(metrics.L1, 85_000, 89_000); + Assert.InRange(metrics.L2, 17_000_000_000, 19_000_000_000); + Assert.InRange(metrics.Rms, 130_500, 135_000); + Assert.InRange(metrics.LossFn, 17_000_000_000, 19_000_000_000); + Assert.Equal(.8, metrics.RSquared, 1); + } + public class HousePriceData { [Column(ordinal: "0")] @@ -121,7 +182,7 @@ public class HousePricePrediction public float Price; } - public Top5Scenarios(ITestOutputHelper output) : base(output) + public ScenariosTests(ITestOutputHelper output) : base(output) { } } diff --git a/test/Microsoft.ML.Tests/Scenarios/TrainAndPredictIrisModelTest.cs b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs similarity index 99% rename from test/Microsoft.ML.Tests/Scenarios/TrainAndPredictIrisModelTest.cs rename to test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs index d897303e30..30c497ccc5 100644 --- a/test/Microsoft.ML.Tests/Scenarios/TrainAndPredictIrisModelTest.cs +++ b/test/Microsoft.ML.Tests/Scenarios/IrisPlantClassificationTests.cs @@ -10,7 +10,7 @@ namespace Microsoft.ML.Scenarios { - public partial class Top5Scenarios + public partial class ScenariosTests { [Fact] public void TrainAndPredictIrisModelTest() diff --git a/test/Microsoft.ML.Tests/Scenarios/Scenario_TrainPredictionModel.cs b/test/Microsoft.ML.Tests/Scenarios/Scenario_TrainPredictionModel.cs deleted file mode 100644 index de7d2f6a00..0000000000 --- a/test/Microsoft.ML.Tests/Scenarios/Scenario_TrainPredictionModel.cs +++ /dev/null @@ -1,73 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using Microsoft.ML.Models; -using Microsoft.ML.Trainers; -using Microsoft.ML.Transforms; -using Xunit; - -namespace Microsoft.ML.Scenarios -{ - public partial class Top5Scenarios - { - [Fact(Skip = "Missing data set. See https://github.com/dotnet/machinelearning/issues/3")] - public void TrainAndPredictHousePriceModelTest() - { - string dataPath = GetDataPath("kc_house_data.csv"); - - var pipeline = new LearningPipeline(); - - pipeline.Add(new TextLoader(dataPath, useHeader: true, separator: ",")); - - pipeline.Add(new ColumnConcatenator(outputColumn: "NumericalFeatures", - "SqftLiving", "SqftLot", "SqftAbove", "SqftBasement", "Lat", "Long", "SqftLiving15", "SqftLot15")); - - pipeline.Add(new ColumnConcatenator(outputColumn: "CategoryFeatures", - "Bedrooms", "Bathrooms", "Floors", "Waterfront", "View", "Condition", "Grade", "YearBuilt", "YearRenovated", "Zipcode")); - - pipeline.Add(new CategoricalOneHotVectorizer("CategoryFeatures")); - pipeline.Add(new ColumnConcatenator(outputColumn: "Features", - "NumericalFeatures", "CategoryFeatures")); - pipeline.Add(new StochasticDualCoordinateAscentRegressor()); - - PredictionModel model = pipeline.Train(); - - HousePricePrediction prediction = model.Predict(new HousePriceData() - { - Bedrooms = 3, - Bathrooms = 2, - SqftLiving = 1710, - SqftLot = 4697, - Floors = 1.5f, - Waterfront = 0, - View = 0, - Condition = 5, - Grade = 6, - SqftAbove = 1710, - SqftBasement = 0, - YearBuilt = 1941, - YearRenovated = 0, - Zipcode = 98002, - Lat = 47.3048f, - Long = -122.218f, - SqftLiving15 = 1030, - SqftLot15 = 4705 - }); - - Assert.InRange(prediction.Price, 260_000, 330_000); - - string testDataPath = GetDataPath("kc_house_test.csv"); - var testData = new TextLoader(testDataPath, useHeader: true, separator: ","); - - var evaluator = new RegressionEvaluator(); - RegressionMetrics metrics = evaluator.Evaluate(model, testData); - Assert.InRange(metrics.L1, 85_000, 89_000); - Assert.InRange(metrics.L2, 17_000_000_000, 19_000_000_000); - Assert.InRange(metrics.Rms, 130_500, 135_000); - Assert.InRange(metrics.LossFn, 17_000_000_000, 19_000_000_000); - Assert.Equal(.8, metrics.RSquared, 1); - } - } -} - diff --git a/test/Microsoft.ML.Tests/Scenarios/Scenario3_SentimentPrediction.cs b/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs similarity index 99% rename from test/Microsoft.ML.Tests/Scenarios/Scenario3_SentimentPrediction.cs rename to test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs index a0591d34b9..131b8c02db 100644 --- a/test/Microsoft.ML.Tests/Scenarios/Scenario3_SentimentPrediction.cs +++ b/test/Microsoft.ML.Tests/Scenarios/SentimentPredictionTests.cs @@ -13,7 +13,7 @@ namespace Microsoft.ML.Scenarios { - public partial class Top5Scenarios + public partial class ScenariosTests { public const string SentimentDataPath = "wikipedia-detox-250-line-data.tsv"; public const string SentimentTestPath = "wikipedia-detox-250-line-test.tsv";