-
Notifications
You must be signed in to change notification settings - Fork 2.7k
Ranking Sample - Hotel Search Results (changed to Bing Search Engine result ranking) #533
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
nicolehaugen
merged 12 commits into
dotnet:features/ranking-sample
from
nicolehaugen:features/ranking-sample
Jun 29, 2019
Merged
Changes from 10 commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
bf4f7e4
Created ranking sample
nicolehaugen 6cbfc27
removed todo
nicolehaugen 9ff9c37
Fixed wording in ReadMe
nicolehaugen 996621a
Fixed typos
nicolehaugen abe226f
Modified RankingMetric code
nicolehaugen bffd76c
Incorporated Justin's feedback
nicolehaugen e153336
Fixed minor inconsistencies
nicolehaugen addf95a
Converted to new dataset
nicolehaugen 2c88f38
Changed code to download dataset since its zip is too large
nicolehaugen 71fe0e6
fixed using statement
nicolehaugen 61b694f
Removed unneeded license info for dataset
nicolehaugen 5890089
Renamed solution and minor changes
nicolehaugen File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
25 changes: 25 additions & 0 deletions
25
samples/csharp/getting-started/Ranking_PersonalizedSort/PersonalizedRanking.sln
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
| ||
Microsoft Visual Studio Solution File, Format Version 12.00 | ||
# Visual Studio 15 | ||
VisualStudioVersion = 15.0.28307.705 | ||
MinimumVisualStudioVersion = 10.0.40219.1 | ||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "PersonalizedRanking", "PersonalizedRanking\PersonalizedRanking.csproj", "{F71F24D8-F174-461F-B375-508EFB827A33}" | ||
EndProject | ||
Global | ||
GlobalSection(SolutionConfigurationPlatforms) = preSolution | ||
Debug|Any CPU = Debug|Any CPU | ||
Release|Any CPU = Release|Any CPU | ||
EndGlobalSection | ||
GlobalSection(ProjectConfigurationPlatforms) = postSolution | ||
{F71F24D8-F174-461F-B375-508EFB827A33}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||
{F71F24D8-F174-461F-B375-508EFB827A33}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||
{F71F24D8-F174-461F-B375-508EFB827A33}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||
{F71F24D8-F174-461F-B375-508EFB827A33}.Release|Any CPU.Build.0 = Release|Any CPU | ||
EndGlobalSection | ||
GlobalSection(SolutionProperties) = preSolution | ||
HideSolutionNode = FALSE | ||
EndGlobalSection | ||
GlobalSection(ExtensibilityGlobals) = postSolution | ||
SolutionGuid = {92FA42B0-28BF-4531-B744-F3125DAAC91A} | ||
EndGlobalSection | ||
EndGlobal |
62 changes: 62 additions & 0 deletions
62
...harp/getting-started/Ranking_PersonalizedSort/PersonalizedRanking/Common/ConsoleHelper.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
using Microsoft.ML; | ||
using Microsoft.ML.Data; | ||
using PersonalizedRanking.DataStructures; | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
|
||
namespace PersonalizedRanking.Common | ||
{ | ||
public class ConsoleHelper | ||
{ | ||
// To evaluate the accuracy of the model's predicted rankings, prints out the Discounted Cumulative Gain and Normalized Discounted Cumulative Gain for search queries. | ||
public static void EvaluateMetrics(MLContext mlContext, IDataView predictions) | ||
{ | ||
// Evaluate the metrics for the data using NDCG; by default, metrics for the up to 3 search results in the query are reported (e.g. NDCG@3). | ||
RankingMetrics metrics = mlContext.Ranking.Evaluate(predictions); | ||
|
||
Console.WriteLine($"DCG: {string.Join(", ", metrics.DiscountedCumulativeGains.Select((d, i) => $"@{i + 1}:{d:F4}").ToArray())}"); | ||
|
||
Console.WriteLine($"NDCG: {string.Join(", ", metrics.NormalizedDiscountedCumulativeGains.Select((d, i) => $"@{i + 1}:{d:F4}").ToArray())}\n"); | ||
} | ||
|
||
// Performs evaluation with the truncation level set up to 10 search results within a query. | ||
// This is a temporary workaround for this issue: https://github.com/dotnet/machinelearning/issues/2728. | ||
public static void EvaluateMetrics(MLContext mlContext, IDataView scoredData, int truncationLevel) | ||
{ | ||
if (truncationLevel < 1 || truncationLevel > 10) | ||
{ | ||
throw new InvalidOperationException("Currently metrics are only supported for 1 to 10 truncation levels."); | ||
nicolehaugen marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
// Uses reflection to set the truncation level before calling evaluate. | ||
var mlAssembly = AppDomain.CurrentDomain.GetAssemblies().Where(a => a.FullName.Contains("Microsoft.ML.Data")).First(); | ||
var rankEvalType = mlAssembly.DefinedTypes.Where(t => t.Name.Contains("RankingEvaluator")).First(); | ||
|
||
var evalArgsType = rankEvalType.GetNestedType("Arguments"); | ||
var evalArgs = Activator.CreateInstance(rankEvalType.GetNestedType("Arguments")); | ||
|
||
var dcgLevel = evalArgsType.GetField("DcgTruncationLevel"); | ||
dcgLevel.SetValue(evalArgs, truncationLevel); | ||
|
||
var ctor = rankEvalType.GetConstructors().First(); | ||
var evaluator = ctor.Invoke(new object[] { mlContext, evalArgs }); | ||
|
||
var evaluateMethod = rankEvalType.GetMethod("Evaluate"); | ||
RankingMetrics metrics = (RankingMetrics)evaluateMethod.Invoke(evaluator, new object[] { scoredData, "Label", "GroupId", "Score" }); | ||
|
||
Console.WriteLine($"DCG: {string.Join(", ", metrics.DiscountedCumulativeGains.Select((d, i) => $"@{i + 1}:{d:F4}").ToArray())}"); | ||
|
||
Console.WriteLine($"NDCG: {string.Join(", ", metrics.NormalizedDiscountedCumulativeGains.Select((d, i) => $"@{i + 1}:{d:F4}").ToArray())}\n"); | ||
} | ||
|
||
// Prints out the the individual scores used to determine the relative ranking. | ||
public static void PrintScores(IEnumerable<SearchResultPrediction> predictions) | ||
{ | ||
foreach (var prediction in predictions) | ||
{ | ||
Console.WriteLine($"GroupId: {prediction.GroupId}, Score: {prediction.Score}"); | ||
} | ||
} | ||
} | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Renaming:
I might recommend a title for this sample: "MSLR-WEB10K Ranking" or more simply, "Web Ranking".
I don't think there is personalization in this dataset. Personalization in the ranking area generally means the each user gets individualized search results. In this dataset, there is not information about the user. This information would generally include { topics of interest of the user, demographics of the user, current location of the user, etc }.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
#resolved