From 7338a47a134f406a23c337f63cdbeb55f11035a7 Mon Sep 17 00:00:00 2001 From: Roman Pavlov Date: Fri, 9 May 2014 02:02:22 +0300 Subject: [PATCH 1/2] mapped some tokenfilters that are not present --- .../TokenFilter/CommonGramsTokenFilter.cs | 43 + .../DelimitedPayloadTokenFilter.cs | 30 + .../TokenFilter/HunspellTokenFilter.cs | 49 + .../TokenFilter/KeepWordsTokenFilter.cs | 36 + .../TokenFilter/KeywordRepeatTokenFilter.cs | 13 + .../TokenFilter/LimitTokenCountTokenFilter.cs | 28 + .../TokenFilter/PatternCaptureTokenFilter.cs | 25 + .../TokenFilter/StemmerOverrideTokenFilter.cs | 30 + .../TokenFilter/UppercaseTokenFilter.cs | 17 + src/Nest/Nest.csproj | 1629 +++++++++-------- 10 files changed, 1090 insertions(+), 810 deletions(-) create mode 100644 src/Nest/Domain/Analysis/TokenFilter/CommonGramsTokenFilter.cs create mode 100644 src/Nest/Domain/Analysis/TokenFilter/DelimitedPayloadTokenFilter.cs create mode 100644 src/Nest/Domain/Analysis/TokenFilter/HunspellTokenFilter.cs create mode 100644 src/Nest/Domain/Analysis/TokenFilter/KeepWordsTokenFilter.cs create mode 100644 src/Nest/Domain/Analysis/TokenFilter/KeywordRepeatTokenFilter.cs create mode 100644 src/Nest/Domain/Analysis/TokenFilter/LimitTokenCountTokenFilter.cs create mode 100644 src/Nest/Domain/Analysis/TokenFilter/PatternCaptureTokenFilter.cs create mode 100644 src/Nest/Domain/Analysis/TokenFilter/StemmerOverrideTokenFilter.cs create mode 100644 src/Nest/Domain/Analysis/TokenFilter/UppercaseTokenFilter.cs diff --git a/src/Nest/Domain/Analysis/TokenFilter/CommonGramsTokenFilter.cs b/src/Nest/Domain/Analysis/TokenFilter/CommonGramsTokenFilter.cs new file mode 100644 index 00000000000..6e2a298d47a --- /dev/null +++ b/src/Nest/Domain/Analysis/TokenFilter/CommonGramsTokenFilter.cs @@ -0,0 +1,43 @@ +using System.Collections.Generic; +using Newtonsoft.Json; + +namespace Nest +{ + /// + /// Token filter that generates bigrams for frequently occuring terms. Single terms are still indexed. + ///Note, common_words or common_words_path field is required. + /// + public class CommonGramsTokenFilter : TokenFilterBase + { + public CommonGramsTokenFilter() + : base("common_grams") + { + + } + + /// + /// A list of common words to use. + /// + [JsonProperty("common_words")] + public IEnumerable CommonWords { get; set; } + + /// + /// A path (either relative to config location, or absolute) to a list of common words. + /// + [JsonProperty("common_words_path")] + public string CommonWordsPath { get; set; } + + /// + /// If true, common words matching will be case insensitive. + /// + [JsonProperty("ignore_case")] + public bool? IgnoreCase { get; set; } + + /// + /// Generates bigrams then removes common words and single terms followed by a common word. + /// + [JsonProperty("query_mode")] + public bool? QueryMode { get; set; } + + } +} \ No newline at end of file diff --git a/src/Nest/Domain/Analysis/TokenFilter/DelimitedPayloadTokenFilter.cs b/src/Nest/Domain/Analysis/TokenFilter/DelimitedPayloadTokenFilter.cs new file mode 100644 index 00000000000..6546e793f10 --- /dev/null +++ b/src/Nest/Domain/Analysis/TokenFilter/DelimitedPayloadTokenFilter.cs @@ -0,0 +1,30 @@ +using System.Collections.Generic; +using Newtonsoft.Json; + +namespace Nest +{ + /// + /// Splits tokens into tokens and payload whenever a delimiter character is found. + /// + public class DelimitedPayloadTokenFilter : TokenFilterBase + { + public DelimitedPayloadTokenFilter() + : base("delimited_payload_filter") + { + + } + + /// + /// Character used for splitting the tokens. + /// + [JsonProperty("delimiter")] + public char Delimiter { get; set; } + + /// + /// The type of the payload. int for integer, float for float and identity for characters. + /// + [JsonProperty("encoding")] + public string Encoding { get; set; } + + } +} \ No newline at end of file diff --git a/src/Nest/Domain/Analysis/TokenFilter/HunspellTokenFilter.cs b/src/Nest/Domain/Analysis/TokenFilter/HunspellTokenFilter.cs new file mode 100644 index 00000000000..dc8e52063d1 --- /dev/null +++ b/src/Nest/Domain/Analysis/TokenFilter/HunspellTokenFilter.cs @@ -0,0 +1,49 @@ +using Newtonsoft.Json; + +namespace Nest +{ + /// + /// Basic support for hunspell stemming. + /// Hunspell dictionaries will be picked up from a dedicated hunspell directory on the filesystem. + /// + public class HunspellTokenFilter : TokenFilterBase + { + public HunspellTokenFilter() + : base("hunspell") + { + + } + + /// + /// If true, dictionary matching will be case insensitive. + /// + [JsonProperty("ignore_case")] + public bool? IgnoreCase { get; set; } + + /// + /// A locale for this filter. If this is unset, the lang or language are used instead - so one of these has to be set. + /// + [JsonProperty("locale")] + public string Locale { get; set; } + + /// + /// The name of a dictionary. + /// + [JsonProperty("dictionary")] + public string Dictionary { get; set; } + + /// + /// If only unique terms should be returned, this needs to be set to true. + /// + [JsonProperty("dedup")] + public bool? Dedup { get; set; } + + /// + /// If only the longest term should be returned, set this to true. + /// + [JsonProperty("longest_only")] + public bool? LongestOnly { get; set; } + + + } +} \ No newline at end of file diff --git a/src/Nest/Domain/Analysis/TokenFilter/KeepWordsTokenFilter.cs b/src/Nest/Domain/Analysis/TokenFilter/KeepWordsTokenFilter.cs new file mode 100644 index 00000000000..60271f2a0c0 --- /dev/null +++ b/src/Nest/Domain/Analysis/TokenFilter/KeepWordsTokenFilter.cs @@ -0,0 +1,36 @@ +using System.Collections.Generic; +using Newtonsoft.Json; + +namespace Nest +{ + /// + /// A token filter of type keep that only keeps tokens with text contained in a predefined set of words. + /// + public class KeepWordsTokenFilter : TokenFilterBase + { + public KeepWordsTokenFilter() + : base("keep") + { + + } + + /// + /// A list of words to keep. + /// + [JsonProperty("keep_words")] + public IEnumerable KeepWords { get; set; } + + /// + /// A path to a words file. + /// + [JsonProperty("rules_path")] + public string KeepWordsPath { get; set; } + + /// + /// A boolean indicating whether to lower case the words. + /// + [JsonProperty("keep_words_case")] + public bool? KeepWordsCase { get; set; } + + } +} \ No newline at end of file diff --git a/src/Nest/Domain/Analysis/TokenFilter/KeywordRepeatTokenFilter.cs b/src/Nest/Domain/Analysis/TokenFilter/KeywordRepeatTokenFilter.cs new file mode 100644 index 00000000000..fa0f2d1dc76 --- /dev/null +++ b/src/Nest/Domain/Analysis/TokenFilter/KeywordRepeatTokenFilter.cs @@ -0,0 +1,13 @@ +namespace Nest +{ + /// + /// The keyword_repeat token filter Emits each incoming token twice once as keyword and once as a non-keyword to allow an unstemmed version of a term to be indexed side by side with the stemmed version of the term. + /// + public class KeywordRepeatTokenFilter : TokenFilterBase + { + public KeywordRepeatTokenFilter() + : base("keyword_repeat") + { + } + } +} \ No newline at end of file diff --git a/src/Nest/Domain/Analysis/TokenFilter/LimitTokenCountTokenFilter.cs b/src/Nest/Domain/Analysis/TokenFilter/LimitTokenCountTokenFilter.cs new file mode 100644 index 00000000000..bff10eb86ee --- /dev/null +++ b/src/Nest/Domain/Analysis/TokenFilter/LimitTokenCountTokenFilter.cs @@ -0,0 +1,28 @@ +using Newtonsoft.Json; + +namespace Nest +{ + /// + /// Limits the number of tokens that are indexed per document and field. + /// + public class LimitTokenCountTokenFilter : TokenFilterBase + { + public LimitTokenCountTokenFilter() + : base("limit") + { + + } + + /// + /// The maximum number of tokens that should be indexed per document and field. + /// + [JsonProperty("max_token_count")] + public int? MaxTokenCount { get; set; } + + /// + /// If set to true the filter exhaust the stream even if max_token_count tokens have been consumed already. + /// + [JsonProperty("consume_all_tokens")] + public bool? ConsumeAllTokens { get; set; } + } +} \ No newline at end of file diff --git a/src/Nest/Domain/Analysis/TokenFilter/PatternCaptureTokenFilter.cs b/src/Nest/Domain/Analysis/TokenFilter/PatternCaptureTokenFilter.cs new file mode 100644 index 00000000000..6f099c69606 --- /dev/null +++ b/src/Nest/Domain/Analysis/TokenFilter/PatternCaptureTokenFilter.cs @@ -0,0 +1,25 @@ +using System.Collections.Generic; +using Newtonsoft.Json; + +namespace Nest +{ + /// + /// The pattern_capture token filter, unlike the pattern tokenizer, emits a token for every capture group in the regular expression. + /// + public class PatternCaptureTokenFilter : TokenFilterBase + { + public PatternCaptureTokenFilter() + : base("pattern_capture") + { + } + + [JsonProperty("patterns")] + public IEnumerable Patterns { get; set; } + + /// + /// If preserve_original is set to true then it would also emit the original token + /// + [JsonProperty("preserve_original")] + public bool? PreserveOriginal { get; set; } + } +} diff --git a/src/Nest/Domain/Analysis/TokenFilter/StemmerOverrideTokenFilter.cs b/src/Nest/Domain/Analysis/TokenFilter/StemmerOverrideTokenFilter.cs new file mode 100644 index 00000000000..8c3039eeb3f --- /dev/null +++ b/src/Nest/Domain/Analysis/TokenFilter/StemmerOverrideTokenFilter.cs @@ -0,0 +1,30 @@ +using System.Collections.Generic; +using Newtonsoft.Json; + +namespace Nest +{ + /// + /// Overrides stemming algorithms, by applying a custom mapping, then protecting these terms from being modified by stemmers. Must be placed before any stemming filters. + /// + public class StemmerOverrideTokenFilter : TokenFilterBase + { + public StemmerOverrideTokenFilter() + : base("stemmer_override") + { + + } + + /// + /// A list of mapping rules to use. + /// + [JsonProperty("rules")] + public IEnumerable Rules { get; set; } + + /// + /// A path (either relative to config location, or absolute) to a list of mappings. + /// + [JsonProperty("rules_path")] + public string RulesPath { get; set; } + + } +} \ No newline at end of file diff --git a/src/Nest/Domain/Analysis/TokenFilter/UppercaseTokenFilter.cs b/src/Nest/Domain/Analysis/TokenFilter/UppercaseTokenFilter.cs new file mode 100644 index 00000000000..296e6c526f0 --- /dev/null +++ b/src/Nest/Domain/Analysis/TokenFilter/UppercaseTokenFilter.cs @@ -0,0 +1,17 @@ +using Newtonsoft.Json; + +namespace Nest +{ + /// + /// A token filter of type uppercase that normalizes token text to upper case. + /// + public class UppercaseTokenFilter : TokenFilterBase + { + public UppercaseTokenFilter() + : base("uppercase") + { + + } + + } +} \ No newline at end of file diff --git a/src/Nest/Nest.csproj b/src/Nest/Nest.csproj index d33cdddd995..348ccaf0fab 100644 --- a/src/Nest/Nest.csproj +++ b/src/Nest/Nest.csproj @@ -1,818 +1,827 @@ - - - - Debug - AnyCPU - 9.0.21022 - 2.0 - {072BA7DA-7B60-407D-8B6E-95E3186BE70C} - Library - Properties - Nest - Nest - 512 - - - 3.5 - - publish\ - true - Disk - false - Foreground - 7 - Days - false - false - true - 0 - 1.0.0.%2a - false - false - true - ..\ - true - - - True - full - False - bin\Debug\ - DEBUG;TRACE - prompt - 4 - BasicCorrectnessRules.ruleset - AnyCPU - bin\Debug\Nest.XML - true - 1591,1572,1571,1573,1587,1570 - - - pdbonly - True - bin\Release\ - TRACE - prompt - 4 - AllRules.ruleset - bin\Release\Nest.XML - - - true - bin\Debug - Generator\ - DEBUG;TRACE - bin\Debug\Nest.XML - full - AnyCPU - prompt - BasicCorrectnessRules.ruleset - - - true - - - ..\..\build\keys\keypair.snk - - - - - False - ..\..\dep\Newtonsoft.Json.6.0.1\lib\net40\Newtonsoft.Json.dll - - - - 3.5 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Code - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Code - - - Code - - - - - - Code - - - Code - - - - Code - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - False - .NET Framework 3.5 SP1 Client Profile - false - - - False - .NET Framework 3.5 SP1 - true - - - False - Windows Installer 3.1 - true - - - - - {e97ccf40-0ba6-43fe-9f2d-58d454134088} - Elasticsearch.Net - - - - - - - - - - - + + + + Debug + AnyCPU + 9.0.21022 + 2.0 + {072BA7DA-7B60-407D-8B6E-95E3186BE70C} + Library + Properties + Nest + Nest + 512 + + + 3.5 + + publish\ + true + Disk + false + Foreground + 7 + Days + false + false + true + 0 + 1.0.0.%2a + false + false + true + ..\ + true + + + True + full + False + bin\Debug\ + DEBUG;TRACE + prompt + 4 + BasicCorrectnessRules.ruleset + AnyCPU + bin\Debug\Nest.XML + true + 1591,1572,1571,1573,1587,1570 + + + pdbonly + True + bin\Release\ + TRACE + prompt + 4 + AllRules.ruleset + bin\Release\Nest.XML + + + true + bin\Debug - Generator\ + DEBUG;TRACE + bin\Debug\Nest.XML + full + AnyCPU + prompt + BasicCorrectnessRules.ruleset + + + true + + + ..\..\build\keys\keypair.snk + + + + + False + ..\..\dep\Newtonsoft.Json.6.0.1\lib\net40\Newtonsoft.Json.dll + + + + 3.5 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Code + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Code + + + Code + + + + + + Code + + + Code + + + + Code + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + False + .NET Framework 3.5 SP1 Client Profile + false + + + False + .NET Framework 3.5 SP1 + true + + + False + Windows Installer 3.1 + true + + + + + {e97ccf40-0ba6-43fe-9f2d-58d454134088} + Elasticsearch.Net + + + + + + + + + + + - +--> + + --> \ No newline at end of file From f0180ea63a781341c0a996b10da51b426622db26 Mon Sep 17 00:00:00 2001 From: Roman Pavlov Date: Fri, 9 May 2014 02:04:14 +0300 Subject: [PATCH 2/2] Added support for language on LowercaseTokenFilter --- .../Domain/Analysis/TokenFilter/LowercaseTokenFilter.cs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Nest/Domain/Analysis/TokenFilter/LowercaseTokenFilter.cs b/src/Nest/Domain/Analysis/TokenFilter/LowercaseTokenFilter.cs index 80422951597..9a1ad012a1a 100644 --- a/src/Nest/Domain/Analysis/TokenFilter/LowercaseTokenFilter.cs +++ b/src/Nest/Domain/Analysis/TokenFilter/LowercaseTokenFilter.cs @@ -12,7 +12,9 @@ public LowercaseTokenFilter() : base("lowercase") { - } - + } + + [JsonProperty("language")] + public string Language { get; set; } } } \ No newline at end of file