Skip to content

Commit c9848a1

Browse files
authored
Add ignore_keywords flag to word delimiter graph (#5121)
This introduces a new property for the word delimiter graph token filter to configure ignoring of keywords. It relates to this change elastic/elasticsearch#59563
1 parent 87f01e5 commit c9848a1

File tree

3 files changed

+25
-1
lines changed

3 files changed

+25
-1
lines changed

src/Nest/Analysis/TokenFilters/WordDelimiterGraph/WordDelimiterGraphTokenFilter.cs

+19-1
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,13 @@ public interface IWordDelimiterGraphTokenFilter : ITokenFilter
5656
[JsonFormatter(typeof(NullableStringBooleanFormatter))]
5757
bool? GenerateWordParts { get; set; }
5858

59+
/// <summary>
60+
/// If true, the filter skips tokens with a keyword attribute of true. Defaults to false.
61+
/// </summary>
62+
[DataMember(Name = "ignore_keywords")]
63+
[JsonFormatter(typeof(NullableStringBooleanFormatter))]
64+
bool? IgnoreKeywords { get; set; }
65+
5966
/// <summary>
6067
/// If true includes original words in subwords: "500-42" ⇒ "500-42" "500" "42". Defaults to false.
6168
/// </summary>
@@ -133,6 +140,9 @@ public WordDelimiterGraphTokenFilter() : base("word_delimiter_graph") { }
133140
/// <inheritdoc />
134141
public bool? GenerateWordParts { get; set; }
135142

143+
/// <inheritdoc />
144+
public bool? IgnoreKeywords { get; set; }
145+
136146
/// <inheritdoc />
137147
public bool? PreserveOriginal { get; set; }
138148

@@ -169,8 +179,8 @@ public class WordDelimiterGraphTokenFilterDescriptor
169179
bool? IWordDelimiterGraphTokenFilter.CatenateWords { get; set; }
170180
bool? IWordDelimiterGraphTokenFilter.GenerateNumberParts { get; set; }
171181
bool? IWordDelimiterGraphTokenFilter.GenerateWordParts { get; set; }
182+
bool? IWordDelimiterGraphTokenFilter.IgnoreKeywords { get; set; }
172183
bool? IWordDelimiterGraphTokenFilter.PreserveOriginal { get; set; }
173-
174184
IEnumerable<string> IWordDelimiterGraphTokenFilter.ProtectedWords { get; set; }
175185
string IWordDelimiterGraphTokenFilter.ProtectedWordsPath { get; set; }
176186
bool? IWordDelimiterGraphTokenFilter.SplitOnCaseChange { get; set; }
@@ -187,6 +197,14 @@ public WordDelimiterGraphTokenFilterDescriptor GenerateWordParts(bool? generateW
187197
public WordDelimiterGraphTokenFilterDescriptor GenerateNumberParts(bool? generateNumberParts = true) =>
188198
Assign(generateNumberParts, (a, v) => a.GenerateNumberParts = v);
189199

200+
/// <summary>
201+
/// <para>Configure whether the filter will skip tokens with a keyword attribute of true.</para>
202+
/// <para>(Optional) When not configured, this defaults to false in Elasticsearch.</para>
203+
/// </summary>
204+
/// <param name="ignoreKeywords">If true, the filter skips tokens with a keyword attribute of true.</param>
205+
public WordDelimiterGraphTokenFilterDescriptor IgnoreKeywords(bool? ignoreKeywords = true) =>
206+
Assign(ignoreKeywords, (a, v) => a.IgnoreKeywords = v);
207+
190208
/// <inheritdoc />
191209
public WordDelimiterGraphTokenFilterDescriptor CatenateWords(bool? catenateWords = true) => Assign(catenateWords, (a, v) => a.CatenateWords = v);
192210

tests/Tests/Analysis/TokenFilters/TokenFilterTests.cs

+3
Original file line numberDiff line numberDiff line change
@@ -923,6 +923,7 @@ public class WordDelimiterGraphTests : TokenFilterAssertionBase<WordDelimiterGra
923923
.CatenateWords()
924924
.GenerateNumberParts()
925925
.GenerateWordParts()
926+
.IgnoreKeywords()
926927
.PreserveOriginal()
927928
.ProtectedWords("x", "y", "z")
928929
.SplitOnCaseChange()
@@ -939,6 +940,7 @@ public class WordDelimiterGraphTests : TokenFilterAssertionBase<WordDelimiterGra
939940
CatenateWords = true,
940941
GenerateNumberParts = true,
941942
GenerateWordParts = true,
943+
IgnoreKeywords = true,
942944
PreserveOriginal = true,
943945
ProtectedWords = new[] { "x", "y", "z" },
944946
SplitOnCaseChange = true,
@@ -952,6 +954,7 @@ public class WordDelimiterGraphTests : TokenFilterAssertionBase<WordDelimiterGra
952954
adjust_offsets = true,
953955
generate_word_parts = true,
954956
generate_number_parts = true,
957+
ignore_keywords = true,
955958
catenate_words = true,
956959
catenate_numbers = true,
957960
catenate_all = true,

tests/Tests/Analysis/TokenFilters/TokenFilterUsageTests.cs

+3
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ public class TokenFilterUsageTests : PromiseUsageTestBase<IIndexSettings, IndexS
141141
.CatenateWords()
142142
.GenerateNumberParts()
143143
.GenerateWordParts()
144+
.IgnoreKeywords()
144145
.PreserveOriginal()
145146
.ProtectedWords("x", "y", "z")
146147
.SplitOnCaseChange()
@@ -301,6 +302,7 @@ public class TokenFilterUsageTests : PromiseUsageTestBase<IIndexSettings, IndexS
301302
CatenateWords = true,
302303
GenerateNumberParts = true,
303304
GenerateWordParts = true,
305+
IgnoreKeywords = true,
304306
PreserveOriginal = true,
305307
ProtectedWords = new[] { "x", "y", "z" },
306308
SplitOnCaseChange = true,
@@ -624,6 +626,7 @@ public class TokenFilterUsageTests : PromiseUsageTestBase<IIndexSettings, IndexS
624626
type = "word_delimiter_graph",
625627
generate_word_parts = true,
626628
generate_number_parts = true,
629+
ignore_keywords = true,
627630
catenate_words = true,
628631
catenate_numbers = true,
629632
catenate_all = true,

0 commit comments

Comments
 (0)