Skip to content

Commit 3e7ec70

Browse files
committed
Merge pull request #1125 from elasticsearch/feature/keep-types-filtering
added support for the keep_types token filter fix #1103
2 parents 1c8c79c + 2bfdee8 commit 3e7ec70

File tree

6 files changed

+78
-24
lines changed

6 files changed

+78
-24
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
using System.Collections.Generic;
2+
using Newtonsoft.Json;
3+
4+
namespace Nest
5+
{
6+
/// <summary>
7+
/// A token filter of type keep that only keeps tokens with text contained in a predefined set of words.
8+
/// </summary>
9+
public class KeepTypesTokenFilter : TokenFilterBase
10+
{
11+
public KeepTypesTokenFilter()
12+
: base("keep_types")
13+
{
14+
15+
}
16+
17+
/// <summary>
18+
/// A list of types to keep.
19+
/// </summary>
20+
[JsonProperty("types")]
21+
public IEnumerable<string> Types { get; set; }
22+
23+
}
24+
}

src/Nest/Domain/Analysis/TokenFilter/KeepWordsTokenFilter.cs

+24-24
Original file line numberDiff line numberDiff line change
@@ -3,34 +3,34 @@
33

44
namespace Nest
55
{
6-
/// <summary>
7-
/// A token filter of type keep that only keeps tokens with text contained in a predefined set of words.
8-
/// </summary>
9-
public class KeepWordsTokenFilter : TokenFilterBase
10-
{
11-
public KeepWordsTokenFilter()
12-
: base("keep")
13-
{
6+
/// <summary>
7+
/// A token filter of type keep that only keeps tokens with text contained in a predefined set of words.
8+
/// </summary>
9+
public class KeepWordsTokenFilter : TokenFilterBase
10+
{
11+
public KeepWordsTokenFilter()
12+
: base("keep")
13+
{
1414

15-
}
15+
}
1616

17-
/// <summary>
18-
/// A list of words to keep.
19-
/// </summary>
20-
[JsonProperty("keep_words")]
21-
public IEnumerable<string> KeepWords { get; set; }
17+
/// <summary>
18+
/// A list of words to keep.
19+
/// </summary>
20+
[JsonProperty("keep_words")]
21+
public IEnumerable<string> KeepWords { get; set; }
2222

23-
/// <summary>
24-
/// A path to a words file.
25-
/// </summary>
26-
[JsonProperty("rules_path")]
27-
public string KeepWordsPath { get; set; }
23+
/// <summary>
24+
/// A path to a words file.
25+
/// </summary>
26+
[JsonProperty("rules_path")]
27+
public string KeepWordsPath { get; set; }
2828

29-
/// <summary>
30-
/// A boolean indicating whether to lower case the words.
31-
/// </summary>
32-
[JsonProperty("keep_words_case")]
29+
/// <summary>
30+
/// A boolean indicating whether to lower case the words.
31+
/// </summary>
32+
[JsonProperty("keep_words_case")]
3333
public bool? KeepWordsCase { get; set; }
3434

35-
}
35+
}
3636
}

src/Nest/Nest.csproj

+1
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@
134134
<Compile Include="Domain\Analysis\CharFilter\PatternReplaceCharFilter.cs" />
135135
<Compile Include="Domain\Analysis\TokenFilter\DelimitedPayloadTokenFilter.cs" />
136136
<Compile Include="Domain\Analysis\TokenFilter\CommonGramsTokenFilter.cs" />
137+
<Compile Include="Domain\Analysis\TokenFilter\KeepTypesTokenFilter.cs" />
137138
<Compile Include="Domain\Analysis\TokenFilter\KeywordRepeatTokenFilter.cs" />
138139
<Compile Include="Domain\Analysis\TokenFilter\HunspellTokenFilter.cs" />
139140
<Compile Include="Domain\Analysis\TokenFilter\LimitTokenCountTokenFilter.cs" />

src/Tests/Nest.Tests.Unit/Core/Indices/Analysis/Analyzers/AnalyzerTests.cs

+12
Original file line numberDiff line numberDiff line change
@@ -116,5 +116,17 @@ public void WhitespaceAnalyzerTest()
116116

117117
this.JsonEquals(result.ConnectionStatus.Request, MethodInfo.GetCurrentMethod());
118118
}
119+
120+
[Test]
121+
public void KeepTypesTokenFilter()
122+
{
123+
var result = this.Analysis(a => a
124+
.TokenFilters(tf => tf
125+
.Add("keep", new KeepTypesTokenFilter {Types = new[] {"<NUM>"}})
126+
)
127+
);
128+
129+
this.JsonEquals(result.ConnectionStatus.Request, MethodInfo.GetCurrentMethod());
130+
}
119131
}
120132
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"settings": {
3+
"index": {
4+
"analysis": {
5+
"filter": {
6+
"keep": {
7+
"types" : [ "<NUM>" ],
8+
"type": "keep_types"
9+
}
10+
}
11+
}
12+
}
13+
}
14+
}

src/Tests/Nest.Tests.Unit/Nest.Tests.Unit.csproj

+3
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,9 @@
148148
<None Include="Core\Indices\Analysis\Analyzers\LanguageAnalyzerTest.json">
149149
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
150150
</None>
151+
<None Include="Core\Indices\Analysis\Analyzers\KeepTypesTokenFilter.json">
152+
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
153+
</None>
151154
<None Include="Core\Indices\Analysis\Analyzers\WhitespaceAnalyzerTest.json">
152155
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
153156
</None>

0 commit comments

Comments
 (0)