Skip to content

Commit f840112

Browse files
committed
Reset Token position on reuse in scripted analysis (#47424)
Most of the information in AnalysisPredicateScript.Token is pulled directly from its underlying AttributeSource, but we also keep track of the token position, and this state is held directly on the Token. This information needs to be reset when the containing ScriptFilteringTokenFilter or ScriptedConditionTokenFilter is re-used. Fixes #47197
1 parent 8117f8a commit f840112

File tree

5 files changed

+26
-9
lines changed

5 files changed

+26
-9
lines changed

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/AnalysisPredicateScript.java

+4
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,10 @@ public Token(AttributeSource source) {
6161
this.keywordAtt = source.addAttribute(KeywordAttribute.class);
6262
}
6363

64+
public void reset() {
65+
this.pos = -1;
66+
}
67+
6468
public void updatePosition() {
6569
this.pos = this.pos + posIncAtt.getPositionIncrement();
6670
}

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PredicateTokenFilterScriptFactory.java

+6
Original file line numberDiff line numberDiff line change
@@ -69,5 +69,11 @@ protected boolean accept() throws IOException {
6969
token.updatePosition();
7070
return script.execute(token);
7171
}
72+
73+
@Override
74+
public void reset() throws IOException {
75+
super.reset();
76+
this.token.reset();
77+
}
7278
}
7379
}

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ScriptedConditionTokenFilterFactory.java

+7
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import org.elasticsearch.script.ScriptService;
3232
import org.elasticsearch.script.ScriptType;
3333

34+
import java.io.IOException;
3435
import java.util.ArrayList;
3536
import java.util.List;
3637
import java.util.function.Function;
@@ -119,6 +120,12 @@ protected boolean shouldFilter() {
119120
token.updatePosition();
120121
return script.execute(token);
121122
}
123+
124+
@Override
125+
public void reset() throws IOException {
126+
super.reset();
127+
token.reset();
128+
}
122129
}
123130

124131
}

modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PredicateTokenScriptFilterTests.java

+5-5
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ public void testSimpleFilter() throws IOException {
4646
Settings indexSettings = Settings.builder()
4747
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
4848
.put("index.analysis.filter.f.type", "predicate_token_filter")
49-
.put("index.analysis.filter.f.script.source", "token.getTerm().length() > 5")
49+
.put("index.analysis.filter.f.script.source", "my_script")
5050
.put("index.analysis.analyzer.myAnalyzer.type", "custom")
5151
.put("index.analysis.analyzer.myAnalyzer.tokenizer", "standard")
5252
.putList("index.analysis.analyzer.myAnalyzer.filter", "f")
@@ -56,7 +56,7 @@ public void testSimpleFilter() throws IOException {
5656
AnalysisPredicateScript.Factory factory = () -> new AnalysisPredicateScript() {
5757
@Override
5858
public boolean execute(Token token) {
59-
return token.getTerm().length() > 5;
59+
return token.getPosition() < 2 || token.getPosition() > 4;
6060
}
6161
};
6262

@@ -65,7 +65,7 @@ public boolean execute(Token token) {
6565
@Override
6666
public <FactoryType> FactoryType compile(Script script, ScriptContext<FactoryType> context) {
6767
assertEquals(context, AnalysisPredicateScript.CONTEXT);
68-
assertEquals(new Script("token.getTerm().length() > 5"), script);
68+
assertEquals(new Script("my_script"), script);
6969
return (FactoryType) factory;
7070
}
7171
};
@@ -79,8 +79,8 @@ public <FactoryType> FactoryType compile(Script script, ScriptContext<FactoryTyp
7979

8080
try (NamedAnalyzer analyzer = analyzers.get("myAnalyzer")) {
8181
assertNotNull(analyzer);
82-
assertAnalyzesTo(analyzer, "Vorsprung Durch Technik", new String[]{
83-
"Vorsprung", "Technik"
82+
assertAnalyzesTo(analyzer, "Oh what a wonderful thing to be", new String[]{
83+
"Oh", "what", "to", "be"
8484
});
8585
}
8686

modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ScriptedConditionTokenFilterTests.java

+4-4
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public void testSimpleCondition() throws Exception {
4545
Settings indexSettings = Settings.builder()
4646
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
4747
.put("index.analysis.filter.cond.type", "condition")
48-
.put("index.analysis.filter.cond.script.source", "token.getTerm().length() > 5")
48+
.put("index.analysis.filter.cond.script.source", "token.getPosition() > 1")
4949
.putList("index.analysis.filter.cond.filter", "uppercase")
5050
.put("index.analysis.analyzer.myAnalyzer.type", "custom")
5151
.put("index.analysis.analyzer.myAnalyzer.tokenizer", "standard")
@@ -56,7 +56,7 @@ public void testSimpleCondition() throws Exception {
5656
AnalysisPredicateScript.Factory factory = () -> new AnalysisPredicateScript() {
5757
@Override
5858
public boolean execute(Token token) {
59-
return token.getTerm().length() > 5;
59+
return token.getPosition() > 1;
6060
}
6161
};
6262

@@ -65,7 +65,7 @@ public boolean execute(Token token) {
6565
@Override
6666
public <FactoryType> FactoryType compile(Script script, ScriptContext<FactoryType> context) {
6767
assertEquals(context, AnalysisPredicateScript.CONTEXT);
68-
assertEquals(new Script("token.getTerm().length() > 5"), script);
68+
assertEquals(new Script("token.getPosition() > 1"), script);
6969
return (FactoryType) factory;
7070
}
7171
};
@@ -80,7 +80,7 @@ public <FactoryType> FactoryType compile(Script script, ScriptContext<FactoryTyp
8080
try (NamedAnalyzer analyzer = analyzers.get("myAnalyzer")) {
8181
assertNotNull(analyzer);
8282
assertAnalyzesTo(analyzer, "Vorsprung Durch Technik", new String[]{
83-
"VORSPRUNG", "Durch", "TECHNIK"
83+
"Vorsprung", "Durch", "TECHNIK"
8484
});
8585
}
8686

0 commit comments

Comments
 (0)