Apply keyword normalizers in the field retrieval API. (#59260)

jtibshirani · jtibshirani · commit 64143b8357c7 · 2020-07-17T20:12:44.000-07:00
As we discussed in the meta-issue, when returning `keyword` in the fields
retrieval API, we'll apply their `normalizer`. This decision is not a clear-cut
one, and we'll validate it with internal users before merging the feature
branch.
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
@@ -47,6 +47,7 @@
 import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
 
 import java.io.IOException;
+import java.io.UncheckedIOException;
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
@@ -361,25 +362,9 @@ protected void parseCreateField(ParseContext context) throws IOException {
             return;
         }
 
-        final NamedAnalyzer normalizer = fieldType().normalizer();
+        NamedAnalyzer normalizer = fieldType().normalizer();
         if (normalizer != null) {
-            try (TokenStream ts = normalizer.tokenStream(name(), value)) {
-                final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
-                ts.reset();
-                if (ts.incrementToken() == false) {
-                  throw new IllegalStateException("The normalization token stream is "
-                      + "expected to produce exactly 1 token, but got 0 for analyzer "
-                      + normalizer + " and input \"" + value + "\"");
-                }
-                final String newValue = termAtt.toString();
-                if (ts.incrementToken()) {
-                  throw new IllegalStateException("The normalization token stream is "
-                      + "expected to produce exactly 1 token, but got 2+ for analyzer "
-                      + normalizer + " and input \"" + value + "\"");
-                }
-                ts.end();
-                value = newValue;
-            }
+            value = normalizeValue(normalizer, value);
         }
 
         // convert to utf8 only once before feeding postings/dv/stored fields
@@ -398,6 +383,26 @@ protected void parseCreateField(ParseContext context) throws IOException {
         }
     }
 
+    private String normalizeValue(NamedAnalyzer normalizer, String value) throws IOException {
+        try (TokenStream ts = normalizer.tokenStream(name(), value)) {
+            final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
+            ts.reset();
+            if (ts.incrementToken() == false) {
+                throw new IllegalStateException("The normalization token stream is "
+                    + "expected to produce exactly 1 token, but got 0 for analyzer "
+                    + normalizer + " and input \"" + value + "\"");
+            }
+            final String newValue = termAtt.toString();
+            if (ts.incrementToken()) {
+                throw new IllegalStateException("The normalization token stream is "
+                    + "expected to produce exactly 1 token, but got 2+ for analyzer "
+                    + normalizer + " and input \"" + value + "\"");
+            }
+            ts.end();
+            return newValue;
+        }
+    }
+
     @Override
     protected String parseSourceValue(Object value, String format) {
         if (format != null) {
@@ -408,7 +413,17 @@ protected String parseSourceValue(Object value, String format) {
         if (keywordValue.length() > ignoreAbove) {
             return null;
         }
-        return keywordValue;
+
+        NamedAnalyzer normalizer = fieldType().normalizer();
+        if (normalizer == null) {
+            return keywordValue;
+        }
+
+        try {
+            return normalizeValue(normalizer, keywordValue);
+        } catch (IOException e) {
+            throw new UncheckedIOException(e);
+        }
     }
 
     @Override
diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java
@@ -650,6 +650,13 @@ public void testParseSourceValue() {
         assertEquals("42", ignoreAboveMapper.parseSourceValue(42L, null));
         assertEquals("true", ignoreAboveMapper.parseSourceValue(true, null));
 
+        KeywordFieldMapper normalizerMapper = new KeywordFieldMapper.Builder("field")
+            .normalizer(indexService.getIndexAnalyzers(), "lowercase")
+            .build(context);
+        assertEquals("value", normalizerMapper.parseSourceValue("VALUE", null));
+        assertEquals("42", normalizerMapper.parseSourceValue(42L, null));
+        assertEquals("value", normalizerMapper.parseSourceValue("value", null));
+
         KeywordFieldMapper nullValueMapper = new KeywordFieldMapper.Builder("field")
             .nullValue("NULL")
             .build(context);