elastic · benwtrent · Dec 18, 2019 · Dec 16, 2019 · Dec 16, 2019 · Dec 17, 2019
diff --git a/.../main/java/org/elasticsearch/xpack/core/ml/inference/preprocessing/FrequencyEncoding.java b/.../main/java/org/elasticsearch/xpack/core/ml/inference/preprocessing/FrequencyEncoding.java
@@ -14,6 +14,7 @@
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.XContentParser;
 import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
+import org.elasticsearch.xpack.core.ml.utils.MapHelper;
 
 import java.io.IOException;
 import java.util.Collections;
@@ -103,7 +104,7 @@ public String getName() {
 
     @Override
     public void process(Map<String, Object> fields) {
-        Object value = fields.get(field);
+        Object value = MapHelper.dig(field, fields);
         if (value == null) {
             return;
         }

diff --git a/...src/main/java/org/elasticsearch/xpack/core/ml/inference/preprocessing/OneHotEncoding.java b/...src/main/java/org/elasticsearch/xpack/core/ml/inference/preprocessing/OneHotEncoding.java
@@ -14,6 +14,7 @@
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.XContentParser;
 import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
+import org.elasticsearch.xpack.core.ml.utils.MapHelper;
 
 import java.io.IOException;
 import java.util.Collections;
@@ -86,7 +87,7 @@ public String getName() {
 
     @Override
     public void process(Map<String, Object> fields) {
-        Object value = fields.get(field);
+        Object value = MapHelper.dig(field, fields);
         if (value == null) {
             return;
         }

diff --git a/...main/java/org/elasticsearch/xpack/core/ml/inference/preprocessing/TargetMeanEncoding.java b/...main/java/org/elasticsearch/xpack/core/ml/inference/preprocessing/TargetMeanEncoding.java
@@ -14,6 +14,7 @@
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.XContentParser;
 import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
+import org.elasticsearch.xpack.core.ml.utils.MapHelper;
 
 import java.io.IOException;
 import java.util.Collections;
@@ -114,7 +115,7 @@ public String getName() {
 
     @Override
     public void process(Map<String, Object> fields) {
-        Object value = fields.get(field);
+        Object value = MapHelper.dig(field, fields);
         if (value == null) {
             return;
         }

diff --git a/.../core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/tree/Tree.java b/.../core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/tree/Tree.java
@@ -28,6 +28,7 @@
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.StrictlyParsedTrainedModel;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TargetType;
 import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
+import org.elasticsearch.xpack.core.ml.utils.MapHelper;
 
 import java.io.IOException;
 import java.util.ArrayDeque;
@@ -129,7 +130,9 @@ public InferenceResults infer(Map<String, Object> fields, InferenceConfig config
                 "Cannot infer using configuration for [{}] when model target_type is [{}]", config.getName(), targetType.toString());
         }
 
-        List<Double> features = featureNames.stream().map(f -> InferenceHelpers.toDouble(fields.get(f))).collect(Collectors.toList());
+        List<Double> features = featureNames.stream()
+            .map(f -> InferenceHelpers.toDouble(MapHelper.dig(f, fields)))
+            .collect(Collectors.toList());
         return infer(features, config);
     }
 

diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/utils/MapHelper.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/utils/MapHelper.java
@@ -0,0 +1,133 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+package org.elasticsearch.xpack.core.ml.utils;
+
+import org.elasticsearch.common.Nullable;
+
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Stack;
+
+public final class MapHelper {
+
+    private MapHelper() {}
+
+    /**
+     * This eagerly digs (depth first search, longer keys first) through the map by tokenizing the provided path on '.'.
+     *
+     * It is possible for ES _source docs to have "mixed" path formats. So, we should search all potential paths
+     * given the current knowledge of the map.
+     *
+     * Examples:
+     *
+     * The following maps would return `2` given the path "a.b.c.d"
+     *
+     * {
+     *     "a.b.c.d" : 2
+     * }
+     * {
+     *     "a" :{"b": {"c": {"d" : 2}}}
+     * }
+     * {
+     *     "a" :{"b.c": {"d" : 2}}}
+     * }
+     * {
+     *     "a" :{"b": {"c": {"d" : 2}}},
+     *     "a.b" :{"c": {"d" : 5}} // we choose the first one found, we go down longer keys first
+     * }
+     * {
+     *     "a" :{"b": {"c": {"NOT_d" : 2, "d": 2}}}
+     * }
+     *
+     * Conceptual "Worse case" 5 potential paths explored for "a.b.c.d" until 2 is finally returned
+     * {
+     *     "a.b.c": {"not_d": 2},
+     *     "a.b": {"c": {"not_d": 2}},
+     *     "a": {"b.c": {"not_d": 2}},
+     *     "a": {"b" :{ "c.not_d": 2}},
+     *     "a" :{"b": {"c": {"not_d" : 2}}},
+     *     "a" :{"b": {"c": {"d" : 2}}},
+     * }
+     *
+     * We don't exhaustively create all potential paths.
+     * If we did, this would result in 2^n-1 total possible paths, where {@code n = path.split("\\.").length}.
+     *
+     * Instead we lazily create potential paths once we know that they are possibilities.
+     *
+     * @param path Dot delimited path containing the field desired
+     * @param map The {@link Map} map to dig
+     * @return The found object. Returns {@code null} if not found
+     */
+    @Nullable
+    public static Object dig(String path, Map<String, Object> map) {
+        // short cut before search
+        if (map.keySet().contains(path)) {
+            return map.get(path);
+        }
+        String[] fields = path.split("\\.");
+        if (Arrays.stream(fields).anyMatch(String::isEmpty)) {
+            throw new IllegalArgumentException("Empty path detected. Invalid field name");
+        }
+        Stack<PotentialPath> pathStack = new Stack<>();
+        pathStack.push(new PotentialPath(map, 0));
+        return explore(fields, pathStack);
+    }
+
+    @SuppressWarnings("unchecked")
+    private static Object explore(String[] path, Stack<PotentialPath> pathStack) {
+        while (pathStack.empty() == false) {
+            PotentialPath potentialPath = pathStack.pop();
+            int endPos = potentialPath.pathPosition + 1;
+            int startPos = potentialPath.pathPosition;
+            Map<String, Object> map = potentialPath.map;
+            String candidateKey = null;
+            while(endPos <= path.length) {
+                candidateKey = mergePath(path, startPos, endPos);
+                Object next = map.get(candidateKey);
+                if (endPos == path.length && next != null) { // exit early, we reached the full path and found something
+                    return next;
+                }
+                if (next instanceof Map<?, ?>) { // we found another map, continue exploring down this path
+                    pathStack.push(new PotentialPath((Map<String, Object>)next, endPos));
+                }
+                endPos++;
+            }
+            if (candidateKey != null && map.containsKey(candidateKey)) { //exit early
+                return map.get(candidateKey);
+            }
+        }
+
+        return null;
+    }
+
+    private static String mergePath(String[] path, int start, int end) {
+        if (start + 1 == end) { // early exit, no need to create sb
+            return path[start];
+        }
+
+        StringBuilder sb = new StringBuilder();
+        for (int i = start; i < end - 1; i++) {
+            sb.append(path[i]);
+            sb.append(".");
+        }
+        sb.append(path[end - 1]);
+        return sb.toString();
+    }
+
+    private static class PotentialPath {
+
+        // Pointer to where to start exploring
+        private final Map<String, Object> map;
+        // Where in the requested path are we
+        private final int pathPosition;
+
+        private PotentialPath(Map<String, Object> map, int pathPosition) {
+            this.map = map;
+            this.pathPosition = pathPosition;
+        }
+
+    }
+}
diff --git a/.../java/org/elasticsearch/xpack/core/ml/inference/preprocessing/FrequencyEncodingTests.java b/.../java/org/elasticsearch/xpack/core/ml/inference/preprocessing/FrequencyEncodingTests.java
@@ -65,4 +65,22 @@ public void testProcessWithFieldPresent() {
         testProcess(encoding, fieldValues, matchers);
     }
 
+    public void testProcessWithNestedField() {
+        String field = "categorical.child";
+        List<Object> values = Arrays.asList("foo", "bar", "foobar", "baz", "farequote", 1.5);
+        Map<String, Double> valueMap = values.stream().collect(Collectors.toMap(Object::toString,
+            v -> randomDoubleBetween(0.0, 1.0, false)));
+        String encodedFeatureName = "encoded";
+        FrequencyEncoding encoding = new FrequencyEncoding(field, encodedFeatureName, valueMap);
+
+        Map<String, Object> fieldValues = new HashMap<>() {{
+            put("categorical", new HashMap<>(){{
+                put("child", "farequote");
+            }});
+        }};
+
+        encoding.process(fieldValues);
+        assertThat(fieldValues.get("encoded"), equalTo(valueMap.get("farequote")));
+    }
+
 }
diff --git a/...est/java/org/elasticsearch/xpack/core/ml/inference/preprocessing/OneHotEncodingTests.java b/...est/java/org/elasticsearch/xpack/core/ml/inference/preprocessing/OneHotEncodingTests.java
@@ -67,4 +67,19 @@ public void testProcessWithFieldPresent() {
         testProcess(encoding, fieldValues, matchers);
     }
 
+    public void testProcessWithNestedField() {
+        String field = "categorical.child";
+        List<Object> values = Arrays.asList("foo", "bar", "foobar", "baz", "farequote", 1.5);
+        Map<String, String> valueMap = values.stream().collect(Collectors.toMap(Object::toString, v -> "Column_" + v.toString()));
+        OneHotEncoding encoding = new OneHotEncoding(field, valueMap);
+        Map<String, Object> fieldValues = new HashMap<>() {{
+            put("categorical", new HashMap<>(){{
+                put("child", "farequote");
+            }});
+        }};
+
+        encoding.process(fieldValues);
+        assertThat(fieldValues.get("Column_farequote"), equalTo(1));
+    }
+
 }
diff --git a/...java/org/elasticsearch/xpack/core/ml/inference/preprocessing/TargetMeanEncodingTests.java b/...java/org/elasticsearch/xpack/core/ml/inference/preprocessing/TargetMeanEncodingTests.java
@@ -68,4 +68,24 @@ public void testProcessWithFieldPresent() {
         testProcess(encoding, fieldValues, matchers);
     }
 
+    public void testProcessWithNestedField() {
+        String field = "categorical.child";
+        List<Object> values = Arrays.asList("foo", "bar", "foobar", "baz", "farequote", 1.5);
+        Map<String, Double> valueMap = values.stream().collect(Collectors.toMap(Object::toString,
+            v -> randomDoubleBetween(0.0, 1.0, false)));
+        String encodedFeatureName = "encoded";
+        Double defaultvalue = randomDouble();
+        TargetMeanEncoding encoding = new TargetMeanEncoding(field, encodedFeatureName, valueMap, defaultvalue);
+
+        Map<String, Object> fieldValues = new HashMap<>() {{
+            put("categorical", new HashMap<>(){{
+                put("child", "farequote");
+            }});
+        }};
+
+        encoding.process(fieldValues);
+
+        assertThat(fieldValues.get("encoded"), equalTo(valueMap.get("farequote")));
+    }
+
 }
diff --git a/...t/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/ensemble/EnsembleTests.java b/...t/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/ensemble/EnsembleTests.java
@@ -445,6 +445,63 @@ public void testRegressionInference() {
             closeTo(((SingleValueInferenceResults)ensemble.infer(featureMap, RegressionConfig.EMPTY_PARAMS)).value(), 0.00001));
     }
 
+    public void testInferNestedFields() {
+        List<String> featureNames = Arrays.asList("foo.baz", "bar.biz");
+        Tree tree1 = Tree.builder()
+            .setFeatureNames(featureNames)
+            .setRoot(TreeNode.builder(0)
+                .setLeftChild(1)
+                .setRightChild(2)
+                .setSplitFeature(0)
+                .setThreshold(0.5))
+            .addNode(TreeNode.builder(1).setLeafValue(0.3))
+            .addNode(TreeNode.builder(2)
+                .setThreshold(0.8)
+                .setSplitFeature(1)
+                .setLeftChild(3)
+                .setRightChild(4))
+            .addNode(TreeNode.builder(3).setLeafValue(0.1))
+            .addNode(TreeNode.builder(4).setLeafValue(0.2)).build();
+        Tree tree2 = Tree.builder()
+            .setFeatureNames(featureNames)
+            .setRoot(TreeNode.builder(0)
+                .setLeftChild(1)
+                .setRightChild(2)
+                .setSplitFeature(0)
+                .setThreshold(0.5))
+            .addNode(TreeNode.builder(1).setLeafValue(1.5))
+            .addNode(TreeNode.builder(2).setLeafValue(0.9))
+            .build();
+        Ensemble ensemble = Ensemble.builder()
+            .setTargetType(TargetType.REGRESSION)
+            .setFeatureNames(featureNames)
+            .setTrainedModels(Arrays.asList(tree1, tree2))
+            .setOutputAggregator(new WeightedSum(new double[]{0.5, 0.5}))
+            .build();
+
+        Map<String, Object> featureMap = new HashMap<>() {{
+            put("foo", new HashMap<>(){{
+                put("baz", 0.4);
+            }});
+            put("bar", new HashMap<>(){{
+                put("biz", 0.0);
+            }});
+        }};
+        assertThat(0.9,
+            closeTo(((SingleValueInferenceResults)ensemble.infer(featureMap, RegressionConfig.EMPTY_PARAMS)).value(), 0.00001));
+
+        featureMap = new HashMap<>() {{
+            put("foo", new HashMap<>(){{
+                put("baz", 2.0);
+            }});
+            put("bar", new HashMap<>(){{
+                put("biz", 0.7);
+            }});
+        }};
+        assertThat(0.5,
+            closeTo(((SingleValueInferenceResults)ensemble.infer(featureMap, RegressionConfig.EMPTY_PARAMS)).value(), 0.00001));
+    }
+
     public void testOperationsEstimations() {
         Tree tree1 = TreeTests.buildRandomTree(Arrays.asList("foo", "bar"), 2);
         Tree tree2 = TreeTests.buildRandomTree(Arrays.asList("foo", "bar", "baz"), 5);

diff --git a/.../src/test/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/tree/TreeTests.java b/.../src/test/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/tree/TreeTests.java
@@ -169,6 +169,58 @@ public void testInfer() {
             closeTo(((SingleValueInferenceResults)tree.infer(featureMap, RegressionConfig.EMPTY_PARAMS)).value(), 0.00001));
     }
 
+    public void testInferNestedFields() {
+        // Build a tree with 2 nodes and 3 leaves using 2 features
+        // The leaves have unique values 0.1, 0.2, 0.3
+        Tree.Builder builder = Tree.builder().setTargetType(TargetType.REGRESSION);
+        TreeNode.Builder rootNode = builder.addJunction(0, 0, true, 0.5);
+        builder.addLeaf(rootNode.getRightChild(), 0.3);
+        TreeNode.Builder leftChildNode = builder.addJunction(rootNode.getLeftChild(), 1, true, 0.8);
+        builder.addLeaf(leftChildNode.getLeftChild(), 0.1);
+        builder.addLeaf(leftChildNode.getRightChild(), 0.2);
+
+        List<String> featureNames = Arrays.asList("foo.baz", "bar.biz");
+        Tree tree = builder.setFeatureNames(featureNames).build();
+
+        // This feature vector should hit the right child of the root node
+        Map<String, Object> featureMap = new HashMap<>() {{
+            put("foo", new HashMap<>(){{
+                put("baz", 0.6);
+            }});
+            put("bar", new HashMap<>(){{
+                put("biz", 0.0);
+            }});
+        }};
+        assertThat(0.3,
+            closeTo(((SingleValueInferenceResults)tree.infer(featureMap, RegressionConfig.EMPTY_PARAMS)).value(), 0.00001));
+
+        // This should hit the left child of the left child of the root node
+        // i.e. it takes the path left, left
+        featureMap = new HashMap<>() {{
+            put("foo", new HashMap<>(){{
+                put("baz", 0.3);
+            }});
+            put("bar", new HashMap<>(){{
+                put("biz", 0.7);
+            }});
+        }};
+        assertThat(0.1,
+            closeTo(((SingleValueInferenceResults)tree.infer(featureMap, RegressionConfig.EMPTY_PARAMS)).value(), 0.00001));
+
+        // This should hit the right child of the left child of the root node
+        // i.e. it takes the path left, right
+        featureMap = new HashMap<>() {{
+            put("foo", new HashMap<>(){{
+                put("baz", 0.3);
+            }});
+            put("bar", new HashMap<>(){{
+                put("biz", 0.9);
+            }});
+        }};
+        assertThat(0.2,
+            closeTo(((SingleValueInferenceResults)tree.infer(featureMap, RegressionConfig.EMPTY_PARAMS)).value(), 0.00001));
+    }
+
     public void testTreeClassificationProbability() {
         // Build a tree with 2 nodes and 3 leaves using 2 features
         // The leaves have unique values 0.1, 0.2, 0.3