|
27 | 27 | import org.elasticsearch.xpack.ql.expression.Expression;
|
28 | 28 | import org.elasticsearch.xpack.ql.expression.Expressions;
|
29 | 29 | import org.elasticsearch.xpack.ql.expression.FieldAttribute;
|
| 30 | +import org.elasticsearch.xpack.ql.expression.NamedExpression; |
30 | 31 | import org.elasticsearch.xpack.ql.expression.predicate.Predicates;
|
31 | 32 | import org.elasticsearch.xpack.ql.expression.predicate.logical.BinaryLogic;
|
32 | 33 | import org.elasticsearch.xpack.ql.expression.predicate.operator.comparison.BinaryComparison;
|
@@ -126,32 +127,100 @@ protected PhysicalPlan rule(LocalPlanExec plan) {
|
126 | 127 | * Copy any limit/sort/topN in the local plan (before the exchange) after it so after gathering the data,
|
127 | 128 | * the limit still applies.
|
128 | 129 | */
|
129 |
| - private static class LocalToGlobalLimitAndTopNExec extends Rule<PhysicalPlan, PhysicalPlan> { |
| 130 | + private static class LocalToGlobalLimitAndTopNExec extends OptimizerRule<ExchangeExec> { |
130 | 131 |
|
131 |
| - public PhysicalPlan apply(PhysicalPlan plan) { |
132 |
| - return plan.transformUp(UnaryExec.class, u -> { |
133 |
| - PhysicalPlan pl = u; |
134 |
| - if (u.child()instanceof ExchangeExec exchange) { |
135 |
| - var localLimit = findLocalLimitOrTopN(exchange); |
136 |
| - if (localLimit != null) { |
137 |
| - pl = localLimit.replaceChild(u); |
138 |
| - } |
139 |
| - } |
140 |
| - return pl; |
141 |
| - }); |
| 132 | + private LocalToGlobalLimitAndTopNExec() { |
| 133 | + super(OptimizerRules.TransformDirection.UP); |
| 134 | + } |
| 135 | + |
| 136 | + @Override |
| 137 | + protected PhysicalPlan rule(ExchangeExec exchange) { |
| 138 | + if (exchange.getType() == ExchangeExec.Type.GATHER) { |
| 139 | + return maybeAddGlobalLimitOrTopN(exchange); |
| 140 | + } |
| 141 | + return exchange; |
142 | 142 | }
|
143 | 143 |
|
144 |
| - private UnaryExec findLocalLimitOrTopN(UnaryExec localPlan) { |
145 |
| - for (var plan = localPlan.child();;) { |
146 |
| - if (plan instanceof LimitExec || plan instanceof TopNExec) { |
147 |
| - return (UnaryExec) plan; |
| 144 | + /** |
| 145 | + * This method copies any Limit/Sort/TopN in the local plan (before the exchange) after it, |
| 146 | + * ensuring that all the inputs are available at that point |
| 147 | + * eg. if between the exchange and the TopN there is a <code>project</code> that filters out |
| 148 | + * some inputs needed by the topN (i.e. the sorting fields), this method also modifies |
| 149 | + * the existing <code>project</code> to make these inputs available to the global TopN, and then adds |
| 150 | + * another <code>project</code> at the end of the plan, to ensure that the original semantics |
| 151 | + * are preserved. |
| 152 | + * |
| 153 | + * In detail: |
| 154 | + * <ol> |
| 155 | + * <li>Traverse the plan down starting from the exchange, looking for the first Limit/Sort/TopN</li> |
| 156 | + * <li>If a Limit is found, copy it after the Exchange to make it global limit</li> |
| 157 | + * <li>If a TopN is found, copy it after the Exchange and ensure that it has all the inputs needed: |
| 158 | + * <ol> |
| 159 | + * <li>Starting from the TopN, traverse the plan backwards and check that all the nodes propagate |
| 160 | + * the inputs needed by the TopN</li> |
| 161 | + * <li>If a Project node filters out some of the inputs needed by the TopN, |
| 162 | + * replace it with another one that includes those inputs</li> |
| 163 | + * <li>Copy the TopN after the exchange, to make it global</li> |
| 164 | + * <li>If the outputs of the new global TopN are different from the outputs of the original Exchange, |
| 165 | + * add another Project that filters out the unneeded outputs and preserves the original semantics</li> |
| 166 | + * </ol> |
| 167 | + * </li> |
| 168 | + * </ol> |
| 169 | + * @param exchange |
| 170 | + * @return |
| 171 | + */ |
| 172 | + private PhysicalPlan maybeAddGlobalLimitOrTopN(ExchangeExec exchange) { |
| 173 | + List<UnaryExec> visitedNodes = new ArrayList<>(); |
| 174 | + visitedNodes.add(exchange); |
| 175 | + AttributeSet exchangeOutputSet = exchange.outputSet(); |
| 176 | + // step 1: traverse the plan and find Limit/TopN |
| 177 | + for (var plan = exchange.child();;) { |
| 178 | + if (plan instanceof LimitExec limit) { |
| 179 | + // Step 2: just add a global Limit |
| 180 | + return limit.replaceChild(exchange); |
| 181 | + } |
| 182 | + if (plan instanceof TopNExec topN) { |
| 183 | + // Step 3: copy the TopN after the Exchange and ensure that it has all the inputs needed |
| 184 | + Set<Attribute> requiredAttributes = Expressions.references(topN.order()).combine(topN.inputSet()); |
| 185 | + if (exchangeOutputSet.containsAll(requiredAttributes)) { |
| 186 | + return topN.replaceChild(exchange); |
| 187 | + } |
| 188 | + |
| 189 | + PhysicalPlan subPlan = topN; |
| 190 | + // Step 3.1: Traverse the plan backwards to check inputs available |
| 191 | + for (int i = visitedNodes.size() - 1; i >= 0; i--) { |
| 192 | + UnaryExec node = visitedNodes.get(i); |
| 193 | + if (node instanceof ProjectExec proj && node.outputSet().containsAll(requiredAttributes) == false) { |
| 194 | + // Step 3.2: a Project is filtering out some inputs needed by the global TopN, |
| 195 | + // replace it with another one that preserves these inputs |
| 196 | + List<NamedExpression> newProjections = new ArrayList<>(proj.projections()); |
| 197 | + for (Attribute attr : requiredAttributes) { |
| 198 | + if (newProjections.contains(attr) == false) { |
| 199 | + newProjections.add(attr); |
| 200 | + } |
| 201 | + } |
| 202 | + node = new ProjectExec(proj.source(), proj.child(), newProjections); |
| 203 | + } |
| 204 | + subPlan = node.replaceChild(subPlan); |
| 205 | + } |
| 206 | + |
| 207 | + // Step 3.3: add the global TopN right after the exchange |
| 208 | + topN = topN.replaceChild(subPlan); |
| 209 | + if (exchangeOutputSet.containsAll(topN.output())) { |
| 210 | + return topN; |
| 211 | + } else { |
| 212 | + // Step 3.4: the output propagation is leaking at the end of the plan, |
| 213 | + // add one more Project to preserve the original query semantics |
| 214 | + return new ProjectExec(topN.source(), topN, new ArrayList<>(exchangeOutputSet)); |
| 215 | + } |
148 | 216 | }
|
149 |
| - // possible to go deeper |
150 | 217 | if (plan instanceof ProjectExec || plan instanceof EvalExec) {
|
| 218 | + visitedNodes.add((UnaryExec) plan); |
| 219 | + // go deeper with step 1 |
151 | 220 | plan = ((UnaryExec) plan).child();
|
152 | 221 | } else {
|
153 |
| - // no limit specified |
154 |
| - return null; |
| 222 | + // no limit specified, return the original plan |
| 223 | + return exchange; |
155 | 224 | }
|
156 | 225 | }
|
157 | 226 | }
|
|
0 commit comments