Skip to content

Commit 66e1092

Browse files
committed
SQL: Fix ORDER BY on aggregates and GROUPed BY fields (#51894)
Previously, in the in-memory sorting module `LocalAggregationSorterListener` only the aggregate functions where used (grabbed by the `sortingColumns`). As a consequence, if the ORDER BY was also using columns of the GROUP BY clause, (especially in the case of higher priority - before the aggregate functions) wrong results were produced. E.g.: ``` SELECT gender, MAX(salary) AS max FROM test_emp GROUP BY gender ORDER BY gender, max ``` Add all columns of the ORDER BY to the `sortingColumns` so that the `LocalAggregationSorterListener` can use the correct comparators in the underlying PriorityQueue used to implement the in-memory sorting. Fixes: #50355 (cherry picked from commit be680af)
1 parent a53e4aa commit 66e1092

File tree

9 files changed

+281
-101
lines changed

9 files changed

+281
-101
lines changed

x-pack/plugin/sql/qa/src/main/resources/agg-ordering.csv-spec

+47-1
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,50 @@ g:s | gender:s | s3:i | SUM(salary):i | s5:i
2323
M |M |2671054|2671054 |2671054
2424
F |F |1666196|1666196 |1666196
2525
null |null |487605 |487605 |487605
26-
;
26+
;
27+
28+
histogramDateTimeWithCountAndOrder_1
29+
schema::h:ts|c:l
30+
SELECT HISTOGRAM(birth_date, INTERVAL 1 YEAR) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY h DESC, c ASC;
31+
32+
h | c
33+
------------------------+---------------
34+
1965-01-01T00:00:00.000Z|1
35+
1964-01-01T00:00:00.000Z|4
36+
1963-01-01T00:00:00.000Z|7
37+
1962-01-01T00:00:00.000Z|6
38+
1961-01-01T00:00:00.000Z|8
39+
1960-01-01T00:00:00.000Z|8
40+
1959-01-01T00:00:00.000Z|9
41+
1958-01-01T00:00:00.000Z|7
42+
1957-01-01T00:00:00.000Z|4
43+
1956-01-01T00:00:00.000Z|5
44+
1955-01-01T00:00:00.000Z|4
45+
1954-01-01T00:00:00.000Z|8
46+
1953-01-01T00:00:00.000Z|11
47+
1952-01-01T00:00:00.000Z|8
48+
null |10
49+
;
50+
51+
histogramDateTimeWithCountAndOrder_2
52+
schema::h:ts|c:l
53+
SELECT HISTOGRAM(birth_date, INTERVAL 1 YEAR) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY c DESC, h ASC;
54+
55+
h | c
56+
------------------------+---------------
57+
1953-01-01T00:00:00.000Z|11
58+
null |10
59+
1959-01-01T00:00:00.000Z|9
60+
1952-01-01T00:00:00.000Z|8
61+
1954-01-01T00:00:00.000Z|8
62+
1960-01-01T00:00:00.000Z|8
63+
1961-01-01T00:00:00.000Z|8
64+
1958-01-01T00:00:00.000Z|7
65+
1963-01-01T00:00:00.000Z|7
66+
1962-01-01T00:00:00.000Z|6
67+
1956-01-01T00:00:00.000Z|5
68+
1955-01-01T00:00:00.000Z|4
69+
1957-01-01T00:00:00.000Z|4
70+
1964-01-01T00:00:00.000Z|4
71+
1965-01-01T00:00:00.000Z|1
72+
;

x-pack/plugin/sql/qa/src/main/resources/agg-ordering.sql-spec

+24-3
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ aggNotSpecifiedInTheAggregateAndGroupWithHavingWithLimitAndDirection
8080
SELECT gender, MIN(salary) AS min, COUNT(*) AS c FROM test_emp GROUP BY gender HAVING c > 1 ORDER BY MAX(salary) ASC, c DESC LIMIT 5;
8181

8282
groupAndAggNotSpecifiedInTheAggregateWithHaving
83-
SELECT gender, MIN(salary) AS min, COUNT(*) AS c FROM test_emp GROUP BY gender HAVING c > 1 ORDER BY gender, MAX(salary);
83+
SELECT gender, MIN(salary) AS min, COUNT(*) AS c FROM test_emp GROUP BY gender HAVING c > 1 ORDER BY gender NULLS FIRST, MAX(salary);
8484

8585
multipleAggsThatGetRewrittenWithAliasOnAMediumGroupBy
8686
SELECT languages, MAX(salary) AS max, MIN(salary) AS min FROM test_emp GROUP BY languages ORDER BY max;
@@ -136,5 +136,26 @@ SELECT gender AS g, first_name AS f, last_name AS l FROM test_emp GROUP BY f, ge
136136
multipleGroupingsAndOrderingByGroups_8
137137
SELECT gender AS g, first_name, last_name FROM test_emp GROUP BY g, last_name, first_name ORDER BY gender ASC, first_name DESC, last_name ASC;
138138

139-
multipleGroupingsAndOrderingByGroupsWithFunctions
140-
SELECT first_name f, last_name l, gender g, CONCAT(first_name, last_name) c FROM test_emp GROUP BY gender, l, f, c ORDER BY gender, c DESC, first_name, last_name ASC;
139+
multipleGroupingsAndOrderingByGroupsAndAggs_1
140+
SELECT gender, MIN(salary) AS min, COUNT(*) AS c, MAX(salary) AS max FROM test_emp GROUP BY gender HAVING c > 1 ORDER BY gender ASC NULLS FIRST, MAX(salary) DESC;
141+
142+
multipleGroupingsAndOrderingByGroupsAndAggs_2
143+
SELECT gender, MIN(salary) AS min, COUNT(*) AS c, MAX(salary) AS max FROM test_emp GROUP BY gender HAVING c > 1 ORDER BY gender DESC NULLS LAST, MAX(salary) ASC;
144+
145+
multipleGroupingsAndOrderingByGroupsWithFunctions_1
146+
SELECT first_name f, last_name l, gender g, CONCAT(first_name, last_name) c FROM test_emp GROUP BY gender, l, f, c ORDER BY gender NULLS FIRST, c DESC, first_name, last_name ASC;
147+
148+
multipleGroupingsAndOrderingByGroupsWithFunctions_2
149+
SELECT first_name f, last_name l, gender g, CONCAT(first_name, last_name) c FROM test_emp GROUP BY gender, l, f, c ORDER BY c DESC, gender DESC NULLS LAST, first_name, last_name ASC;
150+
151+
multipleGroupingsAndOrderingByGroupsAndAggregatesWithFunctions_1
152+
SELECT CONCAT('foo', gender) g, MAX(salary) AS max, MIN(salary) AS min FROM test_emp GROUP BY g ORDER BY 1 NULLS FIRST, 2, 3;
153+
154+
multipleGroupingsAndOrderingByGroupsAndAggregatesWithFunctions_2
155+
SELECT CONCAT('foo', gender) g, MAX(salary) AS max, MIN(salary) AS min FROM test_emp GROUP BY g ORDER BY 1 DESC NULLS LAST, 2, 3;
156+
157+
multipleGroupingsAndOrderingByGroupsAndAggregatesWithFunctions_3
158+
SELECT CONCAT('foo', gender) g, MAX(salary) AS max, MIN(salary) AS min FROM test_emp GROUP BY g ORDER BY 2, 1 NULLS FIRST, 3;
159+
160+
multipleGroupingsAndOrderingByGroupsAndAggregatesWithFunctions_4
161+
SELECT CONCAT('foo', gender) g, MAX(salary) AS max, MIN(salary) AS min FROM test_emp GROUP BY g ORDER BY 3 DESC, 1 NULLS FIRST, 2;

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/Querier.java

+31-16
Original file line numberDiff line numberDiff line change
@@ -605,36 +605,51 @@ static class AggSortingQueue extends PriorityQueue<Tuple<List<?>, Integer>> {
605605
this.sortingColumns = sortingColumns;
606606
}
607607

608-
// compare row based on the received attribute sort
609-
// if a sort item is not in the list, it is assumed the sorting happened in ES
610-
// and the results are left as is (by using the row ordering), otherwise it is sorted based on the given criteria.
611-
//
612-
// Take for example ORDER BY a, x, b, y
613-
// a, b - are sorted in ES
614-
// x, y - need to be sorted client-side
615-
// sorting on x kicks in, only if the values for a are equal.
616-
608+
/**
609+
* Compare row based on the received attribute sort
610+
* <ul>
611+
* <li>
612+
* If a tuple in {@code sortingColumns} has a null comparator, it is assumed the sorting
613+
* happened in ES and the results are left as is (by using the row ordering), otherwise it is
614+
* sorted based on the given criteria.
615+
* </li>
616+
* <li>
617+
* If no tuple exists in {@code sortingColumns} for an output column, it means this column
618+
* is not included at all in the ORDER BY
619+
* </li>
620+
*</ul>
621+
*
622+
* Take for example ORDER BY a, x, b, y
623+
* a, b - are sorted in ES
624+
* x, y - need to be sorted client-side
625+
* sorting on x kicks in only if the values for a are equal.
626+
* sorting on y kicks in only if the values for a, x and b are all equal
627+
*
628+
*/
617629
// thanks to @jpountz for the row ordering idea as a way to preserve ordering
618630
@SuppressWarnings("unchecked")
619631
@Override
620632
protected boolean lessThan(Tuple<List<?>, Integer> l, Tuple<List<?>, Integer> r) {
621633
for (Tuple<Integer, Comparator> tuple : sortingColumns) {
622-
int i = tuple.v1().intValue();
634+
int columnIdx = tuple.v1().intValue();
623635
Comparator comparator = tuple.v2();
624636

625-
Object vl = l.v1().get(i);
626-
Object vr = r.v1().get(i);
637+
// Get the values for left and right rows at the current column index
638+
Object vl = l.v1().get(columnIdx);
639+
Object vr = r.v1().get(columnIdx);
627640
if (comparator != null) {
628641
int result = comparator.compare(vl, vr);
629-
// if things are equals, move to the next comparator
642+
// if things are not equal: return the comparison result,
643+
// otherwise: move to the next comparator to solve the tie.
630644
if (result != 0) {
631645
return result > 0;
632646
}
633647
}
634-
// no comparator means the existing order needs to be preserved
648+
// no comparator means the rows are pre-ordered by ES for the column at
649+
// the current index and the existing order needs to be preserved
635650
else {
636-
// check the values - if they are equal move to the next comparator
637-
// otherwise return the row order
651+
// check the values - if they are not equal return the row order
652+
// otherwise: move to the next comparator to solve the tie.
638653
if (Objects.equals(vl, vr) == false) {
639654
return l.v2().compareTo(r.v2()) > 0;
640655
}

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/SourceGenerator.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ private static void sorting(QueryContainer container, SearchSourceBuilder source
109109
source.sort("_doc");
110110
return;
111111
}
112-
for (Sort sortable : container.sort()) {
112+
for (Sort sortable : container.sort().values()) {
113113
SortBuilder<?> sortBuilder = null;
114114

115115
if (sortable instanceof AttributeSort) {

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryFolder.java

+29-29
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
import org.elasticsearch.xpack.sql.querydsl.container.GlobalCountRef;
6565
import org.elasticsearch.xpack.sql.querydsl.container.GroupByRef;
6666
import org.elasticsearch.xpack.sql.querydsl.container.GroupByRef.Property;
67+
import org.elasticsearch.xpack.sql.querydsl.container.GroupingFunctionSort;
6768
import org.elasticsearch.xpack.sql.querydsl.container.MetricAggRef;
6869
import org.elasticsearch.xpack.sql.querydsl.container.PivotColumnRef;
6970
import org.elasticsearch.xpack.sql.querydsl.container.QueryContainer;
@@ -681,37 +682,36 @@ protected PhysicalPlan rule(OrderExec plan) {
681682

682683
// TODO: might need to validate whether the target field or group actually exist
683684
if (group != null && group != Aggs.IMPLICIT_GROUP_KEY) {
684-
// check whether the lookup matches a group
685-
if (group.id().equals(lookup)) {
686-
qContainer = qContainer.updateGroup(group.with(direction));
687-
}
688-
// else it's a leafAgg
689-
else {
690-
qContainer = qContainer.updateGroup(group.with(direction));
691-
}
685+
qContainer = qContainer.updateGroup(group.with(direction));
686+
}
687+
688+
// field
689+
if (orderExpression instanceof FieldAttribute) {
690+
qContainer = qContainer.addSort(lookup,
691+
new AttributeSort((FieldAttribute) orderExpression, direction, missing));
692+
}
693+
// scalar functions typically require script ordering
694+
else if (orderExpression instanceof ScalarFunction) {
695+
ScalarFunction sf = (ScalarFunction) orderExpression;
696+
// nope, use scripted sorting
697+
qContainer = qContainer.addSort(lookup, new ScriptSort(sf.asScript(), direction, missing));
698+
}
699+
// histogram
700+
else if (orderExpression instanceof Histogram) {
701+
qContainer = qContainer.addSort(lookup, new GroupingFunctionSort(direction, missing));
692702
}
703+
// score
704+
else if (orderExpression instanceof Score) {
705+
qContainer = qContainer.addSort(lookup, new ScoreSort(direction, missing));
706+
}
707+
// agg function
708+
else if (orderExpression instanceof AggregateFunction) {
709+
qContainer = qContainer.addSort(lookup,
710+
new AggregateSort((AggregateFunction) orderExpression, direction, missing));
711+
}
712+
// unknown
693713
else {
694-
// scalar functions typically require script ordering
695-
if (orderExpression instanceof ScalarFunction) {
696-
ScalarFunction sf = (ScalarFunction) orderExpression;
697-
// nope, use scripted sorting
698-
qContainer = qContainer.addSort(new ScriptSort(sf.asScript(), direction, missing));
699-
}
700-
// score
701-
else if (orderExpression instanceof Score) {
702-
qContainer = qContainer.addSort(new ScoreSort(direction, missing));
703-
}
704-
// field
705-
else if (orderExpression instanceof FieldAttribute) {
706-
qContainer = qContainer.addSort(new AttributeSort((FieldAttribute) orderExpression, direction, missing));
707-
}
708-
// agg function
709-
else if (orderExpression instanceof AggregateFunction) {
710-
qContainer = qContainer.addSort(new AggregateSort((AggregateFunction) orderExpression, direction, missing));
711-
} else {
712-
// unknown
713-
throw new SqlIllegalArgumentException("unsupported sorting expression {}", orderExpression);
714-
}
714+
throw new SqlIllegalArgumentException("unsupported sorting expression {}", orderExpression);
715715
}
716716
}
717717

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
package org.elasticsearch.xpack.sql.querydsl.container;
7+
8+
import java.util.Objects;
9+
10+
public class GroupingFunctionSort extends Sort {
11+
12+
public GroupingFunctionSort(Direction direction, Missing missing) {
13+
super(direction, missing);
14+
}
15+
16+
@Override
17+
public int hashCode() {
18+
return Objects.hash(direction(), missing());
19+
}
20+
21+
@Override
22+
public boolean equals(Object obj) {
23+
if (this == obj) {
24+
return true;
25+
}
26+
27+
if (obj == null || getClass() != obj.getClass()) {
28+
return false;
29+
}
30+
31+
GroupingFunctionSort other = (GroupingFunctionSort) obj;
32+
return Objects.equals(direction(), other.direction())
33+
&& Objects.equals(missing(), other.missing());
34+
}
35+
}

0 commit comments

Comments
 (0)