Skip to content

Commit be680af

Browse files
authored
SQL: Fix ORDER BY on aggregates and GROUPed BY fields (#51894)
Previously, in the in-memory sorting module `LocalAggregationSorterListener` only the aggregate functions where used (grabbed by the `sortingColumns`). As a consequence, if the ORDER BY was also using columns of the GROUP BY clause, (especially in the case of higher priority - before the aggregate functions) wrong results were produced. E.g.: ``` SELECT gender, MAX(salary) AS max FROM test_emp GROUP BY gender ORDER BY gender, max ``` Add all columns of the ORDER BY to the `sortingColumns` so that the `LocalAggregationSorterListener` can use the correct comparators in the underlying PriorityQueue used to implement the in-memory sorting. Fixes: #50355
1 parent ec1afb9 commit be680af

File tree

9 files changed

+281
-101
lines changed

9 files changed

+281
-101
lines changed

x-pack/plugin/sql/qa/src/main/resources/agg-ordering.csv-spec

+47-1
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,50 @@ g:s | gender:s | s3:i | SUM(salary):i | s5:i
2323
M |M |2671054|2671054 |2671054
2424
F |F |1666196|1666196 |1666196
2525
null |null |487605 |487605 |487605
26-
;
26+
;
27+
28+
histogramDateTimeWithCountAndOrder_1
29+
schema::h:ts|c:l
30+
SELECT HISTOGRAM(birth_date, INTERVAL 1 YEAR) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY h DESC, c ASC;
31+
32+
h | c
33+
------------------------+---------------
34+
1965-01-01T00:00:00.000Z|1
35+
1964-01-01T00:00:00.000Z|4
36+
1963-01-01T00:00:00.000Z|7
37+
1962-01-01T00:00:00.000Z|6
38+
1961-01-01T00:00:00.000Z|8
39+
1960-01-01T00:00:00.000Z|8
40+
1959-01-01T00:00:00.000Z|9
41+
1958-01-01T00:00:00.000Z|7
42+
1957-01-01T00:00:00.000Z|4
43+
1956-01-01T00:00:00.000Z|5
44+
1955-01-01T00:00:00.000Z|4
45+
1954-01-01T00:00:00.000Z|8
46+
1953-01-01T00:00:00.000Z|11
47+
1952-01-01T00:00:00.000Z|8
48+
null |10
49+
;
50+
51+
histogramDateTimeWithCountAndOrder_2
52+
schema::h:ts|c:l
53+
SELECT HISTOGRAM(birth_date, INTERVAL 1 YEAR) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY c DESC, h ASC;
54+
55+
h | c
56+
------------------------+---------------
57+
1953-01-01T00:00:00.000Z|11
58+
null |10
59+
1959-01-01T00:00:00.000Z|9
60+
1952-01-01T00:00:00.000Z|8
61+
1954-01-01T00:00:00.000Z|8
62+
1960-01-01T00:00:00.000Z|8
63+
1961-01-01T00:00:00.000Z|8
64+
1958-01-01T00:00:00.000Z|7
65+
1963-01-01T00:00:00.000Z|7
66+
1962-01-01T00:00:00.000Z|6
67+
1956-01-01T00:00:00.000Z|5
68+
1955-01-01T00:00:00.000Z|4
69+
1957-01-01T00:00:00.000Z|4
70+
1964-01-01T00:00:00.000Z|4
71+
1965-01-01T00:00:00.000Z|1
72+
;

x-pack/plugin/sql/qa/src/main/resources/agg-ordering.sql-spec

+24-3
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ aggNotSpecifiedInTheAggregateAndGroupWithHavingWithLimitAndDirection
8080
SELECT gender, MIN(salary) AS min, COUNT(*) AS c FROM test_emp GROUP BY gender HAVING c > 1 ORDER BY MAX(salary) ASC, c DESC LIMIT 5;
8181

8282
groupAndAggNotSpecifiedInTheAggregateWithHaving
83-
SELECT gender, MIN(salary) AS min, COUNT(*) AS c FROM test_emp GROUP BY gender HAVING c > 1 ORDER BY gender, MAX(salary);
83+
SELECT gender, MIN(salary) AS min, COUNT(*) AS c FROM test_emp GROUP BY gender HAVING c > 1 ORDER BY gender NULLS FIRST, MAX(salary);
8484

8585
multipleAggsThatGetRewrittenWithAliasOnAMediumGroupBy
8686
SELECT languages, MAX(salary) AS max, MIN(salary) AS min FROM test_emp GROUP BY languages ORDER BY max;
@@ -136,5 +136,26 @@ SELECT gender AS g, first_name AS f, last_name AS l FROM test_emp GROUP BY f, ge
136136
multipleGroupingsAndOrderingByGroups_8
137137
SELECT gender AS g, first_name, last_name FROM test_emp GROUP BY g, last_name, first_name ORDER BY gender ASC, first_name DESC, last_name ASC;
138138

139-
multipleGroupingsAndOrderingByGroupsWithFunctions
140-
SELECT first_name f, last_name l, gender g, CONCAT(first_name, last_name) c FROM test_emp GROUP BY gender, l, f, c ORDER BY gender, c DESC, first_name, last_name ASC;
139+
multipleGroupingsAndOrderingByGroupsAndAggs_1
140+
SELECT gender, MIN(salary) AS min, COUNT(*) AS c, MAX(salary) AS max FROM test_emp GROUP BY gender HAVING c > 1 ORDER BY gender ASC NULLS FIRST, MAX(salary) DESC;
141+
142+
multipleGroupingsAndOrderingByGroupsAndAggs_2
143+
SELECT gender, MIN(salary) AS min, COUNT(*) AS c, MAX(salary) AS max FROM test_emp GROUP BY gender HAVING c > 1 ORDER BY gender DESC NULLS LAST, MAX(salary) ASC;
144+
145+
multipleGroupingsAndOrderingByGroupsWithFunctions_1
146+
SELECT first_name f, last_name l, gender g, CONCAT(first_name, last_name) c FROM test_emp GROUP BY gender, l, f, c ORDER BY gender NULLS FIRST, c DESC, first_name, last_name ASC;
147+
148+
multipleGroupingsAndOrderingByGroupsWithFunctions_2
149+
SELECT first_name f, last_name l, gender g, CONCAT(first_name, last_name) c FROM test_emp GROUP BY gender, l, f, c ORDER BY c DESC, gender DESC NULLS LAST, first_name, last_name ASC;
150+
151+
multipleGroupingsAndOrderingByGroupsAndAggregatesWithFunctions_1
152+
SELECT CONCAT('foo', gender) g, MAX(salary) AS max, MIN(salary) AS min FROM test_emp GROUP BY g ORDER BY 1 NULLS FIRST, 2, 3;
153+
154+
multipleGroupingsAndOrderingByGroupsAndAggregatesWithFunctions_2
155+
SELECT CONCAT('foo', gender) g, MAX(salary) AS max, MIN(salary) AS min FROM test_emp GROUP BY g ORDER BY 1 DESC NULLS LAST, 2, 3;
156+
157+
multipleGroupingsAndOrderingByGroupsAndAggregatesWithFunctions_3
158+
SELECT CONCAT('foo', gender) g, MAX(salary) AS max, MIN(salary) AS min FROM test_emp GROUP BY g ORDER BY 2, 1 NULLS FIRST, 3;
159+
160+
multipleGroupingsAndOrderingByGroupsAndAggregatesWithFunctions_4
161+
SELECT CONCAT('foo', gender) g, MAX(salary) AS max, MIN(salary) AS min FROM test_emp GROUP BY g ORDER BY 3 DESC, 1 NULLS FIRST, 2;

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/Querier.java

+31-16
Original file line numberDiff line numberDiff line change
@@ -594,36 +594,51 @@ static class AggSortingQueue extends PriorityQueue<Tuple<List<?>, Integer>> {
594594
this.sortingColumns = sortingColumns;
595595
}
596596

597-
// compare row based on the received attribute sort
598-
// if a sort item is not in the list, it is assumed the sorting happened in ES
599-
// and the results are left as is (by using the row ordering), otherwise it is sorted based on the given criteria.
600-
//
601-
// Take for example ORDER BY a, x, b, y
602-
// a, b - are sorted in ES
603-
// x, y - need to be sorted client-side
604-
// sorting on x kicks in, only if the values for a are equal.
605-
597+
/**
598+
* Compare row based on the received attribute sort
599+
* <ul>
600+
* <li>
601+
* If a tuple in {@code sortingColumns} has a null comparator, it is assumed the sorting
602+
* happened in ES and the results are left as is (by using the row ordering), otherwise it is
603+
* sorted based on the given criteria.
604+
* </li>
605+
* <li>
606+
* If no tuple exists in {@code sortingColumns} for an output column, it means this column
607+
* is not included at all in the ORDER BY
608+
* </li>
609+
*</ul>
610+
*
611+
* Take for example ORDER BY a, x, b, y
612+
* a, b - are sorted in ES
613+
* x, y - need to be sorted client-side
614+
* sorting on x kicks in only if the values for a are equal.
615+
* sorting on y kicks in only if the values for a, x and b are all equal
616+
*
617+
*/
606618
// thanks to @jpountz for the row ordering idea as a way to preserve ordering
607619
@SuppressWarnings("unchecked")
608620
@Override
609621
protected boolean lessThan(Tuple<List<?>, Integer> l, Tuple<List<?>, Integer> r) {
610622
for (Tuple<Integer, Comparator> tuple : sortingColumns) {
611-
int i = tuple.v1().intValue();
623+
int columnIdx = tuple.v1().intValue();
612624
Comparator comparator = tuple.v2();
613625

614-
Object vl = l.v1().get(i);
615-
Object vr = r.v1().get(i);
626+
// Get the values for left and right rows at the current column index
627+
Object vl = l.v1().get(columnIdx);
628+
Object vr = r.v1().get(columnIdx);
616629
if (comparator != null) {
617630
int result = comparator.compare(vl, vr);
618-
// if things are equals, move to the next comparator
631+
// if things are not equal: return the comparison result,
632+
// otherwise: move to the next comparator to solve the tie.
619633
if (result != 0) {
620634
return result > 0;
621635
}
622636
}
623-
// no comparator means the existing order needs to be preserved
637+
// no comparator means the rows are pre-ordered by ES for the column at
638+
// the current index and the existing order needs to be preserved
624639
else {
625-
// check the values - if they are equal move to the next comparator
626-
// otherwise return the row order
640+
// check the values - if they are not equal return the row order
641+
// otherwise: move to the next comparator to solve the tie.
627642
if (Objects.equals(vl, vr) == false) {
628643
return l.v2().compareTo(r.v2()) > 0;
629644
}

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/execution/search/SourceGenerator.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ private static void sorting(QueryContainer container, SearchSourceBuilder source
110110
source.sort("_doc");
111111
return;
112112
}
113-
for (Sort sortable : container.sort()) {
113+
for (Sort sortable : container.sort().values()) {
114114
SortBuilder<?> sortBuilder = null;
115115

116116
if (sortable instanceof AttributeSort) {

x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryFolder.java

+29-29
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
import org.elasticsearch.xpack.sql.querydsl.container.GlobalCountRef;
6868
import org.elasticsearch.xpack.sql.querydsl.container.GroupByRef;
6969
import org.elasticsearch.xpack.sql.querydsl.container.GroupByRef.Property;
70+
import org.elasticsearch.xpack.sql.querydsl.container.GroupingFunctionSort;
7071
import org.elasticsearch.xpack.sql.querydsl.container.MetricAggRef;
7172
import org.elasticsearch.xpack.sql.querydsl.container.PivotColumnRef;
7273
import org.elasticsearch.xpack.sql.querydsl.container.QueryContainer;
@@ -682,37 +683,36 @@ protected PhysicalPlan rule(OrderExec plan) {
682683

683684
// TODO: might need to validate whether the target field or group actually exist
684685
if (group != null && group != Aggs.IMPLICIT_GROUP_KEY) {
685-
// check whether the lookup matches a group
686-
if (group.id().equals(lookup)) {
687-
qContainer = qContainer.updateGroup(group.with(direction));
688-
}
689-
// else it's a leafAgg
690-
else {
691-
qContainer = qContainer.updateGroup(group.with(direction));
692-
}
686+
qContainer = qContainer.updateGroup(group.with(direction));
687+
}
688+
689+
// field
690+
if (orderExpression instanceof FieldAttribute) {
691+
qContainer = qContainer.addSort(lookup,
692+
new AttributeSort((FieldAttribute) orderExpression, direction, missing));
693+
}
694+
// scalar functions typically require script ordering
695+
else if (orderExpression instanceof ScalarFunction) {
696+
ScalarFunction sf = (ScalarFunction) orderExpression;
697+
// nope, use scripted sorting
698+
qContainer = qContainer.addSort(lookup, new ScriptSort(sf.asScript(), direction, missing));
699+
}
700+
// histogram
701+
else if (orderExpression instanceof Histogram) {
702+
qContainer = qContainer.addSort(lookup, new GroupingFunctionSort(direction, missing));
693703
}
704+
// score
705+
else if (orderExpression instanceof Score) {
706+
qContainer = qContainer.addSort(lookup, new ScoreSort(direction, missing));
707+
}
708+
// agg function
709+
else if (orderExpression instanceof AggregateFunction) {
710+
qContainer = qContainer.addSort(lookup,
711+
new AggregateSort((AggregateFunction) orderExpression, direction, missing));
712+
}
713+
// unknown
694714
else {
695-
// scalar functions typically require script ordering
696-
if (orderExpression instanceof ScalarFunction) {
697-
ScalarFunction sf = (ScalarFunction) orderExpression;
698-
// nope, use scripted sorting
699-
qContainer = qContainer.addSort(new ScriptSort(sf.asScript(), direction, missing));
700-
}
701-
// score
702-
else if (orderExpression instanceof Score) {
703-
qContainer = qContainer.addSort(new ScoreSort(direction, missing));
704-
}
705-
// field
706-
else if (orderExpression instanceof FieldAttribute) {
707-
qContainer = qContainer.addSort(new AttributeSort((FieldAttribute) orderExpression, direction, missing));
708-
}
709-
// agg function
710-
else if (orderExpression instanceof AggregateFunction) {
711-
qContainer = qContainer.addSort(new AggregateSort((AggregateFunction) orderExpression, direction, missing));
712-
} else {
713-
// unknown
714-
throw new SqlIllegalArgumentException("unsupported sorting expression {}", orderExpression);
715-
}
715+
throw new SqlIllegalArgumentException("unsupported sorting expression {}", orderExpression);
716716
}
717717
}
718718

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
package org.elasticsearch.xpack.sql.querydsl.container;
7+
8+
import java.util.Objects;
9+
10+
public class GroupingFunctionSort extends Sort {
11+
12+
public GroupingFunctionSort(Direction direction, Missing missing) {
13+
super(direction, missing);
14+
}
15+
16+
@Override
17+
public int hashCode() {
18+
return Objects.hash(direction(), missing());
19+
}
20+
21+
@Override
22+
public boolean equals(Object obj) {
23+
if (this == obj) {
24+
return true;
25+
}
26+
27+
if (obj == null || getClass() != obj.getClass()) {
28+
return false;
29+
}
30+
31+
GroupingFunctionSort other = (GroupingFunctionSort) obj;
32+
return Objects.equals(direction(), other.direction())
33+
&& Objects.equals(missing(), other.missing());
34+
}
35+
}

0 commit comments

Comments
 (0)