Skip to content

Commit d912637

Browse files
authored
SQL: Add PIVOT support (#46489)
Add initial PIVOT support for transforming a regular table into a statistics table around an arbitrary pivoting column: SELECT * FROM (SELECT languages, country, salary, FROM mp) PIVOT (AVG(salary) FOR countries IN ('NL', 'DE', 'ES', 'RO', 'US')) In the current implementation PIVOT allows only one aggregation however this restriction is likely to be lifted in the future. Also not all aggregations are working, in particular MatrixStats are not yet supported.
1 parent 0851a37 commit d912637

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+3967
-2114
lines changed

x-pack/plugin/sql/qa/src/main/java/org/elasticsearch/xpack/sql/qa/jdbc/FetchSizeTestCase.java

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,4 +149,59 @@ private void assertNestedDocuments(ResultSet rs, int i) throws SQLException {
149149
assertTrue("No more entries left after row " + rs.getRow(), (i+j == 23 || rs.next()));
150150
}
151151
}
152-
}
152+
153+
/**
154+
* Explicit pagination test for PIVOT.
155+
* Checks that the paging properly consumes the necessary amount of aggregations and the
156+
* page size affects the result not the intermediate query.
157+
*/
158+
public void testPivotPaging() throws Exception {
159+
Request request = new Request("PUT", "/test_pivot/_bulk");
160+
request.addParameter("refresh", "true");
161+
StringBuilder bulk = new StringBuilder();
162+
String[] continent = new String[] { "AF", "AS", "EU", "NA", "SA", "AQ", "AU" };
163+
for (int i = 0; i <= 100; i++) {
164+
bulk.append("{\"index\":{}}\n");
165+
bulk.append("{\"item\":").append(i % 10)
166+
.append(", \"entry\":").append(i)
167+
.append(", \"amount\" : ").append(randomInt(999))
168+
.append(", \"location\" : \"").append(continent[i % (continent.length)]).append("\"")
169+
.append("}\n");
170+
}
171+
request.setJsonEntity(bulk.toString());
172+
assertEquals(200, client().performRequest(request).getStatusLine().getStatusCode());
173+
174+
try (Connection c = esJdbc();
175+
Statement s = c.createStatement()) {
176+
177+
String query = "SELECT * FROM "
178+
+ "(SELECT item, amount, location FROM test_pivot)"
179+
+ " PIVOT (AVG(amount) FOR location IN ( 'AF', 'AS', 'EU', 'NA', 'SA', 'AQ', 'AU') )";
180+
// set size smaller than an agg page
181+
s.setFetchSize(3);
182+
try (ResultSet rs = s.executeQuery(query)) {
183+
assertEquals(8, rs.getMetaData().getColumnCount());
184+
for (int i = 0; i < 10; i++) {
185+
assertTrue(rs.next());
186+
// the page was set to a pivot row (since the initial 3 is lower as a pivot page takes number of pivot entries + 1)
187+
assertEquals(1, rs.getFetchSize());
188+
assertEquals(Long.valueOf(i), rs.getObject("item"));
189+
}
190+
assertFalse(rs.next());
191+
}
192+
193+
// now try with a larger fetch size (8 * 2 + something) - should be 2
194+
s.setFetchSize(20);
195+
try (ResultSet rs = s.executeQuery(query)) {
196+
for (int i = 0; i < 10; i++) {
197+
assertTrue(rs.next());
198+
//
199+
assertEquals(2, rs.getFetchSize());
200+
assertEquals(Long.valueOf(i), rs.getObject("item"));
201+
}
202+
assertFalse(rs.next());
203+
}
204+
}
205+
assertNoSearchContexts();
206+
}
207+
}
Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
averageWithOneValue
2+
schema::languages:bt|'F':d
3+
SELECT * FROM (SELECT languages, gender, salary FROM test_emp) PIVOT (AVG(salary) FOR gender IN ('F'));
4+
5+
languages | 'F'
6+
---------------+------------------
7+
null |62140.666666666664
8+
1 |47073.25
9+
2 |50684.4
10+
3 |53660.0
11+
4 |49291.5
12+
5 |46705.555555555555
13+
;
14+
15+
averageWithAliasAndOneValue
16+
schema::languages:bt|'F':d
17+
SELECT * FROM (SELECT languages, gender, salary FROM test_emp) PIVOT (AVG(salary) AS "AVG" FOR gender IN ('F'));
18+
19+
languages | 'F'
20+
---------------+------------------
21+
null |62140.666666666664
22+
1 |47073.25
23+
2 |50684.4
24+
3 |53660.0
25+
4 |49291.5
26+
5 |46705.555555555555
27+
;
28+
29+
averageWithAliasedValue
30+
schema::languages:bt|XX:d
31+
SELECT * FROM (SELECT languages, gender, salary FROM test_emp) PIVOT (AVG(salary) FOR gender IN ('F' AS "XX"));
32+
33+
languages | XX
34+
---------------+------------------
35+
null |62140.666666666664
36+
1 |47073.25
37+
2 |50684.4
38+
3 |53660.0
39+
4 |49291.5
40+
5 |46705.555555555555
41+
;
42+
43+
averageWithTwoValues
44+
schema::languages:bt|'M':d|'F':d
45+
SELECT * FROM (SELECT languages, gender, salary FROM test_emp) PIVOT (AVG(salary) FOR gender IN ('M', 'F'));
46+
47+
languages | 'M' | 'F'
48+
---------------+-----------------+------------------
49+
null |48396.28571428572|62140.666666666664
50+
1 |49767.22222222222|47073.25
51+
2 |44103.90909090909|50684.4
52+
3 |51741.90909090909|53660.0
53+
4 |47058.90909090909|49291.5
54+
5 |39052.875 |46705.555555555555
55+
;
56+
57+
averageWithTwoValuesAndAlias
58+
schema::languages:bt|XY:d|XX:d
59+
SELECT * FROM (SELECT languages, gender, salary FROM test_emp) PIVOT (AVG(salary) FOR gender IN ('M' AS "XY", 'F' "XX"));
60+
61+
languages | XY | XX
62+
---------------+-----------------+------------------
63+
null |48396.28571428572|62140.666666666664
64+
1 |49767.22222222222|47073.25
65+
2 |44103.90909090909|50684.4
66+
3 |51741.90909090909|53660.0
67+
4 |47058.90909090909|49291.5
68+
5 |39052.875 |46705.555555555555
69+
;
70+
71+
averageWithThreeValuesIncludingNull
72+
schema::languages:bt|'M':d|'F':d
73+
SELECT * FROM (SELECT languages, gender, salary FROM test_emp) PIVOT (AVG(salary) FOR gender IN ('M', 'F'));
74+
75+
languages | 'M' | 'F'
76+
---------------+-----------------+------------------
77+
null |48396.28571428572|62140.666666666664
78+
1 |49767.22222222222|47073.25
79+
2 |44103.90909090909|50684.4
80+
3 |51741.90909090909|53660.0
81+
4 |47058.90909090909|49291.5
82+
5 |39052.875 |46705.555555555555
83+
;
84+
85+
86+
averageWithOneValueAndLimit
87+
schema::languages:bt|'F':d
88+
SELECT * FROM (SELECT languages, gender, salary FROM test_emp) PIVOT (AVG(salary) FOR gender IN ('F')) LIMIT 3;
89+
90+
languages | 'F'
91+
---------------+------------------
92+
null |62140.666666666664
93+
1 |47073.25
94+
2 |50684.4
95+
;
96+
97+
averageWithTwoValuesAndLimit
98+
schema::languages:bt|'M':d|'F':d
99+
SELECT * FROM (SELECT languages, gender, salary FROM test_emp) PIVOT (AVG(salary) FOR gender IN ('M', 'F')) LIMIT 3;
100+
101+
languages | 'M' | 'F'
102+
---------------+-----------------+------------------
103+
null |48396.28571428572|62140.666666666664
104+
1 |49767.22222222222|47073.25
105+
2 |44103.90909090909|50684.4
106+
;
107+
108+
109+
averageWithTwoValuesAndTinyLimit
110+
schema::languages:bt|'M':d|'F':d
111+
SELECT * FROM (SELECT languages, gender, salary FROM test_emp) PIVOT (AVG(salary) FOR gender IN ('M', 'F')) LIMIT 1;
112+
113+
languages | 'M' | 'F'
114+
---------------+-----------------+------------------
115+
null |48396.28571428572|62140.666666666664
116+
;
117+
118+
119+
averageWithTwoValuesAndSmallLimit
120+
schema::languages:bt|'M':d|'F':d
121+
SELECT * FROM (SELECT languages, gender, salary FROM test_emp) PIVOT (AVG(salary) FOR gender IN ('M', 'F')) LIMIT 2;
122+
123+
languages | 'M' | 'F'
124+
---------------+-----------------+------------------
125+
null |48396.28571428572|62140.666666666664
126+
1 |49767.22222222222|47073.25
127+
;
128+
129+
averageWithOneValueAndOrder
130+
schema::languages:bt|'F':d
131+
SELECT * FROM (SELECT languages, gender, salary FROM test_emp) PIVOT (AVG(salary) FOR gender IN ('F')) ORDER BY languages DESC LIMIT 4;
132+
133+
languages | 'F'
134+
---------------+------------------
135+
5 |46705.555555555555
136+
4 |49291.5
137+
3 |53660.0
138+
2 |50684.4
139+
;
140+
141+
averageWithTwoValuesAndOrderDesc
142+
schema::languages:bt|'M':d|'F':d
143+
SELECT * FROM (SELECT languages, gender, salary FROM test_emp) PIVOT (AVG(salary) FOR gender IN ('M', 'F')) ORDER BY languages DESC;
144+
145+
languages | 'M' | 'F'
146+
---------------+-----------------+------------------
147+
5 |39052.875 |46705.555555555555
148+
4 |47058.90909090909|49291.5
149+
3 |51741.90909090909|53660.0
150+
2 |44103.90909090909|50684.4
151+
1 |49767.22222222222|47073.25
152+
null |48396.28571428572|62140.666666666664
153+
;
154+
155+
averageWithTwoValuesAndOrderDescAndLimit
156+
schema::languages:bt|'M':d|'F':d
157+
SELECT * FROM (SELECT languages, gender, salary FROM test_emp) PIVOT (AVG(salary) FOR gender IN ('M', 'F')) ORDER BY languages DESC LIMIT 2;
158+
159+
languages | 'M' | 'F'
160+
---------------+-----------------+------------------
161+
5 |39052.875 |46705.555555555555
162+
4 |47058.90909090909|49291.5
163+
;
164+
165+
averageWithTwoValuesAndOrderAsc
166+
schema::languages:bt|'M':d|'F':d
167+
SELECT * FROM (SELECT languages, gender, salary FROM test_emp) PIVOT (AVG(salary) FOR gender IN ('M', 'F')) ORDER BY languages ASC;
168+
169+
languages | 'M' | 'F'
170+
---------------+-----------------+------------------
171+
null |48396.28571428572|62140.666666666664
172+
1 |49767.22222222222|47073.25
173+
2 |44103.90909090909|50684.4
174+
3 |51741.90909090909|53660.0
175+
4 |47058.90909090909|49291.5
176+
5 |39052.875 |46705.555555555555
177+
;
178+
179+
180+
sumWithoutSubquery
181+
schema::birth_date:ts|emp_no:i|first_name:s|gender:s|hire_date:ts|last_name:s|1:i|2:i
182+
SELECT * FROM test_emp PIVOT (SUM(salary) FOR languages IN (1, 2)) LIMIT 5;
183+
184+
birth_date | emp_no | first_name | gender | hire_date | last_name | 1 | 2
185+
---------------------+---------------+---------------+---------------+---------------------+---------------+---------------+---------------
186+
null |10041 |Uri |F |1989-11-12 00:00:00.0|Lenart |56415 |null
187+
null |10043 |Yishay |M |1990-10-20 00:00:00.0|Tzvieli |34341 |null
188+
null |10044 |Mingsen |F |1994-05-21 00:00:00.0|Casley |39728 |null
189+
1952-04-19 00:00:00.0|10009 |Sumant |F |1985-02-18 00:00:00.0|Peac |66174 |null
190+
1953-01-07 00:00:00.0|10067 |Claudi |M |1987-03-04 00:00:00.0|Stavenow |null |52044
191+
1953-01-23 00:00:00.0|10019 |Lillian |null |1999-04-30 00:00:00.0|Haddadi |73717 |null
192+
;
193+
194+
averageWithOneValueAndMath
195+
schema::languages:bt|'F':d
196+
SELECT * FROM (SELECT languages, gender, salary FROM test_emp) PIVOT (ROUND(AVG(salary) / 2) FOR gender IN ('F'));
197+
198+
languages | 'F'
199+
---------------+---------------
200+
null |31070.0
201+
1 |23537.0
202+
2 |25342.0
203+
3 |26830.0
204+
4 |24646.0
205+
5 |23353.0
206+
;

x-pack/plugin/sql/src/main/antlr/SqlBase.g4

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,15 +90,15 @@ orderBy
9090
;
9191

9292
querySpecification
93-
: SELECT setQuantifier? selectItem (',' selectItem)*
93+
: SELECT setQuantifier? selectItems
9494
fromClause?
9595
(WHERE where=booleanExpression)?
9696
(GROUP BY groupBy)?
9797
(HAVING having=booleanExpression)?
9898
;
9999

100100
fromClause
101-
: FROM relation (',' relation)*
101+
: FROM relation (',' relation)* pivotClause?
102102
;
103103

104104
groupBy
@@ -123,6 +123,10 @@ setQuantifier
123123
| ALL
124124
;
125125

126+
selectItems
127+
: selectItem (',' selectItem)*
128+
;
129+
126130
selectItem
127131
: expression (AS? identifier)? #selectExpression
128132
;
@@ -154,6 +158,18 @@ relationPrimary
154158
| '(' relation ')' (AS? qualifiedName)? #aliasedRelation
155159
;
156160

161+
pivotClause
162+
: PIVOT '(' aggs=pivotArgs FOR column=qualifiedName IN '(' vals=pivotArgs ')' ')'
163+
;
164+
165+
pivotArgs
166+
: namedValueExpression (',' namedValueExpression)*
167+
;
168+
169+
namedValueExpression
170+
: valueExpression (AS? identifier)?
171+
;
172+
157173
expression
158174
: booleanExpression
159175
;
@@ -343,6 +359,7 @@ whenClause
343359
;
344360

345361
// http://developer.mimer.se/validator/sql-reserved-words.tml
362+
// https://developer.mimer.com/wp-content/uploads/standard-sql-reserved-words-summary.pdf
346363
nonReserved
347364
: ANALYZE | ANALYZED
348365
| CATALOGS | COLUMNS | CURRENT_DATE | CURRENT_TIME | CURRENT_TIMESTAMP
@@ -355,7 +372,7 @@ nonReserved
355372
| LAST | LIMIT
356373
| MAPPED | MINUTE | MONTH
357374
| OPTIMIZED
358-
| PARSED | PHYSICAL | PLAN
375+
| PARSED | PHYSICAL | PIVOT | PLAN
359376
| QUERY
360377
| RLIKE
361378
| SCHEMAS | SECOND | SHOW | SYS
@@ -397,6 +414,7 @@ EXPLAIN: 'EXPLAIN';
397414
EXTRACT: 'EXTRACT';
398415
FALSE: 'FALSE';
399416
FIRST: 'FIRST';
417+
FOR: 'FOR';
400418
FORMAT: 'FORMAT';
401419
FROM: 'FROM';
402420
FROZEN: 'FROZEN';
@@ -434,6 +452,7 @@ ORDER: 'ORDER';
434452
OUTER: 'OUTER';
435453
PARSED: 'PARSED';
436454
PHYSICAL: 'PHYSICAL';
455+
PIVOT: 'PIVOT';
437456
PLAN: 'PLAN';
438457
RIGHT: 'RIGHT';
439458
RLIKE: 'RLIKE';

0 commit comments

Comments
 (0)