Skip to content

Commit 0c7f19b

Browse files
committed
Add draft EQL grammar and expression tree
1 parent 28dc77f commit 0c7f19b

37 files changed

+6996
-0
lines changed

buildSrc/src/main/resources/checkstyle_suppressions.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
<suppress files="modules[/\\]lang-painless[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]painless[/\\]antlr[/\\]PainlessLexer\.java" checks="." />
1111
<suppress files="modules[/\\]lang-painless[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]painless[/\\]antlr[/\\]PainlessParser(|BaseVisitor|Visitor)\.java" checks="." />
1212
<suppress files="plugin[/\\]sql[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]xpack[/\\]sql[/\\]parser[/\\]SqlBase(Base(Listener|Visitor)|Lexer|Listener|Parser|Visitor).java" checks="." />
13+
<suppress files="plugin[/\\]eql[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]xpack[/\\]eql[/\\]parser[/\\]EqlBase(Base(Listener|Visitor)|Lexer|Listener|Parser|Visitor).java" checks="." />
1314

1415
<!-- JNA requires the no-argument constructor on JNAKernel32Library.SizeT to be public-->
1516
<suppress files="server[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]bootstrap[/\\]JNAKernel32Library.java" checks="RedundantModifier" />

x-pack/plugin/eql/build.gradle

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
evaluationDependsOn(xpackModule('core'))
2+
3+
apply plugin: 'elasticsearch.esplugin'
4+
esplugin {
5+
name 'x-pack-eql'
6+
description 'The Elasticsearch plugin that powers EQL for Elasticsearch'
7+
classname 'org.elasticsearch.xpack.eql.plugin.EqlPlugin'
8+
extendedPlugins = ['x-pack-core', 'lang-painless']
9+
}
10+
11+
ext {
12+
// EQL dependency versions
13+
antlrVersion = "4.5.3"
14+
}
15+
16+
archivesBaseName = 'x-pack-eql'
17+
18+
dependencies {
19+
compileOnly project(path: xpackModule('core'), configuration: 'default')
20+
compileOnly(project(':modules:lang-painless')) {
21+
exclude group: "org.ow2.asm"
22+
}
23+
compile "org.antlr:antlr4-runtime:4.5.3"
24+
testCompile project(':test:framework')
25+
testCompile project(path: xpackModule('core'), configuration: 'testArtifacts')
26+
testCompile project(path: xpackModule('security'), configuration: 'testArtifacts')
27+
testCompile project(path: ':modules:reindex', configuration: 'runtime')
28+
testCompile project(path: ':modules:parent-join', configuration: 'runtime')
29+
testCompile project(path: ':modules:analysis-common', configuration: 'runtime')
30+
}
31+
32+
// disable integration tests for now
33+
integTest.enabled = false
34+
35+
/**********************************************
36+
* EQL Parser regeneration *
37+
**********************************************/
38+
39+
configurations {
40+
regenerate
41+
}
42+
43+
dependencies {
44+
regenerate "org.antlr:antlr4:${antlrVersion}"
45+
}
46+
47+
String grammarPath = 'src/main/antlr'
48+
String outputPath = 'src/main/java/org/elasticsearch/xpack/eql/parser'
49+
50+
task cleanGenerated(type: Delete) {
51+
delete fileTree(grammarPath) {
52+
include '*.tokens'
53+
}
54+
delete fileTree(outputPath) {
55+
include 'EqlBase*.java'
56+
}
57+
}
58+
59+
task regenParser(type: JavaExec) {
60+
dependsOn cleanGenerated
61+
main = 'org.antlr.v4.Tool'
62+
classpath = configurations.regenerate
63+
systemProperty 'file.encoding', 'UTF-8'
64+
systemProperty 'user.language', 'en'
65+
systemProperty 'user.country', 'US'
66+
systemProperty 'user.variant', ''
67+
args '-Werror',
68+
'-package', 'org.elasticsearch.xpack.eql.parser',
69+
'-listener',
70+
'-visitor',
71+
'-o', outputPath,
72+
"${file(grammarPath)}/EqlBase.g4"
73+
}
74+
75+
task regen {
76+
dependsOn regenParser
77+
doLast {
78+
// moves token files to grammar directory for use with IDE's
79+
ant.move(file: "${outputPath}/EqlBase.tokens", toDir: grammarPath)
80+
ant.move(file: "${outputPath}/EqlBaseLexer.tokens", toDir: grammarPath)
81+
// make the generated classes package private
82+
ant.replaceregexp(match: 'public ((interface|class) \\QEqlBase\\E\\w+)',
83+
replace: '\\1',
84+
encoding: 'UTF-8') {
85+
fileset(dir: outputPath, includes: 'EqlBase*.java')
86+
}
87+
// nuke timestamps/filenames in generated files
88+
ant.replaceregexp(match: '\\Q// Generated from \\E.*',
89+
replace: '\\/\\/ ANTLR GENERATED CODE: DO NOT EDIT',
90+
encoding: 'UTF-8') {
91+
fileset(dir: outputPath, includes: 'EqlBase*.java')
92+
}
93+
// remove tabs in antlr generated files
94+
ant.replaceregexp(match: '\t', flags: 'g', replace: ' ', encoding: 'UTF-8') {
95+
fileset(dir: outputPath, includes: 'EqlBase*.java')
96+
}
97+
// fix line endings
98+
ant.fixcrlf(srcdir: outputPath, eol: 'lf') {
99+
patternset(includes: 'EqlBase*.java')
100+
}
101+
}
102+
}
Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,254 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License;
4+
* you may not use this file except in compliance with the Elastic License.
5+
*/
6+
7+
grammar EqlBase;
8+
9+
tokens {
10+
DELIMITER
11+
}
12+
13+
singleStatement
14+
: statement EOF
15+
;
16+
17+
singleExpression
18+
: expression EOF
19+
;
20+
21+
statement
22+
: query (PIPE pipe)*
23+
;
24+
25+
query
26+
: sequence
27+
| join
28+
| condition
29+
;
30+
31+
sequence
32+
: SEQUENCE (by=joinKeys)? (span)?
33+
match+
34+
(UNTIL match)?
35+
;
36+
37+
join
38+
: JOIN (by=joinKeys)?
39+
match+
40+
(UNTIL match)?
41+
;
42+
43+
pipe
44+
: kind=IDENTIFIER (booleanExpression (COMMA booleanExpression)*)?
45+
;
46+
47+
joinKeys
48+
: BY qualifiedNames
49+
;
50+
51+
span
52+
: WITH MAXSPAN EQ DIGIT_IDENTIFIER
53+
;
54+
55+
match
56+
: LB condition RB (by=joinKeys)?
57+
;
58+
59+
condition
60+
: event=qualifiedName WHERE expression
61+
;
62+
63+
expression
64+
: booleanExpression
65+
;
66+
67+
booleanExpression
68+
: NOT booleanExpression #logicalNot
69+
| predicated #booleanDefault
70+
| left=booleanExpression operator=AND right=booleanExpression #logicalBinary
71+
| left=booleanExpression operator=OR right=booleanExpression #logicalBinary
72+
;
73+
74+
// workaround for:
75+
// https://github.com/antlr/antlr4/issues/780
76+
// https://github.com/antlr/antlr4/issues/781
77+
predicated
78+
: valueExpression predicate?
79+
;
80+
81+
// dedicated calls for each branch are not used to reuse the NOT handling across them
82+
// instead the property kind is used for differentiation
83+
predicate
84+
: NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression
85+
| NOT? kind=IN LP valueExpression (COMMA valueExpression)* RP
86+
| NOT? kind=IN LP query RP
87+
;
88+
89+
valueExpression
90+
: primaryExpression #valueExpressionDefault
91+
| operator=(MINUS | PLUS) valueExpression #arithmeticUnary
92+
| left=valueExpression operator=(ASTERISK | SLASH | PERCENT) right=valueExpression #arithmeticBinary
93+
| left=valueExpression operator=(PLUS | MINUS) right=valueExpression #arithmeticBinary
94+
| left=valueExpression comparisonOperator right=valueExpression #comparison
95+
;
96+
97+
primaryExpression
98+
: constant #constantDefault
99+
| functionExpression #function
100+
| qualifiedName #dereference
101+
| LP expression RP #parenthesizedExpression
102+
;
103+
104+
functionExpression
105+
: identifier LP (expression (COMMA expression)*)? RP
106+
;
107+
108+
constant
109+
: NULL #nullLiteral
110+
| number #numericLiteral
111+
| booleanValue #booleanLiteral
112+
| STRING+ #stringLiteral
113+
;
114+
115+
comparisonOperator
116+
: EQ | NEQ | LT | LTE | GT | GTE
117+
;
118+
119+
booleanValue
120+
: TRUE | FALSE
121+
;
122+
123+
qualifiedNames
124+
: qualifiedName (COMMA qualifiedName)*
125+
;
126+
127+
qualifiedName
128+
: (identifier DOT)* identifier
129+
;
130+
131+
identifier
132+
: quoteIdentifier
133+
| unquoteIdentifier
134+
;
135+
136+
quoteIdentifier
137+
: QUOTED_IDENTIFIER #quotedIdentifier
138+
;
139+
140+
unquoteIdentifier
141+
: IDENTIFIER #unquotedIdentifier
142+
| DIGIT_IDENTIFIER #digitIdentifier
143+
;
144+
145+
number
146+
: DECIMAL_VALUE #decimalLiteral
147+
| INTEGER_VALUE #integerLiteral
148+
;
149+
150+
string
151+
: STRING
152+
;
153+
154+
AND: 'AND';
155+
ANY: 'ANY';
156+
ASC: 'ASC';
157+
BETWEEN: 'BETWEEN';
158+
BY: 'BY';
159+
CHILD: 'CHILD';
160+
DESCENDANT: 'DESCENDANT';
161+
EVENT: 'EVENT';
162+
FALSE: 'FALSE';
163+
IN: 'IN';
164+
JOIN: 'JOIN';
165+
MAXSPAN: 'MAXSPAN';
166+
NOT: 'NOT';
167+
NULL: 'NULL';
168+
OF: 'OF';
169+
OR: 'OR';
170+
SEQUENCE: 'SEQUENCE';
171+
TRUE: 'TRUE';
172+
UNTIL: 'UNTIL';
173+
WHERE: 'WHERE';
174+
WITH: 'WITH';
175+
176+
// Operators
177+
EQ : '=' | '==';
178+
NEQ : '<>' | '!=';
179+
LT : '<';
180+
LTE : '<=';
181+
GT : '>';
182+
GTE : '>=';
183+
184+
PLUS: '+';
185+
MINUS: '-';
186+
ASTERISK: '*';
187+
SLASH: '/';
188+
PERCENT: '%';
189+
DOT: '.';
190+
COMMA: ',';
191+
LB: '[';
192+
RB: ']';
193+
LP: '(';
194+
RP: ')';
195+
PIPE: '|';
196+
197+
STRING
198+
: '\'' ( ~'\'')* '\''
199+
| '"' ( ~'"' )* '"'
200+
;
201+
202+
INTEGER_VALUE
203+
: DIGIT+
204+
;
205+
206+
DECIMAL_VALUE
207+
: DIGIT+ DOT DIGIT*
208+
| DOT DIGIT+
209+
| DIGIT+ (DOT DIGIT*)? EXPONENT
210+
| DOT DIGIT+ EXPONENT
211+
;
212+
213+
IDENTIFIER
214+
: (LETTER | '_') (LETTER | DIGIT | '_' | '@' )*
215+
;
216+
217+
DIGIT_IDENTIFIER
218+
: DIGIT (LETTER | DIGIT | '_' | '@')+
219+
;
220+
221+
QUOTED_IDENTIFIER
222+
: '"' ( ~'"' | '""' )* '"'
223+
;
224+
225+
fragment EXPONENT
226+
: 'E' [+-]? DIGIT+
227+
;
228+
229+
fragment DIGIT
230+
: [0-9]
231+
;
232+
233+
fragment LETTER
234+
: [A-Z]
235+
;
236+
237+
SIMPLE_COMMENT
238+
: '//' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN)
239+
;
240+
241+
BRACKETED_COMMENT
242+
: '/*' (BRACKETED_COMMENT|.)*? '*/' -> channel(HIDDEN)
243+
;
244+
245+
WS
246+
: [ \r\n\t]+ -> channel(HIDDEN)
247+
;
248+
249+
// Catch-all for anything we can't recognize.
250+
// We use this to be able to ignore and recover all the text
251+
// when splitting statements with DelimiterLexer
252+
UNRECOGNIZED
253+
: .
254+
;

0 commit comments

Comments
 (0)