Skip to content

Commit 8e44cb5

Browse files
wolframhaussigfelixbarny
authored andcommitted
SQL parsing improvements (#696)
support for CALL using JDBC escape syntax support for MERGE support for DB links fixes #676
1 parent 8aaaf6a commit 8e44cb5

File tree

5 files changed

+292
-32
lines changed

5 files changed

+292
-32
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
/*-
2+
* #%L
3+
* Elastic APM Java agent
4+
* %%
5+
* Copyright (C) 2018 - 2019 Elastic and contributors
6+
* %%
7+
* Licensed to Elasticsearch B.V. under one or more contributor
8+
* license agreements. See the NOTICE file distributed with
9+
* this work for additional information regarding copyright
10+
* ownership. Elasticsearch B.V. licenses this file to you under
11+
* the Apache License, Version 2.0 (the "License"); you may
12+
* not use this file except in compliance with the License.
13+
* You may obtain a copy of the License at
14+
*
15+
* http://www.apache.org/licenses/LICENSE-2.0
16+
*
17+
* Unless required by applicable law or agreed to in writing,
18+
* software distributed under the License is distributed on an
19+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
20+
* KIND, either express or implied. See the License for the
21+
* specific language governing permissions and limitations
22+
* under the License.
23+
* #L%
24+
*/
25+
package co.elastic.apm.agent.jdbc.signature;
26+
27+
class JdbcFilter {
28+
29+
private boolean inQuote = false;
30+
private boolean inJdbcEscape = false;
31+
private boolean jdbcKeyWord = false;
32+
33+
boolean skip(Scanner s, char c) {
34+
switch (c) {
35+
case '{':
36+
if (!inQuote) {
37+
inJdbcEscape = true;
38+
jdbcKeyWord = true;
39+
return true;
40+
}
41+
break;
42+
case 'o':
43+
case 'O':
44+
if (!inQuote && inJdbcEscape && jdbcKeyWord && s.isNextCharIgnoreCase('j')) {
45+
s.next();
46+
jdbcKeyWord = false;
47+
return true;
48+
}
49+
break;
50+
case '}':
51+
if (!inQuote) {
52+
inJdbcEscape = false;
53+
return true;
54+
}
55+
break;
56+
case '?':
57+
case '=':
58+
if (!inQuote && inJdbcEscape) {
59+
return true;
60+
}
61+
break;
62+
case '\'':
63+
inQuote = !inQuote;
64+
break;
65+
}
66+
jdbcKeyWord = false;
67+
return false;
68+
}
69+
70+
void reset() {
71+
inQuote = false;
72+
inJdbcEscape = false;
73+
jdbcKeyWord = false;
74+
}
75+
}

apm-agent-plugins/apm-jdbc-plugin/src/main/java/co/elastic/apm/agent/jdbc/signature/Scanner.java

+12-4
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,11 @@ public class Scanner {
3131
private int end; // text end char offset
3232
private int pos; // read position char offset
3333
private int inputLength;
34+
private final JdbcFilter filter = new JdbcFilter();
3435

3536
public void setQuery(String sql) {
3637
this.input = sql;
38+
filter.reset();
3739
inputLength = sql.length();
3840
start = 0;
3941
end = 0;
@@ -74,7 +76,7 @@ public Token scan() {
7476
return Token.EOF;
7577
}
7678
char c = next();
77-
while (Character.isSpaceChar(c)) {
79+
while (Character.isSpaceChar(c) || filter.skip(this, c)) {
7880
if (hasNext()) {
7981
c = next();
8082
} else {
@@ -301,7 +303,7 @@ private char peek() {
301303
return input.charAt(pos);
302304
}
303305

304-
private char next() {
306+
char next() {
305307
final char c = peek();
306308
pos++;
307309
end = pos;
@@ -347,6 +349,10 @@ private boolean isNextChar(char c) {
347349
return hasNext() && peek() == c;
348350
}
349351

352+
boolean isNextCharIgnoreCase(char c) {
353+
return hasNext() && Character.toLowerCase(peek()) == Character.toLowerCase(c);
354+
}
355+
350356
public enum Token {
351357

352358
OTHER,
@@ -373,7 +379,9 @@ public enum Token {
373379
SET,
374380
TABLE,
375381
TRUNCATE, // Cassandra/CQL-specific
376-
UPDATE;
382+
UPDATE,
383+
MERGE,
384+
USING;
377385

378386
private static final Token[] EMPTY = {};
379387
private static final Token[][] KEYWORDS_BY_LENGTH = {
@@ -382,7 +390,7 @@ public enum Token {
382390
{AS, OR},
383391
{SET},
384392
{CALL, FROM, INTO},
385-
{TABLE},
393+
{TABLE, MERGE, USING},
386394
{DELETE, INSERT, SELECT, UPDATE},
387395
{REPLACE},
388396
{TRUNCATE}

apm-agent-plugins/apm-jdbc-plugin/src/main/java/co/elastic/apm/agent/jdbc/signature/SignatureParser.java

+72-20
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,14 @@
2424
*/
2525
package co.elastic.apm.agent.jdbc.signature;
2626

27+
import javax.annotation.Nullable;
2728
import java.util.concurrent.ConcurrentHashMap;
2829
import java.util.concurrent.ConcurrentMap;
2930

3031
import static co.elastic.apm.agent.jdbc.signature.Scanner.Token.EOF;
3132
import static co.elastic.apm.agent.jdbc.signature.Scanner.Token.FROM;
3233
import static co.elastic.apm.agent.jdbc.signature.Scanner.Token.IDENT;
34+
import static co.elastic.apm.agent.jdbc.signature.Scanner.Token.INTO;
3335
import static co.elastic.apm.agent.jdbc.signature.Scanner.Token.LPAREN;
3436
import static co.elastic.apm.agent.jdbc.signature.Scanner.Token.RPAREN;
3537

@@ -54,55 +56,61 @@ public class SignatureParser {
5456
* When relying on weak keys, we would not leverage any caching benefits if the query string is collected.
5557
* That means that we are leaking Strings but as the size of the map is limited that should not be an issue.
5658
*/
57-
private final static ConcurrentMap<String, String> signatureCache = new ConcurrentHashMap<String, String>(DISABLE_CACHE_THRESHOLD, 0.5f, Runtime.getRuntime().availableProcessors());
59+
private final static ConcurrentMap<String, String[]> signatureCache = new ConcurrentHashMap<String, String[]>(DISABLE_CACHE_THRESHOLD,
60+
0.5f, Runtime.getRuntime().availableProcessors());
5861

5962
private final Scanner scanner = new Scanner();
6063

6164
public void querySignature(String query, StringBuilder signature, boolean preparedStatement) {
65+
querySignature(query, signature, null, preparedStatement);
66+
}
6267

68+
public void querySignature(String query, StringBuilder signature, @Nullable StringBuilder dbLink, boolean preparedStatement) {
6369
final boolean cacheable = preparedStatement // non-prepared statements are likely to be dynamic strings
6470
&& QUERY_LENGTH_CACHE_LOWER_THRESHOLD < query.length()
6571
&& query.length() < QUERY_LENGTH_CACHE_UPPER_THRESHOLD;
6672
if (cacheable) {
67-
final String cachedSignature = signatureCache.get(query);
73+
final String[] cachedSignature = signatureCache.get(query);
6874
if (cachedSignature != null) {
69-
signature.append(cachedSignature);
75+
signature.append(cachedSignature[0]);
76+
if (dbLink != null) {
77+
dbLink.append(cachedSignature[1]);
78+
}
7079
return;
7180
}
7281
}
7382

7483
scanner.setQuery(query);
75-
parse(query, signature);
84+
parse(query, signature, dbLink);
7685

7786
if (cacheable && signatureCache.size() <= DISABLE_CACHE_THRESHOLD) {
7887
// we don't mind a small overshoot due to race conditions
79-
signatureCache.put(query, signature.toString());
88+
signatureCache.put(query, new String[]{signature.toString(), dbLink != null ? dbLink.toString() : ""});
8089
}
8190
}
8291

83-
private void parse(String query, StringBuilder signature) {
92+
private void parse(String query, StringBuilder signature, @Nullable StringBuilder dbLink) {
8493
final Scanner.Token firstToken = scanner.scanWhile(Scanner.Token.COMMENT);
8594
switch (firstToken) {
8695
case CALL:
8796
signature.append("CALL");
8897
if (scanner.scanUntil(Scanner.Token.IDENT)) {
89-
signature.append(' ');
90-
scanner.appendCurrentTokenText(signature);
98+
appendIdentifiers(signature, dbLink);
9199
}
92100
return;
93101
case DELETE:
94102
signature.append("DELETE");
95103
if (scanner.scanUntil(FROM) && scanner.scanUntil(Scanner.Token.IDENT)) {
96-
signature.append(" FROM ");
97-
appendIdentifiers(signature);
104+
signature.append(" FROM");
105+
appendIdentifiers(signature, dbLink);
98106
}
99107
return;
100108
case INSERT:
101109
case REPLACE:
102110
signature.append(firstToken.name());
103111
if (scanner.scanUntil(Scanner.Token.INTO) && scanner.scanUntil(Scanner.Token.IDENT)) {
104-
signature.append(" INTO ");
105-
appendIdentifiers(signature);
112+
signature.append(" INTO");
113+
appendIdentifiers(signature, dbLink);
106114
}
107115
return;
108116
case SELECT:
@@ -116,8 +124,8 @@ private void parse(String query, StringBuilder signature) {
116124
} else if (t == FROM) {
117125
if (level == 0) {
118126
if (scanner.scanToken(Scanner.Token.IDENT)) {
119-
signature.append(" FROM ");
120-
appendIdentifiers(signature);
127+
signature.append(" FROM");
128+
appendIdentifiers(signature, dbLink);
121129
} else {
122130
return;
123131
}
@@ -128,7 +136,7 @@ private void parse(String query, StringBuilder signature) {
128136
case UPDATE:
129137
signature.append("UPDATE");
130138
// Scan for the table name
131-
boolean hasPeriod = false, hasFirstPeriod = false;
139+
boolean hasPeriod = false, hasFirstPeriod = false, isDbLink = false;
132140
if (scanner.scanToken(IDENT)) {
133141
signature.append(' ');
134142
scanner.appendCurrentTokenText(signature);
@@ -145,6 +153,11 @@ private void parse(String query, StringBuilder signature) {
145153
signature.setLength(0);
146154
signature.append("UPDATE ");
147155
scanner.appendCurrentTokenText(signature);
156+
} else if (isDbLink) {
157+
if (dbLink != null) {
158+
scanner.appendCurrentTokenText(dbLink);
159+
}
160+
isDbLink = false;
148161
}
149162
// Two adjacent identifiers found after the first period.
150163
// Ignore the secondary ones, in case they are unknown keywords.
@@ -155,23 +168,62 @@ private void parse(String query, StringBuilder signature) {
155168
signature.append('.');
156169
break;
157170
default:
158-
return;
171+
if ("@".equals(scanner.text())) {
172+
isDbLink = true;
173+
break;
174+
} else {
175+
return;
176+
}
159177
}
160178
}
161179
}
162180
return;
181+
case MERGE:
182+
signature.append("MERGE");
183+
if (scanner.scanToken(INTO) && scanner.scanUntil(Scanner.Token.IDENT)) {
184+
signature.append(" INTO");
185+
appendIdentifiers(signature, dbLink);
186+
}
187+
return;
163188
default:
164189
query = query.trim();
165190
final int indexOfWhitespace = query.indexOf(' ');
166191
signature.append(query, 0, indexOfWhitespace > 0 ? indexOfWhitespace : query.length());
167192
}
168193
}
169194

170-
private void appendIdentifiers(StringBuilder signature) {
195+
private void appendIdentifiers(StringBuilder signature, @Nullable StringBuilder dbLink) {
196+
signature.append(' ');
171197
scanner.appendCurrentTokenText(signature);
172-
while (scanner.scanToken(Scanner.Token.PERIOD) && scanner.scanToken(Scanner.Token.IDENT)) {
173-
signature.append('.');
174-
scanner.appendCurrentTokenText(signature);
198+
boolean connectedIdents = false, isDbLink = false;
199+
for (Scanner.Token t = scanner.scan(); t != EOF; t = scanner.scan()) {
200+
switch (t) {
201+
case IDENT:
202+
// do not add tokens which are separated by a space
203+
if (connectedIdents) {
204+
scanner.appendCurrentTokenText(signature);
205+
connectedIdents = false;
206+
} else {
207+
if (isDbLink) {
208+
if (dbLink != null) {
209+
scanner.appendCurrentTokenText(dbLink);
210+
}
211+
}
212+
return;
213+
}
214+
break;
215+
case PERIOD:
216+
signature.append('.');
217+
connectedIdents = true;
218+
break;
219+
case USING:
220+
return;
221+
default:
222+
if ("@".equals(scanner.text())) {
223+
isDbLink = true;
224+
}
225+
break;
226+
}
175227
}
176228
}
177229
}

0 commit comments

Comments
 (0)