Skip to content

Commit 204508f

Browse files
committed
jdbc: support for sub-set of JDBC escape syntax
Add a driver SQL pre-processing before sending it to the server. The driver supports sub-set of scalar functions defined by the spec (appendix C), outer joins, escape clause for SQL LIKE operator, and limit/offset clause. The processed result can be received using Connection.nativeSQL() method. Closes #79 Closes #76 Closes #81 Closes #83 Closes #84 Affects: #108
1 parent 4ba88fb commit 204508f

13 files changed

+1389
-13
lines changed
+344
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,344 @@
1+
package org.tarantool.jdbc;
2+
3+
import static org.tarantool.jdbc.EscapedFunctions.Expression;
4+
import static org.tarantool.jdbc.EscapedFunctions.FunctionExpression;
5+
import static org.tarantool.jdbc.EscapedFunctions.FunctionSignatureKey;
6+
import static org.tarantool.jdbc.EscapedFunctions.functionMappings;
7+
8+
import org.tarantool.util.SQLStates;
9+
import org.tarantool.util.ThrowingBiFunction;
10+
11+
import java.sql.Connection;
12+
import java.sql.SQLSyntaxErrorException;
13+
import java.util.ArrayList;
14+
import java.util.LinkedList;
15+
import java.util.List;
16+
import java.util.regex.Pattern;
17+
18+
/**
19+
* Set of utils to work with JDBC escape processing.
20+
* <p>
21+
* Supported escape syntax:
22+
* <ol>
23+
* <li>Scalar functions (i.e. {@code {fn random()}}).</li>
24+
* <li>Outer joins (i.e. {@code {oj "dept" left outer join "salary" on "dept_id" = 1412}}).</li>
25+
* <li>Like escape character (i.e. {@code like '_|%_3%' {escape '|'}}).</li>
26+
* <li>Limiting returned rows (i.e. {@code {limit 10 offset 20}}).</li>
27+
* </ol>
28+
*
29+
* <p>
30+
* Most of the supported expressions translates directly omitting escape borders.
31+
* In this way, {@code {fn abs(-5)}} becomes {@code abs(-5)}} or {@code {limit 10 offset 50}}
32+
* becomes {@code limit 10 offset 50} and so on. There are exceptions in case of scalar
33+
* functions where JDBC functions may not match exactly with Tarantool ones (for example,
34+
* JDBC {@code {fn rand()}} function becomes {@code random()} supported by Tarantool.
35+
*
36+
* <p>
37+
* Escape syntax explicitly do not allow or deny SQL comments within an escape expression.
38+
* To avoid undefined behaviours when processing is performed the parser always replaces
39+
* a comment with one whitespace.
40+
*/
41+
public class EscapeSyntaxParser {
42+
43+
/**
44+
* Pattern that covers function names described in JDBC Spec
45+
* Appendix C. Scalar functions.
46+
*/
47+
private static final Pattern IDENTIFIER = Pattern.compile("[_a-zA-Z][_a-zA-Z0-9]+");
48+
49+
private final SQLConnection jdbcContext;
50+
51+
public EscapeSyntaxParser(SQLConnection jdbcContext) {
52+
this.jdbcContext = jdbcContext;
53+
}
54+
55+
/**
56+
* Performs escape processing for SQL queries. It translates
57+
* sql text with optional escape expressions such as {@code {fn abs(-1)}}.
58+
*
59+
* <p>
60+
* Comments inside SQL text can be eliminated as parsing goes using preserveComments
61+
* flag. Hence, Comments inside escape syntax are always omitted regardless of
62+
* the flag, though.
63+
*
64+
* @param sql SQL text to be processed
65+
*
66+
* @return native SQL query
67+
*
68+
* @throws SQLSyntaxErrorException if any syntax error happened
69+
*/
70+
public String translate(String sql, boolean preserveComments) throws SQLSyntaxErrorException {
71+
StringBuilder nativeSql = new StringBuilder(sql.length());
72+
StringBuilder escapeBuffer = new StringBuilder();
73+
StringBuilder activeBuffer = nativeSql;
74+
LinkedList<Integer> escapeStartPositions = new LinkedList<>();
75+
76+
int i = 0;
77+
while (i < sql.length()) {
78+
char currentChar = sql.charAt(i);
79+
switch (currentChar) {
80+
case '\'':
81+
case '"':
82+
int endOfString = seekEndOfRegion(sql, i, "" + currentChar, "" + currentChar);
83+
if (endOfString == -1) {
84+
throw new SQLSyntaxErrorException(
85+
"Not enclosed string literal or quoted identifier at position " + i,
86+
SQLStates.SYNTAX_ERROR.getSqlState()
87+
);
88+
}
89+
activeBuffer.append(sql, i, endOfString + 1);
90+
i = endOfString + 1;
91+
break;
92+
93+
case '/':
94+
case '-':
95+
int endOfComment;
96+
if (currentChar == '/') {
97+
endOfComment = seekEndOfRegion(sql, i, "/*", "*/");
98+
if (endOfComment == -1) {
99+
throw new SQLSyntaxErrorException(
100+
"Open block comment at position " + i, SQLStates.SYNTAX_ERROR.getSqlState()
101+
);
102+
}
103+
} else {
104+
endOfComment = seekEndOfRegion(sql, i, "--", "\n");
105+
if (endOfComment == -1) {
106+
endOfComment = sql.length() - 1;
107+
}
108+
}
109+
if (i == endOfComment) {
110+
activeBuffer.append(currentChar);
111+
i++;
112+
} else {
113+
if (activeBuffer == nativeSql && preserveComments) {
114+
nativeSql.append(sql, i, endOfComment + 1);
115+
} else {
116+
activeBuffer.append(' ');
117+
}
118+
i = endOfComment + 1;
119+
}
120+
break;
121+
122+
case '{':
123+
escapeStartPositions.addFirst(escapeBuffer.length());
124+
escapeBuffer.append(currentChar);
125+
activeBuffer = escapeBuffer;
126+
i++;
127+
break;
128+
129+
case '}':
130+
Integer startPosition = escapeStartPositions.pollFirst();
131+
if (startPosition == null) {
132+
throw new SQLSyntaxErrorException(
133+
"Unexpected '}' at position " + i,
134+
SQLStates.SYNTAX_ERROR.getSqlState()
135+
);
136+
}
137+
escapeBuffer.append(currentChar);
138+
processEscapeExpression(escapeBuffer, startPosition, escapeBuffer.length());
139+
if (escapeStartPositions.isEmpty()) {
140+
nativeSql.append(escapeBuffer);
141+
escapeBuffer.setLength(0);
142+
activeBuffer = nativeSql;
143+
}
144+
i++;
145+
break;
146+
147+
default:
148+
activeBuffer.append(currentChar);
149+
i++;
150+
break;
151+
}
152+
}
153+
154+
if (!escapeStartPositions.isEmpty()) {
155+
throw new SQLSyntaxErrorException(
156+
"Not enclosed escape expression at position " + escapeStartPositions.pollFirst(),
157+
SQLStates.SYNTAX_ERROR.getSqlState()
158+
);
159+
}
160+
return nativeSql.toString();
161+
}
162+
163+
/**
164+
* Parses text like {@code functionName([arg[,args...]])}.
165+
* Arguments are not parsed recursively and saved as-is.
166+
*
167+
* <p>
168+
* In contrast to SQL where function name can be enclosed by double quotes,
169+
* it is not supported within escape syntax.
170+
*
171+
* @param functionString text to be parsed
172+
*
173+
* @return parsed result containing function name and its parameters, if any
174+
*
175+
* @throws SQLSyntaxErrorException if any syntax errors happened
176+
*/
177+
private FunctionExpression parseFunction(String functionString) throws SQLSyntaxErrorException {
178+
int braceNestLevel = 0;
179+
String functionName = null;
180+
List<String> functionParameters = new ArrayList<>();
181+
int parameterStartPosition = 0;
182+
183+
int i = 0;
184+
boolean completed = false;
185+
while (i < functionString.length() && !completed) {
186+
char currentChar = functionString.charAt(i);
187+
switch (currentChar) {
188+
case '\'':
189+
case '"':
190+
i = seekEndOfRegion(functionString, i, "" + currentChar, "" + currentChar) + 1;
191+
break;
192+
193+
case '(':
194+
if (braceNestLevel++ == 0) {
195+
functionName = functionString.substring(0, i).trim().toUpperCase();
196+
if (!IDENTIFIER.matcher(functionName).matches()) {
197+
throw new SQLSyntaxErrorException(
198+
"Invalid function identifier '" + functionName + "'", SQLStates.SYNTAX_ERROR.getSqlState()
199+
);
200+
}
201+
parameterStartPosition = i + 1;
202+
}
203+
i++;
204+
break;
205+
206+
case ')':
207+
if (--braceNestLevel == 0) {
208+
// reach a function closing brace
209+
// parse the last possible function parameter
210+
String param = functionString.substring(parameterStartPosition, i).trim();
211+
if (!param.isEmpty()) {
212+
functionParameters.add(param);
213+
} else if (!functionParameters.isEmpty()) {
214+
throw new SQLSyntaxErrorException(
215+
"Empty function argument at " + (functionParameters.size() + 1) + " position",
216+
SQLStates.SYNTAX_ERROR.getSqlState()
217+
);
218+
}
219+
completed = true;
220+
}
221+
i++;
222+
break;
223+
224+
case ',':
225+
if (braceNestLevel == 1) {
226+
// reach the function argument delimiter
227+
// parse the argument before this comma
228+
String param = functionString.substring(parameterStartPosition, i).trim();
229+
if (param.isEmpty()) {
230+
throw new SQLSyntaxErrorException(
231+
"Empty function argument at " + (functionParameters.size() + 1) + " position",
232+
SQLStates.SYNTAX_ERROR.getSqlState()
233+
);
234+
}
235+
parameterStartPosition = i + 1;
236+
functionParameters.add(param);
237+
}
238+
i++;
239+
break;
240+
241+
default:
242+
i++;
243+
break;
244+
}
245+
}
246+
247+
if (functionName == null || !completed) {
248+
throw new SQLSyntaxErrorException(
249+
"Malformed function expression '" + functionString + "'", SQLStates.SYNTAX_ERROR.getSqlState()
250+
);
251+
}
252+
if (i < functionString.length()) {
253+
String tail = functionString.substring(i).trim();
254+
if (!tail.isEmpty()) {
255+
throw new SQLSyntaxErrorException(
256+
"Unexpected expression '" + tail + "' after a function declaration",
257+
SQLStates.SYNTAX_ERROR.getSqlState()
258+
);
259+
}
260+
}
261+
return new FunctionExpression(functionName, functionParameters);
262+
}
263+
264+
/**
265+
* Handles an escape expression. All expression substitutes are applied to
266+
* the passed {@code buffer} parameter. In case of {@code fn}, the function
267+
* name is case-insensitive.
268+
*
269+
* @param buffer buffer containing current escape expression
270+
* @param start start position of the escape syntax in the buffer, inclusive
271+
* @param end end position of the escape syntax in the buffer, exclusive
272+
*
273+
* @throws SQLSyntaxErrorException if any syntax error happen
274+
*/
275+
private void processEscapeExpression(StringBuilder buffer, int start, int end)
276+
throws SQLSyntaxErrorException {
277+
if (buffer.charAt(start) != '{' || buffer.charAt(end - 1) != '}') {
278+
return;
279+
}
280+
int startExpression = seekFirstNonSpaceSymbol(buffer, start + 1);
281+
int endExpression = seekLastNonSpaceSymbol(buffer, end - 2) + 1;
282+
283+
if (substringMatches(buffer, "fn ", startExpression)) {
284+
FunctionExpression expression = parseFunction(buffer.substring(startExpression + 3, endExpression));
285+
ThrowingBiFunction<FunctionExpression, Connection, Expression, SQLSyntaxErrorException> mapper =
286+
functionMappings.get(FunctionSignatureKey.of(expression.getName(), expression.getParameters().size()));
287+
if (mapper == null) {
288+
throw new SQLSyntaxErrorException(
289+
"Unknown function " + expression.getName(),
290+
SQLStates.SYNTAX_ERROR.getSqlState()
291+
);
292+
}
293+
buffer.replace(start, end, mapper.apply(expression, jdbcContext).toString());
294+
} else if (substringMatches(buffer, "oj ", startExpression)) {
295+
buffer.replace(start, end, buffer.substring(startExpression + 3, endExpression));
296+
} else if (substringMatches(buffer, "escape ", startExpression)) {
297+
buffer.replace(start, end, buffer.substring(startExpression, endExpression));
298+
} else if (substringMatches(buffer, "limit ", startExpression)) {
299+
buffer.replace(start, end, buffer.substring(startExpression, endExpression));
300+
} else {
301+
throw new SQLSyntaxErrorException("Unrecognizable escape expression", SQLStates.SYNTAX_ERROR.getSqlState());
302+
}
303+
}
304+
305+
/**
306+
* Looks for the end of the region defined by its start and end
307+
* substring patterns.
308+
*
309+
* @param text search text
310+
* @param position start position in text to search the region, inclusive
311+
* @param startRegion pattern of the region start
312+
* @param endRegion pattern of the region end
313+
*
314+
* @return found position of the region end, inclusive. Start position if the region start
315+
* pattern does not match the text start position and {@literal -1} if the
316+
* region end is not found.
317+
*/
318+
private int seekEndOfRegion(String text, int position, String startRegion, String endRegion) {
319+
if (!text.regionMatches(position, startRegion, 0, startRegion.length())) {
320+
return position;
321+
}
322+
int end = text.indexOf(endRegion, position + startRegion.length());
323+
return end == -1 ? end : end + endRegion.length() - 1;
324+
}
325+
326+
private boolean substringMatches(StringBuilder text, String substring, int start) {
327+
return text.indexOf(substring, start) == start;
328+
}
329+
330+
private int seekFirstNonSpaceSymbol(StringBuilder text, int position) {
331+
while (position < text.length() && Character.isWhitespace(text.charAt(position))) {
332+
position++;
333+
}
334+
return position;
335+
}
336+
337+
private int seekLastNonSpaceSymbol(StringBuilder text, int position) {
338+
while (position > 0 && Character.isWhitespace(text.charAt(position))) {
339+
position--;
340+
}
341+
return position;
342+
}
343+
344+
}

0 commit comments

Comments
 (0)