Skip to content

Commit b3c259d

Browse files
fix: improve client-side regex statement parser (#1328)
* fix: improve client-side regex statement parser The client-side regex-based statement parser contained multiple minor errors, like: - BEGIN would match any string as BEGIN TRANSACTION (including stuff like `BEGIN foo`) - COMMIT and ROLLBACK had the same problem as BEGIN. - Mismatches were reported as UPDATE. They are now returned as UNKNOWN. - DLL missed the ANALYZE keyword * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent 03400c4 commit b3c259d

File tree

7 files changed

+53
-24
lines changed

7 files changed

+53
-24
lines changed

google/cloud/spanner_dbapi/batch_dml_executor.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,12 @@ def execute_statement(self, parsed_statement: ParsedStatement):
5454
"""
5555
from google.cloud.spanner_dbapi import ProgrammingError
5656

57+
# Note: Let the server handle it if the client-side parser did not
58+
# recognize the type of statement.
5759
if (
5860
parsed_statement.statement_type != StatementType.UPDATE
5961
and parsed_statement.statement_type != StatementType.INSERT
62+
and parsed_statement.statement_type != StatementType.UNKNOWN
6063
):
6164
raise ProgrammingError("Only DML statements are allowed in batch DML mode.")
6265
self._statements.append(parsed_statement.statement)

google/cloud/spanner_dbapi/client_side_statement_parser.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,18 +21,18 @@
2121
Statement,
2222
)
2323

24-
RE_BEGIN = re.compile(r"^\s*(BEGIN|START)(TRANSACTION)?", re.IGNORECASE)
25-
RE_COMMIT = re.compile(r"^\s*(COMMIT)(TRANSACTION)?", re.IGNORECASE)
26-
RE_ROLLBACK = re.compile(r"^\s*(ROLLBACK)(TRANSACTION)?", re.IGNORECASE)
24+
RE_BEGIN = re.compile(r"^\s*(BEGIN|START)(\s+TRANSACTION)?\s*$", re.IGNORECASE)
25+
RE_COMMIT = re.compile(r"^\s*(COMMIT)(\s+TRANSACTION)?\s*$", re.IGNORECASE)
26+
RE_ROLLBACK = re.compile(r"^\s*(ROLLBACK)(\s+TRANSACTION)?\s*$", re.IGNORECASE)
2727
RE_SHOW_COMMIT_TIMESTAMP = re.compile(
28-
r"^\s*(SHOW)\s+(VARIABLE)\s+(COMMIT_TIMESTAMP)", re.IGNORECASE
28+
r"^\s*(SHOW)\s+(VARIABLE)\s+(COMMIT_TIMESTAMP)\s*$", re.IGNORECASE
2929
)
3030
RE_SHOW_READ_TIMESTAMP = re.compile(
31-
r"^\s*(SHOW)\s+(VARIABLE)\s+(READ_TIMESTAMP)", re.IGNORECASE
31+
r"^\s*(SHOW)\s+(VARIABLE)\s+(READ_TIMESTAMP)\s*$", re.IGNORECASE
3232
)
33-
RE_START_BATCH_DML = re.compile(r"^\s*(START)\s+(BATCH)\s+(DML)", re.IGNORECASE)
34-
RE_RUN_BATCH = re.compile(r"^\s*(RUN)\s+(BATCH)", re.IGNORECASE)
35-
RE_ABORT_BATCH = re.compile(r"^\s*(ABORT)\s+(BATCH)", re.IGNORECASE)
33+
RE_START_BATCH_DML = re.compile(r"^\s*(START)\s+(BATCH)\s+(DML)\s*$", re.IGNORECASE)
34+
RE_RUN_BATCH = re.compile(r"^\s*(RUN)\s+(BATCH)\s*$", re.IGNORECASE)
35+
RE_ABORT_BATCH = re.compile(r"^\s*(ABORT)\s+(BATCH)\s*$", re.IGNORECASE)
3636
RE_PARTITION_QUERY = re.compile(r"^\s*(PARTITION)\s+(.+)", re.IGNORECASE)
3737
RE_RUN_PARTITION = re.compile(r"^\s*(RUN)\s+(PARTITION)\s+(.+)", re.IGNORECASE)
3838
RE_RUN_PARTITIONED_QUERY = re.compile(

google/cloud/spanner_dbapi/connection.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,7 @@
2020
from google.cloud import spanner_v1 as spanner
2121
from google.cloud.spanner_dbapi import partition_helper
2222
from google.cloud.spanner_dbapi.batch_dml_executor import BatchMode, BatchDmlExecutor
23-
from google.cloud.spanner_dbapi.parse_utils import _get_statement_type
24-
from google.cloud.spanner_dbapi.parsed_statement import (
25-
StatementType,
26-
AutocommitDmlMode,
27-
)
23+
from google.cloud.spanner_dbapi.parsed_statement import AutocommitDmlMode
2824
from google.cloud.spanner_dbapi.partition_helper import PartitionId
2925
from google.cloud.spanner_dbapi.parsed_statement import ParsedStatement, Statement
3026
from google.cloud.spanner_dbapi.transaction_helper import TransactionRetryHelper
@@ -702,10 +698,6 @@ def set_autocommit_dml_mode(
702698
self._autocommit_dml_mode = autocommit_dml_mode
703699

704700
def _partitioned_query_validation(self, partitioned_query, statement):
705-
if _get_statement_type(Statement(partitioned_query)) is not StatementType.QUERY:
706-
raise ProgrammingError(
707-
"Only queries can be partitioned. Invalid statement: " + statement.sql
708-
)
709701
if self.read_only is not True and self._client_transaction_started is True:
710702
raise ProgrammingError(
711703
"Partitioned query is not supported, because the connection is in a read/write transaction."

google/cloud/spanner_dbapi/cursor.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -404,9 +404,12 @@ def executemany(self, operation, seq_of_params):
404404
# For every operation, we've got to ensure that any prior DDL
405405
# statements were run.
406406
self.connection.run_prior_DDL_statements()
407+
# Treat UNKNOWN statements as if they are DML and let the server
408+
# determine what is wrong with it.
407409
if self._parsed_statement.statement_type in (
408410
StatementType.INSERT,
409411
StatementType.UPDATE,
412+
StatementType.UNKNOWN,
410413
):
411414
statements = []
412415
for params in seq_of_params:

google/cloud/spanner_dbapi/parse_utils.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -155,25 +155,30 @@
155155
STMT_INSERT = "INSERT"
156156

157157
# Heuristic for identifying statements that don't need to be run as updates.
158+
# TODO: This and the other regexes do not match statements that start with a hint.
158159
RE_NON_UPDATE = re.compile(r"^\W*(SELECT|GRAPH|FROM)", re.IGNORECASE)
159160

160161
RE_WITH = re.compile(r"^\s*(WITH)", re.IGNORECASE)
161162

162163
# DDL statements follow
163164
# https://cloud.google.com/spanner/docs/data-definition-language
164165
RE_DDL = re.compile(
165-
r"^\s*(CREATE|ALTER|DROP|GRANT|REVOKE|RENAME)", re.IGNORECASE | re.DOTALL
166+
r"^\s*(CREATE|ALTER|DROP|GRANT|REVOKE|RENAME|ANALYZE)", re.IGNORECASE | re.DOTALL
166167
)
167168

168-
RE_IS_INSERT = re.compile(r"^\s*(INSERT)", re.IGNORECASE | re.DOTALL)
169+
# TODO: These do not match statements that start with a hint.
170+
RE_IS_INSERT = re.compile(r"^\s*(INSERT\s+)", re.IGNORECASE | re.DOTALL)
171+
RE_IS_UPDATE = re.compile(r"^\s*(UPDATE\s+)", re.IGNORECASE | re.DOTALL)
172+
RE_IS_DELETE = re.compile(r"^\s*(DELETE\s+)", re.IGNORECASE | re.DOTALL)
169173

170174
RE_INSERT = re.compile(
171175
# Only match the `INSERT INTO <table_name> (columns...)
172176
# otherwise the rest of the statement could be a complex
173177
# operation.
174-
r"^\s*INSERT INTO (?P<table_name>[^\s\(\)]+)\s*\((?P<columns>[^\(\)]+)\)",
178+
r"^\s*INSERT(?:\s+INTO)?\s+(?P<table_name>[^\s\(\)]+)\s*\((?P<columns>[^\(\)]+)\)",
175179
re.IGNORECASE | re.DOTALL,
176180
)
181+
"""Deprecated: Use the RE_IS_INSERT, RE_IS_UPDATE, and RE_IS_DELETE regexes"""
177182

178183
RE_VALUES_TILL_END = re.compile(r"VALUES\s*\(.+$", re.IGNORECASE | re.DOTALL)
179184

@@ -259,8 +264,13 @@ def _get_statement_type(statement):
259264
# statements and doesn't yet support WITH for DML statements.
260265
return StatementType.QUERY
261266

262-
statement.sql = ensure_where_clause(query)
263-
return StatementType.UPDATE
267+
if RE_IS_UPDATE.match(query) or RE_IS_DELETE.match(query):
268+
# TODO: Remove this? It makes more sense to have this in SQLAlchemy and
269+
# Django than here.
270+
statement.sql = ensure_where_clause(query)
271+
return StatementType.UPDATE
272+
273+
return StatementType.UNKNOWN
264274

265275

266276
def sql_pyformat_args_to_spanner(sql, params):
@@ -355,7 +365,7 @@ def get_param_types(params):
355365
def ensure_where_clause(sql):
356366
"""
357367
Cloud Spanner requires a WHERE clause on UPDATE and DELETE statements.
358-
Add a dummy WHERE clause if non detected.
368+
Add a dummy WHERE clause if not detected.
359369
360370
:type sql: str
361371
:param sql: SQL code to check.

google/cloud/spanner_dbapi/parsed_statement.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818

1919
class StatementType(Enum):
20+
UNKNOWN = 0
2021
CLIENT_SIDE = 1
2122
DDL = 2
2223
QUERY = 3

tests/unit/spanner_dbapi/test_parse_utils.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,31 @@ def test_classify_stmt(self):
7474
("REVOKE SELECT ON TABLE Singers TO ROLE parent", StatementType.DDL),
7575
("GRANT ROLE parent TO ROLE child", StatementType.DDL),
7676
("INSERT INTO table (col1) VALUES (1)", StatementType.INSERT),
77+
("INSERT table (col1) VALUES (1)", StatementType.INSERT),
78+
("INSERT OR UPDATE table (col1) VALUES (1)", StatementType.INSERT),
79+
("INSERT OR IGNORE table (col1) VALUES (1)", StatementType.INSERT),
7780
("UPDATE table SET col1 = 1 WHERE col1 = NULL", StatementType.UPDATE),
81+
("delete from table WHERE col1 = 2", StatementType.UPDATE),
82+
("delete from table WHERE col1 in (select 1)", StatementType.UPDATE),
83+
("dlete from table where col1 = 2", StatementType.UNKNOWN),
84+
("udpate table set col2=1 where col1 = 2", StatementType.UNKNOWN),
85+
("begin foo", StatementType.UNKNOWN),
86+
("begin transaction foo", StatementType.UNKNOWN),
87+
("commit foo", StatementType.UNKNOWN),
88+
("commit transaction foo", StatementType.UNKNOWN),
89+
("rollback foo", StatementType.UNKNOWN),
90+
("rollback transaction foo", StatementType.UNKNOWN),
91+
("show variable", StatementType.UNKNOWN),
92+
("show variable read_timestamp foo", StatementType.UNKNOWN),
93+
("INSERTs INTO table (col1) VALUES (1)", StatementType.UNKNOWN),
94+
("UPDATEs table SET col1 = 1 WHERE col1 = NULL", StatementType.UNKNOWN),
95+
("DELETEs from table WHERE col1 = 2", StatementType.UNKNOWN),
7896
)
7997

8098
for query, want_class in cases:
81-
self.assertEqual(classify_statement(query).statement_type, want_class)
99+
self.assertEqual(
100+
classify_statement(query).statement_type, want_class, query
101+
)
82102

83103
def test_partition_query_classify_stmt(self):
84104
parsed_statement = classify_statement(

0 commit comments

Comments
 (0)