Skip to content

Commit 6624b10

Browse files
author
Jim Fulton
authored
feat: STRUCT and ARRAY support (#318)
1 parent a42283c commit 6624b10

21 files changed

+903
-159
lines changed

docs/alembic.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ Supported operations:
4343
<https://alembic.sqlalchemy.org/en/latest/ops.html#alembic.operations.Operations.rename_table>`_
4444

4545
Note that some of the operations above have limited capability, again
46-
do to `BigQuery limitations
46+
due to `BigQuery limitations
4747
<https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language>`_.
4848

4949
The `execute` operation allows access to BigQuery-specific

docs/index.rst

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
:maxdepth: 2
44

55
README
6+
struct
67
geography
78
alembic
89
reference

docs/struct.rst

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
Working with BigQuery STRUCT data
2+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3+
4+
The BigQuery `STRUCT data type
5+
<https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type>`_
6+
provided data that are collections of named fields.
7+
8+
`sqlalchemy-bigquery` provided a STRUCT type that can be used to
9+
define tables with STRUCT columns:
10+
11+
.. literalinclude:: samples/snippets/STRUCT.py
12+
:language: python
13+
:dedent: 4
14+
:start-after: [START bigquery_sqlalchemy_create_table_with_struct]
15+
:end-before: [END bigquery_sqlalchemy_create_table_with_struct]
16+
17+
`STRUCT` types can be nested, as in this example. Struct fields can
18+
be defined in two ways:
19+
20+
- Fields can be provided as keyword arguments, as in the `cylinder`
21+
and `horsepower` fields in this example.
22+
23+
- Fields can be provided as name-type tuples provided as positional
24+
arguments, as with the `count` and `compression` fields in this example.
25+
26+
STRUCT columns are automatically created when existing database tables
27+
containing STRUCT columns are introspected.
28+
29+
Struct data are represented in Python as Python dictionaries:
30+
31+
.. literalinclude:: samples/snippets/STRUCT.py
32+
:language: python
33+
:dedent: 4
34+
:start-after: [START bigquery_sqlalchemy_insert_struct]
35+
:end-before: [END bigquery_sqlalchemy_insert_struct]
36+
37+
When querying struct fields, you can use attribute access syntax:
38+
39+
.. literalinclude:: samples/snippets/STRUCT.py
40+
:language: python
41+
:dedent: 4
42+
:start-after: [START bigquery_sqlalchemy_query_struct]
43+
:end-before: [END bigquery_sqlalchemy_query_struct]
44+
45+
or mapping access:
46+
47+
.. literalinclude:: samples/snippets/STRUCT.py
48+
:language: python
49+
:dedent: 4
50+
:start-after: [START bigquery_sqlalchemy_query_getitem]
51+
:end-before: [END bigquery_sqlalchemy_query_getitem]
52+
53+
and field names are case insensitive:
54+
55+
.. literalinclude:: samples/snippets/STRUCT.py
56+
:language: python
57+
:dedent: 4
58+
:start-after: [START bigquery_sqlalchemy_query_STRUCT]
59+
:end-before: [END bigquery_sqlalchemy_query_STRUCT]
60+
61+
When using attribute-access syntax, field names may conflict with
62+
column attribute names. For example SQLAlchemy columns have `name`
63+
and `type` attributes, among others. When accessing a field whose name
64+
conflicts with a column attribute name, either use mapping access, or
65+
spell the field name with upper-case letters.
66+
67+
68+
69+

samples/snippets/STRUCT.py

+90
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# Copyright (c) 2021 The sqlalchemy-bigquery Authors
2+
#
3+
# Permission is hereby granted, free of charge, to any person obtaining a copy of
4+
# this software and associated documentation files (the "Software"), to deal in
5+
# the Software without restriction, including without limitation the rights to
6+
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
7+
# the Software, and to permit persons to whom the Software is furnished to do so,
8+
# subject to the following conditions:
9+
#
10+
# The above copyright notice and this permission notice shall be included in all
11+
# copies or substantial portions of the Software.
12+
#
13+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15+
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16+
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17+
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18+
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19+
20+
21+
def example(engine):
22+
# fmt: off
23+
# [START bigquery_sqlalchemy_create_table_with_struct]
24+
from sqlalchemy.ext.declarative import declarative_base
25+
from sqlalchemy import Column, String, Integer, Float
26+
from sqlalchemy_bigquery import STRUCT
27+
28+
Base = declarative_base()
29+
30+
class Car(Base):
31+
__tablename__ = "Cars"
32+
33+
model = Column(String, primary_key=True)
34+
engine = Column(
35+
STRUCT(
36+
cylinder=STRUCT(("count", Integer),
37+
("compression", Float)),
38+
horsepower=Integer)
39+
)
40+
41+
# [END bigquery_sqlalchemy_create_table_with_struct]
42+
Car.__table__.create(engine)
43+
44+
# [START bigquery_sqlalchemy_insert_struct]
45+
from sqlalchemy.orm import sessionmaker
46+
47+
Session = sessionmaker(bind=engine)
48+
session = Session()
49+
50+
sebring = Car(model="Sebring",
51+
engine=dict(
52+
cylinder=dict(
53+
count=6,
54+
compression=18.0),
55+
horsepower=235))
56+
townc = Car(model="Town and Counttry",
57+
engine=dict(
58+
cylinder=dict(
59+
count=6,
60+
compression=16.0),
61+
horsepower=251))
62+
xj8 = Car(model="XJ8",
63+
engine=dict(
64+
cylinder=dict(
65+
count=8,
66+
compression=10.75),
67+
horsepower=575))
68+
69+
session.add_all((sebring, townc, xj8))
70+
session.commit()
71+
72+
# [END bigquery_sqlalchemy_insert_struct]
73+
74+
# [START bigquery_sqlalchemy_query_struct]
75+
sixes = session.query(Car).filter(Car.engine.cylinder.count == 6)
76+
# [END bigquery_sqlalchemy_query_struct]
77+
sixes1 = list(sixes)
78+
79+
# [START bigquery_sqlalchemy_query_STRUCT]
80+
sixes = session.query(Car).filter(Car.engine.CYLINDER.COUNT == 6)
81+
# [END bigquery_sqlalchemy_query_STRUCT]
82+
sixes2 = list(sixes)
83+
84+
# [START bigquery_sqlalchemy_query_getitem]
85+
sixes = session.query(Car).filter(Car.engine["cylinder"]["count"] == 6)
86+
# [END bigquery_sqlalchemy_query_getitem]
87+
# fmt: on
88+
sixes3 = list(sixes)
89+
90+
return sixes1, sixes2, sixes3

samples/snippets/STRUCT_test.py

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Copyright (c) 2021 The sqlalchemy-bigquery Authors
2+
#
3+
# Permission is hereby granted, free of charge, to any person obtaining a copy of
4+
# this software and associated documentation files (the "Software"), to deal in
5+
# the Software without restriction, including without limitation the rights to
6+
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
7+
# the Software, and to permit persons to whom the Software is furnished to do so,
8+
# subject to the following conditions:
9+
#
10+
# The above copyright notice and this permission notice shall be included in all
11+
# copies or substantial portions of the Software.
12+
#
13+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15+
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16+
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17+
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18+
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19+
20+
21+
def test_struct(engine):
22+
from . import STRUCT
23+
24+
sixeses = STRUCT.example(engine)
25+
26+
for sixes in sixeses:
27+
assert sorted(car.model for car in sixes) == ["Sebring", "Town and Counttry"]

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def readme():
8383
# Until this issue is closed
8484
# https://github.com/googleapis/google-cloud-python/issues/10566
8585
"google-auth>=1.25.0,<3.0.0dev", # Work around pip wack.
86-
"google-cloud-bigquery>=2.24.1",
86+
"google-cloud-bigquery>=2.25.2,<3.0.0dev",
8787
"sqlalchemy>=1.2.0,<1.5.0dev",
8888
"future",
8989
],

sqlalchemy_bigquery/__init__.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from .version import __version__ # noqa
2424

2525
from .base import BigQueryDialect, dialect # noqa
26-
from .base import (
26+
from ._types import (
2727
ARRAY,
2828
BIGNUMERIC,
2929
BOOL,
@@ -38,6 +38,7 @@
3838
NUMERIC,
3939
RECORD,
4040
STRING,
41+
STRUCT,
4142
TIME,
4243
TIMESTAMP,
4344
)
@@ -58,6 +59,7 @@
5859
"NUMERIC",
5960
"RECORD",
6061
"STRING",
62+
"STRUCT",
6163
"TIME",
6264
"TIMESTAMP",
6365
]

sqlalchemy_bigquery/_struct.py

+148
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
# Copyright (c) 2021 The sqlalchemy-bigquery Authors
2+
#
3+
# Permission is hereby granted, free of charge, to any person obtaining a copy of
4+
# this software and associated documentation files (the "Software"), to deal in
5+
# the Software without restriction, including without limitation the rights to
6+
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
7+
# the Software, and to permit persons to whom the Software is furnished to do so,
8+
# subject to the following conditions:
9+
#
10+
# The above copyright notice and this permission notice shall be included in all
11+
# copies or substantial portions of the Software.
12+
#
13+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15+
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16+
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17+
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18+
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19+
20+
from typing import Mapping, Tuple
21+
22+
import packaging.version
23+
import sqlalchemy.sql.default_comparator
24+
import sqlalchemy.sql.sqltypes
25+
import sqlalchemy.types
26+
27+
from . import base
28+
29+
sqlalchemy_1_4_or_more = packaging.version.parse(
30+
sqlalchemy.__version__
31+
) >= packaging.version.parse("1.4")
32+
33+
if sqlalchemy_1_4_or_more:
34+
import sqlalchemy.sql.coercions
35+
import sqlalchemy.sql.roles
36+
37+
38+
def _get_subtype_col_spec(type_):
39+
global _get_subtype_col_spec
40+
41+
type_compiler = base.dialect.type_compiler(base.dialect())
42+
_get_subtype_col_spec = type_compiler.process
43+
return _get_subtype_col_spec(type_)
44+
45+
46+
class STRUCT(sqlalchemy.sql.sqltypes.Indexable, sqlalchemy.types.UserDefinedType):
47+
"""
48+
A type for BigQuery STRUCT/RECORD data
49+
50+
See https://googleapis.dev/python/sqlalchemy-bigquery/latest/struct.html
51+
"""
52+
53+
# See https://docs.sqlalchemy.org/en/14/core/custom_types.html#creating-new-types
54+
55+
def __init__(
56+
self,
57+
*fields: Tuple[str, sqlalchemy.types.TypeEngine],
58+
**kwfields: Mapping[str, sqlalchemy.types.TypeEngine],
59+
):
60+
# Note that because:
61+
# https://docs.python.org/3/whatsnew/3.6.html#pep-468-preserving-keyword-argument-order
62+
# We know that `kwfields` preserves order.
63+
self._STRUCT_fields = tuple(
64+
(
65+
name,
66+
type_ if isinstance(type_, sqlalchemy.types.TypeEngine) else type_(),
67+
)
68+
for (name, type_) in (fields + tuple(kwfields.items()))
69+
)
70+
71+
self._STRUCT_byname = {
72+
name.lower(): type_ for (name, type_) in self._STRUCT_fields
73+
}
74+
75+
def __repr__(self):
76+
fields = ", ".join(
77+
f"{name}={repr(type_)}" for name, type_ in self._STRUCT_fields
78+
)
79+
return f"STRUCT({fields})"
80+
81+
def get_col_spec(self, **kw):
82+
fields = ", ".join(
83+
f"{name} {_get_subtype_col_spec(type_)}"
84+
for name, type_ in self._STRUCT_fields
85+
)
86+
return f"STRUCT<{fields}>"
87+
88+
def bind_processor(self, dialect):
89+
return dict
90+
91+
class Comparator(sqlalchemy.sql.sqltypes.Indexable.Comparator):
92+
def _setup_getitem(self, name):
93+
if not isinstance(name, str):
94+
raise TypeError(
95+
f"STRUCT fields can only be accessed with strings field names,"
96+
f" not {repr(name)}."
97+
)
98+
subtype = self.expr.type._STRUCT_byname.get(name.lower())
99+
if subtype is None:
100+
raise KeyError(name)
101+
operator = struct_getitem_op
102+
index = _field_index(self, name, operator)
103+
return operator, index, subtype
104+
105+
def __getattr__(self, name):
106+
if name.lower() in self.expr.type._STRUCT_byname:
107+
return self[name]
108+
109+
comparator_factory = Comparator
110+
111+
112+
# In the implementations of _field_index below, we're stealing from
113+
# the JSON type implementation, but the code to steal changed in
114+
# 1.4. :/
115+
116+
if sqlalchemy_1_4_or_more:
117+
118+
def _field_index(self, name, operator):
119+
return sqlalchemy.sql.coercions.expect(
120+
sqlalchemy.sql.roles.BinaryElementRole,
121+
name,
122+
expr=self.expr,
123+
operator=operator,
124+
bindparam_type=sqlalchemy.types.String(),
125+
)
126+
127+
128+
else:
129+
130+
def _field_index(self, name, operator):
131+
return sqlalchemy.sql.default_comparator._check_literal(
132+
self.expr, operator, name, bindparam_type=sqlalchemy.types.String(),
133+
)
134+
135+
136+
def struct_getitem_op(a, b):
137+
raise NotImplementedError()
138+
139+
140+
sqlalchemy.sql.default_comparator.operator_lookup[
141+
struct_getitem_op.__name__
142+
] = sqlalchemy.sql.default_comparator.operator_lookup["json_getitem_op"]
143+
144+
145+
class SQLCompiler:
146+
def visit_struct_getitem_op_binary(self, binary, operator_, **kw):
147+
left = self.process(binary.left, **kw)
148+
return f"{left}.{binary.right.value}"

0 commit comments

Comments
 (0)