Skip to content

Commit 350499b

Browse files
authored
deps: update ibis to version 8.0.0 and refactor remote_function to use ibis UDF method (#277)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Follow-up to https://togithub.com/googleapis/python-bigquery-dataframes/pull/53#discussion_r1427224630 🦕
1 parent 20de7f4 commit 350499b

File tree

11 files changed

+230
-69
lines changed

11 files changed

+230
-69
lines changed

bigframes/core/compile/aggregate_compiler.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ def _(
331331
op: agg_ops.RankOp, column: ibis_types.Column, window=None
332332
) -> ibis_types.IntegerValue:
333333
# Ibis produces 0-based ranks, while pandas creates 1-based ranks
334-
return _apply_window_if_present(column.rank(), window) + 1
334+
return _apply_window_if_present(ibis.rank(), window) + 1
335335

336336

337337
@compile_unary_agg.register

bigframes/core/compile/compiled.py

+6-9
Original file line numberDiff line numberDiff line change
@@ -1099,17 +1099,14 @@ def _to_ibis_expr(
10991099
if not columns:
11001100
return ibis.memtable([])
11011101

1102+
# Make sure we don't have any unbound (deferred) columns.
1103+
table = self._table.select(columns)
1104+
11021105
# Make sure all dtypes are the "canonical" ones for BigFrames. This is
11031106
# important for operations like UNION where the schema must match.
1104-
table = self._table.select(
1105-
bigframes.dtypes.ibis_value_to_canonical_type(
1106-
column.resolve(self._table)
1107-
# TODO(https://github.com/ibis-project/ibis/issues/7613): use
1108-
# public API to refer to Deferred type.
1109-
if isinstance(column, ibis.common.deferred.Deferred)
1110-
else column
1111-
)
1112-
for column in columns
1107+
table = table.select(
1108+
bigframes.dtypes.ibis_value_to_canonical_type(table[column])
1109+
for column in table.columns
11131110
)
11141111
base_table = table
11151112
if self._reduced_predicate is not None:

bigframes/dtypes.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
import geopandas as gpd # type: ignore
2424
import google.cloud.bigquery as bigquery
2525
import ibis
26-
from ibis.backends.bigquery.datatypes import BigQueryType
2726
import ibis.expr.datatypes as ibis_dtypes
2827
from ibis.expr.datatypes.core import dtype as python_type_to_bigquery_type
2928
import ibis.expr.types as ibis_types
@@ -33,6 +32,7 @@
3332

3433
import bigframes.constants as constants
3534
import third_party.bigframes_vendored.google_cloud_bigquery._pandas_helpers as gcb3p_pandas_helpers
35+
import third_party.bigframes_vendored.ibis.backends.bigquery.datatypes as third_party_ibis_bqtypes
3636
import third_party.bigframes_vendored.ibis.expr.operations as vendored_ibis_ops
3737

3838
# Type hints for Pandas dtypes supported by BigQuery DataFrame
@@ -643,4 +643,4 @@ def ibis_type_from_python_type(t: type) -> ibis_dtypes.DataType:
643643
def ibis_type_from_type_kind(tk: bigquery.StandardSqlTypeNames) -> ibis_dtypes.DataType:
644644
if tk not in SUPPORTED_IO_BIGQUERY_TYPEKINDS:
645645
raise UnsupportedTypeError(tk, SUPPORTED_IO_BIGQUERY_TYPEKINDS)
646-
return BigQueryType.to_ibis(tk)
646+
return third_party_ibis_bqtypes.BigQueryType.to_ibis(tk)

bigframes/functions/remote_function.py

+31-40
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
from __future__ import annotations
1616

17-
import functools
1817
import hashlib
1918
import inspect
2019
import logging
@@ -28,6 +27,7 @@
2827
import textwrap
2928
from typing import List, NamedTuple, Optional, Sequence, TYPE_CHECKING
3029

30+
import ibis
3131
import requests
3232

3333
if TYPE_CHECKING:
@@ -43,15 +43,12 @@
4343
resourcemanager_v3,
4444
)
4545
import google.iam.v1
46-
from ibis.backends.bigquery.compiler import compiles
47-
from ibis.backends.bigquery.datatypes import BigQueryType
4846
from ibis.expr.datatypes.core import DataType as IbisDataType
49-
import ibis.expr.operations as ops
50-
import ibis.expr.rules as rlz
5147

5248
from bigframes import clients
5349
import bigframes.constants as constants
5450
import bigframes.dtypes
51+
import third_party.bigframes_vendored.ibis.backends.bigquery.datatypes as third_party_ibis_bqtypes
5552

5653
logger = logging.getLogger(__name__)
5754

@@ -173,12 +170,14 @@ def create_bq_remote_function(
173170
# Create BQ function
174171
# https://cloud.google.com/bigquery/docs/reference/standard-sql/remote-functions#create_a_remote_function_2
175172
bq_function_args = []
176-
bq_function_return_type = BigQueryType.from_ibis(output_type)
173+
bq_function_return_type = third_party_ibis_bqtypes.BigQueryType.from_ibis(
174+
output_type
175+
)
177176

178177
# We are expecting the input type annotations to be 1:1 with the input args
179178
for idx, name in enumerate(input_args):
180179
bq_function_args.append(
181-
f"{name} {BigQueryType.from_ibis(input_types[idx])}"
180+
f"{name} {third_party_ibis_bqtypes.BigQueryType.from_ibis(input_types[idx])}"
182181
)
183182
create_function_ddl = f"""
184183
CREATE OR REPLACE FUNCTION `{self._gcp_project_id}.{self._bq_dataset}`.{bq_function_name}({','.join(bq_function_args)})
@@ -515,33 +514,10 @@ def get_remote_function_specs(self, remote_function_name):
515514
return (http_endpoint, bq_connection)
516515

517516

518-
def remote_function_node(
519-
routine_ref: bigquery.RoutineReference, ibis_signature: IbisSignature
520-
):
521-
"""Creates an Ibis node representing a remote function call."""
522-
523-
fields = {
524-
name: rlz.ValueOf(None if type_ == "ANY TYPE" else type_)
525-
for name, type_ in zip(
526-
ibis_signature.parameter_names, ibis_signature.input_types
527-
)
528-
}
529-
530-
fields["dtype"] = ibis_signature.output_type # type: ignore
531-
fields["shape"] = rlz.shape_like("args")
532-
533-
node = type(routine_ref_to_string_for_query(routine_ref), (ops.ValueOp,), fields) # type: ignore
534-
535-
@compiles(node)
536-
def compile_node(t, op):
537-
return "{}({})".format(node.__name__, ", ".join(map(t.translate, op.args)))
538-
539-
def f(*args, **kwargs):
540-
return node(*args, **kwargs).to_expr()
541-
542-
f.bigframes_remote_function = str(routine_ref) # type: ignore
543-
544-
return f
517+
class UnsupportedTypeError(ValueError):
518+
def __init__(self, type_, supported_types):
519+
self.type = type_
520+
self.supported_types = supported_types
545521

546522

547523
def ibis_signature_from_python_signature(
@@ -831,14 +807,16 @@ def wrapper(f):
831807
packages,
832808
)
833809

834-
node = remote_function_node(dataset_ref.routine(rf_name), ibis_signature)
835-
836-
node = functools.wraps(f)(node)
837-
node.__signature__ = signature
810+
node = ibis.udf.scalar.builtin(
811+
f,
812+
name=rf_name,
813+
schema=f"{dataset_ref.project}.{dataset_ref.dataset_id}",
814+
signature=(ibis_signature.input_types, ibis_signature.output_type),
815+
)
838816
node.bigframes_cloud_function = (
839817
remote_function_client.get_cloud_function_fully_qualified_name(cf_name)
840818
)
841-
819+
node.bigframes_remote_function = str(dataset_ref.routine(rf_name)) # type: ignore
842820
return node
843821

844822
return wrapper
@@ -888,4 +866,17 @@ def read_gbq_function(
888866
f"{constants.FEEDBACK_LINK}"
889867
)
890868

891-
return remote_function_node(routine_ref, ibis_signature)
869+
# The name "args" conflicts with the Ibis operator, so we use
870+
# non-standard names for the arguments here.
871+
def node(*ignored_args, **ignored_kwargs):
872+
f"""Remote function {str(routine_ref)}."""
873+
874+
node.__name__ = routine_ref.routine_id
875+
node = ibis.udf.scalar.builtin(
876+
node,
877+
name=routine_ref.routine_id,
878+
schema=f"{routine_ref.project}.{routine_ref.dataset_id}",
879+
signature=(ibis_signature.input_types, ibis_signature.output_type),
880+
)
881+
node.bigframes_remote_function = str(routine_ref) # type: ignore
882+
return node

noxfile.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -565,12 +565,12 @@ def prerelease(session: nox.sessions.Session, tests_path):
565565
# session.install(
566566
# "--upgrade",
567567
# "-e", # Use -e so that py.typed file is included.
568-
# "git+https://github.com/ibis-project/ibis.git@7.x.x#egg=ibis-framework",
568+
# "git+https://github.com/ibis-project/ibis.git#egg=ibis-framework",
569569
# )
570570
session.install(
571571
"--upgrade",
572-
# "--pre",
573-
"ibis-framework>=7.1.0,<7.2.0dev",
572+
"--pre",
573+
"ibis-framework>=8.0.0,<9.0.0dev",
574574
)
575575
already_installed.add("ibis-framework")
576576

setup.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,7 @@
4444
"google-cloud-iam >=2.12.1",
4545
"google-cloud-resource-manager >=1.10.3",
4646
"google-cloud-storage >=2.0.0",
47-
# TODO: Relax upper bound once we have fixed unit tests with 7.2.0.
48-
"ibis-framework[bigquery] >=7.1.0,<7.2.0dev",
47+
"ibis-framework[bigquery] >=8.0.0,<9.0.0dev",
4948
# TODO: Relax upper bound once we have fixed `system_prerelease` tests.
5049
"pandas >=1.5.0,<2.1.4",
5150
"pydata-google-auth >=1.8.2",
@@ -55,7 +54,7 @@
5554
# Keep sqlglot versions in sync with ibis-framework. This avoids problems
5655
# where the incorrect version of sqlglot is installed, such as
5756
# https://github.com/googleapis/python-bigquery-dataframes/issues/315
58-
"sqlglot >=19.9.0,<20",
57+
"sqlglot >=20.8.0,<=20.11",
5958
"tabulate >= 0.9",
6059
"ipywidgets >=7.7.1",
6160
"humanize >= 4.6.0",

testing/constraints-3.9.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@ google-cloud-bigquery-connection==1.12.0
1010
google-cloud-iam==2.12.1
1111
google-cloud-resource-manager==1.10.3
1212
google-cloud-storage==2.0.0
13-
ibis-framework==7.1.0
13+
ibis-framework==8.0.0
1414
pandas==1.5.0
1515
pydata-google-auth==1.8.2
1616
requests==2.27.1
1717
scikit-learn==1.2.2
1818
sqlalchemy==1.4
19-
sqlglot==19.9.0
19+
sqlglot==20.8.0
2020
tabulate==0.9
2121
ipywidgets==7.7.1
2222
humanize==4.6.0

tests/system/small/test_dataframe.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -157,15 +157,13 @@ def test_tail_with_custom_column_labels(scalars_df_index, scalars_pandas_df_inde
157157
],
158158
)
159159
def test_df_nlargest(scalars_df_index, scalars_pandas_df_index, keep):
160-
bf_result = scalars_df_index.nlargest(
161-
3, ["bool_col", "int64_too"], keep=keep
162-
).to_pandas()
160+
bf_result = scalars_df_index.nlargest(3, ["bool_col", "int64_too"], keep=keep)
163161
pd_result = scalars_pandas_df_index.nlargest(
164162
3, ["bool_col", "int64_too"], keep=keep
165163
)
166164

167165
pd.testing.assert_frame_equal(
168-
bf_result,
166+
bf_result.to_pandas(),
169167
pd_result,
170168
)
171169

@@ -179,11 +177,11 @@ def test_df_nlargest(scalars_df_index, scalars_pandas_df_index, keep):
179177
],
180178
)
181179
def test_df_nsmallest(scalars_df_index, scalars_pandas_df_index, keep):
182-
bf_result = scalars_df_index.nsmallest(6, ["bool_col"], keep=keep).to_pandas()
180+
bf_result = scalars_df_index.nsmallest(6, ["bool_col"], keep=keep)
183181
pd_result = scalars_pandas_df_index.nsmallest(6, ["bool_col"], keep=keep)
184182

185183
pd.testing.assert_frame_equal(
186-
bf_result,
184+
bf_result.to_pandas(),
187185
pd_result,
188186
)
189187

tests/unit/test_core.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ def test_arrayvalues_to_ibis_expr_with_concat():
118118
total_ordering_columns=["col1"],
119119
)
120120
expr = value.concat([value])
121-
actual = expr._compile_ordered()._to_ibis_expr(ordering_mode="unordered")
121+
actual = expr._compile_unordered()._to_ibis_expr()
122122
assert len(actual.columns) == 3
123123
# TODO(ashleyxu, b/299631930): test out the union expression
124124
assert actual.columns[0] == "column_0"

tests/unit/test_remote_function.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from ibis.backends.bigquery import datatypes as bq_types
1615
from ibis.expr import datatypes as ibis_types
1716

1817
import bigframes.dtypes
18+
import third_party.bigframes_vendored.ibis.backends.bigquery.datatypes as third_party_ibis_bqtypes
1919

2020

2121
def test_supported_types_correspond():
@@ -24,7 +24,7 @@ def test_supported_types_correspond():
2424
ibis_types.dtype(t) for t in bigframes.dtypes.SUPPORTED_IO_PYTHON_TYPES
2525
}
2626
ibis_types_from_bigquery = {
27-
bq_types.BigQueryType.to_ibis(tk)
27+
third_party_ibis_bqtypes.BigQueryType.to_ibis(tk)
2828
for tk in bigframes.dtypes.SUPPORTED_IO_BIGQUERY_TYPEKINDS
2929
}
3030

0 commit comments

Comments
 (0)