Skip to content

Commit 421024c

Browse files
feat: allow loading table from dataframe with extra fields, googleapis#1812
1 parent a69d6b7 commit 421024c

File tree

2 files changed

+43
-2
lines changed

2 files changed

+43
-2
lines changed

google/cloud/bigquery/_pandas_helpers.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,10 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
484484
Optional[Sequence[google.cloud.bigquery.schema.SchemaField]]:
485485
The automatically determined schema. Returns None if the type of
486486
any column cannot be determined.
487+
488+
Note:
489+
- If `bq_schema` contains fields not found in the DataFrame, they will
490+
still be included in the resulting schema, and a warning will be issued.
487491
"""
488492
if pandas_gbq is None:
489493
warnings.warn(
@@ -537,11 +541,14 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
537541
# Catch any schema mismatch. The developer explicitly asked to serialize a
538542
# column, but it was not found.
539543
if bq_schema_unused:
540-
raise ValueError(
544+
warnings.warn(
541545
"bq_schema contains fields not present in dataframe: {}".format(
542546
bq_schema_unused
543-
)
547+
),
548+
category=UserWarning,
544549
)
550+
for unused_field_name in bq_schema_unused:
551+
bq_schema_out.append(bq_schema_index.get(unused_field_name))
545552

546553
# If schema detection was not successful for all columns, also try with
547554
# pyarrow, if available.

tests/unit/test__pandas_helpers.py

+34
Original file line numberDiff line numberDiff line change
@@ -1385,6 +1385,40 @@ def test_dataframe_to_bq_schema_w_bq_schema(module_under_test, monkeypatch):
13851385
assert returned_schema == expected_schema
13861386

13871387

1388+
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
1389+
def test_dataframe_to_bq_schema_allows_extra_fields(module_under_test, monkeypatch):
1390+
monkeypatch.setattr(module_under_test, "pandas_gbq", None)
1391+
1392+
df_data = collections.OrderedDict(
1393+
[
1394+
("str_column", ["hello", "world"]),
1395+
("int_column", [42, 8]),
1396+
("bool_column", [True, False]),
1397+
]
1398+
)
1399+
dataframe = pandas.DataFrame(df_data)
1400+
1401+
dict_schema = [
1402+
{"name": "str_column", "type": "STRING", "mode": "NULLABLE"},
1403+
{"name": "int_column", "type": "INTEGER", "mode": "NULLABLE"},
1404+
{"name": "bool_column", "type": "BOOL", "mode": "REQUIRED"},
1405+
{"name": "extra_column", "type": "STRING", "mode": "NULLABLE"},
1406+
]
1407+
1408+
with pytest.warns(UserWarning, match="bq_schema contains fields not present"):
1409+
returned_schema = module_under_test.dataframe_to_bq_schema(
1410+
dataframe, dict_schema
1411+
)
1412+
1413+
expected_schema = (
1414+
schema.SchemaField("str_column", "STRING", "NULLABLE"),
1415+
schema.SchemaField("int_column", "INTEGER", "NULLABLE"),
1416+
schema.SchemaField("bool_column", "BOOL", "REQUIRED"),
1417+
schema.SchemaField("extra_column", "STRING", "NULLABLE"),
1418+
)
1419+
assert returned_schema == expected_schema
1420+
1421+
13881422
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
13891423
def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(
13901424
module_under_test, monkeypatch

0 commit comments

Comments
 (0)