Skip to content

Commit c0e3cd8

Browse files
committed
extend to_gbq docstring and add tests for table_schema parameter
1 parent 0ebe1e2 commit c0e3cd8

File tree

3 files changed

+51
-0
lines changed

3 files changed

+51
-0
lines changed

docs/source/changelog.rst

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ Changelog
66

77
- Resolve issue where the optional ``--noauth_local_webserver`` command line argument would not be propagated during the authentication process. (:issue:`35`)
88
- Drop support for Python 3.4 (:issue:`40`)
9+
- Add support for users to provide a table schema in ``to_gbq`` call instead of letting module to infer the schema from ``DataFrame.dtypes`` (:issue:`46`)
910

1011
0.1.6 / 2017-05-03
1112
------------------

pandas_gbq/gbq.py

+7
Original file line numberDiff line numberDiff line change
@@ -816,6 +816,13 @@ def to_gbq(dataframe, destination_table, project_id, chunksize=10000,
816816
Service account private key in JSON format. Can be file path
817817
or string contents. This is useful for remote server
818818
authentication (eg. jupyter iPython notebook on remote host)
819+
table_schema : list of dicts
820+
.. versionadded:: 0.2.0
821+
List of BigQuery table fields to which according DataFrame columns
822+
conform to, e.g. `[{'name': 'col1', 'type': 'STRING'},...]`. If
823+
schema is not provided, it will be generated according to dtypes
824+
of DataFrame columns. See BigQuery API documentation on available
825+
names of a field.
819826
"""
820827

821828
if if_exists not in ('fail', 'replace', 'append'):

pandas_gbq/tests/test_gbq.py

+43
Original file line numberDiff line numberDiff line change
@@ -1258,6 +1258,49 @@ def test_verify_schema_ignores_field_mode(self):
12581258
assert self.sut.verify_schema(
12591259
self.dataset_prefix + "1", TABLE_ID + test_id, test_schema_2)
12601260

1261+
# Issue #46; tests test scenarios with user-provided
1262+
# schemas
1263+
1264+
def test_upload_data_with_valid_user_schema(self):
1265+
df = tm.makeMixedDataFrame()
1266+
test_id = "15"
1267+
test_schema = [{'name': 'A', 'type': 'FLOAT'},
1268+
{'name': 'B', 'type': 'FLOAT'},
1269+
{'name': 'C', 'type': 'STRING'},
1270+
{'name': 'D', 'type': 'TIMESTAMP'}]
1271+
destination_table = self.destination_table + test_id
1272+
gbq.to_gbq(df, destination_table, _get_project_id(),
1273+
private_key=_get_private_key_path(),
1274+
table_schema=test_schema)
1275+
dataset, table = destination_table.split('.')
1276+
assert self.table.verify_schema(dataset, table,
1277+
dict(fields=test_schema))
1278+
1279+
def test_upload_data_with_invalid_user_schema_raises_error(self):
1280+
df = tm.makeMixedDataFrame()
1281+
test_id = "16"
1282+
test_schema = [{'name': 'A', 'type': 'FLOAT'},
1283+
{'name': 'B', 'type': 'FLOAT'},
1284+
{'name': 'C', 'type': 'FLOAT'},
1285+
{'name': 'D', 'type': 'FLOAT'}]
1286+
destination_table = self.destination_table + test_id
1287+
with tm.assertRaises(gbq.StreamingInsertError):
1288+
gbq.to_gbq(df, destination_table, _get_project_id(),
1289+
private_key=_get_private_key_path(),
1290+
table_schema=test_schema)
1291+
1292+
def test_upload_data_with_missing_schema_fields_raises_error(self):
1293+
df = tm.makeMixedDataFrame()
1294+
test_id = "16"
1295+
test_schema = [{'name': 'A', 'type': 'FLOAT'},
1296+
{'name': 'B', 'type': 'FLOAT'},
1297+
{'name': 'C', 'type': 'FLOAT'}]
1298+
destination_table = self.destination_table + test_id
1299+
with tm.assertRaises(gbq.StreamingInsertError):
1300+
gbq.to_gbq(df, destination_table, _get_project_id(),
1301+
private_key=_get_private_key_path(),
1302+
table_schema=test_schema)
1303+
12611304
def test_list_dataset(self):
12621305
dataset_id = self.dataset_prefix + "1"
12631306
assert dataset_id in self.dataset.datasets()

0 commit comments

Comments
 (0)