Skip to content

Commit 4e9a6cd

Browse files
committed
extend to_gbq docstring and add tests for table_schema parameter
1 parent 0ebe1e2 commit 4e9a6cd

File tree

3 files changed

+51
-0
lines changed

3 files changed

+51
-0
lines changed

docs/source/changelog.rst

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ Changelog
66

77
- Resolve issue where the optional ``--noauth_local_webserver`` command line argument would not be propagated during the authentication process. (:issue:`35`)
88
- Drop support for Python 3.4 (:issue:`40`)
9+
- Add support for users to provide a table schema in ``to_gbq`` call instead of letting module to infer the schema from ``DataFrame.dtypes`` (:issue:`46`)
910

1011
0.1.6 / 2017-05-03
1112
------------------

pandas_gbq/gbq.py

+8
Original file line numberDiff line numberDiff line change
@@ -816,6 +816,14 @@ def to_gbq(dataframe, destination_table, project_id, chunksize=10000,
816816
Service account private key in JSON format. Can be file path
817817
or string contents. This is useful for remote server
818818
authentication (eg. jupyter iPython notebook on remote host)
819+
table_schema : list of dicts
820+
List of BigQuery table fields to which according DataFrame columns
821+
conform to, e.g. `[{'name': 'col1', 'type': 'STRING'},...]`. If
822+
schema is not provided, it will be generated according to dtypes
823+
of DataFrame columns. See BigQuery API documentation on available
824+
names of a field.
825+
826+
.. versionadded:: 0.2.0
819827
"""
820828

821829
if if_exists not in ('fail', 'replace', 'append'):

pandas_gbq/tests/test_gbq.py

+42
Original file line numberDiff line numberDiff line change
@@ -1258,6 +1258,48 @@ def test_verify_schema_ignores_field_mode(self):
12581258
assert self.sut.verify_schema(
12591259
self.dataset_prefix + "1", TABLE_ID + test_id, test_schema_2)
12601260

1261+
def test_upload_data_with_valid_user_schema(self):
1262+
# Issue #46; tests test scenarios with user-provided
1263+
# schemas
1264+
df = tm.makeMixedDataFrame()
1265+
test_id = "15"
1266+
test_schema = [{'name': 'A', 'type': 'FLOAT'},
1267+
{'name': 'B', 'type': 'FLOAT'},
1268+
{'name': 'C', 'type': 'STRING'},
1269+
{'name': 'D', 'type': 'TIMESTAMP'}]
1270+
destination_table = self.destination_table + test_id
1271+
gbq.to_gbq(df, destination_table, _get_project_id(),
1272+
private_key=_get_private_key_path(),
1273+
table_schema=test_schema)
1274+
dataset, table = destination_table.split('.')
1275+
assert self.table.verify_schema(dataset, table,
1276+
dict(fields=test_schema))
1277+
1278+
def test_upload_data_with_invalid_user_schema_raises_error(self):
1279+
df = tm.makeMixedDataFrame()
1280+
test_id = "16"
1281+
test_schema = [{'name': 'A', 'type': 'FLOAT'},
1282+
{'name': 'B', 'type': 'FLOAT'},
1283+
{'name': 'C', 'type': 'FLOAT'},
1284+
{'name': 'D', 'type': 'FLOAT'}]
1285+
destination_table = self.destination_table + test_id
1286+
with tm.assertRaises(gbq.StreamingInsertError):
1287+
gbq.to_gbq(df, destination_table, _get_project_id(),
1288+
private_key=_get_private_key_path(),
1289+
table_schema=test_schema)
1290+
1291+
def test_upload_data_with_missing_schema_fields_raises_error(self):
1292+
df = tm.makeMixedDataFrame()
1293+
test_id = "16"
1294+
test_schema = [{'name': 'A', 'type': 'FLOAT'},
1295+
{'name': 'B', 'type': 'FLOAT'},
1296+
{'name': 'C', 'type': 'FLOAT'}]
1297+
destination_table = self.destination_table + test_id
1298+
with tm.assertRaises(gbq.StreamingInsertError):
1299+
gbq.to_gbq(df, destination_table, _get_project_id(),
1300+
private_key=_get_private_key_path(),
1301+
table_schema=test_schema)
1302+
12611303
def test_list_dataset(self):
12621304
dataset_id = self.dataset_prefix + "1"
12631305
assert dataset_id in self.dataset.datasets()

0 commit comments

Comments
 (0)