diff --git a/bigquery/cloud-client/README.rst b/bigquery/cloud-client/README.rst index a68ff1f7624..1a3b889a302 100644 --- a/bigquery/cloud-client/README.rst +++ b/bigquery/cloud-client/README.rst @@ -70,6 +70,18 @@ Install Dependencies Samples ------------------------------------------------------------------------------- +Simple Application ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + + + +To run this sample: + +.. code-block:: bash + + $ python simple_app.py + + Quickstart +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -82,7 +94,7 @@ To run this sample: $ python quickstart.py -Sync query +Query +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -91,26 +103,35 @@ To run this sample: .. code-block:: bash - $ python sync_query.py + $ python query.py - usage: sync_query.py [-h] query + usage: query.py [-h] [--use_standard_sql] + [--destination_table DESTINATION_TABLE] + query - Command-line application to perform synchronous queries in BigQuery. + Command-line application to perform queries in BigQuery. For more information, see the README.rst. Example invocation: - $ python sync_query.py \ - 'SELECT corpus FROM `publicdata.samples.shakespeare` GROUP BY corpus' + $ python query.py '#standardSQL + SELECT corpus + FROM `bigquery-public-data.samples.shakespeare` + GROUP BY corpus + ORDER BY corpus' positional arguments: - query BigQuery SQL Query. + query BigQuery SQL Query. optional arguments: - -h, --help show this help message and exit + -h, --help show this help message and exit + --use_standard_sql Use standard SQL syntax. + --destination_table DESTINATION_TABLE + Destination table to use for results. Example: + my_dataset.my_table -Async query +Parameterized Query +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -119,23 +140,29 @@ To run this sample: .. code-block:: bash - $ python async_query.py + $ python query_params.py - usage: async_query.py [-h] query + usage: query_params.py [-h] {named,positional,array,timestamp,struct} ... - Command-line application to perform asynchronous queries in BigQuery. + Command-line app to perform queries with parameters in BigQuery. For more information, see the README.rst. Example invocation: - $ python async_query.py \ - 'SELECT corpus FROM `publicdata.samples.shakespeare` GROUP BY corpus' + $ python query_params.py named 'romeoandjuliet' 100 + $ python query_params.py positional 'romeoandjuliet' 100 positional arguments: - query BigQuery SQL Query. + {named,positional,array,timestamp,struct} + samples + named Run a query with named parameters. + positional Run a query with positional parameters. + array Run a query with an array parameter. + timestamp Run a query with a timestamp parameter. + struct Run a query with a struct parameter. optional arguments: - -h, --help show this help message and exit + -h, --help show this help message and exit Snippets @@ -202,20 +229,21 @@ To run this sample: $ python load_data_from_file.py - usage: load_data_from_file.py [-h] dataset_name table_name source_file_name + usage: load_data_from_file.py [-h] dataset_id table_id source_file_name Loads data into BigQuery from a local file. For more information, see the README.rst. Example invocation: - $ python load_data_from_file.py example_dataset example_table example-data.csv + $ python load_data_from_file.py example_dataset example_table \ + example-data.csv The dataset and table should already exist. positional arguments: - dataset_name - table_name + dataset_id + table_id source_file_name Path to a .csv file to upload. optional arguments: @@ -233,25 +261,26 @@ To run this sample: $ python load_data_from_gcs.py - usage: load_data_from_gcs.py [-h] dataset_name table_name source + usage: load_data_from_gcs.py [-h] dataset_id table_id source Loads data into BigQuery from an object in Google Cloud Storage. For more information, see the README.rst. Example invocation: - $ python load_data_from_gcs.py example_dataset example_table gs://example-bucket/example-data.csv + $ python load_data_from_gcs.py example_dataset example_table \ + gs://example-bucket/example-data.csv The dataset and table should already exist. positional arguments: - dataset_name - table_name - source The Google Cloud Storage object to load. Must be in the format - gs://bucket_name/object_name + dataset_id + table_id + source The Google Cloud Storage object to load. Must be in the format + gs://bucket_name/object_name optional arguments: - -h, --help show this help message and exit + -h, --help show this help message and exit Load streaming data @@ -265,24 +294,25 @@ To run this sample: $ python stream_data.py - usage: stream_data.py [-h] dataset_name table_name json_data + usage: stream_data.py [-h] dataset_id table_id json_data Loads a single row of data directly into BigQuery. For more information, see the README.rst. Example invocation: - $ python stream_data.py example_dataset example_table '["Gandalf", 2000]' + $ python stream_data.py example_dataset example_table \ + '["Gandalf", 2000]' The dataset and table should already exist. positional arguments: - dataset_name - table_name - json_data The row to load into BigQuery as an array in JSON format. + dataset_id + table_id + json_data The row to load into BigQuery as an array in JSON format. optional arguments: - -h, --help show this help message and exit + -h, --help show this help message and exit Export data to Cloud Storage @@ -296,25 +326,26 @@ To run this sample: $ python export_data_to_gcs.py - usage: export_data_to_gcs.py [-h] dataset_name table_name destination + usage: export_data_to_gcs.py [-h] dataset_id table_id destination Exports data from BigQuery to an object in Google Cloud Storage. For more information, see the README.rst. Example invocation: - $ python export_data_to_gcs.py example_dataset example_table gs://example-bucket/example-data.csv + $ python export_data_to_gcs.py example_dataset example_table \ + gs://example-bucket/example-data.csv The dataset and table should already exist. positional arguments: - dataset_name - table_name - destination The desintation Google Cloud Storage object.Must be in the - format gs://bucket_name/object_name + dataset_id + table_id + destination The destination Google Cloud Storage object. Must be in the + format gs://bucket_name/object_name optional arguments: - -h, --help show this help message and exit + -h, --help show this help message and exit diff --git a/bigquery/cloud-client/README.rst.in b/bigquery/cloud-client/README.rst.in index 49143f062e6..61c66ab43c1 100644 --- a/bigquery/cloud-client/README.rst.in +++ b/bigquery/cloud-client/README.rst.in @@ -16,13 +16,15 @@ setup: - install_deps samples: +- name: Simple Application + file: simple_app.py - name: Quickstart file: quickstart.py -- name: Sync query - file: sync_query.py +- name: Query + file: query.py show_help: true -- name: Async query - file: async_query.py +- name: Parameterized Query + file: query_params.py show_help: true - name: Snippets file: snippets.py diff --git a/bigquery/cloud-client/export_data_to_gcs.py b/bigquery/cloud-client/export_data_to_gcs.py index 41b011ca06d..5993ef0f6a2 100644 --- a/bigquery/cloud-client/export_data_to_gcs.py +++ b/bigquery/cloud-client/export_data_to_gcs.py @@ -19,47 +19,43 @@ For more information, see the README.rst. Example invocation: - $ python export_data_to_gcs.py example_dataset example_table \ + $ python export_data_to_gcs.py example_dataset example_table \\ gs://example-bucket/example-data.csv The dataset and table should already exist. """ import argparse -import uuid from google.cloud import bigquery -def export_data_to_gcs(dataset_name, table_name, destination): +def export_data_to_gcs(dataset_id, table_id, destination): bigquery_client = bigquery.Client() - dataset = bigquery_client.dataset(dataset_name) - table = dataset.table(table_name) - job_name = str(uuid.uuid4()) + dataset_ref = bigquery_client.dataset(dataset_id) + table_ref = dataset_ref.table(table_id) - job = bigquery_client.extract_table_to_storage( - job_name, table, destination) + job = bigquery_client.extract_table(table_ref, destination) - job.begin() - job.result() # Wait for job to complete + job.result() # Waits for job to complete print('Exported {}:{} to {}'.format( - dataset_name, table_name, destination)) + dataset_id, table_id, destination)) if __name__ == '__main__': parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument('dataset_name') - parser.add_argument('table_name') + parser.add_argument('dataset_id') + parser.add_argument('table_id') parser.add_argument( - 'destination', help='The desintation Google Cloud Storage object.' + 'destination', help='The destination Google Cloud Storage object. ' 'Must be in the format gs://bucket_name/object_name') args = parser.parse_args() export_data_to_gcs( - args.dataset_name, - args.table_name, + args.dataset_id, + args.table_id, args.destination) diff --git a/bigquery/cloud-client/load_data_from_file.py b/bigquery/cloud-client/load_data_from_file.py index 9e0bf9f4d13..e311daa1e62 100644 --- a/bigquery/cloud-client/load_data_from_file.py +++ b/bigquery/cloud-client/load_data_from_file.py @@ -19,7 +19,7 @@ For more information, see the README.rst. Example invocation: - $ python load_data_from_file.py example_dataset example_table \ + $ python load_data_from_file.py example_dataset example_table \\ example-data.csv The dataset and table should already exist. @@ -30,38 +30,37 @@ from google.cloud import bigquery -def load_data_from_file(dataset_name, table_name, source_file_name): +def load_data_from_file(dataset_id, table_id, source_file_name): bigquery_client = bigquery.Client() - dataset = bigquery_client.dataset(dataset_name) - table = dataset.table(table_name) - - # Reload the table to get the schema. - table.reload() + dataset_ref = bigquery_client.dataset(dataset_id) + table_ref = dataset_ref.table(table_id) with open(source_file_name, 'rb') as source_file: # This example uses CSV, but you can use other formats. # See https://cloud.google.com/bigquery/loading-data - job = table.upload_from_file( - source_file, source_format='text/csv') + job_config = bigquery.LoadJobConfig() + job_config.source_format = 'text/csv' + job = bigquery_client.load_table_from_file( + source_file, table_ref, job_config=job_config) - job.result() # Wait for job to complete + job.result() # Waits for job to complete print('Loaded {} rows into {}:{}.'.format( - job.output_rows, dataset_name, table_name)) + job.output_rows, dataset_id, table_id)) if __name__ == '__main__': parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument('dataset_name') - parser.add_argument('table_name') + parser.add_argument('dataset_id') + parser.add_argument('table_id') parser.add_argument( 'source_file_name', help='Path to a .csv file to upload.') args = parser.parse_args() load_data_from_file( - args.dataset_name, - args.table_name, + args.dataset_id, + args.table_id, args.source_file_name) diff --git a/bigquery/cloud-client/load_data_from_gcs.py b/bigquery/cloud-client/load_data_from_gcs.py index b0db3a01139..285e6d1b22a 100644 --- a/bigquery/cloud-client/load_data_from_gcs.py +++ b/bigquery/cloud-client/load_data_from_gcs.py @@ -19,40 +19,36 @@ For more information, see the README.rst. Example invocation: - $ python load_data_from_gcs.py example_dataset example_table \ + $ python load_data_from_gcs.py example_dataset example_table \\ gs://example-bucket/example-data.csv The dataset and table should already exist. """ import argparse -import uuid from google.cloud import bigquery -def load_data_from_gcs(dataset_name, table_name, source): +def load_data_from_gcs(dataset_id, table_id, source): bigquery_client = bigquery.Client() - dataset = bigquery_client.dataset(dataset_name) - table = dataset.table(table_name) - job_name = str(uuid.uuid4()) + dataset_ref = bigquery_client.dataset(dataset_id) + table_ref = dataset_ref.table(table_id) - job = bigquery_client.load_table_from_storage( - job_name, table, source) + job = bigquery_client.load_table_from_uri(source, table_ref) - job.begin() - job.result() # Wait for job to complete + job.result() # Waits for job to complete print('Loaded {} rows into {}:{}.'.format( - job.output_rows, dataset_name, table_name)) + job.output_rows, dataset_id, table_id)) if __name__ == '__main__': parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument('dataset_name') - parser.add_argument('table_name') + parser.add_argument('dataset_id') + parser.add_argument('table_id') parser.add_argument( 'source', help='The Google Cloud Storage object to load. Must be in ' 'the format gs://bucket_name/object_name') @@ -60,6 +56,6 @@ def load_data_from_gcs(dataset_name, table_name, source): args = parser.parse_args() load_data_from_gcs( - args.dataset_name, - args.table_name, + args.dataset_id, + args.table_id, args.source) diff --git a/bigquery/cloud-client/query.py b/bigquery/cloud-client/query.py index 93b13b84873..19605bbad4d 100755 --- a/bigquery/cloud-client/query.py +++ b/bigquery/cloud-client/query.py @@ -21,69 +21,58 @@ Example invocation: $ python query.py '#standardSQL SELECT corpus - FROM `publicdata.samples.shakespeare` + FROM `bigquery-public-data.samples.shakespeare` GROUP BY corpus ORDER BY corpus' """ import argparse -import uuid from google.cloud import bigquery def query(query): client = bigquery.Client() - query_job = client.run_async_query(str(uuid.uuid4()), query) - - query_job.begin() - query_job.result() # Wait for job to complete. + query_job = client.query(query) # Print the results. - destination_table = query_job.destination - destination_table.reload() - for row in destination_table.fetch_data(): + for row in query_job.result(): # Waits for job to complete. print(row) def query_standard_sql(query): client = bigquery.Client() - query_job = client.run_async_query(str(uuid.uuid4()), query) - # Set use_legacy_sql to False to use standard SQL syntax. See: - # https://cloud.google.com/bigquery/docs/reference/standard-sql/enabling-standard-sql - query_job.use_legacy_sql = False + job_config = bigquery.QueryJobConfig() - query_job.begin() - query_job.result() # Wait for job to complete. + # Set use_legacy_sql to False to use standard SQL syntax. + # Note that queries are treated as standard SQL by default. + job_config.use_legacy_sql = False + query_job = client.query(query, job_config=job_config) # Print the results. - destination_table = query_job.destination - destination_table.reload() - for row in destination_table.fetch_data(): + for row in query_job.result(): # Waits for job to complete. print(row) def query_destination_table(query, dest_dataset_id, dest_table_id): client = bigquery.Client() - query_job = client.run_async_query(str(uuid.uuid4()), query) + job_config = bigquery.QueryJobConfig() # Allow for query results larger than the maximum response size. - query_job.allow_large_results = True + job_config.allow_large_results = True # When large results are allowed, a destination table must be set. - dest_dataset = client.dataset(dest_dataset_id) - dest_table = dest_dataset.table(dest_table_id) - query_job.destination = dest_table + dest_dataset_ref = client.dataset(dest_dataset_id) + dest_table_ref = dest_dataset_ref.table(dest_table_id) + job_config.destination = dest_table_ref # Allow the results table to be overwritten. - query_job.write_disposition = 'WRITE_TRUNCATE' + job_config.write_disposition = 'WRITE_TRUNCATE' - query_job.begin() - query_job.result() # Wait for job to complete. + query_job = client.query(query, job_config=job_config) - # Verify that the results were written to the destination table. - dest_table.reload() # Get the table metadata, such as the schema. - for row in dest_table.fetch_data(): + # Print the results. + for row in query_job.result(): # Waits for job to complete. print(row) diff --git a/bigquery/cloud-client/query_params.py b/bigquery/cloud-client/query_params.py index 435af29d3b5..594f51c758d 100644 --- a/bigquery/cloud-client/query_params.py +++ b/bigquery/cloud-client/query_params.py @@ -19,13 +19,12 @@ For more information, see the README.rst. Example invocation: - $ python query_params.py --use-named-params 'romeoandjuliet' 100 - $ python query_params.py --use-positional-params 'romeoandjuliet' 100 + $ python query_params.py named 'romeoandjuliet' 100 + $ python query_params.py positional 'romeoandjuliet' 100 """ import argparse import datetime -import uuid from google.cloud import bigquery import pytz @@ -40,28 +39,23 @@ def query_positional_params(corpus, min_word_count): AND word_count >= ? ORDER BY word_count DESC; """ - query_job = client.run_async_query( - str(uuid.uuid4()), - query, - query_parameters=( - bigquery.ScalarQueryParameter( - # Set the name to None to use positional parameters (? symbol - # in the query). Note that you cannot mix named and positional - # parameters. - None, 'STRING', corpus), - bigquery.ScalarQueryParameter(None, 'INT64', min_word_count))) - - # Only standard SQL syntax supports parameters in queries. - # See: https://cloud.google.com/bigquery/sql-reference/ - query_job.use_legacy_sql = False - - query_job.begin() + # Set the name to None to use positional parameters (? symbol in the + # query). Note that you cannot mix named and positional parameters. + # See: https://cloud.google.com/bigquery/docs/parameterized-queries/ + query_params = [ + bigquery.ScalarQueryParameter(None, 'STRING', corpus), + bigquery.ScalarQueryParameter(None, 'INT64', min_word_count) + ] + job_config = bigquery.QueryJobConfig() + job_config.query_parameters = query_params + query_job = client.query(query, job_config=job_config) + query_job.result() # Wait for job to complete # Print the results. - destination_table = query_job.destination - destination_table.reload() - for row in destination_table.fetch_data(): + destination_table_ref = query_job.destination + table = client.get_table(destination_table_ref) + for row in client.list_rows(table): print(row) @@ -74,22 +68,21 @@ def query_named_params(corpus, min_word_count): AND word_count >= @min_word_count ORDER BY word_count DESC; """ - query_job = client.run_async_query( - str(uuid.uuid4()), - query, - query_parameters=( - bigquery.ScalarQueryParameter('corpus', 'STRING', corpus), - bigquery.ScalarQueryParameter( - 'min_word_count', 'INT64', min_word_count))) - query_job.use_legacy_sql = False - - query_job.begin() + query_params = [ + bigquery.ScalarQueryParameter('corpus', 'STRING', corpus), + bigquery.ScalarQueryParameter( + 'min_word_count', 'INT64', min_word_count) + ] + job_config = bigquery.QueryJobConfig() + job_config.query_parameters = query_params + query_job = client.query(query, job_config=job_config) + query_job.result() # Wait for job to complete # Print the results. - destination_table = query_job.destination - destination_table.reload() - for row in destination_table.fetch_data(): + destination_table_ref = query_job.destination + table = client.get_table(destination_table_ref) + for row in client.list_rows(table): print(row) @@ -104,68 +97,65 @@ def query_array_params(gender, states): ORDER BY count DESC LIMIT 10; """ - query_job = client.run_async_query( - str(uuid.uuid4()), - query, - query_parameters=( - bigquery.ScalarQueryParameter('gender', 'STRING', gender), - bigquery.ArrayQueryParameter('states', 'STRING', states))) - query_job.use_legacy_sql = False - - query_job.begin() + query_params = [ + bigquery.ScalarQueryParameter('gender', 'STRING', gender), + bigquery.ArrayQueryParameter('states', 'STRING', states) + ] + job_config = bigquery.QueryJobConfig() + job_config.query_parameters = query_params + query_job = client.query(query, job_config=job_config) + query_job.result() # Wait for job to complete # Print the results. - destination_table = query_job.destination - destination_table.reload() - for row in destination_table.fetch_data(): + destination_table_ref = query_job.destination + table = client.get_table(destination_table_ref) + for row in client.list_rows(table): print(row) def query_timestamp_params(year, month, day, hour, minute): client = bigquery.Client() query = 'SELECT TIMESTAMP_ADD(@ts_value, INTERVAL 1 HOUR);' - query_job = client.run_async_query( - str(uuid.uuid4()), - query, - query_parameters=[ - bigquery.ScalarQueryParameter( - 'ts_value', - 'TIMESTAMP', - datetime.datetime( - year, month, day, hour, minute, tzinfo=pytz.UTC))]) - query_job.use_legacy_sql = False - - query_job.begin() - query_job.result() # Wait for job to complete + query_params = [ + bigquery.ScalarQueryParameter( + 'ts_value', + 'TIMESTAMP', + datetime.datetime(year, month, day, hour, minute, tzinfo=pytz.UTC)) + ] + job_config = bigquery.QueryJobConfig() + job_config.query_parameters = query_params + query_job = client.query(query, job_config=job_config) + + query_job.result() # Waits for job to complete # Print the results. - destination_table = query_job.destination - destination_table.reload() - for row in destination_table.fetch_data(): + destination_table_ref = query_job.destination + table = client.get_table(destination_table_ref) + for row in client.list_rows(table): print(row) def query_struct_params(x, y): client = bigquery.Client() query = 'SELECT @struct_value AS s;' - query_job = client.run_async_query( - str(uuid.uuid4()), - query, - query_parameters=[ - bigquery.StructQueryParameter( - 'struct_value', - bigquery.ScalarQueryParameter('x', 'INT64', x), - bigquery.ScalarQueryParameter('y', 'STRING', y))]) - query_job.use_legacy_sql = False - - query_job.begin() - query_job.result() # Wait for job to complete + query_params = [ + bigquery.StructQueryParameter( + 'struct_value', + bigquery.ScalarQueryParameter('x', 'INT64', x), + bigquery.ScalarQueryParameter('y', 'STRING', y) + ) + ] + job_config = bigquery.QueryJobConfig() + job_config.query_parameters = query_params + query_job = client.query(query, job_config=job_config) + + query_job.result() # Waits for job to complete # Print the results. - destination_table = query_job.destination - destination_table.reload() - for row in destination_table.fetch_data(): + destination_table_ref = query_job.destination + table = client.get_table(destination_table_ref) + for row in client.list_rows(table): print(row) diff --git a/bigquery/cloud-client/query_test.py b/bigquery/cloud-client/query_test.py index 9d6c912b4e5..3d456cb5967 100644 --- a/bigquery/cloud-client/query_test.py +++ b/bigquery/cloud-client/query_test.py @@ -23,7 +23,7 @@ def test_query(capsys): # Query only outputs the first 10 rows, sort results to avoid randomness query_string = '''#standardSQL SELECT corpus - FROM `publicdata.samples.shakespeare` + FROM `bigquery-public-data.samples.shakespeare` GROUP BY corpus ORDER BY corpus LIMIT 10;''' @@ -38,7 +38,7 @@ def test_query(capsys): def test_query_standard_sql(capsys): # Query only outputs the first 10 rows, sort results to avoid randomness query_string = '''SELECT corpus - FROM `publicdata.samples.shakespeare` + FROM `bigquery-public-data.samples.shakespeare` GROUP BY corpus ORDER BY corpus LIMIT 10;''' @@ -54,7 +54,7 @@ def test_query_destination_table(capsys): # Query only outputs the first 10 rows, sort results to avoid randomness query_string = '''#standardSQL SELECT corpus - FROM `publicdata.samples.shakespeare` + FROM `bigquery-public-data.samples.shakespeare` GROUP BY corpus ORDER BY corpus LIMIT 10;''' diff --git a/bigquery/cloud-client/quickstart.py b/bigquery/cloud-client/quickstart.py index 2c9923f6eb8..10ae58e84ca 100644 --- a/bigquery/cloud-client/quickstart.py +++ b/bigquery/cloud-client/quickstart.py @@ -24,15 +24,16 @@ def run_quickstart(): bigquery_client = bigquery.Client() # The name for the new dataset - dataset_name = 'my_new_dataset' + dataset_id = 'my_new_dataset' - # Prepares the new dataset - dataset = bigquery_client.dataset(dataset_name) + # Prepares a reference to the new dataset + dataset_ref = bigquery_client.dataset(dataset_id) + dataset = bigquery.Dataset(dataset_ref) # Creates the new dataset - dataset.create() + dataset = bigquery_client.create_dataset(dataset) - print('Dataset {} created.'.format(dataset.name)) + print('Dataset {} created.'.format(dataset.dataset_id)) # [END bigquery_quickstart] diff --git a/bigquery/cloud-client/quickstart_test.py b/bigquery/cloud-client/quickstart_test.py index f5842960ce6..02931086a11 100644 --- a/bigquery/cloud-client/quickstart_test.py +++ b/bigquery/cloud-client/quickstart_test.py @@ -13,6 +13,7 @@ # limitations under the License. from google.cloud import bigquery +from google.cloud.exceptions import NotFound import pytest import quickstart @@ -28,15 +29,23 @@ def temporary_dataset(): """Fixture that ensures the test dataset does not exist before or after a test.""" bigquery_client = bigquery.Client() - dataset = bigquery_client.dataset(DATASET_ID) + dataset_ref = bigquery_client.dataset(DATASET_ID) - if dataset.exists(): - dataset.delete() + if dataset_exists(dataset_ref, bigquery_client): + bigquery_client.delete_dataset(dataset_ref) yield - if dataset.exists(): - dataset.delete() + if dataset_exists(dataset_ref, bigquery_client): + bigquery_client.delete_dataset(dataset_ref) + + +def dataset_exists(dataset, client): + try: + client.get_dataset(dataset) + return True + except NotFound: + return False def test_quickstart(capsys, temporary_dataset): diff --git a/bigquery/cloud-client/requirements.txt b/bigquery/cloud-client/requirements.txt index e90122cb65f..0401c422148 100644 --- a/bigquery/cloud-client/requirements.txt +++ b/bigquery/cloud-client/requirements.txt @@ -1,3 +1,3 @@ -google-cloud-bigquery==0.27.0 +google-cloud-bigquery==0.28.0 google-auth-oauthlib==0.1.1 pytz==2017.2 diff --git a/bigquery/cloud-client/simple_app.py b/bigquery/cloud-client/simple_app.py index 9bca432ef4a..5d0d04e666d 100644 --- a/bigquery/cloud-client/simple_app.py +++ b/bigquery/cloud-client/simple_app.py @@ -17,8 +17,6 @@ """Simple application that performs a query with BigQuery.""" # [START all] # [START create_client] -import uuid - from google.cloud import bigquery @@ -26,23 +24,20 @@ def query_shakespeare(): client = bigquery.Client() # [END create_client] # [START run_query] - query_job = client.run_async_query(str(uuid.uuid4()), """ + query_job = client.query(""" #standardSQL SELECT corpus AS title, COUNT(*) AS unique_words - FROM `publicdata.samples.shakespeare` + FROM `bigquery-public-data.samples.shakespeare` GROUP BY title ORDER BY unique_words DESC LIMIT 10""") - query_job.begin() - query_job.result() # Wait for job to complete. + results = query_job.result() # Waits for job to complete. # [END run_query] # [START print_results] - destination_table = query_job.destination - destination_table.reload() - for row in destination_table.fetch_data(): - print(row) + for row in results: + print("{}: {}".format(row.title, row.unique_words)) # [END print_results] diff --git a/bigquery/cloud-client/snippets.py b/bigquery/cloud-client/snippets.py index 401d18bbdfb..ee75f7fc269 100644 --- a/bigquery/cloud-client/snippets.py +++ b/bigquery/cloud-client/snippets.py @@ -25,11 +25,8 @@ """ import argparse -import itertools -import uuid from google.cloud import bigquery -import google.cloud.bigquery.job def list_projects(): @@ -47,52 +44,45 @@ def list_datasets(project=None): bigquery_client = bigquery.Client(project=project) for dataset in bigquery_client.list_datasets(): - print(dataset.name) + print(dataset.dataset_id) -def create_dataset(dataset_name, project=None): +def create_dataset(dataset_id, project=None): """Craetes a dataset in a given project. If no project is specified, then the currently active project is used. """ bigquery_client = bigquery.Client(project=project) - dataset = bigquery_client.dataset(dataset_name) + dataset_ref = bigquery_client.dataset(dataset_id) - dataset.create() + dataset = bigquery_client.create_dataset(bigquery.Dataset(dataset_ref)) - print('Created dataset {}.'.format(dataset_name)) + print('Created dataset {}.'.format(dataset.dataset_id)) -def list_tables(dataset_name, project=None): +def list_tables(dataset_id, project=None): """Lists all of the tables in a given dataset. If no project is specified, then the currently active project is used. """ bigquery_client = bigquery.Client(project=project) - dataset = bigquery_client.dataset(dataset_name) + dataset_ref = bigquery_client.dataset(dataset_id) - if not dataset.exists(): - print('Dataset {} does not exist.'.format(dataset_name)) - return + for table in bigquery_client.list_dataset_tables(dataset_ref): + print(table.table_id) - for table in dataset.list_tables(): - print(table.name) - -def create_table(dataset_name, table_name, project=None): +def create_table(dataset_id, table_id, project=None): """Creates a simple table in the given dataset. If no project is specified, then the currently active project is used. """ bigquery_client = bigquery.Client(project=project) - dataset = bigquery_client.dataset(dataset_name) - - if not dataset.exists(): - print('Dataset {} does not exist.'.format(dataset_name)) - return + dataset_ref = bigquery_client.dataset(dataset_id) - table = dataset.table(table_name) + table_ref = dataset_ref.table(table_id) + table = bigquery.Table(table_ref) # Set the table schema table.schema = ( @@ -101,12 +91,12 @@ def create_table(dataset_name, table_name, project=None): bigquery.SchemaField('Weight', 'FLOAT'), ) - table.create() + table = bigquery_client.create_table(table) - print('Created table {} in dataset {}.'.format(table_name, dataset_name)) + print('Created table {} in dataset {}.'.format(table_id, dataset_id)) -def list_rows(dataset_name, table_name, project=None): +def list_rows(dataset_id, table_id, project=None): """Prints rows in the given table. Will print 25 rows at most for brevity as tables can contain large amounts @@ -115,18 +105,14 @@ def list_rows(dataset_name, table_name, project=None): If no project is specified, then the currently active project is used. """ bigquery_client = bigquery.Client(project=project) - dataset = bigquery_client.dataset(dataset_name) - table = dataset.table(table_name) + dataset_ref = bigquery_client.dataset(dataset_id) + table_ref = dataset_ref.table(table_id) - if not table.exists(): - print('Table {}:{} does not exist.'.format(dataset_name, table_name)) - return - - # Reload the table so that the schema is available. - table.reload() + # Get the table from the API so that the schema is available. + table = bigquery_client.get_table(table_ref) # Load at most 25 results. - rows = itertools.islice(table.fetch_data(), 25) + rows = bigquery_client.list_rows(table, max_results=25) # Use format to create a simple table. format_string = '{!s:<16} ' * len(table.schema) @@ -139,49 +125,50 @@ def list_rows(dataset_name, table_name, project=None): print(format_string.format(*row)) -def copy_table(dataset_name, table_name, new_table_name, project=None): +def copy_table(dataset_id, table_id, new_table_id, project=None): """Copies a table. If no project is specified, then the currently active project is used. """ bigquery_client = bigquery.Client(project=project) - dataset = bigquery_client.dataset(dataset_name) - table = dataset.table(table_name) + dataset_ref = bigquery_client.dataset(dataset_id) + table_ref = dataset_ref.table(table_id) # This sample shows the destination table in the same dataset and project, # however, it's possible to copy across datasets and projects. You can - # also copy muliple source tables into a single destination table by + # also copy multiple source tables into a single destination table by # providing addtional arguments to `copy_table`. - destination_table = dataset.table(new_table_name) + destination_table_ref = dataset_ref.table(new_table_id) # Create a job to copy the table to the destination table. - job_id = str(uuid.uuid4()) - job = bigquery_client.copy_table( - job_id, destination_table, table) + # Start by creating a job configuration + job_config = bigquery.CopyJobConfig() + + # Configure the job to create the table if it doesn't exist. + job_config.create_disposition = ( + bigquery.job.CreateDisposition.CREATE_IF_NEEDED) - # Create the table if it doesn't exist. - job.create_disposition = ( - google.cloud.bigquery.job.CreateDisposition.CREATE_IF_NEEDED) + copy_job = bigquery_client.copy_table( + table_ref, destination_table_ref, job_config=job_config) - job.begin() # Start the job. print('Waiting for job to finish...') - job.result() + copy_job.result() - print('Table {} copied to {}.'.format(table_name, new_table_name)) + print('Table {} copied to {}.'.format(table_id, new_table_id)) -def delete_table(dataset_name, table_name, project=None): +def delete_table(dataset_id, table_id, project=None): """Deletes a table in a given dataset. If no project is specified, then the currently active project is used. """ bigquery_client = bigquery.Client(project=project) - dataset = bigquery_client.dataset(dataset_name) - table = dataset.table(table_name) + dataset_ref = bigquery_client.dataset(dataset_id) + table_ref = dataset_ref.table(table_id) - table.delete() + bigquery_client.delete_table(table_ref) - print('Table {}:{} deleted.'.format(dataset_name, table_name)) + print('Table {}:{} deleted.'.format(dataset_id, table_id)) if __name__ == '__main__': @@ -200,32 +187,32 @@ def delete_table(dataset_name, table_name, project=None): create_dataset_parser = subparsers.add_parser( 'list-datasets', help=list_datasets.__doc__) - create_dataset_parser.add_argument('dataset_name') + create_dataset_parser.add_argument('dataset_id') list_tables_parser = subparsers.add_parser( 'list-tables', help=list_tables.__doc__) - list_tables_parser.add_argument('dataset_name') + list_tables_parser.add_argument('dataset_id') create_table_parser = subparsers.add_parser( 'create-table', help=create_table.__doc__) - create_table_parser.add_argument('dataset_name') - create_table_parser.add_argument('table_name') + create_table_parser.add_argument('dataset_id') + create_table_parser.add_argument('table_id') list_rows_parser = subparsers.add_parser( 'list-rows', help=list_rows.__doc__) - list_rows_parser.add_argument('dataset_name') - list_rows_parser.add_argument('table_name') + list_rows_parser.add_argument('dataset_id') + list_rows_parser.add_argument('table_id') copy_table_parser = subparsers.add_parser( 'copy-table', help=copy_table.__doc__) - copy_table_parser.add_argument('dataset_name') - copy_table_parser.add_argument('table_name') - copy_table_parser.add_argument('new_table_name') + copy_table_parser.add_argument('dataset_id') + copy_table_parser.add_argument('table_id') + copy_table_parser.add_argument('new_table_id') delete_table_parser = subparsers.add_parser( 'delete-table', help=delete_table.__doc__) - delete_table_parser.add_argument('dataset_name') - delete_table_parser.add_argument('table_name') + delete_table_parser.add_argument('dataset_id') + delete_table_parser.add_argument('table_id') args = parser.parse_args() @@ -234,14 +221,14 @@ def delete_table(dataset_name, table_name, project=None): elif args.command == 'list-datasets': list_datasets(args.project) elif args.command == 'create-dataset': - create_dataset(args.dataset_name, args.project) + create_dataset(args.dataset_id, args.project) elif args.command == 'list-tables': - list_tables(args.dataset_name, args.project) + list_tables(args.dataset_id, args.project) elif args.command == 'create-table': - create_table(args.dataset_name, args.table_name, args.project) + create_table(args.dataset_id, args.table_id, args.project) elif args.command == 'list-rows': - list_rows(args.dataset_name, args.table_name, args.project) + list_rows(args.dataset_id, args.table_id, args.project) elif args.command == 'copy-table': - copy_table(args.dataset_name, args.table_name, args.new_table_name) + copy_table(args.dataset_id, args.table_id, args.new_table_id) elif args.command == 'delete-table': - delete_table(args.dataset_name, args.table_name, args.project) + delete_table(args.dataset_id, args.table_id, args.project) diff --git a/bigquery/cloud-client/snippets_test.py b/bigquery/cloud-client/snippets_test.py index af368d9a4a2..5f666ccc6c8 100644 --- a/bigquery/cloud-client/snippets_test.py +++ b/bigquery/cloud-client/snippets_test.py @@ -13,6 +13,7 @@ # limitations under the License. from google.cloud import bigquery +from google.cloud.exceptions import NotFound import pytest import snippets @@ -38,17 +39,25 @@ def test_list_datasets(capsys): @pytest.fixture def cleanup_dataset(): - dataset_name = 'test_temporary_dataset' + dataset_id = 'test_temporary_dataset' bigquery_client = bigquery.Client() - dataset = bigquery_client.dataset(dataset_name) + dataset_ref = bigquery_client.dataset(dataset_id) - if dataset.exists(): - dataset.delete() + if dataset_exists(dataset_ref, bigquery_client): + bigquery_client.delete_dataset(dataset_ref) - yield dataset_name + yield dataset_id - if dataset.exists(): - dataset.delete() + if dataset_exists(dataset_ref, bigquery_client): + bigquery_client.delete_dataset(dataset_ref) + + +def dataset_exists(dataset, client): + try: + client.get_dataset(dataset) + return True + except NotFound: + return False def test_create_dataset(capsys, cleanup_dataset): @@ -87,46 +96,57 @@ def temporary_table(): """Fixture that returns a factory for tables that do not yet exist and will be automatically deleted after the test.""" bigquery_client = bigquery.Client() - dataset = bigquery_client.dataset(DATASET_ID) + dataset_ref = bigquery_client.dataset(DATASET_ID) tables = [] - def factory(table_name): - new_table = dataset.table(table_name) - if new_table.exists(): - new_table.delete() - tables.append(new_table) - return new_table + def factory(table_id): + new_table_ref = dataset_ref.table(table_id) + if table_exists(new_table_ref, bigquery_client): + bigquery_client.delete_table(new_table_ref) + tables.append(new_table_ref) + return new_table_ref yield factory for table in tables: - if table.exists(): - table.delete() + if table_exists(table, bigquery_client): + bigquery_client.delete_table(table) + + +def table_exists(table, client): + try: + client.get_table(table) + return True + except NotFound: + return False def test_create_table(temporary_table): + bigquery_client = bigquery.Client() new_table = temporary_table('test_create_table') - snippets.create_table(DATASET_ID, new_table.name) - assert new_table.exists() + snippets.create_table(DATASET_ID, new_table.table_id) + assert table_exists(new_table, bigquery_client) @pytest.mark.slow def test_copy_table(temporary_table): + bigquery_client = bigquery.Client() new_table = temporary_table('test_copy_table') - snippets.copy_table(DATASET_ID, TABLE_ID, new_table.name) - assert new_table.exists() + snippets.copy_table(DATASET_ID, TABLE_ID, new_table.table_id) + assert table_exists(new_table, bigquery_client) def test_delete_table(): # Create a table to delete bigquery_client = bigquery.Client() - dataset = bigquery_client.dataset(DATASET_ID) - table = dataset.table('test_delete_table') + dataset_ref = bigquery_client.dataset(DATASET_ID) + table_ref = dataset_ref.table('test_delete_table') + table = bigquery.Table(table_ref) - if not table.exists(): + if not table_exists(table, bigquery_client): table.schema = [bigquery.SchemaField('id', 'INTEGER')] - table.create() + table = bigquery_client.create_table(table) - snippets.delete_table(DATASET_ID, table.name) + snippets.delete_table(DATASET_ID, table.table_id) - assert not table.exists() + assert not table_exists(table, bigquery_client) diff --git a/bigquery/cloud-client/stream_data.py b/bigquery/cloud-client/stream_data.py index 7d9970c3aa3..c5496004820 100644 --- a/bigquery/cloud-client/stream_data.py +++ b/bigquery/cloud-client/stream_data.py @@ -19,7 +19,7 @@ For more information, see the README.rst. Example invocation: - $ python stream_data.py example_dataset example_table \ + $ python stream_data.py example_dataset example_table \\ '["Gandalf", 2000]' The dataset and table should already exist. @@ -32,20 +32,20 @@ from google.cloud import bigquery -def stream_data(dataset_name, table_name, json_data): +def stream_data(dataset_id, table_id, json_data): bigquery_client = bigquery.Client() - dataset = bigquery_client.dataset(dataset_name) - table = dataset.table(table_name) + dataset_ref = bigquery_client.dataset(dataset_id) + table_ref = dataset_ref.table(table_id) data = json.loads(json_data) - # Reload the table to get the schema. - table.reload() + # Get the table from the API so that the schema is available. + table = bigquery_client.get_table(table_ref) rows = [data] - errors = table.insert_data(rows) + errors = bigquery_client.create_rows(table, rows) if not errors: - print('Loaded 1 row into {}:{}'.format(dataset_name, table_name)) + print('Loaded 1 row into {}:{}'.format(dataset_id, table_id)) else: print('Errors:') pprint(errors) @@ -55,8 +55,8 @@ def stream_data(dataset_name, table_name, json_data): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) - parser.add_argument('dataset_name') - parser.add_argument('table_name') + parser.add_argument('dataset_id') + parser.add_argument('table_id') parser.add_argument( 'json_data', help='The row to load into BigQuery as an array in JSON format.') @@ -64,6 +64,6 @@ def stream_data(dataset_name, table_name, json_data): args = parser.parse_args() stream_data( - args.dataset_name, - args.table_name, + args.dataset_id, + args.table_id, args.json_data) diff --git a/bigquery/cloud-client/user_credentials.py b/bigquery/cloud-client/user_credentials.py index ca585c0a14e..a170b66291e 100644 --- a/bigquery/cloud-client/user_credentials.py +++ b/bigquery/cloud-client/user_credentials.py @@ -21,7 +21,6 @@ """ import argparse -import uuid from google.cloud import bigquery from google_auth_oauthlib import flow @@ -29,15 +28,10 @@ def run_query(credentials, project, query): client = bigquery.Client(project=project, credentials=credentials) - query_job = client.run_async_query(str(uuid.uuid4()), query) - - query_job.begin() - query_job.result() # Wait for the job to complete. + query_job = client.query(query) # Print the results. - destination_table = query_job.destination - destination_table.reload() - for row in destination_table.fetch_data(): + for row in query_job.result(): # Wait for the job to complete. print(row)