Skip to content

BigQuery: Updates samples for BigQuery Beta 2 (do not merge until release) #1178

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Oct 31, 2017
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 10 additions & 14 deletions bigquery/cloud-client/export_data_to_gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,40 +26,36 @@
"""

import argparse
import uuid

from google.cloud import bigquery


def export_data_to_gcs(dataset_name, table_name, destination):
def export_data_to_gcs(dataset_id, table_id, destination):
bigquery_client = bigquery.Client()
dataset = bigquery_client.dataset(dataset_name)
table = dataset.table(table_name)
job_name = str(uuid.uuid4())
dataset_ref = bigquery_client.dataset(dataset_id)
table_ref = dataset_ref.table(table_id)

job = bigquery_client.extract_table_to_storage(
job_name, table, destination)
job = bigquery_client.extract_table(table_ref, destination)

job.begin()
job.result() # Wait for job to complete
job.result() # Waits for job to complete

print('Exported {}:{} to {}'.format(
dataset_name, table_name, destination))
dataset_id, table_id, destination))


if __name__ == '__main__':
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('dataset_name')
parser.add_argument('table_name')
parser.add_argument('dataset_id')
parser.add_argument('table_id')
parser.add_argument(
'destination', help='The desintation Google Cloud Storage object.'
'Must be in the format gs://bucket_name/object_name')

args = parser.parse_args()

export_data_to_gcs(
args.dataset_name,
args.table_name,
args.dataset_id,
args.table_id,
args.destination)
27 changes: 13 additions & 14 deletions bigquery/cloud-client/load_data_from_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,38 +30,37 @@
from google.cloud import bigquery


def load_data_from_file(dataset_name, table_name, source_file_name):
def load_data_from_file(dataset_id, table_id, source_file_name):
bigquery_client = bigquery.Client()
dataset = bigquery_client.dataset(dataset_name)
table = dataset.table(table_name)

# Reload the table to get the schema.
table.reload()
dataset_ref = bigquery_client.dataset(dataset_id)
table_ref = dataset_ref.table(table_id)

with open(source_file_name, 'rb') as source_file:
# This example uses CSV, but you can use other formats.
# See https://cloud.google.com/bigquery/loading-data
job = table.upload_from_file(
source_file, source_format='text/csv')
job_config = bigquery.LoadJobConfig()
job_config.source_format = 'text/csv'
job = bigquery_client.load_table_from_file(
source_file, table_ref, job_config=job_config)

job.result() # Wait for job to complete
job.result() # Waits for job to complete

print('Loaded {} rows into {}:{}.'.format(
job.output_rows, dataset_name, table_name))
job.output_rows, dataset_id, table_id))


if __name__ == '__main__':
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('dataset_name')
parser.add_argument('table_name')
parser.add_argument('dataset_id')
parser.add_argument('table_id')
parser.add_argument(
'source_file_name', help='Path to a .csv file to upload.')

args = parser.parse_args()

load_data_from_file(
args.dataset_name,
args.table_name,
args.dataset_id,
args.table_id,
args.source_file_name)
24 changes: 10 additions & 14 deletions bigquery/cloud-client/load_data_from_gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,40 +26,36 @@
"""

import argparse
import uuid

from google.cloud import bigquery


def load_data_from_gcs(dataset_name, table_name, source):
def load_data_from_gcs(dataset_id, table_id, source):
bigquery_client = bigquery.Client()
dataset = bigquery_client.dataset(dataset_name)
table = dataset.table(table_name)
job_name = str(uuid.uuid4())
dataset_ref = bigquery_client.dataset(dataset_id)
table_ref = dataset_ref.table(table_id)

job = bigquery_client.load_table_from_storage(
job_name, table, source)
job = bigquery_client.load_table_from_uri(source, table_ref)

job.begin()
job.result() # Wait for job to complete
job.result() # Waits for job to complete

print('Loaded {} rows into {}:{}.'.format(
job.output_rows, dataset_name, table_name))
job.output_rows, dataset_id, table_id))


if __name__ == '__main__':
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('dataset_name')
parser.add_argument('table_name')
parser.add_argument('dataset_id')
parser.add_argument('table_id')
parser.add_argument(
'source', help='The Google Cloud Storage object to load. Must be in '
'the format gs://bucket_name/object_name')

args = parser.parse_args()

load_data_from_gcs(
args.dataset_name,
args.table_name,
args.dataset_id,
args.table_id,
args.source)
45 changes: 17 additions & 28 deletions bigquery/cloud-client/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,63 +27,52 @@
"""

import argparse
import uuid

from google.cloud import bigquery


def query(query):
client = bigquery.Client()
query_job = client.run_async_query(str(uuid.uuid4()), query)

query_job.begin()
query_job.result() # Wait for job to complete.
query_job = client.query(query)

# Print the results.
destination_table = query_job.destination
destination_table.reload()
for row in destination_table.fetch_data():
for row in query_job.result(): # Waits for job to complete.
print(row)


def query_standard_sql(query):
client = bigquery.Client()
query_job = client.run_async_query(str(uuid.uuid4()), query)
# Set use_legacy_sql to False to use standard SQL syntax. See:
# https://cloud.google.com/bigquery/docs/reference/standard-sql/enabling-standard-sql
query_job.use_legacy_sql = False
job_config = bigquery.QueryJobConfig()

query_job.begin()
query_job.result() # Wait for job to complete.
# Set use_legacy_sql to False to use standard SQL syntax.
# Note that queries are treated as standard SQL by default.
job_config.use_legacy_sql = False
query_job = client.query(query, job_config=job_config)

# Print the results.
destination_table = query_job.destination
destination_table.reload()
for row in destination_table.fetch_data():
for row in query_job.result(): # Waits for job to complete.
print(row)


def query_destination_table(query, dest_dataset_id, dest_table_id):
client = bigquery.Client()
query_job = client.run_async_query(str(uuid.uuid4()), query)
job_config = bigquery.QueryJobConfig()

# Allow for query results larger than the maximum response size.
query_job.allow_large_results = True
job_config.allow_large_results = True

# When large results are allowed, a destination table must be set.
dest_dataset = client.dataset(dest_dataset_id)
dest_table = dest_dataset.table(dest_table_id)
query_job.destination = dest_table
dest_dataset_ref = client.dataset(dest_dataset_id)
dest_table_ref = dest_dataset_ref.table(dest_table_id)
job_config.destination = dest_table_ref

# Allow the results table to be overwritten.
query_job.write_disposition = 'WRITE_TRUNCATE'
job_config.write_disposition = 'WRITE_TRUNCATE'

query_job.begin()
query_job.result() # Wait for job to complete.
query_job = client.query(query, job_config=job_config)

# Verify that the results were written to the destination table.
dest_table.reload() # Get the table metadata, such as the schema.
for row in dest_table.fetch_data():
# Print the results.
for row in query_job.result(): # Waits for job to complete.
print(row)


Expand Down
Loading