From b714c5097e2c1ac9a948e6e159ff35c933204824 Mon Sep 17 00:00:00 2001 From: Eli Bixby Date: Fri, 19 Jun 2015 10:44:59 -0700 Subject: [PATCH 1/2] Added doc strings to functions with ambiguous args --- bigquery/samples/export_data_to_cloud_storage.py | 9 +++++++++ bigquery/samples/load_data_by_post.py | 7 +++++++ bigquery/samples/load_data_from_csv.py | 11 +++++++++++ bigquery/samples/utils.py | 7 +++++++ 4 files changed, 34 insertions(+) diff --git a/bigquery/samples/export_data_to_cloud_storage.py b/bigquery/samples/export_data_to_cloud_storage.py index 7b361e68d40..ea2742f3389 100644 --- a/bigquery/samples/export_data_to_cloud_storage.py +++ b/bigquery/samples/export_data_to_cloud_storage.py @@ -21,6 +21,15 @@ def export_table(service, cloud_storage_path, projectId, datasetId, tableId, num_retries=5): + """ + service: initialized and authorized bigquery + google-api-client object, + cloud_storage_path: fully qualified + path to a Google Cloud Storage location, + e.g. gs://mybucket/myfolder/ + returns: an extract job resource representing the + job, see https://cloud.google.com/bigquery/docs/reference/v2/jobs + """ # Generate a unique job_id so retries # don't accidentally duplicate export job_data = { diff --git a/bigquery/samples/load_data_by_post.py b/bigquery/samples/load_data_by_post.py index 6c03885c4db..8c9c1b94816 100644 --- a/bigquery/samples/load_data_by_post.py +++ b/bigquery/samples/load_data_by_post.py @@ -22,6 +22,13 @@ # [START make_post] def make_post(http, schema, data, projectId, datasetId, tableId): + """ + http: an authorized httplib2 client, + schema: a valid bigquery schema, + see https://cloud.google.com/bigquery/docs/reference/v2/tables, + data: valid JSON to insert into the table + returns: an http.request object + """ url = ('https://www.googleapis.com/upload/bigquery/v2/projects/' + projectId + '/jobs') # Create the body of the request, separated by a boundary of xxx diff --git a/bigquery/samples/load_data_from_csv.py b/bigquery/samples/load_data_from_csv.py index 73a108d3eee..8a6512b2dbc 100644 --- a/bigquery/samples/load_data_from_csv.py +++ b/bigquery/samples/load_data_from_csv.py @@ -20,6 +20,17 @@ # [START load_table] def load_table(service, source_schema, source_csv, projectId, datasetId, tableId, num_retries=5): + """ + service: an initialized and authorized bigquery + google-api-client object + source_schema: a valid bigquery schema, + see https://cloud.google.com/bigquery/docs/reference/v2/tables + source_csv: the fully qualified Google Cloud Storage location of + the data to load into your table + returns: a bigquery load job, see + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load + """ + # Generate a unique job_id so retries # don't accidentally duplicate query job_data = { diff --git a/bigquery/samples/utils.py b/bigquery/samples/utils.py index dd7a3900cbc..0b375cefc03 100644 --- a/bigquery/samples/utils.py +++ b/bigquery/samples/utils.py @@ -15,8 +15,11 @@ # [START get_service] def get_service(): + """returns an initialized and authorized bigquery client""" + from googleapiclient.discovery import build from oauth2client.client import GoogleCredentials + credentials = GoogleCredentials.get_application_default() if credentials.create_scoped_required(): credentials = credentials.create_scoped( @@ -27,6 +30,8 @@ def get_service(): # [START poll_job] def poll_job(service, projectId, jobId, interval=5, num_retries=5): + """checks the status of a job every *interval* seconds""" + import time job_get = service.jobs().get(projectId=projectId, jobId=jobId) @@ -44,6 +49,8 @@ def poll_job(service, projectId, jobId, interval=5, num_retries=5): # [START paging] def paging(service, request_func, num_retries=5, **kwargs): + """pages though the results of an asynchronous job""" + has_next = True while has_next: response = request_func(**kwargs).execute(num_retries=num_retries) From a5b3bd8547dae29bae1838d664b79e307f21cfff Mon Sep 17 00:00:00 2001 From: Eli Bixby Date: Fri, 19 Jun 2015 11:06:25 -0700 Subject: [PATCH 2/2] Fixed doc strings --- .../samples/export_data_to_cloud_storage.py | 16 ++++++++++------ bigquery/samples/load_data_by_post.py | 15 ++++++++++----- bigquery/samples/load_data_from_csv.py | 18 +++++++++++------- 3 files changed, 31 insertions(+), 18 deletions(-) diff --git a/bigquery/samples/export_data_to_cloud_storage.py b/bigquery/samples/export_data_to_cloud_storage.py index ea2742f3389..62675d66d20 100644 --- a/bigquery/samples/export_data_to_cloud_storage.py +++ b/bigquery/samples/export_data_to_cloud_storage.py @@ -22,12 +22,16 @@ def export_table(service, cloud_storage_path, projectId, datasetId, tableId, num_retries=5): """ - service: initialized and authorized bigquery - google-api-client object, - cloud_storage_path: fully qualified - path to a Google Cloud Storage location, - e.g. gs://mybucket/myfolder/ - returns: an extract job resource representing the + Starts an export job + + Args: + service: initialized and authorized bigquery + google-api-client object, + cloud_storage_path: fully qualified + path to a Google Cloud Storage location, + e.g. gs://mybucket/myfolder/ + + Returns: an extract job resource representing the job, see https://cloud.google.com/bigquery/docs/reference/v2/jobs """ # Generate a unique job_id so retries diff --git a/bigquery/samples/load_data_by_post.py b/bigquery/samples/load_data_by_post.py index 8c9c1b94816..8ba4b883827 100644 --- a/bigquery/samples/load_data_by_post.py +++ b/bigquery/samples/load_data_by_post.py @@ -23,11 +23,16 @@ # [START make_post] def make_post(http, schema, data, projectId, datasetId, tableId): """ - http: an authorized httplib2 client, - schema: a valid bigquery schema, - see https://cloud.google.com/bigquery/docs/reference/v2/tables, - data: valid JSON to insert into the table - returns: an http.request object + Creates an http POST request for loading data into + a bigquery table + + Args: + http: an authorized httplib2 client, + schema: a valid bigquery schema, + see https://cloud.google.com/bigquery/docs/reference/v2/tables, + data: valid JSON to insert into the table + + Returns: an http.request object """ url = ('https://www.googleapis.com/upload/bigquery/v2/projects/' + projectId + '/jobs') diff --git a/bigquery/samples/load_data_from_csv.py b/bigquery/samples/load_data_from_csv.py index 8a6512b2dbc..a58fc32d988 100644 --- a/bigquery/samples/load_data_from_csv.py +++ b/bigquery/samples/load_data_from_csv.py @@ -21,13 +21,17 @@ def load_table(service, source_schema, source_csv, projectId, datasetId, tableId, num_retries=5): """ - service: an initialized and authorized bigquery - google-api-client object - source_schema: a valid bigquery schema, - see https://cloud.google.com/bigquery/docs/reference/v2/tables - source_csv: the fully qualified Google Cloud Storage location of - the data to load into your table - returns: a bigquery load job, see + Starts a job to load a bigquery table from CSV + + Args: + service: an initialized and authorized bigquery + google-api-client object + source_schema: a valid bigquery schema, + see https://cloud.google.com/bigquery/docs/reference/v2/tables + source_csv: the fully qualified Google Cloud Storage location of + the data to load into your table + + Returns: a bigquery load job, see https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load """