Merge pull request #467 from GoogleCloudPlatform/bigquery-cloud-client-samples

dpebot · web-flow · commit 85176d49997e · 2016-08-19T14:59:39.000-07:00
diff --git a/bigquery/cloud-client/export_data_to_gcs.py b/bigquery/cloud-client/export_data_to_gcs.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Exports data from BigQuery to an object in Google Cloud Storage.
+
+For more information, see the README.md under /bigquery.
+
+Example invocation:
+    $ python export_data_to_gcs.py example_dataset example_table \
+        gs://example-bucket/example-data.csv
+
+The dataset and table should already exist.
+"""
+
+import argparse
+import time
+import uuid
+
+from gcloud import bigquery
+
+
+def export_data_to_gcs(dataset_name, table_name, destination):
+    bigquery_client = bigquery.Client()
+    dataset = bigquery_client.dataset(dataset_name)
+    table = dataset.table(table_name)
+    job_name = str(uuid.uuid4())
+
+    job = bigquery_client.extract_table_to_storage(
+        job_name, table, destination)
+
+    job.begin()
+
+    wait_for_job(job)
+
+    print('Exported {}:{} to {}'.format(
+        dataset_name, table_name, destination))
+
+
+def wait_for_job(job):
+    while True:
+        job.reload()
+        if job.state == 'DONE':
+            if job.error_result:
+                raise RuntimeError(job.error_result)
+            return
+        time.sleep(1)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument('dataset_name')
+    parser.add_argument('table_name')
+    parser.add_argument(
+        'destination', help='The desintation Google Cloud Storage object.'
+        'Must be in the format gs://bucket_name/object_name')
+
+    args = parser.parse_args()
+
+    export_data_to_gcs(
+        args.dataset_name,
+        args.table_name,
+        args.destination)
diff --git a/bigquery/cloud-client/export_data_to_gcs_test.py b/bigquery/cloud-client/export_data_to_gcs_test.py
@@ -0,0 +1,30 @@
+# Copyright 2015, Google, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import export_data_to_gcs
+
+
+DATASET_ID = 'test_dataset'
+TABLE_ID = 'test_import_table'
+
+
+def test_export_data_to_gcs(cloud_config, capsys):
+    export_data_to_gcs.export_data_to_gcs(
+        DATASET_ID,
+        TABLE_ID,
+        'gs://{}/test-export-data-to-gcs.csv'.format(
+            cloud_config.storage_bucket))
+
+    out, _ = capsys.readouterr()
+
+    assert 'Exported' in out
diff --git a/bigquery/cloud-client/load_data_from_file.py b/bigquery/cloud-client/load_data_from_file.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Loads data into BigQuery from a local file.
+
+For more information, see the README.md under /bigquery.
+
+Example invocation:
+    $ python load_data_from_file.py example_dataset example_table \
+        example-data.csv
+
+The dataset and table should already exist.
+"""
+
+import argparse
+import time
+from gcloud import bigquery
+
+
+def load_data_from_file(dataset_name, table_name, source_file_name):
+    bigquery_client = bigquery.Client()
+    dataset = bigquery_client.dataset(dataset_name)
+    table = dataset.table(table_name)
+
+    # Reload the table to get the schema.
+    table.reload()
+
+    with open(source_file_name, 'rb') as source_file:
+        # This example uses CSV, but you can use other formats.
+        # See https://cloud.google.com/bigquery/loading-data
+        job = table.upload_from_file(
+            source_file, source_format='text/csv')
+
+    job.begin()
+
+    wait_for_job(job)
+
+    print('Loaded {} rows into {}:{}.'.format(
+        job.output_rows, dataset_name, table_name))
+
+
+def wait_for_job(job):
+    while True:
+        job.reload()
+        if job.state == 'DONE':
+            if job.error_result:
+                raise RuntimeError(job.error_result)
+            return
+        time.sleep(1)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument('dataset_name')
+    parser.add_argument('table_name')
+    parser.add_argument(
+        'source_file_name', help='Path to a .csv file to upload.')
+
+    args = parser.parse_args()
+
+    load_data_from_file(
+        args.dataset_name,
+        args.table_name,
+        args.source_file_name)
diff --git a/bigquery/cloud-client/load_data_from_file_test.py b/bigquery/cloud-client/load_data_from_file_test.py
@@ -0,0 +1,35 @@
+# Copyright 2015, Google, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+import load_data_from_file
+
+DATASET_ID = 'test_dataset'
+TABLE_ID = 'test_import_table'
+
+
+@pytest.mark.xfail(
+    strict=True,
+    reason='https://github.com/GoogleCloudPlatform/gcloud-python/issues/2133')
+def test_load_table(resource, capsys):
+    data_path = resource('data.csv')
+
+    load_data_from_file.load_data_from_file(
+        DATASET_ID,
+        TABLE_ID,
+        data_path)
+
+    out, _ = capsys.readouterr()
+
+    assert 'Loaded 1 rows' in out
diff --git a/bigquery/cloud-client/load_data_from_gcs.py b/bigquery/cloud-client/load_data_from_gcs.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Loads data into BigQuery from an object in Google Cloud Storage.
+
+For more information, see the README.md under /bigquery.
+
+Example invocation:
+    $ python load_data_from_gcs.py example_dataset example_table \
+        gs://example-bucket/example-data.csv
+
+The dataset and table should already exist.
+"""
+
+import argparse
+import time
+import uuid
+
+from gcloud import bigquery
+
+
+def load_data_from_gcs(dataset_name, table_name, source):
+    bigquery_client = bigquery.Client()
+    dataset = bigquery_client.dataset(dataset_name)
+    table = dataset.table(table_name)
+    job_name = str(uuid.uuid4())
+
+    job = bigquery_client.load_table_from_storage(
+        job_name, table, source)
+
+    job.begin()
+
+    wait_for_job(job)
+
+    print('Loaded {} rows into {}:{}.'.format(
+        job.output_rows, dataset_name, table_name))
+
+
+def wait_for_job(job):
+    while True:
+        job.reload()
+        if job.state == 'DONE':
+            if job.error_result:
+                raise RuntimeError(job.error_result)
+            return
+        time.sleep(1)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument('dataset_name')
+    parser.add_argument('table_name')
+    parser.add_argument(
+        'source', help='The Google Cloud Storage object to load. Must be in '
+        'the format gs://bucket_name/object_name')
+
+    args = parser.parse_args()
+
+    load_data_from_gcs(
+        args.dataset_name,
+        args.table_name,
+        args.source)
diff --git a/bigquery/cloud-client/load_data_from_gcs_test.py b/bigquery/cloud-client/load_data_from_gcs_test.py
@@ -0,0 +1,31 @@
+# Copyright 2015, Google, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import load_data_from_gcs
+
+DATASET_ID = 'test_dataset'
+TABLE_ID = 'test_import_table'
+
+
+def test_load_table(cloud_config, capsys):
+    cloud_storage_input_uri = 'gs://{}/data.csv'.format(
+        cloud_config.storage_bucket)
+
+    load_data_from_gcs.load_data_from_gcs(
+        DATASET_ID,
+        TABLE_ID,
+        cloud_storage_input_uri)
+
+    out, _ = capsys.readouterr()
+
+    assert 'Loaded 1 rows' in out
diff --git a/bigquery/cloud-client/resources/data.csv b/bigquery/cloud-client/resources/data.csv
@@ -0,0 +1 @@
+Gandalf, 2000, 140.0, 1
diff --git a/bigquery/cloud-client/resources/data.json b/bigquery/cloud-client/resources/data.json
@@ -0,0 +1 @@
+{"Name": "Gandalf", "Age": 2000, "Weight": 140.0, "IsMagic": true}
diff --git a/bigquery/cloud-client/resources/schema.json b/bigquery/cloud-client/resources/schema.json
@@ -0,0 +1 @@
+[{"type": "STRING", "name": "Name"}, {"type": "INTEGER", "name": "Age"}, {"type": "FLOAT", "name": "Weight"}, {"type": "BOOLEAN", "name": "IsMagic"}]
diff --git a/bigquery/cloud-client/snippets.py b/bigquery/cloud-client/snippets.py
diff --git a/bigquery/cloud-client/snippets_test.py b/bigquery/cloud-client/snippets_test.py
diff --git a/bigquery/cloud-client/stream_data.py b/bigquery/cloud-client/stream_data.py
diff --git a/bigquery/cloud-client/stream_data_test.py b/bigquery/cloud-client/stream_data_test.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+{"Name": "Gandalf", "Age": 2000, "Weight": 140.0, "IsMagic": true}`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+[{"type": "STRING", "name": "Name"}, {"type": "INTEGER", "name": "Age"}, {"type": "FLOAT", "name": "Weight"}, {"type": "BOOLEAN", "name": "IsMagic"}]`